1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
35 #include "recog.h" /* FIXME: for insn_data */
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
77 int misalign
, enum vect_cost_model_location where
)
81 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
82 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
83 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
86 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
192 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE
, vect_location
,
194 "mark relevant %d, live %d.", relevant
, live_p
);
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
203 if (!used_in_pattern
)
205 imm_use_iterator imm_iter
;
209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
210 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
212 if (is_gimple_assign (stmt
))
213 lhs
= gimple_assign_lhs (stmt
);
215 lhs
= gimple_call_lhs (stmt
);
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
220 if (TREE_CODE (lhs
) == SSA_NAME
)
221 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
223 if (is_gimple_debug (USE_STMT (use_p
)))
225 use_stmt
= USE_STMT (use_p
);
227 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
230 if (vinfo_for_stmt (use_stmt
)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
246 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
248 if (dump_enabled_p ())
249 dump_printf_loc (MSG_NOTE
, vect_location
,
250 "last stmt in pattern. don't mark"
252 stmt_info
= vinfo_for_stmt (pattern_stmt
);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
254 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
255 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
260 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
261 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
262 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
264 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_NOTE
, vect_location
,
269 "already marked relevant/live.");
273 worklist
->safe_push (stmt
);
277 /* Function vect_stmt_relevant_p.
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
287 CHECKME: what other side effects would the vectorizer allow? */
290 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
291 enum vect_relevant
*relevant
, bool *live_p
)
293 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
295 imm_use_iterator imm_iter
;
299 *relevant
= vect_unused_in_scope
;
302 /* cond stmt other than loop exit cond. */
303 if (is_ctrl_stmt (stmt
)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
305 != loop_exit_ctrl_vec_info_type
)
306 *relevant
= vect_used_in_scope
;
308 /* changing memory. */
309 if (gimple_code (stmt
) != GIMPLE_PHI
)
310 if (gimple_vdef (stmt
))
312 if (dump_enabled_p ())
313 dump_printf_loc (MSG_NOTE
, vect_location
,
314 "vec_stmt_relevant_p: stmt has vdefs.");
315 *relevant
= vect_used_in_scope
;
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
321 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
323 basic_block bb
= gimple_bb (USE_STMT (use_p
));
324 if (!flow_bb_inside_loop_p (loop
, bb
))
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_NOTE
, vect_location
,
328 "vec_stmt_relevant_p: used out of loop.");
330 if (is_gimple_debug (USE_STMT (use_p
)))
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
336 gcc_assert (bb
== single_exit (loop
)->dest
);
343 return (*live_p
|| *relevant
);
347 /* Function exist_non_indexing_operands_for_use_p
349 USE is one of the uses attached to STMT. Check if USE is
350 used in STMT for anything other than indexing an array. */
353 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
356 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
358 /* USE corresponds to some operand in STMT. If there is no data
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info
))
364 /* STMT has a data_ref. FORNOW this means that its of one of
368 (This should have been verified in analyze_data_refs).
370 'var' in the second case corresponds to a def, not a use,
371 so USE cannot correspond to any operands that are not used
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
377 if (!gimple_assign_copy_p (stmt
))
379 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
381 operand
= gimple_assign_rhs1 (stmt
);
382 if (TREE_CODE (operand
) != SSA_NAME
)
393 Function process_use.
396 - a USE in STMT in a loop represented by LOOP_VINFO
397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398 that defined USE. This is done by calling mark_relevant and passing it
399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
409 - case 1: If USE is used only for address computations (e.g. array indexing),
410 which does not need to be directly vectorized, then the liveness/relevance
411 of the respective DEF_STMT is left unchanged.
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
417 Return true if everything is as expected. Return false otherwise. */
420 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
421 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
424 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
425 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
426 stmt_vec_info dstmt_vinfo
;
427 basic_block bb
, def_bb
;
430 enum vect_def_type dt
;
432 /* case 1: we are only interested in uses that need to be vectorized. Uses
433 that are used for address computation are not considered relevant. */
434 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
437 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
439 if (dump_enabled_p ())
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
441 "not vectorized: unsupported use in stmt.");
445 if (!def_stmt
|| gimple_nop_p (def_stmt
))
448 def_bb
= gimple_bb (def_stmt
);
449 if (!flow_bb_inside_loop_p (loop
, def_bb
))
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.");
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
462 bb
= gimple_bb (stmt
);
463 if (gimple_code (stmt
) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
465 && gimple_code (def_stmt
) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
467 && bb
->loop_father
== def_bb
->loop_father
)
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_NOTE
, vect_location
,
471 "reduc-stmt defining reduc-phi in the same nest.");
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
473 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
476 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
487 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "outer-loop def-stmt defining inner-loop stmt.");
495 case vect_unused_in_scope
:
496 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
497 vect_used_in_scope
: vect_unused_in_scope
;
500 case vect_used_in_outer_by_reduction
:
501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
502 relevant
= vect_used_by_reduction
;
505 case vect_used_in_outer
:
506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
507 relevant
= vect_used_in_scope
;
510 case vect_used_in_scope
:
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
525 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
,
529 "inner-loop def-stmt defining outer-loop stmt.");
533 case vect_unused_in_scope
:
534 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
535 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
536 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
539 case vect_used_by_reduction
:
540 relevant
= vect_used_in_outer_by_reduction
;
543 case vect_used_in_scope
:
544 relevant
= vect_used_in_outer
;
552 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
553 is_pattern_stmt_p (stmt_vinfo
));
558 /* Function vect_mark_stmts_to_be_vectorized.
560 Not all stmts in the loop need to be vectorized. For example:
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
572 This pass detects such stmts. */
575 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
577 vec
<gimple
> worklist
;
578 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
579 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
580 unsigned int nbbs
= loop
->num_nodes
;
581 gimple_stmt_iterator si
;
584 stmt_vec_info stmt_vinfo
;
588 enum vect_relevant relevant
, tmp_relevant
;
589 enum vect_def_type def_type
;
591 if (dump_enabled_p ())
592 dump_printf_loc (MSG_NOTE
, vect_location
,
593 "=== vect_mark_stmts_to_be_vectorized ===");
595 worklist
.create (64);
597 /* 1. Init worklist. */
598 for (i
= 0; i
< nbbs
; i
++)
601 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
610 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
611 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
613 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
615 stmt
= gsi_stmt (si
);
616 if (dump_enabled_p ())
618 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
619 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
622 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
623 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
627 /* 2. Process_worklist */
628 while (worklist
.length () > 0)
633 stmt
= worklist
.pop ();
634 if (dump_enabled_p ())
636 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
637 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 (DEF_STMT) as relevant/irrelevant and live/dead according to the
642 liveness and relevance properties of STMT. */
643 stmt_vinfo
= vinfo_for_stmt (stmt
);
644 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
645 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
647 /* Generally, the liveness and relevance properties of STMT are
648 propagated as is to the DEF_STMTs of its USEs:
649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
652 One exception is when STMT has been identified as defining a reduction
653 variable; in this case we set the liveness/relevance as follows:
655 relevant = vect_used_by_reduction
656 This is because we distinguish between two kinds of relevant stmts -
657 those that are used by a reduction computation, and those that are
658 (also) used by a regular computation. This allows us later on to
659 identify stmts that are used solely by a reduction, and therefore the
660 order of the results that they produce does not have to be kept. */
662 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
663 tmp_relevant
= relevant
;
666 case vect_reduction_def
:
667 switch (tmp_relevant
)
669 case vect_unused_in_scope
:
670 relevant
= vect_used_by_reduction
;
673 case vect_used_by_reduction
:
674 if (gimple_code (stmt
) == GIMPLE_PHI
)
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
681 "unsupported use of reduction.");
689 case vect_nested_cycle
:
690 if (tmp_relevant
!= vect_unused_in_scope
691 && tmp_relevant
!= vect_used_in_outer_by_reduction
692 && tmp_relevant
!= vect_used_in_outer
)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
696 "unsupported use of nested cycle.");
705 case vect_double_reduction_def
:
706 if (tmp_relevant
!= vect_unused_in_scope
707 && tmp_relevant
!= vect_used_by_reduction
)
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
711 "unsupported use of double reduction.");
724 if (is_pattern_stmt_p (stmt_vinfo
))
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt
))
731 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
732 tree op
= gimple_assign_rhs1 (stmt
);
735 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
737 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
738 live_p
, relevant
, &worklist
, false)
739 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
740 live_p
, relevant
, &worklist
, false))
747 for (; i
< gimple_num_ops (stmt
); i
++)
749 op
= gimple_op (stmt
, i
);
750 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
758 else if (is_gimple_call (stmt
))
760 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
762 tree arg
= gimple_call_arg (stmt
, i
);
763 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
773 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
775 tree op
= USE_FROM_PTR (use_p
);
776 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
784 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
787 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
789 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
796 } /* while worklist */
803 /* Function vect_model_simple_cost.
805 Models cost for simple operations, i.e. those that only emit ncopies of a
806 single op. Right now, this does not account for multiple insns that could
807 be generated for the single vector op. We will handle that shortly. */
810 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
811 enum vect_def_type
*dt
,
812 stmt_vector_for_cost
*prologue_cost_vec
,
813 stmt_vector_for_cost
*body_cost_vec
)
816 int inside_cost
= 0, prologue_cost
= 0;
818 /* The SLP costs were already calculated during SLP tree build. */
819 if (PURE_SLP_STMT (stmt_info
))
822 /* FORNOW: Assuming maximum 2 args per stmts. */
823 for (i
= 0; i
< 2; i
++)
824 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
825 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
826 stmt_info
, 0, vect_prologue
);
828 /* Pass the inside-of-loop statements to the target-specific cost model. */
829 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
830 stmt_info
, 0, vect_body
);
832 if (dump_enabled_p ())
833 dump_printf_loc (MSG_NOTE
, vect_location
,
834 "vect_model_simple_cost: inside_cost = %d, "
835 "prologue_cost = %d .", inside_cost
, prologue_cost
);
839 /* Model cost for type demotion and promotion operations. PWR is normally
840 zero for single-step promotions and demotions. It will be one if
841 two-step promotion/demotion is required, and so on. Each additional
842 step doubles the number of instructions required. */
845 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
846 enum vect_def_type
*dt
, int pwr
)
849 int inside_cost
= 0, prologue_cost
= 0;
850 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
851 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
852 void *target_cost_data
;
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info
))
859 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
861 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
863 for (i
= 0; i
< pwr
+ 1; i
++)
865 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
867 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
868 vec_promote_demote
, stmt_info
, 0,
872 /* FORNOW: Assuming maximum 2 args per stmts. */
873 for (i
= 0; i
< 2; i
++)
874 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
875 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
876 stmt_info
, 0, vect_prologue
);
878 if (dump_enabled_p ())
879 dump_printf_loc (MSG_NOTE
, vect_location
,
880 "vect_model_promotion_demotion_cost: inside_cost = %d, "
881 "prologue_cost = %d .", inside_cost
, prologue_cost
);
884 /* Function vect_cost_group_size
886 For grouped load or store, return the group_size only if it is the first
887 load or store of a group, else return 1. This ensures that group size is
888 only returned once per group. */
891 vect_cost_group_size (stmt_vec_info stmt_info
)
893 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
895 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
896 return GROUP_SIZE (stmt_info
);
902 /* Function vect_model_store_cost
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
908 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
909 bool store_lanes_p
, enum vect_def_type dt
,
911 stmt_vector_for_cost
*prologue_cost_vec
,
912 stmt_vector_for_cost
*body_cost_vec
)
915 unsigned int inside_cost
= 0, prologue_cost
= 0;
916 struct data_reference
*first_dr
;
919 /* The SLP costs were already calculated during SLP tree build. */
920 if (PURE_SLP_STMT (stmt_info
))
923 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
924 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
925 stmt_info
, 0, vect_prologue
);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
932 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
937 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
938 group_size
= vect_cost_group_size (stmt_info
);
941 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
943 /* Not a grouped access. */
947 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p
&& group_size
> 1)
956 /* Uses a high and low interleave operation for each needed permute. */
958 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
959 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
960 stmt_info
, 0, vect_body
);
962 if (dump_enabled_p ())
963 dump_printf_loc (MSG_NOTE
, vect_location
,
964 "vect_model_store_cost: strided group_size = %d .",
968 /* Costs of the stores. */
969 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE
, vect_location
,
973 "vect_model_store_cost: inside_cost = %d, "
974 "prologue_cost = %d .", inside_cost
, prologue_cost
);
978 /* Calculate cost of DR's memory access. */
980 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
981 unsigned int *inside_cost
,
982 stmt_vector_for_cost
*body_cost_vec
)
984 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
985 gimple stmt
= DR_STMT (dr
);
986 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
988 switch (alignment_support_scheme
)
992 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
993 vector_store
, stmt_info
, 0,
996 if (dump_enabled_p ())
997 dump_printf_loc (MSG_NOTE
, vect_location
,
998 "vect_model_store_cost: aligned.");
1002 case dr_unaligned_supported
:
1004 /* Here, we assign an additional cost for the unaligned store. */
1005 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1006 unaligned_store
, stmt_info
,
1007 DR_MISALIGNMENT (dr
), vect_body
);
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE
, vect_location
,
1010 "vect_model_store_cost: unaligned supported by "
1015 case dr_unaligned_unsupported
:
1017 *inside_cost
= VECT_MAX_COST
;
1019 if (dump_enabled_p ())
1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1021 "vect_model_store_cost: unsupported access.");
1031 /* Function vect_model_load_cost
1033 Models cost for loads. In the case of grouped accesses, the last access
1034 has the overhead of the grouped access attributed to it. Since unaligned
1035 accesses are supported for loads, we also account for the costs of the
1036 access scheme chosen. */
1039 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1040 bool load_lanes_p
, slp_tree slp_node
,
1041 stmt_vector_for_cost
*prologue_cost_vec
,
1042 stmt_vector_for_cost
*body_cost_vec
)
1046 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1047 unsigned int inside_cost
= 0, prologue_cost
= 0;
1049 /* The SLP costs were already calculated during SLP tree build. */
1050 if (PURE_SLP_STMT (stmt_info
))
1053 /* Grouped accesses? */
1054 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1057 group_size
= vect_cost_group_size (stmt_info
);
1058 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1060 /* Not a grouped access. */
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (!load_lanes_p
&& group_size
> 1)
1073 /* Uses an even and odd extract operations for each needed permute. */
1074 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1075 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1076 stmt_info
, 0, vect_body
);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE
, vect_location
,
1080 "vect_model_load_cost: strided group_size = %d .",
1084 /* The loads themselves. */
1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1087 /* N scalar loads plus gathering them into a vector. */
1088 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1089 inside_cost
+= record_stmt_cost (body_cost_vec
,
1090 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1091 scalar_load
, stmt_info
, 0, vect_body
);
1092 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1093 stmt_info
, 0, vect_body
);
1096 vect_get_load_cost (first_dr
, ncopies
,
1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1098 || group_size
> 1 || slp_node
),
1099 &inside_cost
, &prologue_cost
,
1100 prologue_cost_vec
, body_cost_vec
, true);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE
, vect_location
,
1104 "vect_model_load_cost: inside_cost = %d, "
1105 "prologue_cost = %d .", inside_cost
, prologue_cost
);
1109 /* Calculate cost of DR's memory access. */
1111 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1112 bool add_realign_cost
, unsigned int *inside_cost
,
1113 unsigned int *prologue_cost
,
1114 stmt_vector_for_cost
*prologue_cost_vec
,
1115 stmt_vector_for_cost
*body_cost_vec
,
1116 bool record_prologue_costs
)
1118 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1119 gimple stmt
= DR_STMT (dr
);
1120 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1122 switch (alignment_support_scheme
)
1126 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1127 stmt_info
, 0, vect_body
);
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE
, vect_location
,
1131 "vect_model_load_cost: aligned.");
1135 case dr_unaligned_supported
:
1137 /* Here, we assign an additional cost for the unaligned load. */
1138 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1139 unaligned_load
, stmt_info
,
1140 DR_MISALIGNMENT (dr
), vect_body
);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE
, vect_location
,
1144 "vect_model_load_cost: unaligned supported by "
1149 case dr_explicit_realign
:
1151 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1152 vector_load
, stmt_info
, 0, vect_body
);
1153 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1154 vec_perm
, stmt_info
, 0, vect_body
);
1156 /* FIXME: If the misalignment remains fixed across the iterations of
1157 the containing loop, the following cost should be added to the
1159 if (targetm
.vectorize
.builtin_mask_for_load
)
1160 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1161 stmt_info
, 0, vect_body
);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: explicit realign");
1169 case dr_explicit_realign_optimized
:
1171 if (dump_enabled_p ())
1172 dump_printf_loc (MSG_NOTE
, vect_location
,
1173 "vect_model_load_cost: unaligned software "
1176 /* Unaligned software pipeline has a load of an address, an initial
1177 load, and possibly a mask operation to "prime" the loop. However,
1178 if this is an access in a group of loads, which provide grouped
1179 access, then the above cost should only be considered for one
1180 access in the group. Inside the loop, there is a load op
1181 and a realignment op. */
1183 if (add_realign_cost
&& record_prologue_costs
)
1185 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1186 vector_stmt
, stmt_info
,
1188 if (targetm
.vectorize
.builtin_mask_for_load
)
1189 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1190 vector_stmt
, stmt_info
,
1194 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1195 stmt_info
, 0, vect_body
);
1196 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1197 stmt_info
, 0, vect_body
);
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE
, vect_location
,
1201 "vect_model_load_cost: explicit realign optimized");
1206 case dr_unaligned_unsupported
:
1208 *inside_cost
= VECT_MAX_COST
;
1210 if (dump_enabled_p ())
1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1212 "vect_model_load_cost: unsupported access.");
1221 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222 the loop preheader for the vectorized stmt STMT. */
1225 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1228 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1231 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1232 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1236 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1240 if (nested_in_vect_loop_p (loop
, stmt
))
1243 pe
= loop_preheader_edge (loop
);
1244 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1245 gcc_assert (!new_bb
);
1249 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1251 gimple_stmt_iterator gsi_bb_start
;
1253 gcc_assert (bb_vinfo
);
1254 bb
= BB_VINFO_BB (bb_vinfo
);
1255 gsi_bb_start
= gsi_after_labels (bb
);
1256 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1260 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE
, vect_location
,
1263 "created new init_stmt: ");
1264 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1268 /* Function vect_init_vector.
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1279 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1286 if (TREE_CODE (type
) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1289 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1291 if (CONSTANT_CLASS_P (val
))
1292 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1295 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1296 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1299 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1303 val
= build_vector_from_val (type
, val
);
1306 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1307 init_stmt
= gimple_build_assign (new_var
, val
);
1308 new_temp
= make_ssa_name (new_var
, init_stmt
);
1309 gimple_assign_set_lhs (init_stmt
, new_temp
);
1310 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1311 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1316 /* Function vect_get_vec_def_for_operand.
1318 OP is an operand in STMT. This function returns a (vector) def that will be
1319 used in the vectorized stmt for STMT.
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1328 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1333 stmt_vec_info def_stmt_info
= NULL
;
1334 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1335 unsigned int nunits
;
1336 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1338 enum vect_def_type dt
;
1342 if (dump_enabled_p ())
1344 dump_printf_loc (MSG_NOTE
, vect_location
,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1349 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1350 &def_stmt
, &def
, &dt
);
1351 gcc_assert (is_simple_use
);
1352 if (dump_enabled_p ())
1354 int loc_printed
= 0;
1357 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1359 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1364 dump_printf (MSG_NOTE
, " def_stmt = ");
1366 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1367 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1373 /* Case 1: operand is a constant. */
1374 case vect_constant_def
:
1376 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1377 gcc_assert (vector_type
);
1378 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1384 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE
, vect_location
,
1386 "Create vector_cst. nunits = %d", nunits
);
1388 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1391 /* Case 2: operand is defined outside the loop - loop invariant. */
1392 case vect_external_def
:
1394 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1395 gcc_assert (vector_type
);
1400 /* Create 'vec_inv = {inv,inv,..,inv}' */
1401 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.");
1404 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1407 /* Case 3: operand is defined inside the loop. */
1408 case vect_internal_def
:
1411 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1413 /* Get the def from the vectorized stmt. */
1414 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1416 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1417 /* Get vectorized pattern statement. */
1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1420 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1421 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1423 gcc_assert (vec_stmt
);
1424 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1425 vec_oprnd
= PHI_RESULT (vec_stmt
);
1426 else if (is_gimple_call (vec_stmt
))
1427 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1429 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1433 /* Case 4: operand is defined by a loop header phi - reduction */
1434 case vect_reduction_def
:
1435 case vect_double_reduction_def
:
1436 case vect_nested_cycle
:
1440 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1441 loop
= (gimple_bb (def_stmt
))->loop_father
;
1443 /* Get the def before the loop */
1444 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1445 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1448 /* Case 5: operand is defined by loop-header phi - induction. */
1449 case vect_induction_def
:
1451 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1453 /* Get the def from the vectorized stmt. */
1454 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1455 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1456 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1457 vec_oprnd
= PHI_RESULT (vec_stmt
);
1459 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1469 /* Function vect_get_vec_def_for_stmt_copy
1471 Return a vector-def for an operand. This function is used when the
1472 vectorized stmt to be created (by the caller to this function) is a "copy"
1473 created in case the vectorized result cannot fit in one vector, and several
1474 copies of the vector-stmt are required. In this case the vector-def is
1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1476 of the stmt that defines VEC_OPRND.
1477 DT is the type of the vector def VEC_OPRND.
1480 In case the vectorization factor (VF) is bigger than the number
1481 of elements that can fit in a vectype (nunits), we have to generate
1482 more than one vector stmt to vectorize the scalar stmt. This situation
1483 arises when there are multiple data-types operated upon in the loop; the
1484 smallest data-type determines the VF, and as a result, when vectorizing
1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486 vector stmt (each computing a vector of 'nunits' results, and together
1487 computing 'VF' results in each iteration). This function is called when
1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489 which VF=16 and nunits=4, so the number of copies required is 4):
1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1494 VS1.1: vx.1 = memref1 VS1.2
1495 VS1.2: vx.2 = memref2 VS1.3
1496 VS1.3: vx.3 = memref3
1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1499 VSnew.1: vz1 = vx.1 + ... VSnew.2
1500 VSnew.2: vz2 = vx.2 + ... VSnew.3
1501 VSnew.3: vz3 = vx.3 + ...
1503 The vectorization of S1 is explained in vectorizable_load.
1504 The vectorization of S2:
1505 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506 the function 'vect_get_vec_def_for_operand' is called to
1507 get the relevant vector-def for each operand of S2. For operand x it
1508 returns the vector-def 'vx.0'.
1510 To create the remaining copies of the vector-stmt (VSnew.j), this
1511 function is called to get the relevant vector-def for each operand. It is
1512 obtained from the respective VS1.j stmt, which is recorded in the
1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515 For example, to obtain the vector-def 'vx.1' in order to create the
1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519 and return its def ('vx.1').
1520 Overall, to create the above sequence this function will be called 3 times:
1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1526 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1528 gimple vec_stmt_for_operand
;
1529 stmt_vec_info def_stmt_info
;
1531 /* Do nothing; can reuse same def. */
1532 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1535 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1536 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1537 gcc_assert (def_stmt_info
);
1538 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1539 gcc_assert (vec_stmt_for_operand
);
1540 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1541 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1542 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1544 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1549 /* Get vectorized definitions for the operands to create a copy of an original
1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1553 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1554 vec
<tree
> *vec_oprnds0
,
1555 vec
<tree
> *vec_oprnds1
)
1557 tree vec_oprnd
= vec_oprnds0
->pop ();
1559 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1560 vec_oprnds0
->quick_push (vec_oprnd
);
1562 if (vec_oprnds1
&& vec_oprnds1
->length ())
1564 vec_oprnd
= vec_oprnds1
->pop ();
1565 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1566 vec_oprnds1
->quick_push (vec_oprnd
);
1571 /* Get vectorized definitions for OP0 and OP1.
1572 REDUC_INDEX is the index of reduction operand in case of reduction,
1573 and -1 otherwise. */
1576 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1577 vec
<tree
> *vec_oprnds0
,
1578 vec
<tree
> *vec_oprnds1
,
1579 slp_tree slp_node
, int reduc_index
)
1583 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1586 vec
<vec
<tree
> > vec_defs
;
1587 vec_defs
.create (nops
);
1589 ops
.quick_push (op0
);
1591 ops
.quick_push (op1
);
1593 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1595 *vec_oprnds0
= vec_defs
[0];
1597 *vec_oprnds1
= vec_defs
[1];
1600 vec_defs
.release ();
1606 vec_oprnds0
->create (1);
1607 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1608 vec_oprnds0
->quick_push (vec_oprnd
);
1612 vec_oprnds1
->create (1);
1613 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1614 vec_oprnds1
->quick_push (vec_oprnd
);
1620 /* Function vect_finish_stmt_generation.
1622 Insert a new stmt. */
1625 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1626 gimple_stmt_iterator
*gsi
)
1628 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1629 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1630 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1632 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1634 if (!gsi_end_p (*gsi
)
1635 && gimple_has_mem_ops (vec_stmt
))
1637 gimple at_stmt
= gsi_stmt (*gsi
);
1638 tree vuse
= gimple_vuse (at_stmt
);
1639 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1641 tree vdef
= gimple_vdef (at_stmt
);
1642 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1643 /* If we have an SSA vuse and insert a store, update virtual
1644 SSA form to avoid triggering the renamer. Do so only
1645 if we can easily see all uses - which is what almost always
1646 happens with the way vectorized stmts are inserted. */
1647 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1648 && ((is_gimple_assign (vec_stmt
)
1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1650 || (is_gimple_call (vec_stmt
)
1651 && !(gimple_call_flags (vec_stmt
)
1652 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1654 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1655 gimple_set_vdef (vec_stmt
, new_vdef
);
1656 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1660 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1662 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1665 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1668 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1671 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1674 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1679 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1681 tree fndecl
= gimple_call_fndecl (call
);
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1689 || TREE_CODE (fndecl
) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl
))
1693 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1697 /* Function vectorizable_call.
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1705 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1711 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1712 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1713 tree vectype_out
, vectype_in
;
1716 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1717 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1718 tree fndecl
, new_temp
, def
, rhs_type
;
1720 enum vect_def_type dt
[3]
1721 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1722 gimple new_stmt
= NULL
;
1724 vec
<tree
> vargs
= vNULL
;
1725 enum { NARROW
, NONE
, WIDEN
} modifier
;
1729 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1732 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt
))
1739 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1742 if (stmt_can_throw_internal (stmt
))
1745 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1747 /* Process function arguments. */
1748 rhs_type
= NULL_TREE
;
1749 vectype_in
= NULL_TREE
;
1750 nargs
= gimple_call_num_args (stmt
);
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs
== 0 || nargs
> 3)
1758 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1759 if (gimple_call_internal_p (stmt
)
1760 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1763 rhs_type
= unsigned_type_node
;
1766 for (i
= 0; i
< nargs
; i
++)
1770 op
= gimple_call_arg (stmt
, i
);
1772 /* We can only handle calls with arguments of the same type. */
1774 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1778 "argument types differ.");
1782 rhs_type
= TREE_TYPE (op
);
1784 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1785 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1794 vectype_in
= opvectype
;
1796 && opvectype
!= vectype_in
)
1798 if (dump_enabled_p ())
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1800 "argument vector types differ.");
1804 /* If all arguments are external or constant defs use a vector type with
1805 the same size as the output vector type. */
1807 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1809 gcc_assert (vectype_in
);
1812 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1815 "no vectype for scalar type ");
1816 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
1823 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1824 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1825 if (nunits_in
== nunits_out
/ 2)
1827 else if (nunits_out
== nunits_in
)
1829 else if (nunits_out
== nunits_in
/ 2)
1834 /* For now, we only vectorize functions if a target specific builtin
1835 is available. TODO -- in some cases, it might be profitable to
1836 insert the calls for pieces of the vector, in order to be able
1837 to vectorize other operations in the loop. */
1838 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1839 if (fndecl
== NULL_TREE
)
1841 if (gimple_call_internal_p (stmt
)
1842 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
1845 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1846 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
1847 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1848 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
1850 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1851 { 0, 1, 2, ... vf - 1 } vector. */
1852 gcc_assert (nargs
== 0);
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1858 "function is not vectorizable.");
1863 gcc_assert (!gimple_vuse (stmt
));
1865 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1867 else if (modifier
== NARROW
)
1868 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1870 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1872 /* Sanity check: make sure that at least one copy of the vectorized stmt
1873 needs to be generated. */
1874 gcc_assert (ncopies
>= 1);
1876 if (!vec_stmt
) /* transformation not required. */
1878 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1879 if (dump_enabled_p ())
1880 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ===");
1881 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.");
1891 scalar_dest
= gimple_call_lhs (stmt
);
1892 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1894 prev_stmt_info
= NULL
;
1898 for (j
= 0; j
< ncopies
; ++j
)
1900 /* Build argument list for the vectorized call. */
1902 vargs
.create (nargs
);
1908 vec
<vec
<tree
> > vec_defs
;
1909 vec_defs
.create (nargs
);
1910 vec
<tree
> vec_oprnds0
;
1912 for (i
= 0; i
< nargs
; i
++)
1913 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1914 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1915 vec_oprnds0
= vec_defs
[0];
1917 /* Arguments are ready. Create the new vector stmt. */
1918 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
1921 for (k
= 0; k
< nargs
; k
++)
1923 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1924 vargs
[k
] = vec_oprndsk
[i
];
1926 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1927 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1928 gimple_call_set_lhs (new_stmt
, new_temp
);
1929 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1930 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1933 for (i
= 0; i
< nargs
; i
++)
1935 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1936 vec_oprndsi
.release ();
1938 vec_defs
.release ();
1942 for (i
= 0; i
< nargs
; i
++)
1944 op
= gimple_call_arg (stmt
, i
);
1947 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1950 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1952 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1955 vargs
.quick_push (vec_oprnd0
);
1958 if (gimple_call_internal_p (stmt
)
1959 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1961 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
1963 for (k
= 0; k
< nunits_out
; ++k
)
1964 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
1965 tree cst
= build_vector (vectype_out
, v
);
1967 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
1968 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
1969 new_temp
= make_ssa_name (new_var
, init_stmt
);
1970 gimple_assign_set_lhs (init_stmt
, new_temp
);
1971 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
1972 new_temp
= make_ssa_name (vec_dest
, NULL
);
1973 new_stmt
= gimple_build_assign (new_temp
,
1974 gimple_assign_lhs (init_stmt
));
1978 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1979 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1980 gimple_call_set_lhs (new_stmt
, new_temp
);
1982 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1985 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1987 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1989 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1995 for (j
= 0; j
< ncopies
; ++j
)
1997 /* Build argument list for the vectorized call. */
1999 vargs
.create (nargs
* 2);
2005 vec
<vec
<tree
> > vec_defs
;
2006 vec_defs
.create (nargs
);
2007 vec
<tree
> vec_oprnds0
;
2009 for (i
= 0; i
< nargs
; i
++)
2010 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2011 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2012 vec_oprnds0
= vec_defs
[0];
2014 /* Arguments are ready. Create the new vector stmt. */
2015 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2019 for (k
= 0; k
< nargs
; k
++)
2021 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2022 vargs
.quick_push (vec_oprndsk
[i
]);
2023 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2025 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2026 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2027 gimple_call_set_lhs (new_stmt
, new_temp
);
2028 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2029 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2032 for (i
= 0; i
< nargs
; i
++)
2034 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2035 vec_oprndsi
.release ();
2037 vec_defs
.release ();
2041 for (i
= 0; i
< nargs
; i
++)
2043 op
= gimple_call_arg (stmt
, i
);
2047 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2049 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2053 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2055 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2057 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2060 vargs
.quick_push (vec_oprnd0
);
2061 vargs
.quick_push (vec_oprnd1
);
2064 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2065 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2066 gimple_call_set_lhs (new_stmt
, new_temp
);
2067 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2070 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2072 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2074 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2077 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2082 /* No current target implements this case. */
2088 /* Update the exception handling table with the vector stmt if necessary. */
2089 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2090 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2092 /* The call in STMT might prevent it from being removed in dce.
2093 We however cannot remove it here, due to the way the ssa name
2094 it defines is mapped to the new definition. So just replace
2095 rhs of the statement with something harmless. */
2100 type
= TREE_TYPE (scalar_dest
);
2101 if (is_pattern_stmt_p (stmt_info
))
2102 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2104 lhs
= gimple_call_lhs (stmt
);
2105 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2106 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2107 set_vinfo_for_stmt (stmt
, NULL
);
2108 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2109 gsi_replace (gsi
, new_stmt
, false);
2110 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2116 /* Function vect_gen_widened_results_half
2118 Create a vector stmt whose code, type, number of arguments, and result
2119 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2120 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2121 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2122 needs to be created (DECL is a function-decl of a target-builtin).
2123 STMT is the original scalar stmt that we are vectorizing. */
2126 vect_gen_widened_results_half (enum tree_code code
,
2128 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2129 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2135 /* Generate half of the widened result: */
2136 if (code
== CALL_EXPR
)
2138 /* Target specific support */
2139 if (op_type
== binary_op
)
2140 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2142 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2143 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2144 gimple_call_set_lhs (new_stmt
, new_temp
);
2148 /* Generic support */
2149 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2150 if (op_type
!= binary_op
)
2152 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2154 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2155 gimple_assign_set_lhs (new_stmt
, new_temp
);
2157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2163 /* Get vectorized definitions for loop-based vectorization. For the first
2164 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2165 scalar operand), and for the rest we get a copy with
2166 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2167 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2168 The vectors are collected into VEC_OPRNDS. */
2171 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2172 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
2176 /* Get first vector operand. */
2177 /* All the vector operands except the very first one (that is scalar oprnd)
2179 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2180 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2182 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2184 vec_oprnds
->quick_push (vec_oprnd
);
2186 /* Get second vector operand. */
2187 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2188 vec_oprnds
->quick_push (vec_oprnd
);
2192 /* For conversion in multiple steps, continue to get operands
2195 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2199 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2200 For multi-step conversions store the resulting vectors and call the function
2204 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
2205 int multi_step_cvt
, gimple stmt
,
2207 gimple_stmt_iterator
*gsi
,
2208 slp_tree slp_node
, enum tree_code code
,
2209 stmt_vec_info
*prev_stmt_info
)
2212 tree vop0
, vop1
, new_tmp
, vec_dest
;
2214 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2216 vec_dest
= vec_dsts
.pop ();
2218 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
2220 /* Create demotion operation. */
2221 vop0
= (*vec_oprnds
)[i
];
2222 vop1
= (*vec_oprnds
)[i
+ 1];
2223 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2224 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2225 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2226 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2229 /* Store the resulting vector for next recursive call. */
2230 (*vec_oprnds
)[i
/2] = new_tmp
;
2233 /* This is the last step of the conversion sequence. Store the
2234 vectors in SLP_NODE or in vector info of the scalar statement
2235 (or in STMT_VINFO_RELATED_STMT chain). */
2237 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2240 if (!*prev_stmt_info
)
2241 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2243 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2245 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2250 /* For multi-step demotion operations we first generate demotion operations
2251 from the source type to the intermediate types, and then combine the
2252 results (stored in VEC_OPRNDS) in demotion operation to the destination
2256 /* At each level of recursion we have half of the operands we had at the
2258 vec_oprnds
->truncate ((i
+1)/2);
2259 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2260 stmt
, vec_dsts
, gsi
, slp_node
,
2261 VEC_PACK_TRUNC_EXPR
,
2265 vec_dsts
.quick_push (vec_dest
);
2269 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2270 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2271 the resulting vectors and call the function recursively. */
2274 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
2275 vec
<tree
> *vec_oprnds1
,
2276 gimple stmt
, tree vec_dest
,
2277 gimple_stmt_iterator
*gsi
,
2278 enum tree_code code1
,
2279 enum tree_code code2
, tree decl1
,
2280 tree decl2
, int op_type
)
2283 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2284 gimple new_stmt1
, new_stmt2
;
2285 vec
<tree
> vec_tmp
= vNULL
;
2287 vec_tmp
.create (vec_oprnds0
->length () * 2);
2288 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
2290 if (op_type
== binary_op
)
2291 vop1
= (*vec_oprnds1
)[i
];
2295 /* Generate the two halves of promotion operation. */
2296 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2297 op_type
, vec_dest
, gsi
, stmt
);
2298 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2299 op_type
, vec_dest
, gsi
, stmt
);
2300 if (is_gimple_call (new_stmt1
))
2302 new_tmp1
= gimple_call_lhs (new_stmt1
);
2303 new_tmp2
= gimple_call_lhs (new_stmt2
);
2307 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2308 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2311 /* Store the results for the next step. */
2312 vec_tmp
.quick_push (new_tmp1
);
2313 vec_tmp
.quick_push (new_tmp2
);
2316 vec_oprnds0
->release ();
2317 *vec_oprnds0
= vec_tmp
;
2321 /* Check if STMT performs a conversion operation, that can be vectorized.
2322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2323 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2327 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2328 gimple
*vec_stmt
, slp_tree slp_node
)
2332 tree op0
, op1
= NULL_TREE
;
2333 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2334 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2335 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2336 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2337 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2338 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2342 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2343 gimple new_stmt
= NULL
;
2344 stmt_vec_info prev_stmt_info
;
2347 tree vectype_out
, vectype_in
;
2349 tree lhs_type
, rhs_type
;
2350 enum { NARROW
, NONE
, WIDEN
} modifier
;
2351 vec
<tree
> vec_oprnds0
= vNULL
;
2352 vec
<tree
> vec_oprnds1
= vNULL
;
2354 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2355 int multi_step_cvt
= 0;
2356 vec
<tree
> vec_dsts
= vNULL
;
2357 vec
<tree
> interm_types
= vNULL
;
2358 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2360 enum machine_mode rhs_mode
;
2361 unsigned short fltsz
;
2363 /* Is STMT a vectorizable conversion? */
2365 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2368 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2371 if (!is_gimple_assign (stmt
))
2374 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2377 code
= gimple_assign_rhs_code (stmt
);
2378 if (!CONVERT_EXPR_CODE_P (code
)
2379 && code
!= FIX_TRUNC_EXPR
2380 && code
!= FLOAT_EXPR
2381 && code
!= WIDEN_MULT_EXPR
2382 && code
!= WIDEN_LSHIFT_EXPR
)
2385 op_type
= TREE_CODE_LENGTH (code
);
2387 /* Check types of lhs and rhs. */
2388 scalar_dest
= gimple_assign_lhs (stmt
);
2389 lhs_type
= TREE_TYPE (scalar_dest
);
2390 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2392 op0
= gimple_assign_rhs1 (stmt
);
2393 rhs_type
= TREE_TYPE (op0
);
2395 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2396 && !((INTEGRAL_TYPE_P (lhs_type
)
2397 && INTEGRAL_TYPE_P (rhs_type
))
2398 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2399 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2402 if ((INTEGRAL_TYPE_P (lhs_type
)
2403 && (TYPE_PRECISION (lhs_type
)
2404 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2405 || (INTEGRAL_TYPE_P (rhs_type
)
2406 && (TYPE_PRECISION (rhs_type
)
2407 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2411 "type conversion to/from bit-precision unsupported.");
2415 /* Check the operands of the operation. */
2416 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2417 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2424 if (op_type
== binary_op
)
2428 op1
= gimple_assign_rhs2 (stmt
);
2429 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2430 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2432 if (CONSTANT_CLASS_P (op0
))
2433 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2434 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2436 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2448 /* If op0 is an external or constant defs use a vector type of
2449 the same size as the output vector type. */
2451 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2453 gcc_assert (vectype_in
);
2456 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2459 "no vectype for scalar type ");
2460 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2466 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2467 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2468 if (nunits_in
< nunits_out
)
2470 else if (nunits_out
== nunits_in
)
2475 /* Multiple types in SLP are handled by creating the appropriate number of
2476 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2478 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2480 else if (modifier
== NARROW
)
2481 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2483 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2485 /* Sanity check: make sure that at least one copy of the vectorized stmt
2486 needs to be generated. */
2487 gcc_assert (ncopies
>= 1);
2489 /* Supportable by target? */
2493 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2495 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2500 if (dump_enabled_p ())
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2502 "conversion not supported by target.");
2506 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2507 &code1
, &code2
, &multi_step_cvt
,
2510 /* Binary widening operation can only be supported directly by the
2512 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2516 if (code
!= FLOAT_EXPR
2517 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2518 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2521 rhs_mode
= TYPE_MODE (rhs_type
);
2522 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2523 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2524 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2525 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2528 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2529 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2530 if (cvt_type
== NULL_TREE
)
2533 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2535 if (!supportable_convert_operation (code
, vectype_out
,
2536 cvt_type
, &decl1
, &codecvt1
))
2539 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2540 cvt_type
, &codecvt1
,
2541 &codecvt2
, &multi_step_cvt
,
2545 gcc_assert (multi_step_cvt
== 0);
2547 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2548 vectype_in
, &code1
, &code2
,
2549 &multi_step_cvt
, &interm_types
))
2553 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2556 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2557 codecvt2
= ERROR_MARK
;
2561 interm_types
.safe_push (cvt_type
);
2562 cvt_type
= NULL_TREE
;
2567 gcc_assert (op_type
== unary_op
);
2568 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2569 &code1
, &multi_step_cvt
,
2573 if (code
!= FIX_TRUNC_EXPR
2574 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2575 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2578 rhs_mode
= TYPE_MODE (rhs_type
);
2580 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2581 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2582 if (cvt_type
== NULL_TREE
)
2584 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2587 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2588 &code1
, &multi_step_cvt
,
2597 if (!vec_stmt
) /* transformation not required. */
2599 if (dump_enabled_p ())
2600 dump_printf_loc (MSG_NOTE
, vect_location
,
2601 "=== vectorizable_conversion ===");
2602 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2604 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2605 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2607 else if (modifier
== NARROW
)
2609 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2610 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2614 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2615 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2617 interm_types
.release ();
2622 if (dump_enabled_p ())
2623 dump_printf_loc (MSG_NOTE
, vect_location
,
2624 "transform conversion. ncopies = %d.", ncopies
);
2626 if (op_type
== binary_op
)
2628 if (CONSTANT_CLASS_P (op0
))
2629 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2630 else if (CONSTANT_CLASS_P (op1
))
2631 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2634 /* In case of multi-step conversion, we first generate conversion operations
2635 to the intermediate types, and then from that types to the final one.
2636 We create vector destinations for the intermediate type (TYPES) received
2637 from supportable_*_operation, and store them in the correct order
2638 for future use in vect_create_vectorized_*_stmts (). */
2639 vec_dsts
.create (multi_step_cvt
+ 1);
2640 vec_dest
= vect_create_destination_var (scalar_dest
,
2641 (cvt_type
&& modifier
== WIDEN
)
2642 ? cvt_type
: vectype_out
);
2643 vec_dsts
.quick_push (vec_dest
);
2647 for (i
= interm_types
.length () - 1;
2648 interm_types
.iterate (i
, &intermediate_type
); i
--)
2650 vec_dest
= vect_create_destination_var (scalar_dest
,
2652 vec_dsts
.quick_push (vec_dest
);
2657 vec_dest
= vect_create_destination_var (scalar_dest
,
2659 ? vectype_out
: cvt_type
);
2663 if (modifier
== WIDEN
)
2665 vec_oprnds0
.create (multi_step_cvt
? vect_pow2(multi_step_cvt
) : 1);
2666 if (op_type
== binary_op
)
2667 vec_oprnds1
.create (1);
2669 else if (modifier
== NARROW
)
2670 vec_oprnds0
.create (
2671 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2673 else if (code
== WIDEN_LSHIFT_EXPR
)
2674 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
2677 prev_stmt_info
= NULL
;
2681 for (j
= 0; j
< ncopies
; j
++)
2684 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2687 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2689 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2691 /* Arguments are ready, create the new vector stmt. */
2692 if (code1
== CALL_EXPR
)
2694 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2695 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2696 gimple_call_set_lhs (new_stmt
, new_temp
);
2700 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2701 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2703 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2704 gimple_assign_set_lhs (new_stmt
, new_temp
);
2707 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2709 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2713 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2715 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2716 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2721 /* In case the vectorization factor (VF) is bigger than the number
2722 of elements that we can fit in a vectype (nunits), we have to
2723 generate more than one vector stmt - i.e - we need to "unroll"
2724 the vector stmt by a factor VF/nunits. */
2725 for (j
= 0; j
< ncopies
; j
++)
2732 if (code
== WIDEN_LSHIFT_EXPR
)
2737 /* Store vec_oprnd1 for every vector stmt to be created
2738 for SLP_NODE. We check during the analysis that all
2739 the shift arguments are the same. */
2740 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2741 vec_oprnds1
.quick_push (vec_oprnd1
);
2743 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2747 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2748 &vec_oprnds1
, slp_node
, -1);
2752 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2753 vec_oprnds0
.quick_push (vec_oprnd0
);
2754 if (op_type
== binary_op
)
2756 if (code
== WIDEN_LSHIFT_EXPR
)
2759 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2761 vec_oprnds1
.quick_push (vec_oprnd1
);
2767 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2768 vec_oprnds0
.truncate (0);
2769 vec_oprnds0
.quick_push (vec_oprnd0
);
2770 if (op_type
== binary_op
)
2772 if (code
== WIDEN_LSHIFT_EXPR
)
2775 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2777 vec_oprnds1
.truncate (0);
2778 vec_oprnds1
.quick_push (vec_oprnd1
);
2782 /* Arguments are ready. Create the new vector stmts. */
2783 for (i
= multi_step_cvt
; i
>= 0; i
--)
2785 tree this_dest
= vec_dsts
[i
];
2786 enum tree_code c1
= code1
, c2
= code2
;
2787 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2792 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2794 stmt
, this_dest
, gsi
,
2795 c1
, c2
, decl1
, decl2
,
2799 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2803 if (codecvt1
== CALL_EXPR
)
2805 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2806 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2807 gimple_call_set_lhs (new_stmt
, new_temp
);
2811 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2812 new_temp
= make_ssa_name (vec_dest
, NULL
);
2813 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2818 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2821 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2824 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2827 if (!prev_stmt_info
)
2828 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2830 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2831 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2836 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2840 /* In case the vectorization factor (VF) is bigger than the number
2841 of elements that we can fit in a vectype (nunits), we have to
2842 generate more than one vector stmt - i.e - we need to "unroll"
2843 the vector stmt by a factor VF/nunits. */
2844 for (j
= 0; j
< ncopies
; j
++)
2848 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2852 vec_oprnds0
.truncate (0);
2853 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2854 vect_pow2 (multi_step_cvt
) - 1);
2857 /* Arguments are ready. Create the new vector stmts. */
2859 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2861 if (codecvt1
== CALL_EXPR
)
2863 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2864 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2865 gimple_call_set_lhs (new_stmt
, new_temp
);
2869 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2870 new_temp
= make_ssa_name (vec_dest
, NULL
);
2871 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2875 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2876 vec_oprnds0
[i
] = new_temp
;
2879 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2880 stmt
, vec_dsts
, gsi
,
2885 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2889 vec_oprnds0
.release ();
2890 vec_oprnds1
.release ();
2891 vec_dsts
.release ();
2892 interm_types
.release ();
2898 /* Function vectorizable_assignment.
2900 Check if STMT performs an assignment (copy) that can be vectorized.
2901 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2902 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2903 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2906 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2907 gimple
*vec_stmt
, slp_tree slp_node
)
2912 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2913 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2914 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2918 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2919 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2922 vec
<tree
> vec_oprnds
= vNULL
;
2924 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2925 gimple new_stmt
= NULL
;
2926 stmt_vec_info prev_stmt_info
= NULL
;
2927 enum tree_code code
;
2930 /* Multiple types in SLP are handled by creating the appropriate number of
2931 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2933 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2936 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2938 gcc_assert (ncopies
>= 1);
2940 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2943 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2946 /* Is vectorizable assignment? */
2947 if (!is_gimple_assign (stmt
))
2950 scalar_dest
= gimple_assign_lhs (stmt
);
2951 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2954 code
= gimple_assign_rhs_code (stmt
);
2955 if (gimple_assign_single_p (stmt
)
2956 || code
== PAREN_EXPR
2957 || CONVERT_EXPR_CODE_P (code
))
2958 op
= gimple_assign_rhs1 (stmt
);
2962 if (code
== VIEW_CONVERT_EXPR
)
2963 op
= TREE_OPERAND (op
, 0);
2965 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2966 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2968 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2974 /* We can handle NOP_EXPR conversions that do not change the number
2975 of elements or the vector size. */
2976 if ((CONVERT_EXPR_CODE_P (code
)
2977 || code
== VIEW_CONVERT_EXPR
)
2979 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2980 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2981 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2984 /* We do not handle bit-precision changes. */
2985 if ((CONVERT_EXPR_CODE_P (code
)
2986 || code
== VIEW_CONVERT_EXPR
)
2987 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2988 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2989 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2990 || ((TYPE_PRECISION (TREE_TYPE (op
))
2991 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2992 /* But a conversion that does not change the bit-pattern is ok. */
2993 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2994 > TYPE_PRECISION (TREE_TYPE (op
)))
2995 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2997 if (dump_enabled_p ())
2998 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2999 "type conversion to/from bit-precision "
3004 if (!vec_stmt
) /* transformation not required. */
3006 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3007 if (dump_enabled_p ())
3008 dump_printf_loc (MSG_NOTE
, vect_location
,
3009 "=== vectorizable_assignment ===");
3010 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3015 if (dump_enabled_p ())
3016 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.");
3019 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3022 for (j
= 0; j
< ncopies
; j
++)
3026 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
3028 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3030 /* Arguments are ready. create the new vector stmt. */
3031 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3033 if (CONVERT_EXPR_CODE_P (code
)
3034 || code
== VIEW_CONVERT_EXPR
)
3035 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
3036 new_stmt
= gimple_build_assign (vec_dest
, vop
);
3037 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3038 gimple_assign_set_lhs (new_stmt
, new_temp
);
3039 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3041 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3048 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3050 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3052 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3055 vec_oprnds
.release ();
3060 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3061 either as shift by a scalar or by a vector. */
3064 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
3067 enum machine_mode vec_mode
;
3072 vectype
= get_vectype_for_scalar_type (scalar_type
);
3076 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3078 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3080 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3082 || (optab_handler (optab
, TYPE_MODE (vectype
))
3083 == CODE_FOR_nothing
))
3087 vec_mode
= TYPE_MODE (vectype
);
3088 icode
= (int) optab_handler (optab
, vec_mode
);
3089 if (icode
== CODE_FOR_nothing
)
3096 /* Function vectorizable_shift.
3098 Check if STMT performs a shift operation that can be vectorized.
3099 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3100 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3101 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3104 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3105 gimple
*vec_stmt
, slp_tree slp_node
)
3109 tree op0
, op1
= NULL
;
3110 tree vec_oprnd1
= NULL_TREE
;
3111 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3113 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3114 enum tree_code code
;
3115 enum machine_mode vec_mode
;
3119 enum machine_mode optab_op2_mode
;
3122 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3123 gimple new_stmt
= NULL
;
3124 stmt_vec_info prev_stmt_info
;
3131 vec
<tree
> vec_oprnds0
= vNULL
;
3132 vec
<tree
> vec_oprnds1
= vNULL
;
3135 bool scalar_shift_arg
= true;
3136 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3139 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3142 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3145 /* Is STMT a vectorizable binary/unary operation? */
3146 if (!is_gimple_assign (stmt
))
3149 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3152 code
= gimple_assign_rhs_code (stmt
);
3154 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3155 || code
== RROTATE_EXPR
))
3158 scalar_dest
= gimple_assign_lhs (stmt
);
3159 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3160 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3161 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3163 if (dump_enabled_p ())
3164 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3165 "bit-precision shifts not supported.");
3169 op0
= gimple_assign_rhs1 (stmt
);
3170 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3171 &def_stmt
, &def
, &dt
[0], &vectype
))
3173 if (dump_enabled_p ())
3174 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3178 /* If op0 is an external or constant def use a vector type with
3179 the same size as the output vector type. */
3181 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3183 gcc_assert (vectype
);
3186 if (dump_enabled_p ())
3187 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3188 "no vectype for scalar type ");
3192 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3193 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3194 if (nunits_out
!= nunits_in
)
3197 op1
= gimple_assign_rhs2 (stmt
);
3198 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3199 &def
, &dt
[1], &op1_vectype
))
3201 if (dump_enabled_p ())
3202 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3208 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3212 /* Multiple types in SLP are handled by creating the appropriate number of
3213 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3215 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3218 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3220 gcc_assert (ncopies
>= 1);
3222 /* Determine whether the shift amount is a vector, or scalar. If the
3223 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3225 if (dt
[1] == vect_internal_def
&& !slp_node
)
3226 scalar_shift_arg
= false;
3227 else if (dt
[1] == vect_constant_def
3228 || dt
[1] == vect_external_def
3229 || dt
[1] == vect_internal_def
)
3231 /* In SLP, need to check whether the shift count is the same,
3232 in loops if it is a constant or invariant, it is always
3236 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3239 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
3240 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3241 scalar_shift_arg
= false;
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3248 "operand mode requires invariant argument.");
3252 /* Vector shifted by vector. */
3253 if (!scalar_shift_arg
)
3255 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_NOTE
, vect_location
,
3258 "vector/vector shift/rotate found.");
3261 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3262 if (op1_vectype
== NULL_TREE
3263 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3267 "unusable type for last operand in"
3268 " vector/vector shift/rotate.");
3272 /* See if the machine has a vector shifted by scalar insn and if not
3273 then see if it has a vector shifted by vector insn. */
3276 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3278 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3280 if (dump_enabled_p ())
3281 dump_printf_loc (MSG_NOTE
, vect_location
,
3282 "vector/scalar shift/rotate found.");
3286 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3288 && (optab_handler (optab
, TYPE_MODE (vectype
))
3289 != CODE_FOR_nothing
))
3291 scalar_shift_arg
= false;
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_NOTE
, vect_location
,
3295 "vector/vector shift/rotate found.");
3297 /* Unlike the other binary operators, shifts/rotates have
3298 the rhs being int, instead of the same type as the lhs,
3299 so make sure the scalar is the right type if we are
3300 dealing with vectors of long long/long/short/char. */
3301 if (dt
[1] == vect_constant_def
)
3302 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3303 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3307 && TYPE_MODE (TREE_TYPE (vectype
))
3308 != TYPE_MODE (TREE_TYPE (op1
)))
3310 if (dump_enabled_p ())
3311 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3312 "unusable type for last operand in"
3313 " vector/vector shift/rotate.");
3316 if (vec_stmt
&& !slp_node
)
3318 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3319 op1
= vect_init_vector (stmt
, op1
,
3320 TREE_TYPE (vectype
), NULL
);
3327 /* Supportable by target? */
3330 if (dump_enabled_p ())
3331 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3335 vec_mode
= TYPE_MODE (vectype
);
3336 icode
= (int) optab_handler (optab
, vec_mode
);
3337 if (icode
== CODE_FOR_nothing
)
3339 if (dump_enabled_p ())
3340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3341 "op not supported by target.");
3342 /* Check only during analysis. */
3343 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3344 || (vf
< vect_min_worthwhile_factor (code
)
3347 if (dump_enabled_p ())
3348 dump_printf_loc (MSG_NOTE
, vect_location
, "proceeding using word mode.");
3351 /* Worthwhile without SIMD support? Check only during analysis. */
3352 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3353 && vf
< vect_min_worthwhile_factor (code
)
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3358 "not worthwhile without SIMD support.");
3362 if (!vec_stmt
) /* transformation not required. */
3364 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_shift ===");
3367 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3373 if (dump_enabled_p ())
3374 dump_printf_loc (MSG_NOTE
, vect_location
,
3375 "transform binary/unary operation.");
3378 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3380 prev_stmt_info
= NULL
;
3381 for (j
= 0; j
< ncopies
; j
++)
3386 if (scalar_shift_arg
)
3388 /* Vector shl and shr insn patterns can be defined with scalar
3389 operand 2 (shift operand). In this case, use constant or loop
3390 invariant op1 directly, without extending it to vector mode
3392 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3393 if (!VECTOR_MODE_P (optab_op2_mode
))
3395 if (dump_enabled_p ())
3396 dump_printf_loc (MSG_NOTE
, vect_location
,
3397 "operand 1 using scalar mode.");
3399 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
3400 vec_oprnds1
.quick_push (vec_oprnd1
);
3403 /* Store vec_oprnd1 for every vector stmt to be created
3404 for SLP_NODE. We check during the analysis that all
3405 the shift arguments are the same.
3406 TODO: Allow different constants for different vector
3407 stmts generated for an SLP instance. */
3408 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3409 vec_oprnds1
.quick_push (vec_oprnd1
);
3414 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3415 (a special case for certain kind of vector shifts); otherwise,
3416 operand 1 should be of a vector type (the usual case). */
3418 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3421 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3425 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3427 /* Arguments are ready. Create the new vector stmt. */
3428 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3430 vop1
= vec_oprnds1
[i
];
3431 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3432 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3433 gimple_assign_set_lhs (new_stmt
, new_temp
);
3434 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3436 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3443 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3445 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3446 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3449 vec_oprnds0
.release ();
3450 vec_oprnds1
.release ();
3456 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3457 gimple_stmt_iterator
*);
3460 /* Function vectorizable_operation.
3462 Check if STMT performs a binary, unary or ternary operation that can
3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3465 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3469 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3470 gimple
*vec_stmt
, slp_tree slp_node
)
3474 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3475 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3477 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3478 enum tree_code code
;
3479 enum machine_mode vec_mode
;
3486 enum vect_def_type dt
[3]
3487 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3488 gimple new_stmt
= NULL
;
3489 stmt_vec_info prev_stmt_info
;
3495 vec
<tree
> vec_oprnds0
= vNULL
;
3496 vec
<tree
> vec_oprnds1
= vNULL
;
3497 vec
<tree
> vec_oprnds2
= vNULL
;
3498 tree vop0
, vop1
, vop2
;
3499 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3502 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3505 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3508 /* Is STMT a vectorizable binary/unary operation? */
3509 if (!is_gimple_assign (stmt
))
3512 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3515 code
= gimple_assign_rhs_code (stmt
);
3517 /* For pointer addition, we should use the normal plus for
3518 the vector addition. */
3519 if (code
== POINTER_PLUS_EXPR
)
3522 /* Support only unary or binary operations. */
3523 op_type
= TREE_CODE_LENGTH (code
);
3524 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3526 if (dump_enabled_p ())
3527 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3528 "num. args = %d (not unary/binary/ternary op).",
3533 scalar_dest
= gimple_assign_lhs (stmt
);
3534 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3536 /* Most operations cannot handle bit-precision types without extra
3538 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3539 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3540 /* Exception are bitwise binary operations. */
3541 && code
!= BIT_IOR_EXPR
3542 && code
!= BIT_XOR_EXPR
3543 && code
!= BIT_AND_EXPR
)
3545 if (dump_enabled_p ())
3546 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3547 "bit-precision arithmetic not supported.");
3551 op0
= gimple_assign_rhs1 (stmt
);
3552 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3553 &def_stmt
, &def
, &dt
[0], &vectype
))
3555 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3560 /* If op0 is an external or constant def use a vector type with
3561 the same size as the output vector type. */
3563 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3565 gcc_assert (vectype
);
3568 if (dump_enabled_p ())
3570 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3571 "no vectype for scalar type ");
3572 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3579 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3580 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3581 if (nunits_out
!= nunits_in
)
3584 if (op_type
== binary_op
|| op_type
== ternary_op
)
3586 op1
= gimple_assign_rhs2 (stmt
);
3587 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3590 if (dump_enabled_p ())
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3596 if (op_type
== ternary_op
)
3598 op2
= gimple_assign_rhs3 (stmt
);
3599 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3602 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3610 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3614 /* Multiple types in SLP are handled by creating the appropriate number of
3615 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3617 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3620 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3622 gcc_assert (ncopies
>= 1);
3624 /* Shifts are handled in vectorizable_shift (). */
3625 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3626 || code
== RROTATE_EXPR
)
3629 /* Supportable by target? */
3631 vec_mode
= TYPE_MODE (vectype
);
3632 if (code
== MULT_HIGHPART_EXPR
)
3634 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3635 icode
= LAST_INSN_CODE
;
3637 icode
= CODE_FOR_nothing
;
3641 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3649 icode
= (int) optab_handler (optab
, vec_mode
);
3652 if (icode
== CODE_FOR_nothing
)
3654 if (dump_enabled_p ())
3655 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3656 "op not supported by target.");
3657 /* Check only during analysis. */
3658 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3659 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3661 if (dump_enabled_p ())
3662 dump_printf_loc (MSG_NOTE
, vect_location
, "proceeding using word mode.");
3665 /* Worthwhile without SIMD support? Check only during analysis. */
3666 if (!VECTOR_MODE_P (vec_mode
)
3668 && vf
< vect_min_worthwhile_factor (code
))
3670 if (dump_enabled_p ())
3671 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3672 "not worthwhile without SIMD support.");
3676 if (!vec_stmt
) /* transformation not required. */
3678 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3679 if (dump_enabled_p ())
3680 dump_printf_loc (MSG_NOTE
, vect_location
,
3681 "=== vectorizable_operation ===");
3682 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3688 if (dump_enabled_p ())
3689 dump_printf_loc (MSG_NOTE
, vect_location
,
3690 "transform binary/unary operation.");
3693 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3695 /* In case the vectorization factor (VF) is bigger than the number
3696 of elements that we can fit in a vectype (nunits), we have to generate
3697 more than one vector stmt - i.e - we need to "unroll" the
3698 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3699 from one copy of the vector stmt to the next, in the field
3700 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3701 stages to find the correct vector defs to be used when vectorizing
3702 stmts that use the defs of the current stmt. The example below
3703 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3704 we need to create 4 vectorized stmts):
3706 before vectorization:
3707 RELATED_STMT VEC_STMT
3711 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3713 RELATED_STMT VEC_STMT
3714 VS1_0: vx0 = memref0 VS1_1 -
3715 VS1_1: vx1 = memref1 VS1_2 -
3716 VS1_2: vx2 = memref2 VS1_3 -
3717 VS1_3: vx3 = memref3 - -
3718 S1: x = load - VS1_0
3721 step2: vectorize stmt S2 (done here):
3722 To vectorize stmt S2 we first need to find the relevant vector
3723 def for the first operand 'x'. This is, as usual, obtained from
3724 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3725 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3726 relevant vector def 'vx0'. Having found 'vx0' we can generate
3727 the vector stmt VS2_0, and as usual, record it in the
3728 STMT_VINFO_VEC_STMT of stmt S2.
3729 When creating the second copy (VS2_1), we obtain the relevant vector
3730 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3731 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3732 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3733 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3734 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3735 chain of stmts and pointers:
3736 RELATED_STMT VEC_STMT
3737 VS1_0: vx0 = memref0 VS1_1 -
3738 VS1_1: vx1 = memref1 VS1_2 -
3739 VS1_2: vx2 = memref2 VS1_3 -
3740 VS1_3: vx3 = memref3 - -
3741 S1: x = load - VS1_0
3742 VS2_0: vz0 = vx0 + v1 VS2_1 -
3743 VS2_1: vz1 = vx1 + v1 VS2_2 -
3744 VS2_2: vz2 = vx2 + v1 VS2_3 -
3745 VS2_3: vz3 = vx3 + v1 - -
3746 S2: z = x + 1 - VS2_0 */
3748 prev_stmt_info
= NULL
;
3749 for (j
= 0; j
< ncopies
; j
++)
3754 if (op_type
== binary_op
|| op_type
== ternary_op
)
3755 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3758 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3760 if (op_type
== ternary_op
)
3762 vec_oprnds2
.create (1);
3763 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
3770 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3771 if (op_type
== ternary_op
)
3773 tree vec_oprnd
= vec_oprnds2
.pop ();
3774 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
3779 /* Arguments are ready. Create the new vector stmt. */
3780 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3782 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3783 ? vec_oprnds1
[i
] : NULL_TREE
);
3784 vop2
= ((op_type
== ternary_op
)
3785 ? vec_oprnds2
[i
] : NULL_TREE
);
3786 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3788 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3789 gimple_assign_set_lhs (new_stmt
, new_temp
);
3790 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3792 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3799 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3801 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3802 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3805 vec_oprnds0
.release ();
3806 vec_oprnds1
.release ();
3807 vec_oprnds2
.release ();
3812 /* A helper function to ensure data reference DR's base alignment
3816 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
3821 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
3823 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3824 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
3826 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
3827 DECL_USER_ALIGN (base_decl
) = 1;
3828 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
3833 /* Function vectorizable_store.
3835 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3837 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3838 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3839 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3842 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3848 tree vec_oprnd
= NULL_TREE
;
3849 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3850 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3851 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3853 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3854 struct loop
*loop
= NULL
;
3855 enum machine_mode vec_mode
;
3857 enum dr_alignment_support alignment_support_scheme
;
3860 enum vect_def_type dt
;
3861 stmt_vec_info prev_stmt_info
= NULL
;
3862 tree dataref_ptr
= NULL_TREE
;
3863 tree dataref_offset
= NULL_TREE
;
3864 gimple ptr_incr
= NULL
;
3865 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3868 gimple next_stmt
, first_stmt
= NULL
;
3869 bool grouped_store
= false;
3870 bool store_lanes_p
= false;
3871 unsigned int group_size
, i
;
3872 vec
<tree
> dr_chain
= vNULL
;
3873 vec
<tree
> oprnds
= vNULL
;
3874 vec
<tree
> result_chain
= vNULL
;
3876 vec
<tree
> vec_oprnds
= vNULL
;
3877 bool slp
= (slp_node
!= NULL
);
3878 unsigned int vec_num
;
3879 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3883 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3885 /* Multiple types in SLP are handled by creating the appropriate number of
3886 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3888 if (slp
|| PURE_SLP_STMT (stmt_info
))
3891 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3893 gcc_assert (ncopies
>= 1);
3895 /* FORNOW. This restriction should be relaxed. */
3896 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3898 if (dump_enabled_p ())
3899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3900 "multiple types in nested loop.");
3904 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3907 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3910 /* Is vectorizable store? */
3912 if (!is_gimple_assign (stmt
))
3915 scalar_dest
= gimple_assign_lhs (stmt
);
3916 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3917 && is_pattern_stmt_p (stmt_info
))
3918 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3919 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3920 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
3921 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3922 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3923 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3924 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3925 && TREE_CODE (scalar_dest
) != MEM_REF
)
3928 gcc_assert (gimple_assign_single_p (stmt
));
3929 op
= gimple_assign_rhs1 (stmt
);
3930 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3933 if (dump_enabled_p ())
3934 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3939 elem_type
= TREE_TYPE (vectype
);
3940 vec_mode
= TYPE_MODE (vectype
);
3942 /* FORNOW. In some cases can vectorize even if data-type not supported
3943 (e.g. - array initialization with 0). */
3944 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3947 if (!STMT_VINFO_DATA_REF (stmt_info
))
3950 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3951 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3952 size_zero_node
) < 0)
3954 if (dump_enabled_p ())
3955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3956 "negative step for store.");
3960 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3962 grouped_store
= true;
3963 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3964 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3966 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3967 if (vect_store_lanes_supported (vectype
, group_size
))
3968 store_lanes_p
= true;
3969 else if (!vect_grouped_store_supported (vectype
, group_size
))
3973 if (first_stmt
== stmt
)
3975 /* STMT is the leader of the group. Check the operands of all the
3976 stmts of the group. */
3977 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3980 gcc_assert (gimple_assign_single_p (next_stmt
));
3981 op
= gimple_assign_rhs1 (next_stmt
);
3982 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3983 &def_stmt
, &def
, &dt
))
3985 if (dump_enabled_p ())
3986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3990 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3995 if (!vec_stmt
) /* transformation not required. */
3997 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3998 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
4005 ensure_base_align (stmt_info
, dr
);
4009 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4010 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4012 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
4015 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
4017 /* We vectorize all the stmts of the interleaving group when we
4018 reach the last stmt in the group. */
4019 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
4020 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
4029 grouped_store
= false;
4030 /* VEC_NUM is the number of vect stmts to be created for this
4032 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4033 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4034 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4035 op
= gimple_assign_rhs1 (first_stmt
);
4038 /* VEC_NUM is the number of vect stmts to be created for this
4040 vec_num
= group_size
;
4046 group_size
= vec_num
= 1;
4049 if (dump_enabled_p ())
4050 dump_printf_loc (MSG_NOTE
, vect_location
,
4051 "transform store. ncopies = %d", ncopies
);
4053 dr_chain
.create (group_size
);
4054 oprnds
.create (group_size
);
4056 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4057 gcc_assert (alignment_support_scheme
);
4058 /* Targets with store-lane instructions must not require explicit
4060 gcc_assert (!store_lanes_p
4061 || alignment_support_scheme
== dr_aligned
4062 || alignment_support_scheme
== dr_unaligned_supported
);
4065 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4067 aggr_type
= vectype
;
4069 /* In case the vectorization factor (VF) is bigger than the number
4070 of elements that we can fit in a vectype (nunits), we have to generate
4071 more than one vector stmt - i.e - we need to "unroll" the
4072 vector stmt by a factor VF/nunits. For more details see documentation in
4073 vect_get_vec_def_for_copy_stmt. */
4075 /* In case of interleaving (non-unit grouped access):
4082 We create vectorized stores starting from base address (the access of the
4083 first stmt in the chain (S2 in the above example), when the last store stmt
4084 of the chain (S4) is reached:
4087 VS2: &base + vec_size*1 = vx0
4088 VS3: &base + vec_size*2 = vx1
4089 VS4: &base + vec_size*3 = vx3
4091 Then permutation statements are generated:
4093 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4094 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4097 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4098 (the order of the data-refs in the output of vect_permute_store_chain
4099 corresponds to the order of scalar stmts in the interleaving chain - see
4100 the documentation of vect_permute_store_chain()).
4102 In case of both multiple types and interleaving, above vector stores and
4103 permutation stmts are created for every copy. The result vector stmts are
4104 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4105 STMT_VINFO_RELATED_STMT for the next copies.
4108 prev_stmt_info
= NULL
;
4109 for (j
= 0; j
< ncopies
; j
++)
4117 /* Get vectorized arguments for SLP_NODE. */
4118 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4119 NULL
, slp_node
, -1);
4121 vec_oprnd
= vec_oprnds
[0];
4125 /* For interleaved stores we collect vectorized defs for all the
4126 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4127 used as an input to vect_permute_store_chain(), and OPRNDS as
4128 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4130 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4131 OPRNDS are of size 1. */
4132 next_stmt
= first_stmt
;
4133 for (i
= 0; i
< group_size
; i
++)
4135 /* Since gaps are not supported for interleaved stores,
4136 GROUP_SIZE is the exact number of stmts in the chain.
4137 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4138 there is no interleaving, GROUP_SIZE is 1, and only one
4139 iteration of the loop will be executed. */
4140 gcc_assert (next_stmt
4141 && gimple_assign_single_p (next_stmt
));
4142 op
= gimple_assign_rhs1 (next_stmt
);
4144 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4146 dr_chain
.quick_push (vec_oprnd
);
4147 oprnds
.quick_push (vec_oprnd
);
4148 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4152 /* We should have catched mismatched types earlier. */
4153 gcc_assert (useless_type_conversion_p (vectype
,
4154 TREE_TYPE (vec_oprnd
)));
4155 bool simd_lane_access_p
4156 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
4157 if (simd_lane_access_p
4158 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
4159 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
4160 && integer_zerop (DR_OFFSET (first_dr
))
4161 && integer_zerop (DR_INIT (first_dr
))
4162 && alias_sets_conflict_p (get_alias_set (aggr_type
),
4163 get_alias_set (DR_REF (first_dr
))))
4165 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
4166 dataref_offset
= build_int_cst (reference_alias_ptr_type
4167 (DR_REF (first_dr
)), 0);
4171 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
4172 simd_lane_access_p
? loop
: NULL
,
4173 NULL_TREE
, &dummy
, gsi
, &ptr_incr
,
4174 simd_lane_access_p
, &inv_p
);
4175 gcc_assert (bb_vinfo
|| !inv_p
);
4179 /* For interleaved stores we created vectorized defs for all the
4180 defs stored in OPRNDS in the previous iteration (previous copy).
4181 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4182 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4184 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4185 OPRNDS are of size 1. */
4186 for (i
= 0; i
< group_size
; i
++)
4189 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4191 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4192 dr_chain
[i
] = vec_oprnd
;
4193 oprnds
[i
] = vec_oprnd
;
4197 = int_const_binop (PLUS_EXPR
, dataref_offset
,
4198 TYPE_SIZE_UNIT (aggr_type
));
4200 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4201 TYPE_SIZE_UNIT (aggr_type
));
4208 /* Combine all the vectors into an array. */
4209 vec_array
= create_vector_array (vectype
, vec_num
);
4210 for (i
= 0; i
< vec_num
; i
++)
4212 vec_oprnd
= dr_chain
[i
];
4213 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4217 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4218 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4219 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4220 gimple_call_set_lhs (new_stmt
, data_ref
);
4221 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4229 result_chain
.create (group_size
);
4231 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4235 next_stmt
= first_stmt
;
4236 for (i
= 0; i
< vec_num
; i
++)
4238 unsigned align
, misalign
;
4241 /* Bump the vector pointer. */
4242 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4246 vec_oprnd
= vec_oprnds
[i
];
4247 else if (grouped_store
)
4248 /* For grouped stores vectorized defs are interleaved in
4249 vect_permute_store_chain(). */
4250 vec_oprnd
= result_chain
[i
];
4252 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4255 : build_int_cst (reference_alias_ptr_type
4256 (DR_REF (first_dr
)), 0));
4257 align
= TYPE_ALIGN_UNIT (vectype
);
4258 if (aligned_access_p (first_dr
))
4260 else if (DR_MISALIGNMENT (first_dr
) == -1)
4262 TREE_TYPE (data_ref
)
4263 = build_aligned_type (TREE_TYPE (data_ref
),
4264 TYPE_ALIGN (elem_type
));
4265 align
= TYPE_ALIGN_UNIT (elem_type
);
4270 TREE_TYPE (data_ref
)
4271 = build_aligned_type (TREE_TYPE (data_ref
),
4272 TYPE_ALIGN (elem_type
));
4273 misalign
= DR_MISALIGNMENT (first_dr
);
4275 if (dataref_offset
== NULL_TREE
)
4276 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4279 /* Arguments are ready. Create the new vector stmt. */
4280 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4281 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4286 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4294 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4296 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4297 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4301 dr_chain
.release ();
4303 result_chain
.release ();
4304 vec_oprnds
.release ();
4309 /* Given a vector type VECTYPE and permutation SEL returns
4310 the VECTOR_CST mask that implements the permutation of the
4311 vector elements. If that is impossible to do, returns NULL. */
4314 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4316 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4319 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4321 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4324 mask_elt_type
= lang_hooks
.types
.type_for_mode
4325 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4326 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4328 mask_elts
= XALLOCAVEC (tree
, nunits
);
4329 for (i
= nunits
- 1; i
>= 0; i
--)
4330 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4331 mask_vec
= build_vector (mask_type
, mask_elts
);
4336 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4337 reversal of the vector elements. If that is impossible to do,
4341 perm_mask_for_reverse (tree vectype
)
4346 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4347 sel
= XALLOCAVEC (unsigned char, nunits
);
4349 for (i
= 0; i
< nunits
; ++i
)
4350 sel
[i
] = nunits
- 1 - i
;
4352 return vect_gen_perm_mask (vectype
, sel
);
4355 /* Given a vector variable X and Y, that was generated for the scalar
4356 STMT, generate instructions to permute the vector elements of X and Y
4357 using permutation mask MASK_VEC, insert them at *GSI and return the
4358 permuted vector variable. */
4361 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4362 gimple_stmt_iterator
*gsi
)
4364 tree vectype
= TREE_TYPE (x
);
4365 tree perm_dest
, data_ref
;
4368 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4369 data_ref
= make_ssa_name (perm_dest
, NULL
);
4371 /* Generate the permute statement. */
4372 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4374 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4379 /* vectorizable_load.
4381 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4383 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4384 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4385 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4388 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4389 slp_tree slp_node
, slp_instance slp_node_instance
)
4392 tree vec_dest
= NULL
;
4393 tree data_ref
= NULL
;
4394 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4395 stmt_vec_info prev_stmt_info
;
4396 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4397 struct loop
*loop
= NULL
;
4398 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4399 bool nested_in_vect_loop
= false;
4400 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4401 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4404 enum machine_mode mode
;
4405 gimple new_stmt
= NULL
;
4407 enum dr_alignment_support alignment_support_scheme
;
4408 tree dataref_ptr
= NULL_TREE
;
4409 tree dataref_offset
= NULL_TREE
;
4410 gimple ptr_incr
= NULL
;
4411 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4413 int i
, j
, group_size
, group_gap
;
4414 tree msq
= NULL_TREE
, lsq
;
4415 tree offset
= NULL_TREE
;
4416 tree realignment_token
= NULL_TREE
;
4418 vec
<tree
> dr_chain
= vNULL
;
4419 bool grouped_load
= false;
4420 bool load_lanes_p
= false;
4423 bool negative
= false;
4424 bool compute_in_loop
= false;
4425 struct loop
*at_loop
;
4427 bool slp
= (slp_node
!= NULL
);
4428 bool slp_perm
= false;
4429 enum tree_code code
;
4430 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4433 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4434 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4435 int gather_scale
= 1;
4436 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4440 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4441 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4442 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4447 /* Multiple types in SLP are handled by creating the appropriate number of
4448 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4450 if (slp
|| PURE_SLP_STMT (stmt_info
))
4453 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4455 gcc_assert (ncopies
>= 1);
4457 /* FORNOW. This restriction should be relaxed. */
4458 if (nested_in_vect_loop
&& ncopies
> 1)
4460 if (dump_enabled_p ())
4461 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4462 "multiple types in nested loop.");
4466 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4469 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4472 /* Is vectorizable load? */
4473 if (!is_gimple_assign (stmt
))
4476 scalar_dest
= gimple_assign_lhs (stmt
);
4477 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4480 code
= gimple_assign_rhs_code (stmt
);
4481 if (code
!= ARRAY_REF
4482 && code
!= BIT_FIELD_REF
4483 && code
!= INDIRECT_REF
4484 && code
!= COMPONENT_REF
4485 && code
!= IMAGPART_EXPR
4486 && code
!= REALPART_EXPR
4488 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4491 if (!STMT_VINFO_DATA_REF (stmt_info
))
4494 elem_type
= TREE_TYPE (vectype
);
4495 mode
= TYPE_MODE (vectype
);
4497 /* FORNOW. In some cases can vectorize even if data-type not supported
4498 (e.g. - data copies). */
4499 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4503 "Aligned load, but unsupported type.");
4507 /* Check if the load is a part of an interleaving chain. */
4508 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4510 grouped_load
= true;
4512 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4514 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4515 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4517 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4518 if (vect_load_lanes_supported (vectype
, group_size
))
4519 load_lanes_p
= true;
4520 else if (!vect_grouped_load_supported (vectype
, group_size
))
4526 if (STMT_VINFO_GATHER_P (stmt_info
))
4530 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4531 &gather_off
, &gather_scale
);
4532 gcc_assert (gather_decl
);
4533 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4534 &def_stmt
, &def
, &gather_dt
,
4535 &gather_off_vectype
))
4537 if (dump_enabled_p ())
4538 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4539 "gather index use not simple.");
4543 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4547 negative
= tree_int_cst_compare (nested_in_vect_loop
4548 ? STMT_VINFO_DR_STEP (stmt_info
)
4550 size_zero_node
) < 0;
4551 if (negative
&& ncopies
> 1)
4553 if (dump_enabled_p ())
4554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4555 "multiple types with negative step.");
4563 if (dump_enabled_p ())
4564 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4565 "negative step for group load not supported");
4568 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4569 if (alignment_support_scheme
!= dr_aligned
4570 && alignment_support_scheme
!= dr_unaligned_supported
)
4572 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4574 "negative step but alignment required.");
4577 if (!perm_mask_for_reverse (vectype
))
4579 if (dump_enabled_p ())
4580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4581 "negative step and reversing not supported.");
4587 if (!vec_stmt
) /* transformation not required. */
4589 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4590 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4594 if (dump_enabled_p ())
4595 dump_printf_loc (MSG_NOTE
, vect_location
,
4596 "transform load. ncopies = %d", ncopies
);
4600 ensure_base_align (stmt_info
, dr
);
4602 if (STMT_VINFO_GATHER_P (stmt_info
))
4604 tree vec_oprnd0
= NULL_TREE
, op
;
4605 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4606 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4607 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4608 edge pe
= loop_preheader_edge (loop
);
4611 enum { NARROW
, NONE
, WIDEN
} modifier
;
4612 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4614 if (nunits
== gather_off_nunits
)
4616 else if (nunits
== gather_off_nunits
/ 2)
4618 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4621 for (i
= 0; i
< gather_off_nunits
; ++i
)
4622 sel
[i
] = i
| nunits
;
4624 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4625 gcc_assert (perm_mask
!= NULL_TREE
);
4627 else if (nunits
== gather_off_nunits
* 2)
4629 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4632 for (i
= 0; i
< nunits
; ++i
)
4633 sel
[i
] = i
< gather_off_nunits
4634 ? i
: i
+ nunits
- gather_off_nunits
;
4636 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4637 gcc_assert (perm_mask
!= NULL_TREE
);
4643 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4644 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4645 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4646 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4647 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4648 scaletype
= TREE_VALUE (arglist
);
4649 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4650 && types_compatible_p (srctype
, masktype
));
4652 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4654 ptr
= fold_convert (ptrtype
, gather_base
);
4655 if (!is_gimple_min_invariant (ptr
))
4657 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4658 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4659 gcc_assert (!new_bb
);
4662 /* Currently we support only unconditional gather loads,
4663 so mask should be all ones. */
4664 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4665 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4666 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4670 for (j
= 0; j
< 6; ++j
)
4672 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4673 mask
= build_real (TREE_TYPE (masktype
), r
);
4677 mask
= build_vector_from_val (masktype
, mask
);
4678 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4680 scale
= build_int_cst (scaletype
, gather_scale
);
4682 prev_stmt_info
= NULL
;
4683 for (j
= 0; j
< ncopies
; ++j
)
4685 if (modifier
== WIDEN
&& (j
& 1))
4686 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4687 perm_mask
, stmt
, gsi
);
4690 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4693 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4695 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4697 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4698 == TYPE_VECTOR_SUBPARTS (idxtype
));
4699 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4700 var
= make_ssa_name (var
, NULL
);
4701 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4703 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4705 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4710 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4712 if (!useless_type_conversion_p (vectype
, rettype
))
4714 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4715 == TYPE_VECTOR_SUBPARTS (rettype
));
4716 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4717 op
= make_ssa_name (var
, new_stmt
);
4718 gimple_call_set_lhs (new_stmt
, op
);
4719 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4720 var
= make_ssa_name (vec_dest
, NULL
);
4721 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4723 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4728 var
= make_ssa_name (vec_dest
, new_stmt
);
4729 gimple_call_set_lhs (new_stmt
, var
);
4732 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4734 if (modifier
== NARROW
)
4741 var
= permute_vec_elements (prev_res
, var
,
4742 perm_mask
, stmt
, gsi
);
4743 new_stmt
= SSA_NAME_DEF_STMT (var
);
4746 if (prev_stmt_info
== NULL
)
4747 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4749 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4750 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4754 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4756 gimple_stmt_iterator incr_gsi
;
4762 vec
<constructor_elt
, va_gc
> *v
= NULL
;
4763 gimple_seq stmts
= NULL
;
4764 tree stride_base
, stride_step
, alias_off
;
4766 gcc_assert (!nested_in_vect_loop
);
4769 = fold_build_pointer_plus
4770 (unshare_expr (DR_BASE_ADDRESS (dr
)),
4771 size_binop (PLUS_EXPR
,
4772 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
4773 convert_to_ptrofftype (DR_INIT(dr
))));
4774 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
4776 /* For a load with loop-invariant (but other than power-of-2)
4777 stride (i.e. not a grouped access) like so:
4779 for (i = 0; i < n; i += stride)
4782 we generate a new induction variable and new accesses to
4783 form a new vector (or vectors, depending on ncopies):
4785 for (j = 0; ; j += VF*stride)
4787 tmp2 = array[j + stride];
4789 vectemp = {tmp1, tmp2, ...}
4792 ivstep
= stride_step
;
4793 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4794 build_int_cst (TREE_TYPE (ivstep
), vf
));
4796 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4798 create_iv (stride_base
, ivstep
, NULL
,
4799 loop
, &incr_gsi
, insert_after
,
4801 incr
= gsi_stmt (incr_gsi
);
4802 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4804 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4806 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4808 prev_stmt_info
= NULL
;
4809 running_off
= offvar
;
4810 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
4811 for (j
= 0; j
< ncopies
; j
++)
4815 vec_alloc (v
, nunits
);
4816 for (i
= 0; i
< nunits
; i
++)
4818 tree newref
, newoff
;
4820 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
4821 running_off
, alias_off
);
4823 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4826 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4827 newoff
= copy_ssa_name (running_off
, NULL
);
4828 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4829 running_off
, stride_step
);
4830 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4832 running_off
= newoff
;
4835 vec_inv
= build_constructor (vectype
, v
);
4836 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4837 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4840 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4842 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4843 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4850 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4852 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
4853 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
4854 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4856 /* Check if the chain of loads is already vectorized. */
4857 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
4858 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4859 ??? But we can only do so if there is exactly one
4860 as we have no way to get at the rest. Leave the CSE
4862 ??? With the group load eventually participating
4863 in multiple different permutations (having multiple
4864 slp nodes which refer to the same group) the CSE
4865 is even wrong code. See PR56270. */
4868 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4871 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4872 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4874 /* VEC_NUM is the number of vect stmts to be created for this group. */
4877 grouped_load
= false;
4878 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4879 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
4881 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
4885 vec_num
= group_size
;
4893 group_size
= vec_num
= 1;
4897 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4898 gcc_assert (alignment_support_scheme
);
4899 /* Targets with load-lane instructions must not require explicit
4901 gcc_assert (!load_lanes_p
4902 || alignment_support_scheme
== dr_aligned
4903 || alignment_support_scheme
== dr_unaligned_supported
);
4905 /* In case the vectorization factor (VF) is bigger than the number
4906 of elements that we can fit in a vectype (nunits), we have to generate
4907 more than one vector stmt - i.e - we need to "unroll" the
4908 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4909 from one copy of the vector stmt to the next, in the field
4910 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4911 stages to find the correct vector defs to be used when vectorizing
4912 stmts that use the defs of the current stmt. The example below
4913 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4914 need to create 4 vectorized stmts):
4916 before vectorization:
4917 RELATED_STMT VEC_STMT
4921 step 1: vectorize stmt S1:
4922 We first create the vector stmt VS1_0, and, as usual, record a
4923 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4924 Next, we create the vector stmt VS1_1, and record a pointer to
4925 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4926 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4928 RELATED_STMT VEC_STMT
4929 VS1_0: vx0 = memref0 VS1_1 -
4930 VS1_1: vx1 = memref1 VS1_2 -
4931 VS1_2: vx2 = memref2 VS1_3 -
4932 VS1_3: vx3 = memref3 - -
4933 S1: x = load - VS1_0
4936 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4937 information we recorded in RELATED_STMT field is used to vectorize
4940 /* In case of interleaving (non-unit grouped access):
4947 Vectorized loads are created in the order of memory accesses
4948 starting from the access of the first stmt of the chain:
4951 VS2: vx1 = &base + vec_size*1
4952 VS3: vx3 = &base + vec_size*2
4953 VS4: vx4 = &base + vec_size*3
4955 Then permutation statements are generated:
4957 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4958 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4961 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4962 (the order of the data-refs in the output of vect_permute_load_chain
4963 corresponds to the order of scalar stmts in the interleaving chain - see
4964 the documentation of vect_permute_load_chain()).
4965 The generation of permutation stmts and recording them in
4966 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4968 In case of both multiple types and interleaving, the vector loads and
4969 permutation stmts above are created for every copy. The result vector
4970 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4971 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4973 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4974 on a target that supports unaligned accesses (dr_unaligned_supported)
4975 we generate the following code:
4979 p = p + indx * vectype_size;
4984 Otherwise, the data reference is potentially unaligned on a target that
4985 does not support unaligned accesses (dr_explicit_realign_optimized) -
4986 then generate the following code, in which the data in each iteration is
4987 obtained by two vector loads, one from the previous iteration, and one
4988 from the current iteration:
4990 msq_init = *(floor(p1))
4991 p2 = initial_addr + VS - 1;
4992 realignment_token = call target_builtin;
4995 p2 = p2 + indx * vectype_size
4997 vec_dest = realign_load (msq, lsq, realignment_token)
5002 /* If the misalignment remains the same throughout the execution of the
5003 loop, we can create the init_addr and permutation mask at the loop
5004 preheader. Otherwise, it needs to be created inside the loop.
5005 This can only occur when vectorizing memory accesses in the inner-loop
5006 nested within an outer-loop that is being vectorized. */
5008 if (nested_in_vect_loop
5009 && (TREE_INT_CST_LOW (DR_STEP (dr
))
5010 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
5012 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
5013 compute_in_loop
= true;
5016 if ((alignment_support_scheme
== dr_explicit_realign_optimized
5017 || alignment_support_scheme
== dr_explicit_realign
)
5018 && !compute_in_loop
)
5020 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
5021 alignment_support_scheme
, NULL_TREE
,
5023 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5025 phi
= SSA_NAME_DEF_STMT (msq
);
5026 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5033 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5036 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5038 aggr_type
= vectype
;
5040 prev_stmt_info
= NULL
;
5041 for (j
= 0; j
< ncopies
; j
++)
5043 /* 1. Create the vector or array pointer update chain. */
5046 bool simd_lane_access_p
5047 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5048 if (simd_lane_access_p
5049 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5050 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5051 && integer_zerop (DR_OFFSET (first_dr
))
5052 && integer_zerop (DR_INIT (first_dr
))
5053 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5054 get_alias_set (DR_REF (first_dr
)))
5055 && (alignment_support_scheme
== dr_aligned
5056 || alignment_support_scheme
== dr_unaligned_supported
))
5058 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5059 dataref_offset
= build_int_cst (reference_alias_ptr_type
5060 (DR_REF (first_dr
)), 0);
5064 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
5065 offset
, &dummy
, gsi
, &ptr_incr
,
5066 simd_lane_access_p
, &inv_p
);
5068 else if (dataref_offset
)
5069 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
5070 TYPE_SIZE_UNIT (aggr_type
));
5072 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5073 TYPE_SIZE_UNIT (aggr_type
));
5075 if (grouped_load
|| slp_perm
)
5076 dr_chain
.create (vec_num
);
5082 vec_array
= create_vector_array (vectype
, vec_num
);
5085 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5086 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5087 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
5088 gimple_call_set_lhs (new_stmt
, vec_array
);
5089 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5091 /* Extract each vector into an SSA_NAME. */
5092 for (i
= 0; i
< vec_num
; i
++)
5094 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
5096 dr_chain
.quick_push (new_temp
);
5099 /* Record the mapping between SSA_NAMEs and statements. */
5100 vect_record_grouped_load_vectors (stmt
, dr_chain
);
5104 for (i
= 0; i
< vec_num
; i
++)
5107 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5110 /* 2. Create the vector-load in the loop. */
5111 switch (alignment_support_scheme
)
5114 case dr_unaligned_supported
:
5116 unsigned int align
, misalign
;
5119 = build2 (MEM_REF
, vectype
, dataref_ptr
,
5122 : build_int_cst (reference_alias_ptr_type
5123 (DR_REF (first_dr
)), 0));
5124 align
= TYPE_ALIGN_UNIT (vectype
);
5125 if (alignment_support_scheme
== dr_aligned
)
5127 gcc_assert (aligned_access_p (first_dr
));
5130 else if (DR_MISALIGNMENT (first_dr
) == -1)
5132 TREE_TYPE (data_ref
)
5133 = build_aligned_type (TREE_TYPE (data_ref
),
5134 TYPE_ALIGN (elem_type
));
5135 align
= TYPE_ALIGN_UNIT (elem_type
);
5140 TREE_TYPE (data_ref
)
5141 = build_aligned_type (TREE_TYPE (data_ref
),
5142 TYPE_ALIGN (elem_type
));
5143 misalign
= DR_MISALIGNMENT (first_dr
);
5145 if (dataref_offset
== NULL_TREE
)
5146 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5150 case dr_explicit_realign
:
5155 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5157 if (compute_in_loop
)
5158 msq
= vect_setup_realignment (first_stmt
, gsi
,
5160 dr_explicit_realign
,
5163 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
5164 new_stmt
= gimple_build_assign_with_ops
5165 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
5167 (TREE_TYPE (dataref_ptr
),
5168 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5169 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5171 = build2 (MEM_REF
, vectype
, ptr
,
5172 build_int_cst (reference_alias_ptr_type
5173 (DR_REF (first_dr
)), 0));
5174 vec_dest
= vect_create_destination_var (scalar_dest
,
5176 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5177 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5178 gimple_assign_set_lhs (new_stmt
, new_temp
);
5179 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5180 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5181 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5184 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5185 TYPE_SIZE_UNIT (elem_type
));
5186 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5187 new_stmt
= gimple_build_assign_with_ops
5188 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5191 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5192 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5193 gimple_assign_set_lhs (new_stmt
, ptr
);
5194 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5196 = build2 (MEM_REF
, vectype
, ptr
,
5197 build_int_cst (reference_alias_ptr_type
5198 (DR_REF (first_dr
)), 0));
5201 case dr_explicit_realign_optimized
:
5202 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5203 new_stmt
= gimple_build_assign_with_ops
5204 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5206 (TREE_TYPE (dataref_ptr
),
5207 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5208 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5210 = build2 (MEM_REF
, vectype
, new_temp
,
5211 build_int_cst (reference_alias_ptr_type
5212 (DR_REF (first_dr
)), 0));
5217 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5218 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5219 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5220 gimple_assign_set_lhs (new_stmt
, new_temp
);
5221 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5223 /* 3. Handle explicit realignment if necessary/supported.
5225 vec_dest = realign_load (msq, lsq, realignment_token) */
5226 if (alignment_support_scheme
== dr_explicit_realign_optimized
5227 || alignment_support_scheme
== dr_explicit_realign
)
5229 lsq
= gimple_assign_lhs (new_stmt
);
5230 if (!realignment_token
)
5231 realignment_token
= dataref_ptr
;
5232 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5234 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
5237 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5238 gimple_assign_set_lhs (new_stmt
, new_temp
);
5239 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5241 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5244 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5245 add_phi_arg (phi
, lsq
,
5246 loop_latch_edge (containing_loop
),
5252 /* 4. Handle invariant-load. */
5253 if (inv_p
&& !bb_vinfo
)
5255 gimple_stmt_iterator gsi2
= *gsi
;
5256 gcc_assert (!grouped_load
);
5258 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5260 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5265 tree perm_mask
= perm_mask_for_reverse (vectype
);
5266 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5267 perm_mask
, stmt
, gsi
);
5268 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5271 /* Collect vector loads and later create their permutation in
5272 vect_transform_grouped_load (). */
5273 if (grouped_load
|| slp_perm
)
5274 dr_chain
.quick_push (new_temp
);
5276 /* Store vector loads in the corresponding SLP_NODE. */
5277 if (slp
&& !slp_perm
)
5278 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5280 /* Bump the vector pointer to account for a gap. */
5281 if (slp
&& group_gap
!= 0)
5283 tree bump
= size_binop (MULT_EXPR
,
5284 TYPE_SIZE_UNIT (elem_type
),
5285 size_int (group_gap
));
5286 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5291 if (slp
&& !slp_perm
)
5296 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
5297 slp_node_instance
, false))
5299 dr_chain
.release ();
5308 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5309 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5314 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5316 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5317 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5320 dr_chain
.release ();
5326 /* Function vect_is_simple_cond.
5329 LOOP - the loop that is being vectorized.
5330 COND - Condition that is checked for simple use.
5333 *COMP_VECTYPE - the vector type for the comparison.
5335 Returns whether a COND can be vectorized. Checks whether
5336 condition operands are supportable using vec_is_simple_use. */
5339 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5340 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5344 enum vect_def_type dt
;
5345 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5347 if (!COMPARISON_CLASS_P (cond
))
5350 lhs
= TREE_OPERAND (cond
, 0);
5351 rhs
= TREE_OPERAND (cond
, 1);
5353 if (TREE_CODE (lhs
) == SSA_NAME
)
5355 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5356 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5357 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5360 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5361 && TREE_CODE (lhs
) != FIXED_CST
)
5364 if (TREE_CODE (rhs
) == SSA_NAME
)
5366 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5367 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5368 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5371 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5372 && TREE_CODE (rhs
) != FIXED_CST
)
5375 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5379 /* vectorizable_condition.
5381 Check if STMT is conditional modify expression that can be vectorized.
5382 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5383 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5386 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5387 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5388 else caluse if it is 2).
5390 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5393 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5394 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5397 tree scalar_dest
= NULL_TREE
;
5398 tree vec_dest
= NULL_TREE
;
5399 tree cond_expr
, then_clause
, else_clause
;
5400 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5401 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5402 tree comp_vectype
= NULL_TREE
;
5403 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5404 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5405 tree vec_compare
, vec_cond_expr
;
5407 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5409 enum vect_def_type dt
, dts
[4];
5410 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5412 enum tree_code code
;
5413 stmt_vec_info prev_stmt_info
= NULL
;
5415 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5416 vec
<tree
> vec_oprnds0
= vNULL
;
5417 vec
<tree
> vec_oprnds1
= vNULL
;
5418 vec
<tree
> vec_oprnds2
= vNULL
;
5419 vec
<tree
> vec_oprnds3
= vNULL
;
5422 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5425 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5427 gcc_assert (ncopies
>= 1);
5428 if (reduc_index
&& ncopies
> 1)
5429 return false; /* FORNOW */
5431 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5434 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5437 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5438 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5442 /* FORNOW: not yet supported. */
5443 if (STMT_VINFO_LIVE_P (stmt_info
))
5445 if (dump_enabled_p ())
5446 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5447 "value used after loop.");
5451 /* Is vectorizable conditional operation? */
5452 if (!is_gimple_assign (stmt
))
5455 code
= gimple_assign_rhs_code (stmt
);
5457 if (code
!= COND_EXPR
)
5460 cond_expr
= gimple_assign_rhs1 (stmt
);
5461 then_clause
= gimple_assign_rhs2 (stmt
);
5462 else_clause
= gimple_assign_rhs3 (stmt
);
5464 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5469 if (TREE_CODE (then_clause
) == SSA_NAME
)
5471 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5472 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5473 &then_def_stmt
, &def
, &dt
))
5476 else if (TREE_CODE (then_clause
) != INTEGER_CST
5477 && TREE_CODE (then_clause
) != REAL_CST
5478 && TREE_CODE (then_clause
) != FIXED_CST
)
5481 if (TREE_CODE (else_clause
) == SSA_NAME
)
5483 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5484 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5485 &else_def_stmt
, &def
, &dt
))
5488 else if (TREE_CODE (else_clause
) != INTEGER_CST
5489 && TREE_CODE (else_clause
) != REAL_CST
5490 && TREE_CODE (else_clause
) != FIXED_CST
)
5493 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
5494 /* The result of a vector comparison should be signed type. */
5495 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
5496 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
5497 if (vec_cmp_type
== NULL_TREE
)
5502 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5503 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5510 vec_oprnds0
.create (1);
5511 vec_oprnds1
.create (1);
5512 vec_oprnds2
.create (1);
5513 vec_oprnds3
.create (1);
5517 scalar_dest
= gimple_assign_lhs (stmt
);
5518 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5520 /* Handle cond expr. */
5521 for (j
= 0; j
< ncopies
; j
++)
5523 gimple new_stmt
= NULL
;
5530 vec
<vec
<tree
> > vec_defs
;
5532 vec_defs
.create (4);
5533 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
5534 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
5535 ops
.safe_push (then_clause
);
5536 ops
.safe_push (else_clause
);
5537 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5538 vec_oprnds3
= vec_defs
.pop ();
5539 vec_oprnds2
= vec_defs
.pop ();
5540 vec_oprnds1
= vec_defs
.pop ();
5541 vec_oprnds0
= vec_defs
.pop ();
5544 vec_defs
.release ();
5550 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5552 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5553 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5556 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5558 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5559 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5560 if (reduc_index
== 1)
5561 vec_then_clause
= reduc_def
;
5564 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5566 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5567 NULL
, >emp
, &def
, &dts
[2]);
5569 if (reduc_index
== 2)
5570 vec_else_clause
= reduc_def
;
5573 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5575 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5576 NULL
, >emp
, &def
, &dts
[3]);
5582 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5583 vec_oprnds0
.pop ());
5584 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5585 vec_oprnds1
.pop ());
5586 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5587 vec_oprnds2
.pop ());
5588 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5589 vec_oprnds3
.pop ());
5594 vec_oprnds0
.quick_push (vec_cond_lhs
);
5595 vec_oprnds1
.quick_push (vec_cond_rhs
);
5596 vec_oprnds2
.quick_push (vec_then_clause
);
5597 vec_oprnds3
.quick_push (vec_else_clause
);
5600 /* Arguments are ready. Create the new vector stmt. */
5601 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
5603 vec_cond_rhs
= vec_oprnds1
[i
];
5604 vec_then_clause
= vec_oprnds2
[i
];
5605 vec_else_clause
= vec_oprnds3
[i
];
5607 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
5608 vec_cond_lhs
, vec_cond_rhs
);
5609 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5610 vec_compare
, vec_then_clause
, vec_else_clause
);
5612 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5613 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5614 gimple_assign_set_lhs (new_stmt
, new_temp
);
5615 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5617 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5624 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5626 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5628 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5631 vec_oprnds0
.release ();
5632 vec_oprnds1
.release ();
5633 vec_oprnds2
.release ();
5634 vec_oprnds3
.release ();
5640 /* Make sure the statement is vectorizable. */
5643 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5645 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5646 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5647 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5649 tree scalar_type
, vectype
;
5650 gimple pattern_stmt
;
5651 gimple_seq pattern_def_seq
;
5653 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
5656 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5659 if (gimple_has_volatile_ops (stmt
))
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5663 "not vectorized: stmt has volatile operands");
5668 /* Skip stmts that do not need to be vectorized. In loops this is expected
5670 - the COND_EXPR which is the loop exit condition
5671 - any LABEL_EXPRs in the loop
5672 - computations that are used only for array indexing or loop control.
5673 In basic blocks we only analyze statements that are a part of some SLP
5674 instance, therefore, all the statements are relevant.
5676 Pattern statement needs to be analyzed instead of the original statement
5677 if the original statement is not relevant. Otherwise, we analyze both
5678 statements. In basic blocks we are called from some SLP instance
5679 traversal, don't analyze pattern stmts instead, the pattern stmts
5680 already will be part of SLP instance. */
5682 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5683 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5684 && !STMT_VINFO_LIVE_P (stmt_info
))
5686 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5688 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5689 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5691 /* Analyze PATTERN_STMT instead of the original stmt. */
5692 stmt
= pattern_stmt
;
5693 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5694 if (dump_enabled_p ())
5696 dump_printf_loc (MSG_NOTE
, vect_location
,
5697 "==> examining pattern statement: ");
5698 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.");
5709 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5712 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5713 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5715 /* Analyze PATTERN_STMT too. */
5716 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_NOTE
, vect_location
,
5719 "==> examining pattern statement: ");
5720 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5723 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5727 if (is_pattern_stmt_p (stmt_info
)
5729 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5731 gimple_stmt_iterator si
;
5733 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5735 gimple pattern_def_stmt
= gsi_stmt (si
);
5736 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5737 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5739 /* Analyze def stmt of STMT if it's a pattern stmt. */
5740 if (dump_enabled_p ())
5742 dump_printf_loc (MSG_NOTE
, vect_location
,
5743 "==> examining pattern def statement: ");
5744 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
5747 if (!vect_analyze_stmt (pattern_def_stmt
,
5748 need_to_vectorize
, node
))
5754 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5756 case vect_internal_def
:
5759 case vect_reduction_def
:
5760 case vect_nested_cycle
:
5761 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5762 || relevance
== vect_used_in_outer_by_reduction
5763 || relevance
== vect_unused_in_scope
));
5766 case vect_induction_def
:
5767 case vect_constant_def
:
5768 case vect_external_def
:
5769 case vect_unknown_def_type
:
5776 gcc_assert (PURE_SLP_STMT (stmt_info
));
5778 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5779 if (dump_enabled_p ())
5781 dump_printf_loc (MSG_NOTE
, vect_location
,
5782 "get vectype for scalar type: ");
5783 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
5786 vectype
= get_vectype_for_scalar_type (scalar_type
);
5789 if (dump_enabled_p ())
5791 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5792 "not SLPed: unsupported data-type ");
5793 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5799 if (dump_enabled_p ())
5801 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
5802 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
5805 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5808 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5810 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5811 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5812 *need_to_vectorize
= true;
5817 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5818 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5819 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5820 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5821 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5822 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5823 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5824 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5825 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5826 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5827 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5831 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5832 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5833 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5834 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5835 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5836 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5837 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5838 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5843 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5846 "not vectorized: relevant stmt not ");
5847 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5848 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5857 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5858 need extra handling, except for vectorizable reductions. */
5859 if (STMT_VINFO_LIVE_P (stmt_info
)
5860 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5861 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5865 if (dump_enabled_p ())
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5868 "not vectorized: live stmt not ");
5869 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5880 /* Function vect_transform_stmt.
5882 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5885 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5886 bool *grouped_store
, slp_tree slp_node
,
5887 slp_instance slp_node_instance
)
5889 bool is_store
= false;
5890 gimple vec_stmt
= NULL
;
5891 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5894 switch (STMT_VINFO_TYPE (stmt_info
))
5896 case type_demotion_vec_info_type
:
5897 case type_promotion_vec_info_type
:
5898 case type_conversion_vec_info_type
:
5899 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5903 case induc_vec_info_type
:
5904 gcc_assert (!slp_node
);
5905 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5909 case shift_vec_info_type
:
5910 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5914 case op_vec_info_type
:
5915 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5919 case assignment_vec_info_type
:
5920 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5924 case load_vec_info_type
:
5925 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5930 case store_vec_info_type
:
5931 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5933 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5935 /* In case of interleaving, the whole chain is vectorized when the
5936 last store in the chain is reached. Store stmts before the last
5937 one are skipped, and there vec_stmt_info shouldn't be freed
5939 *grouped_store
= true;
5940 if (STMT_VINFO_VEC_STMT (stmt_info
))
5947 case condition_vec_info_type
:
5948 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5952 case call_vec_info_type
:
5953 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5954 stmt
= gsi_stmt (*gsi
);
5957 case reduc_vec_info_type
:
5958 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5963 if (!STMT_VINFO_LIVE_P (stmt_info
))
5965 if (dump_enabled_p ())
5966 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5967 "stmt not supported.");
5972 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5973 is being vectorized, but outside the immediately enclosing loop. */
5975 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5976 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5977 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5978 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5979 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5980 || STMT_VINFO_RELEVANT (stmt_info
) ==
5981 vect_used_in_outer_by_reduction
))
5983 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5984 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5985 imm_use_iterator imm_iter
;
5986 use_operand_p use_p
;
5990 if (dump_enabled_p ())
5991 dump_printf_loc (MSG_NOTE
, vect_location
,
5992 "Record the vdef for outer-loop vectorization.");
5994 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5995 (to be used when vectorizing outer-loop stmts that use the DEF of
5997 if (gimple_code (stmt
) == GIMPLE_PHI
)
5998 scalar_dest
= PHI_RESULT (stmt
);
6000 scalar_dest
= gimple_assign_lhs (stmt
);
6002 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
6004 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
6006 exit_phi
= USE_STMT (use_p
);
6007 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
6012 /* Handle stmts whose DEF is used outside the loop-nest that is
6013 being vectorized. */
6014 if (STMT_VINFO_LIVE_P (stmt_info
)
6015 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6017 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
6022 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6028 /* Remove a group of stores (for SLP or interleaving), free their
6032 vect_remove_stores (gimple first_stmt
)
6034 gimple next
= first_stmt
;
6036 gimple_stmt_iterator next_si
;
6040 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
6042 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
6043 if (is_pattern_stmt_p (stmt_info
))
6044 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
6045 /* Free the attached stmt_vec_info and remove the stmt. */
6046 next_si
= gsi_for_stmt (next
);
6047 unlink_stmt_vdef (next
);
6048 gsi_remove (&next_si
, true);
6049 release_defs (next
);
6050 free_stmt_vec_info (next
);
6056 /* Function new_stmt_vec_info.
6058 Create and initialize a new stmt_vec_info struct for STMT. */
6061 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
6062 bb_vec_info bb_vinfo
)
6065 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
6067 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
6068 STMT_VINFO_STMT (res
) = stmt
;
6069 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
6070 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
6071 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
6072 STMT_VINFO_LIVE_P (res
) = false;
6073 STMT_VINFO_VECTYPE (res
) = NULL
;
6074 STMT_VINFO_VEC_STMT (res
) = NULL
;
6075 STMT_VINFO_VECTORIZABLE (res
) = true;
6076 STMT_VINFO_IN_PATTERN_P (res
) = false;
6077 STMT_VINFO_RELATED_STMT (res
) = NULL
;
6078 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
6079 STMT_VINFO_DATA_REF (res
) = NULL
;
6081 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
6082 STMT_VINFO_DR_OFFSET (res
) = NULL
;
6083 STMT_VINFO_DR_INIT (res
) = NULL
;
6084 STMT_VINFO_DR_STEP (res
) = NULL
;
6085 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
6087 if (gimple_code (stmt
) == GIMPLE_PHI
6088 && is_loop_header_bb_p (gimple_bb (stmt
)))
6089 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
6091 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
6093 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
6094 STMT_SLP_TYPE (res
) = loop_vect
;
6095 GROUP_FIRST_ELEMENT (res
) = NULL
;
6096 GROUP_NEXT_ELEMENT (res
) = NULL
;
6097 GROUP_SIZE (res
) = 0;
6098 GROUP_STORE_COUNT (res
) = 0;
6099 GROUP_GAP (res
) = 0;
6100 GROUP_SAME_DR_STMT (res
) = NULL
;
6106 /* Create a hash table for stmt_vec_info. */
6109 init_stmt_vec_info_vec (void)
6111 gcc_assert (!stmt_vec_info_vec
.exists ());
6112 stmt_vec_info_vec
.create (50);
6116 /* Free hash table for stmt_vec_info. */
6119 free_stmt_vec_info_vec (void)
6123 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
6125 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
6126 gcc_assert (stmt_vec_info_vec
.exists ());
6127 stmt_vec_info_vec
.release ();
6131 /* Free stmt vectorization related info. */
6134 free_stmt_vec_info (gimple stmt
)
6136 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6141 /* Check if this statement has a related "pattern stmt"
6142 (introduced by the vectorizer during the pattern recognition
6143 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6145 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
6147 stmt_vec_info patt_info
6148 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6151 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
6154 gimple_stmt_iterator si
;
6155 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
6156 free_stmt_vec_info (gsi_stmt (si
));
6158 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6162 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
6163 set_vinfo_for_stmt (stmt
, NULL
);
6168 /* Function get_vectype_for_scalar_type_and_size.
6170 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6174 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6176 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6177 enum machine_mode simd_mode
;
6178 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6185 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6186 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6189 /* For vector types of elements whose mode precision doesn't
6190 match their types precision we use a element type of mode
6191 precision. The vectorization routines will have to make sure
6192 they support the proper result truncation/extension.
6193 We also make sure to build vector types with INTEGER_TYPE
6194 component type only. */
6195 if (INTEGRAL_TYPE_P (scalar_type
)
6196 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6197 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6198 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6199 TYPE_UNSIGNED (scalar_type
));
6201 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6202 When the component mode passes the above test simply use a type
6203 corresponding to that mode. The theory is that any use that
6204 would cause problems with this will disable vectorization anyway. */
6205 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6206 && !INTEGRAL_TYPE_P (scalar_type
))
6207 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6209 /* We can't build a vector type of elements with alignment bigger than
6211 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6212 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
6213 TYPE_UNSIGNED (scalar_type
));
6215 /* If we felt back to using the mode fail if there was
6216 no scalar type for it. */
6217 if (scalar_type
== NULL_TREE
)
6220 /* If no size was supplied use the mode the target prefers. Otherwise
6221 lookup a vector mode of the specified size. */
6223 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6225 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6226 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6230 vectype
= build_vector_type (scalar_type
, nunits
);
6232 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6233 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6239 unsigned int current_vector_size
;
6241 /* Function get_vectype_for_scalar_type.
6243 Returns the vector type corresponding to SCALAR_TYPE as supported
6247 get_vectype_for_scalar_type (tree scalar_type
)
6250 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6251 current_vector_size
);
6253 && current_vector_size
== 0)
6254 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6258 /* Function get_same_sized_vectype
6260 Returns a vector type corresponding to SCALAR_TYPE of size
6261 VECTOR_TYPE if supported by the target. */
6264 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6266 return get_vectype_for_scalar_type_and_size
6267 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6270 /* Function vect_is_simple_use.
6273 LOOP_VINFO - the vect info of the loop that is being vectorized.
6274 BB_VINFO - the vect info of the basic block that is being vectorized.
6275 OPERAND - operand of STMT in the loop or bb.
6276 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6278 Returns whether a stmt with OPERAND can be vectorized.
6279 For loops, supportable operands are constants, loop invariants, and operands
6280 that are defined by the current iteration of the loop. Unsupportable
6281 operands are those that are defined by a previous iteration of the loop (as
6282 is the case in reduction/induction computations).
6283 For basic blocks, supportable operands are constants and bb invariants.
6284 For now, operands defined outside the basic block are not supported. */
6287 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6288 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6289 tree
*def
, enum vect_def_type
*dt
)
6292 stmt_vec_info stmt_vinfo
;
6293 struct loop
*loop
= NULL
;
6296 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6301 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_NOTE
, vect_location
,
6304 "vect_is_simple_use: operand ");
6305 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
6308 if (CONSTANT_CLASS_P (operand
))
6310 *dt
= vect_constant_def
;
6314 if (is_gimple_min_invariant (operand
))
6317 *dt
= vect_external_def
;
6321 if (TREE_CODE (operand
) == PAREN_EXPR
)
6323 if (dump_enabled_p ())
6324 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.");
6325 operand
= TREE_OPERAND (operand
, 0);
6328 if (TREE_CODE (operand
) != SSA_NAME
)
6330 if (dump_enabled_p ())
6331 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6336 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6337 if (*def_stmt
== NULL
)
6339 if (dump_enabled_p ())
6340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6345 if (dump_enabled_p ())
6347 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
6348 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
6351 /* Empty stmt is expected only in case of a function argument.
6352 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6353 if (gimple_nop_p (*def_stmt
))
6356 *dt
= vect_external_def
;
6360 bb
= gimple_bb (*def_stmt
);
6362 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6363 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6364 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6365 *dt
= vect_external_def
;
6368 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6369 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6372 if (*dt
== vect_unknown_def_type
6374 && *dt
== vect_double_reduction_def
6375 && gimple_code (stmt
) != GIMPLE_PHI
))
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6379 "Unsupported pattern.");
6383 if (dump_enabled_p ())
6384 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.", *dt
);
6386 switch (gimple_code (*def_stmt
))
6389 *def
= gimple_phi_result (*def_stmt
);
6393 *def
= gimple_assign_lhs (*def_stmt
);
6397 *def
= gimple_call_lhs (*def_stmt
);
6402 if (dump_enabled_p ())
6403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6404 "unsupported defining stmt: ");
6411 /* Function vect_is_simple_use_1.
6413 Same as vect_is_simple_use_1 but also determines the vector operand
6414 type of OPERAND and stores it to *VECTYPE. If the definition of
6415 OPERAND is vect_uninitialized_def, vect_constant_def or
6416 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6417 is responsible to compute the best suited vector type for the
6421 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6422 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6423 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6425 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6429 /* Now get a vector type if the def is internal, otherwise supply
6430 NULL_TREE and leave it up to the caller to figure out a proper
6431 type for the use stmt. */
6432 if (*dt
== vect_internal_def
6433 || *dt
== vect_induction_def
6434 || *dt
== vect_reduction_def
6435 || *dt
== vect_double_reduction_def
6436 || *dt
== vect_nested_cycle
)
6438 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6440 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6441 && !STMT_VINFO_RELEVANT (stmt_info
)
6442 && !STMT_VINFO_LIVE_P (stmt_info
))
6443 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6445 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6446 gcc_assert (*vectype
!= NULL_TREE
);
6448 else if (*dt
== vect_uninitialized_def
6449 || *dt
== vect_constant_def
6450 || *dt
== vect_external_def
)
6451 *vectype
= NULL_TREE
;
6459 /* Function supportable_widening_operation
6461 Check whether an operation represented by the code CODE is a
6462 widening operation that is supported by the target platform in
6463 vector form (i.e., when operating on arguments of type VECTYPE_IN
6464 producing a result of type VECTYPE_OUT).
6466 Widening operations we currently support are NOP (CONVERT), FLOAT
6467 and WIDEN_MULT. This function checks if these operations are supported
6468 by the target platform either directly (via vector tree-codes), or via
6472 - CODE1 and CODE2 are codes of vector operations to be used when
6473 vectorizing the operation, if available.
6474 - MULTI_STEP_CVT determines the number of required intermediate steps in
6475 case of multi-step conversion (like char->short->int - in that case
6476 MULTI_STEP_CVT will be 1).
6477 - INTERM_TYPES contains the intermediate type required to perform the
6478 widening operation (short in the above example). */
6481 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6482 tree vectype_out
, tree vectype_in
,
6483 enum tree_code
*code1
, enum tree_code
*code2
,
6484 int *multi_step_cvt
,
6485 vec
<tree
> *interm_types
)
6487 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6488 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6489 struct loop
*vect_loop
= NULL
;
6490 enum machine_mode vec_mode
;
6491 enum insn_code icode1
, icode2
;
6492 optab optab1
, optab2
;
6493 tree vectype
= vectype_in
;
6494 tree wide_vectype
= vectype_out
;
6495 enum tree_code c1
, c2
;
6497 tree prev_type
, intermediate_type
;
6498 enum machine_mode intermediate_mode
, prev_mode
;
6499 optab optab3
, optab4
;
6501 *multi_step_cvt
= 0;
6503 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6507 case WIDEN_MULT_EXPR
:
6508 /* The result of a vectorized widening operation usually requires
6509 two vectors (because the widened results do not fit into one vector).
6510 The generated vector results would normally be expected to be
6511 generated in the same order as in the original scalar computation,
6512 i.e. if 8 results are generated in each vector iteration, they are
6513 to be organized as follows:
6514 vect1: [res1,res2,res3,res4],
6515 vect2: [res5,res6,res7,res8].
6517 However, in the special case that the result of the widening
6518 operation is used in a reduction computation only, the order doesn't
6519 matter (because when vectorizing a reduction we change the order of
6520 the computation). Some targets can take advantage of this and
6521 generate more efficient code. For example, targets like Altivec,
6522 that support widen_mult using a sequence of {mult_even,mult_odd}
6523 generate the following vectors:
6524 vect1: [res1,res3,res5,res7],
6525 vect2: [res2,res4,res6,res8].
6527 When vectorizing outer-loops, we execute the inner-loop sequentially
6528 (each vectorized inner-loop iteration contributes to VF outer-loop
6529 iterations in parallel). We therefore don't allow to change the
6530 order of the computation in the inner-loop during outer-loop
6532 /* TODO: Another case in which order doesn't *really* matter is when we
6533 widen and then contract again, e.g. (short)((int)x * y >> 8).
6534 Normally, pack_trunc performs an even/odd permute, whereas the
6535 repack from an even/odd expansion would be an interleave, which
6536 would be significantly simpler for e.g. AVX2. */
6537 /* In any case, in order to avoid duplicating the code below, recurse
6538 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6539 are properly set up for the caller. If we fail, we'll continue with
6540 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6542 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6543 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6544 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6545 stmt
, vectype_out
, vectype_in
,
6546 code1
, code2
, multi_step_cvt
,
6549 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6550 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6553 case VEC_WIDEN_MULT_EVEN_EXPR
:
6554 /* Support the recursion induced just above. */
6555 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6556 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6559 case WIDEN_LSHIFT_EXPR
:
6560 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6561 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6565 c1
= VEC_UNPACK_LO_EXPR
;
6566 c2
= VEC_UNPACK_HI_EXPR
;
6570 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6571 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6574 case FIX_TRUNC_EXPR
:
6575 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6576 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6577 computing the operation. */
6584 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6586 enum tree_code ctmp
= c1
;
6591 if (code
== FIX_TRUNC_EXPR
)
6593 /* The signedness is determined from output operand. */
6594 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6595 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6599 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6600 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6603 if (!optab1
|| !optab2
)
6606 vec_mode
= TYPE_MODE (vectype
);
6607 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6608 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6614 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6615 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6618 /* Check if it's a multi-step conversion that can be done using intermediate
6621 prev_type
= vectype
;
6622 prev_mode
= vec_mode
;
6624 if (!CONVERT_EXPR_CODE_P (code
))
6627 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6628 intermediate steps in promotion sequence. We try
6629 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6631 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6632 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6634 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6636 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6637 TYPE_UNSIGNED (prev_type
));
6638 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6639 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6641 if (!optab3
|| !optab4
6642 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6643 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6644 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6645 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6646 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6647 == CODE_FOR_nothing
)
6648 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6649 == CODE_FOR_nothing
))
6652 interm_types
->quick_push (intermediate_type
);
6653 (*multi_step_cvt
)++;
6655 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6656 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6659 prev_type
= intermediate_type
;
6660 prev_mode
= intermediate_mode
;
6663 interm_types
->release ();
6668 /* Function supportable_narrowing_operation
6670 Check whether an operation represented by the code CODE is a
6671 narrowing operation that is supported by the target platform in
6672 vector form (i.e., when operating on arguments of type VECTYPE_IN
6673 and producing a result of type VECTYPE_OUT).
6675 Narrowing operations we currently support are NOP (CONVERT) and
6676 FIX_TRUNC. This function checks if these operations are supported by
6677 the target platform directly via vector tree-codes.
6680 - CODE1 is the code of a vector operation to be used when
6681 vectorizing the operation, if available.
6682 - MULTI_STEP_CVT determines the number of required intermediate steps in
6683 case of multi-step conversion (like int->short->char - in that case
6684 MULTI_STEP_CVT will be 1).
6685 - INTERM_TYPES contains the intermediate type required to perform the
6686 narrowing operation (short in the above example). */
6689 supportable_narrowing_operation (enum tree_code code
,
6690 tree vectype_out
, tree vectype_in
,
6691 enum tree_code
*code1
, int *multi_step_cvt
,
6692 vec
<tree
> *interm_types
)
6694 enum machine_mode vec_mode
;
6695 enum insn_code icode1
;
6696 optab optab1
, interm_optab
;
6697 tree vectype
= vectype_in
;
6698 tree narrow_vectype
= vectype_out
;
6700 tree intermediate_type
;
6701 enum machine_mode intermediate_mode
, prev_mode
;
6705 *multi_step_cvt
= 0;
6709 c1
= VEC_PACK_TRUNC_EXPR
;
6712 case FIX_TRUNC_EXPR
:
6713 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6717 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6718 tree code and optabs used for computing the operation. */
6725 if (code
== FIX_TRUNC_EXPR
)
6726 /* The signedness is determined from output operand. */
6727 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6729 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6734 vec_mode
= TYPE_MODE (vectype
);
6735 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6740 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6743 /* Check if it's a multi-step conversion that can be done using intermediate
6745 prev_mode
= vec_mode
;
6746 if (code
== FIX_TRUNC_EXPR
)
6747 uns
= TYPE_UNSIGNED (vectype_out
);
6749 uns
= TYPE_UNSIGNED (vectype
);
6751 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6752 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6753 costly than signed. */
6754 if (code
== FIX_TRUNC_EXPR
&& uns
)
6756 enum insn_code icode2
;
6759 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6761 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6762 if (interm_optab
!= unknown_optab
6763 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6764 && insn_data
[icode1
].operand
[0].mode
6765 == insn_data
[icode2
].operand
[0].mode
)
6768 optab1
= interm_optab
;
6773 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6774 intermediate steps in promotion sequence. We try
6775 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6776 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6777 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6779 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6781 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6783 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6786 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6787 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6788 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6789 == CODE_FOR_nothing
))
6792 interm_types
->quick_push (intermediate_type
);
6793 (*multi_step_cvt
)++;
6795 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6798 prev_mode
= intermediate_mode
;
6799 optab1
= interm_optab
;
6802 interm_types
->release ();