1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
35 #include "recog.h" /* FIXME: for insn_data */
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
77 int misalign
, enum vect_cost_model_location where
)
81 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
82 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
83 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
86 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
192 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE
, vect_location
,
194 "mark relevant %d, live %d.", relevant
, live_p
);
196 /* If this stmt is an original stmt in a pattern, we might need to mark its
197 related pattern stmt instead of the original stmt. However, such stmts
198 may have their own uses that are not in any pattern, in such cases the
199 stmt itself should be marked. */
200 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
203 if (!used_in_pattern
)
205 imm_use_iterator imm_iter
;
209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
210 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
212 if (is_gimple_assign (stmt
))
213 lhs
= gimple_assign_lhs (stmt
);
215 lhs
= gimple_call_lhs (stmt
);
217 /* This use is out of pattern use, if LHS has other uses that are
218 pattern uses, we should mark the stmt itself, and not the pattern
220 if (TREE_CODE (lhs
) == SSA_NAME
)
221 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
223 if (is_gimple_debug (USE_STMT (use_p
)))
225 use_stmt
= USE_STMT (use_p
);
227 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
230 if (vinfo_for_stmt (use_stmt
)
231 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
241 /* This is the last stmt in a sequence that was detected as a
242 pattern that can potentially be vectorized. Don't mark the stmt
243 as relevant/live because it's not going to be vectorized.
244 Instead mark the pattern-stmt that replaces it. */
246 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
248 if (dump_enabled_p ())
249 dump_printf_loc (MSG_NOTE
, vect_location
,
250 "last stmt in pattern. don't mark"
252 stmt_info
= vinfo_for_stmt (pattern_stmt
);
253 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
254 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
255 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
260 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
261 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
262 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
264 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
265 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
267 if (dump_enabled_p ())
268 dump_printf_loc (MSG_NOTE
, vect_location
,
269 "already marked relevant/live.");
273 worklist
->safe_push (stmt
);
277 /* Function vect_stmt_relevant_p.
279 Return true if STMT in loop that is represented by LOOP_VINFO is
280 "relevant for vectorization".
282 A stmt is considered "relevant for vectorization" if:
283 - it has uses outside the loop.
284 - it has vdefs (it alters memory).
285 - control stmts in the loop (except for the exit condition).
287 CHECKME: what other side effects would the vectorizer allow? */
290 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
291 enum vect_relevant
*relevant
, bool *live_p
)
293 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
295 imm_use_iterator imm_iter
;
299 *relevant
= vect_unused_in_scope
;
302 /* cond stmt other than loop exit cond. */
303 if (is_ctrl_stmt (stmt
)
304 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
305 != loop_exit_ctrl_vec_info_type
)
306 *relevant
= vect_used_in_scope
;
308 /* changing memory. */
309 if (gimple_code (stmt
) != GIMPLE_PHI
)
310 if (gimple_vdef (stmt
))
312 if (dump_enabled_p ())
313 dump_printf_loc (MSG_NOTE
, vect_location
,
314 "vec_stmt_relevant_p: stmt has vdefs.");
315 *relevant
= vect_used_in_scope
;
318 /* uses outside the loop. */
319 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
321 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
323 basic_block bb
= gimple_bb (USE_STMT (use_p
));
324 if (!flow_bb_inside_loop_p (loop
, bb
))
326 if (dump_enabled_p ())
327 dump_printf_loc (MSG_NOTE
, vect_location
,
328 "vec_stmt_relevant_p: used out of loop.");
330 if (is_gimple_debug (USE_STMT (use_p
)))
333 /* We expect all such uses to be in the loop exit phis
334 (because of loop closed form) */
335 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
336 gcc_assert (bb
== single_exit (loop
)->dest
);
343 return (*live_p
|| *relevant
);
347 /* Function exist_non_indexing_operands_for_use_p
349 USE is one of the uses attached to STMT. Check if USE is
350 used in STMT for anything other than indexing an array. */
353 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
356 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
358 /* USE corresponds to some operand in STMT. If there is no data
359 reference in STMT, then any operand that corresponds to USE
360 is not indexing an array. */
361 if (!STMT_VINFO_DATA_REF (stmt_info
))
364 /* STMT has a data_ref. FORNOW this means that its of one of
368 (This should have been verified in analyze_data_refs).
370 'var' in the second case corresponds to a def, not a use,
371 so USE cannot correspond to any operands that are not used
374 Therefore, all we need to check is if STMT falls into the
375 first case, and whether var corresponds to USE. */
377 if (!gimple_assign_copy_p (stmt
))
379 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
381 operand
= gimple_assign_rhs1 (stmt
);
382 if (TREE_CODE (operand
) != SSA_NAME
)
393 Function process_use.
396 - a USE in STMT in a loop represented by LOOP_VINFO
397 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398 that defined USE. This is done by calling mark_relevant and passing it
399 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
404 Generally, LIVE_P and RELEVANT are used to define the liveness and
405 relevance info of the DEF_STMT of this USE:
406 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
409 - case 1: If USE is used only for address computations (e.g. array indexing),
410 which does not need to be directly vectorized, then the liveness/relevance
411 of the respective DEF_STMT is left unchanged.
412 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413 skip DEF_STMT cause it had already been processed.
414 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
415 be modified accordingly.
417 Return true if everything is as expected. Return false otherwise. */
420 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
421 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
424 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
425 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
426 stmt_vec_info dstmt_vinfo
;
427 basic_block bb
, def_bb
;
430 enum vect_def_type dt
;
432 /* case 1: we are only interested in uses that need to be vectorized. Uses
433 that are used for address computation are not considered relevant. */
434 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
437 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
439 if (dump_enabled_p ())
440 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
441 "not vectorized: unsupported use in stmt.");
445 if (!def_stmt
|| gimple_nop_p (def_stmt
))
448 def_bb
= gimple_bb (def_stmt
);
449 if (!flow_bb_inside_loop_p (loop
, def_bb
))
451 if (dump_enabled_p ())
452 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.");
456 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457 DEF_STMT must have already been processed, because this should be the
458 only way that STMT, which is a reduction-phi, was put in the worklist,
459 as there should be no other uses for DEF_STMT in the loop. So we just
460 check that everything is as expected, and we are done. */
461 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
462 bb
= gimple_bb (stmt
);
463 if (gimple_code (stmt
) == GIMPLE_PHI
464 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
465 && gimple_code (def_stmt
) != GIMPLE_PHI
466 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
467 && bb
->loop_father
== def_bb
->loop_father
)
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_NOTE
, vect_location
,
471 "reduc-stmt defining reduc-phi in the same nest.");
472 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
473 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
474 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
475 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
476 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
480 /* case 3a: outer-loop stmt defining an inner-loop stmt:
481 outer-loop-header-bb:
487 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "outer-loop def-stmt defining inner-loop stmt.");
495 case vect_unused_in_scope
:
496 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
497 vect_used_in_scope
: vect_unused_in_scope
;
500 case vect_used_in_outer_by_reduction
:
501 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
502 relevant
= vect_used_by_reduction
;
505 case vect_used_in_outer
:
506 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
507 relevant
= vect_used_in_scope
;
510 case vect_used_in_scope
:
518 /* case 3b: inner-loop stmt defining an outer-loop stmt:
519 outer-loop-header-bb:
523 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
525 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE
, vect_location
,
529 "inner-loop def-stmt defining outer-loop stmt.");
533 case vect_unused_in_scope
:
534 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
535 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
536 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
539 case vect_used_by_reduction
:
540 relevant
= vect_used_in_outer_by_reduction
;
543 case vect_used_in_scope
:
544 relevant
= vect_used_in_outer
;
552 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
553 is_pattern_stmt_p (stmt_vinfo
));
558 /* Function vect_mark_stmts_to_be_vectorized.
560 Not all stmts in the loop need to be vectorized. For example:
569 Stmt 1 and 3 do not need to be vectorized, because loop control and
570 addressing of vectorized data-refs are handled differently.
572 This pass detects such stmts. */
575 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
577 vec
<gimple
> worklist
;
578 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
579 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
580 unsigned int nbbs
= loop
->num_nodes
;
581 gimple_stmt_iterator si
;
584 stmt_vec_info stmt_vinfo
;
588 enum vect_relevant relevant
, tmp_relevant
;
589 enum vect_def_type def_type
;
591 if (dump_enabled_p ())
592 dump_printf_loc (MSG_NOTE
, vect_location
,
593 "=== vect_mark_stmts_to_be_vectorized ===");
595 worklist
.create (64);
597 /* 1. Init worklist. */
598 for (i
= 0; i
< nbbs
; i
++)
601 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
604 if (dump_enabled_p ())
606 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
607 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
610 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
611 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
613 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
615 stmt
= gsi_stmt (si
);
616 if (dump_enabled_p ())
618 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
619 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
622 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
623 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
627 /* 2. Process_worklist */
628 while (worklist
.length () > 0)
633 stmt
= worklist
.pop ();
634 if (dump_enabled_p ())
636 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
637 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
640 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641 (DEF_STMT) as relevant/irrelevant and live/dead according to the
642 liveness and relevance properties of STMT. */
643 stmt_vinfo
= vinfo_for_stmt (stmt
);
644 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
645 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
647 /* Generally, the liveness and relevance properties of STMT are
648 propagated as is to the DEF_STMTs of its USEs:
649 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
652 One exception is when STMT has been identified as defining a reduction
653 variable; in this case we set the liveness/relevance as follows:
655 relevant = vect_used_by_reduction
656 This is because we distinguish between two kinds of relevant stmts -
657 those that are used by a reduction computation, and those that are
658 (also) used by a regular computation. This allows us later on to
659 identify stmts that are used solely by a reduction, and therefore the
660 order of the results that they produce does not have to be kept. */
662 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
663 tmp_relevant
= relevant
;
666 case vect_reduction_def
:
667 switch (tmp_relevant
)
669 case vect_unused_in_scope
:
670 relevant
= vect_used_by_reduction
;
673 case vect_used_by_reduction
:
674 if (gimple_code (stmt
) == GIMPLE_PHI
)
679 if (dump_enabled_p ())
680 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
681 "unsupported use of reduction.");
689 case vect_nested_cycle
:
690 if (tmp_relevant
!= vect_unused_in_scope
691 && tmp_relevant
!= vect_used_in_outer_by_reduction
692 && tmp_relevant
!= vect_used_in_outer
)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
696 "unsupported use of nested cycle.");
705 case vect_double_reduction_def
:
706 if (tmp_relevant
!= vect_unused_in_scope
707 && tmp_relevant
!= vect_used_by_reduction
)
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
711 "unsupported use of double reduction.");
724 if (is_pattern_stmt_p (stmt_vinfo
))
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt
))
731 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
732 tree op
= gimple_assign_rhs1 (stmt
);
735 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
737 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
738 live_p
, relevant
, &worklist
, false)
739 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
740 live_p
, relevant
, &worklist
, false))
747 for (; i
< gimple_num_ops (stmt
); i
++)
749 op
= gimple_op (stmt
, i
);
750 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
758 else if (is_gimple_call (stmt
))
760 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
762 tree arg
= gimple_call_arg (stmt
, i
);
763 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
773 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
775 tree op
= USE_FROM_PTR (use_p
);
776 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
784 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
787 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
789 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
796 } /* while worklist */
803 /* Function vect_model_simple_cost.
805 Models cost for simple operations, i.e. those that only emit ncopies of a
806 single op. Right now, this does not account for multiple insns that could
807 be generated for the single vector op. We will handle that shortly. */
810 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
811 enum vect_def_type
*dt
,
812 stmt_vector_for_cost
*prologue_cost_vec
,
813 stmt_vector_for_cost
*body_cost_vec
)
816 int inside_cost
= 0, prologue_cost
= 0;
818 /* The SLP costs were already calculated during SLP tree build. */
819 if (PURE_SLP_STMT (stmt_info
))
822 /* FORNOW: Assuming maximum 2 args per stmts. */
823 for (i
= 0; i
< 2; i
++)
824 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
825 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
826 stmt_info
, 0, vect_prologue
);
828 /* Pass the inside-of-loop statements to the target-specific cost model. */
829 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
830 stmt_info
, 0, vect_body
);
832 if (dump_enabled_p ())
833 dump_printf_loc (MSG_NOTE
, vect_location
,
834 "vect_model_simple_cost: inside_cost = %d, "
835 "prologue_cost = %d .", inside_cost
, prologue_cost
);
839 /* Model cost for type demotion and promotion operations. PWR is normally
840 zero for single-step promotions and demotions. It will be one if
841 two-step promotion/demotion is required, and so on. Each additional
842 step doubles the number of instructions required. */
845 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
846 enum vect_def_type
*dt
, int pwr
)
849 int inside_cost
= 0, prologue_cost
= 0;
850 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
851 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
852 void *target_cost_data
;
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info
))
859 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
861 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
863 for (i
= 0; i
< pwr
+ 1; i
++)
865 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
867 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
868 vec_promote_demote
, stmt_info
, 0,
872 /* FORNOW: Assuming maximum 2 args per stmts. */
873 for (i
= 0; i
< 2; i
++)
874 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
875 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
876 stmt_info
, 0, vect_prologue
);
878 if (dump_enabled_p ())
879 dump_printf_loc (MSG_NOTE
, vect_location
,
880 "vect_model_promotion_demotion_cost: inside_cost = %d, "
881 "prologue_cost = %d .", inside_cost
, prologue_cost
);
884 /* Function vect_cost_group_size
886 For grouped load or store, return the group_size only if it is the first
887 load or store of a group, else return 1. This ensures that group size is
888 only returned once per group. */
891 vect_cost_group_size (stmt_vec_info stmt_info
)
893 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
895 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
896 return GROUP_SIZE (stmt_info
);
902 /* Function vect_model_store_cost
904 Models cost for stores. In the case of grouped accesses, one access
905 has the overhead of the grouped access attributed to it. */
908 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
909 bool store_lanes_p
, enum vect_def_type dt
,
911 stmt_vector_for_cost
*prologue_cost_vec
,
912 stmt_vector_for_cost
*body_cost_vec
)
915 unsigned int inside_cost
= 0, prologue_cost
= 0;
916 struct data_reference
*first_dr
;
919 /* The SLP costs were already calculated during SLP tree build. */
920 if (PURE_SLP_STMT (stmt_info
))
923 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
924 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
925 stmt_info
, 0, vect_prologue
);
927 /* Grouped access? */
928 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
932 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
937 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
938 group_size
= vect_cost_group_size (stmt_info
);
941 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
943 /* Not a grouped access. */
947 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
950 /* We assume that the cost of a single store-lanes instruction is
951 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
952 access is instead being provided by a permute-and-store operation,
953 include the cost of the permutes. */
954 if (!store_lanes_p
&& group_size
> 1)
956 /* Uses a high and low interleave operation for each needed permute. */
958 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
959 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
960 stmt_info
, 0, vect_body
);
962 if (dump_enabled_p ())
963 dump_printf_loc (MSG_NOTE
, vect_location
,
964 "vect_model_store_cost: strided group_size = %d .",
968 /* Costs of the stores. */
969 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE
, vect_location
,
973 "vect_model_store_cost: inside_cost = %d, "
974 "prologue_cost = %d .", inside_cost
, prologue_cost
);
978 /* Calculate cost of DR's memory access. */
980 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
981 unsigned int *inside_cost
,
982 stmt_vector_for_cost
*body_cost_vec
)
984 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
985 gimple stmt
= DR_STMT (dr
);
986 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
988 switch (alignment_support_scheme
)
992 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
993 vector_store
, stmt_info
, 0,
996 if (dump_enabled_p ())
997 dump_printf_loc (MSG_NOTE
, vect_location
,
998 "vect_model_store_cost: aligned.");
1002 case dr_unaligned_supported
:
1004 /* Here, we assign an additional cost for the unaligned store. */
1005 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1006 unaligned_store
, stmt_info
,
1007 DR_MISALIGNMENT (dr
), vect_body
);
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_NOTE
, vect_location
,
1010 "vect_model_store_cost: unaligned supported by "
1015 case dr_unaligned_unsupported
:
1017 *inside_cost
= VECT_MAX_COST
;
1019 if (dump_enabled_p ())
1020 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1021 "vect_model_store_cost: unsupported access.");
1031 /* Function vect_model_load_cost
1033 Models cost for loads. In the case of grouped accesses, the last access
1034 has the overhead of the grouped access attributed to it. Since unaligned
1035 accesses are supported for loads, we also account for the costs of the
1036 access scheme chosen. */
1039 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1040 bool load_lanes_p
, slp_tree slp_node
,
1041 stmt_vector_for_cost
*prologue_cost_vec
,
1042 stmt_vector_for_cost
*body_cost_vec
)
1046 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1047 unsigned int inside_cost
= 0, prologue_cost
= 0;
1049 /* The SLP costs were already calculated during SLP tree build. */
1050 if (PURE_SLP_STMT (stmt_info
))
1053 /* Grouped accesses? */
1054 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1057 group_size
= vect_cost_group_size (stmt_info
);
1058 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1060 /* Not a grouped access. */
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (!load_lanes_p
&& group_size
> 1)
1073 /* Uses an even and odd extract operations for each needed permute. */
1074 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1075 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1076 stmt_info
, 0, vect_body
);
1078 if (dump_enabled_p ())
1079 dump_printf_loc (MSG_NOTE
, vect_location
,
1080 "vect_model_load_cost: strided group_size = %d .",
1084 /* The loads themselves. */
1085 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1087 /* N scalar loads plus gathering them into a vector. */
1088 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1089 inside_cost
+= record_stmt_cost (body_cost_vec
,
1090 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1091 scalar_load
, stmt_info
, 0, vect_body
);
1092 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1093 stmt_info
, 0, vect_body
);
1096 vect_get_load_cost (first_dr
, ncopies
,
1097 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1098 || group_size
> 1 || slp_node
),
1099 &inside_cost
, &prologue_cost
,
1100 prologue_cost_vec
, body_cost_vec
, true);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE
, vect_location
,
1104 "vect_model_load_cost: inside_cost = %d, "
1105 "prologue_cost = %d .", inside_cost
, prologue_cost
);
1109 /* Calculate cost of DR's memory access. */
1111 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1112 bool add_realign_cost
, unsigned int *inside_cost
,
1113 unsigned int *prologue_cost
,
1114 stmt_vector_for_cost
*prologue_cost_vec
,
1115 stmt_vector_for_cost
*body_cost_vec
,
1116 bool record_prologue_costs
)
1118 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1119 gimple stmt
= DR_STMT (dr
);
1120 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1122 switch (alignment_support_scheme
)
1126 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1127 stmt_info
, 0, vect_body
);
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE
, vect_location
,
1131 "vect_model_load_cost: aligned.");
1135 case dr_unaligned_supported
:
1137 /* Here, we assign an additional cost for the unaligned load. */
1138 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1139 unaligned_load
, stmt_info
,
1140 DR_MISALIGNMENT (dr
), vect_body
);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE
, vect_location
,
1144 "vect_model_load_cost: unaligned supported by "
1149 case dr_explicit_realign
:
1151 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1152 vector_load
, stmt_info
, 0, vect_body
);
1153 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1154 vec_perm
, stmt_info
, 0, vect_body
);
1156 /* FIXME: If the misalignment remains fixed across the iterations of
1157 the containing loop, the following cost should be added to the
1159 if (targetm
.vectorize
.builtin_mask_for_load
)
1160 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1161 stmt_info
, 0, vect_body
);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: explicit realign");
1169 case dr_explicit_realign_optimized
:
1171 if (dump_enabled_p ())
1172 dump_printf_loc (MSG_NOTE
, vect_location
,
1173 "vect_model_load_cost: unaligned software "
1176 /* Unaligned software pipeline has a load of an address, an initial
1177 load, and possibly a mask operation to "prime" the loop. However,
1178 if this is an access in a group of loads, which provide grouped
1179 access, then the above cost should only be considered for one
1180 access in the group. Inside the loop, there is a load op
1181 and a realignment op. */
1183 if (add_realign_cost
&& record_prologue_costs
)
1185 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1186 vector_stmt
, stmt_info
,
1188 if (targetm
.vectorize
.builtin_mask_for_load
)
1189 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1190 vector_stmt
, stmt_info
,
1194 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1195 stmt_info
, 0, vect_body
);
1196 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1197 stmt_info
, 0, vect_body
);
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE
, vect_location
,
1201 "vect_model_load_cost: explicit realign optimized");
1206 case dr_unaligned_unsupported
:
1208 *inside_cost
= VECT_MAX_COST
;
1210 if (dump_enabled_p ())
1211 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1212 "vect_model_load_cost: unsupported access.");
1221 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222 the loop preheader for the vectorized stmt STMT. */
1225 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1228 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1231 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1232 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1236 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1240 if (nested_in_vect_loop_p (loop
, stmt
))
1243 pe
= loop_preheader_edge (loop
);
1244 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1245 gcc_assert (!new_bb
);
1249 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1251 gimple_stmt_iterator gsi_bb_start
;
1253 gcc_assert (bb_vinfo
);
1254 bb
= BB_VINFO_BB (bb_vinfo
);
1255 gsi_bb_start
= gsi_after_labels (bb
);
1256 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1260 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE
, vect_location
,
1263 "created new init_stmt: ");
1264 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1268 /* Function vect_init_vector.
1270 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1272 vector type a vector with all elements equal to VAL is created first.
1273 Place the initialization at BSI if it is not NULL. Otherwise, place the
1274 initialization at the loop preheader.
1275 Return the DEF of INIT_STMT.
1276 It will be used in the vectorization of STMT. */
1279 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1286 if (TREE_CODE (type
) == VECTOR_TYPE
1287 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1289 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1291 if (CONSTANT_CLASS_P (val
))
1292 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1295 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1296 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1299 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1303 val
= build_vector_from_val (type
, val
);
1306 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1307 init_stmt
= gimple_build_assign (new_var
, val
);
1308 new_temp
= make_ssa_name (new_var
, init_stmt
);
1309 gimple_assign_set_lhs (init_stmt
, new_temp
);
1310 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1311 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1316 /* Function vect_get_vec_def_for_operand.
1318 OP is an operand in STMT. This function returns a (vector) def that will be
1319 used in the vectorized stmt for STMT.
1321 In the case that OP is an SSA_NAME which is defined in the loop, then
1322 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1324 In case OP is an invariant or constant, a new stmt that creates a vector def
1325 needs to be introduced. */
1328 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1333 stmt_vec_info def_stmt_info
= NULL
;
1334 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1335 unsigned int nunits
;
1336 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1338 enum vect_def_type dt
;
1342 if (dump_enabled_p ())
1344 dump_printf_loc (MSG_NOTE
, vect_location
,
1345 "vect_get_vec_def_for_operand: ");
1346 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1349 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1350 &def_stmt
, &def
, &dt
);
1351 gcc_assert (is_simple_use
);
1352 if (dump_enabled_p ())
1354 int loc_printed
= 0;
1357 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1359 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1364 dump_printf (MSG_NOTE
, " def_stmt = ");
1366 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1367 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1373 /* Case 1: operand is a constant. */
1374 case vect_constant_def
:
1376 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1377 gcc_assert (vector_type
);
1378 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1383 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1384 if (dump_enabled_p ())
1385 dump_printf_loc (MSG_NOTE
, vect_location
,
1386 "Create vector_cst. nunits = %d", nunits
);
1388 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1391 /* Case 2: operand is defined outside the loop - loop invariant. */
1392 case vect_external_def
:
1394 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1395 gcc_assert (vector_type
);
1400 /* Create 'vec_inv = {inv,inv,..,inv}' */
1401 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.");
1404 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1407 /* Case 3: operand is defined inside the loop. */
1408 case vect_internal_def
:
1411 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1413 /* Get the def from the vectorized stmt. */
1414 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1416 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1417 /* Get vectorized pattern statement. */
1419 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1420 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1421 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1423 gcc_assert (vec_stmt
);
1424 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1425 vec_oprnd
= PHI_RESULT (vec_stmt
);
1426 else if (is_gimple_call (vec_stmt
))
1427 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1429 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1433 /* Case 4: operand is defined by a loop header phi - reduction */
1434 case vect_reduction_def
:
1435 case vect_double_reduction_def
:
1436 case vect_nested_cycle
:
1440 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1441 loop
= (gimple_bb (def_stmt
))->loop_father
;
1443 /* Get the def before the loop */
1444 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1445 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1448 /* Case 5: operand is defined by loop-header phi - induction. */
1449 case vect_induction_def
:
1451 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1453 /* Get the def from the vectorized stmt. */
1454 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1455 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1456 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1457 vec_oprnd
= PHI_RESULT (vec_stmt
);
1459 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1469 /* Function vect_get_vec_def_for_stmt_copy
1471 Return a vector-def for an operand. This function is used when the
1472 vectorized stmt to be created (by the caller to this function) is a "copy"
1473 created in case the vectorized result cannot fit in one vector, and several
1474 copies of the vector-stmt are required. In this case the vector-def is
1475 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1476 of the stmt that defines VEC_OPRND.
1477 DT is the type of the vector def VEC_OPRND.
1480 In case the vectorization factor (VF) is bigger than the number
1481 of elements that can fit in a vectype (nunits), we have to generate
1482 more than one vector stmt to vectorize the scalar stmt. This situation
1483 arises when there are multiple data-types operated upon in the loop; the
1484 smallest data-type determines the VF, and as a result, when vectorizing
1485 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486 vector stmt (each computing a vector of 'nunits' results, and together
1487 computing 'VF' results in each iteration). This function is called when
1488 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489 which VF=16 and nunits=4, so the number of copies required is 4):
1491 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1493 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1494 VS1.1: vx.1 = memref1 VS1.2
1495 VS1.2: vx.2 = memref2 VS1.3
1496 VS1.3: vx.3 = memref3
1498 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1499 VSnew.1: vz1 = vx.1 + ... VSnew.2
1500 VSnew.2: vz2 = vx.2 + ... VSnew.3
1501 VSnew.3: vz3 = vx.3 + ...
1503 The vectorization of S1 is explained in vectorizable_load.
1504 The vectorization of S2:
1505 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506 the function 'vect_get_vec_def_for_operand' is called to
1507 get the relevant vector-def for each operand of S2. For operand x it
1508 returns the vector-def 'vx.0'.
1510 To create the remaining copies of the vector-stmt (VSnew.j), this
1511 function is called to get the relevant vector-def for each operand. It is
1512 obtained from the respective VS1.j stmt, which is recorded in the
1513 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1515 For example, to obtain the vector-def 'vx.1' in order to create the
1516 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1518 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519 and return its def ('vx.1').
1520 Overall, to create the above sequence this function will be called 3 times:
1521 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1526 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1528 gimple vec_stmt_for_operand
;
1529 stmt_vec_info def_stmt_info
;
1531 /* Do nothing; can reuse same def. */
1532 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1535 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1536 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1537 gcc_assert (def_stmt_info
);
1538 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1539 gcc_assert (vec_stmt_for_operand
);
1540 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1541 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1542 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1544 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1549 /* Get vectorized definitions for the operands to create a copy of an original
1550 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1553 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1554 vec
<tree
> *vec_oprnds0
,
1555 vec
<tree
> *vec_oprnds1
)
1557 tree vec_oprnd
= vec_oprnds0
->pop ();
1559 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1560 vec_oprnds0
->quick_push (vec_oprnd
);
1562 if (vec_oprnds1
&& vec_oprnds1
->length ())
1564 vec_oprnd
= vec_oprnds1
->pop ();
1565 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1566 vec_oprnds1
->quick_push (vec_oprnd
);
1571 /* Get vectorized definitions for OP0 and OP1.
1572 REDUC_INDEX is the index of reduction operand in case of reduction,
1573 and -1 otherwise. */
1576 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1577 vec
<tree
> *vec_oprnds0
,
1578 vec
<tree
> *vec_oprnds1
,
1579 slp_tree slp_node
, int reduc_index
)
1583 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1586 vec
<vec
<tree
> > vec_defs
;
1587 vec_defs
.create (nops
);
1589 ops
.quick_push (op0
);
1591 ops
.quick_push (op1
);
1593 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1595 *vec_oprnds0
= vec_defs
[0];
1597 *vec_oprnds1
= vec_defs
[1];
1600 vec_defs
.release ();
1606 vec_oprnds0
->create (1);
1607 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1608 vec_oprnds0
->quick_push (vec_oprnd
);
1612 vec_oprnds1
->create (1);
1613 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1614 vec_oprnds1
->quick_push (vec_oprnd
);
1620 /* Function vect_finish_stmt_generation.
1622 Insert a new stmt. */
1625 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1626 gimple_stmt_iterator
*gsi
)
1628 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1629 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1630 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1632 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1634 if (!gsi_end_p (*gsi
)
1635 && gimple_has_mem_ops (vec_stmt
))
1637 gimple at_stmt
= gsi_stmt (*gsi
);
1638 tree vuse
= gimple_vuse (at_stmt
);
1639 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1641 tree vdef
= gimple_vdef (at_stmt
);
1642 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1643 /* If we have an SSA vuse and insert a store, update virtual
1644 SSA form to avoid triggering the renamer. Do so only
1645 if we can easily see all uses - which is what almost always
1646 happens with the way vectorized stmts are inserted. */
1647 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1648 && ((is_gimple_assign (vec_stmt
)
1649 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1650 || (is_gimple_call (vec_stmt
)
1651 && !(gimple_call_flags (vec_stmt
)
1652 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1654 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1655 gimple_set_vdef (vec_stmt
, new_vdef
);
1656 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1660 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1662 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1665 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1668 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1671 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1674 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1675 a function declaration if the target has a vectorized version
1676 of the function, or NULL_TREE if the function cannot be vectorized. */
1679 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1681 tree fndecl
= gimple_call_fndecl (call
);
1683 /* We only handle functions that do not read or clobber memory -- i.e.
1684 const or novops ones. */
1685 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1689 || TREE_CODE (fndecl
) != FUNCTION_DECL
1690 || !DECL_BUILT_IN (fndecl
))
1693 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1697 /* Function vectorizable_call.
1699 Check if STMT performs a function call that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1705 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1711 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1712 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1713 tree vectype_out
, vectype_in
;
1716 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1717 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1718 tree fndecl
, new_temp
, def
, rhs_type
;
1720 enum vect_def_type dt
[3]
1721 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1722 gimple new_stmt
= NULL
;
1724 vec
<tree
> vargs
= vNULL
;
1725 enum { NARROW
, NONE
, WIDEN
} modifier
;
1729 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1732 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1735 /* Is STMT a vectorizable call? */
1736 if (!is_gimple_call (stmt
))
1739 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1742 if (stmt_can_throw_internal (stmt
))
1745 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1747 /* Process function arguments. */
1748 rhs_type
= NULL_TREE
;
1749 vectype_in
= NULL_TREE
;
1750 nargs
= gimple_call_num_args (stmt
);
1752 /* Bail out if the function has more than three arguments, we do not have
1753 interesting builtin functions to vectorize with more than two arguments
1754 except for fma. No arguments is also not good. */
1755 if (nargs
== 0 || nargs
> 3)
1758 for (i
= 0; i
< nargs
; i
++)
1762 op
= gimple_call_arg (stmt
, i
);
1764 /* We can only handle calls with arguments of the same type. */
1766 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1768 if (dump_enabled_p ())
1769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1770 "argument types differ.");
1774 rhs_type
= TREE_TYPE (op
);
1776 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1777 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1779 if (dump_enabled_p ())
1780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1786 vectype_in
= opvectype
;
1788 && opvectype
!= vectype_in
)
1790 if (dump_enabled_p ())
1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1792 "argument vector types differ.");
1796 /* If all arguments are external or constant defs use a vector type with
1797 the same size as the output vector type. */
1799 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1801 gcc_assert (vectype_in
);
1804 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1807 "no vectype for scalar type ");
1808 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
1815 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1816 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1817 if (nunits_in
== nunits_out
/ 2)
1819 else if (nunits_out
== nunits_in
)
1821 else if (nunits_out
== nunits_in
/ 2)
1826 /* For now, we only vectorize functions if a target specific builtin
1827 is available. TODO -- in some cases, it might be profitable to
1828 insert the calls for pieces of the vector, in order to be able
1829 to vectorize other operations in the loop. */
1830 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1831 if (fndecl
== NULL_TREE
)
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1835 "function is not vectorizable.");
1840 gcc_assert (!gimple_vuse (stmt
));
1842 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1844 else if (modifier
== NARROW
)
1845 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1847 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1849 /* Sanity check: make sure that at least one copy of the vectorized stmt
1850 needs to be generated. */
1851 gcc_assert (ncopies
>= 1);
1853 if (!vec_stmt
) /* transformation not required. */
1855 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1856 if (dump_enabled_p ())
1857 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ===");
1858 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.");
1868 scalar_dest
= gimple_call_lhs (stmt
);
1869 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1871 prev_stmt_info
= NULL
;
1875 for (j
= 0; j
< ncopies
; ++j
)
1877 /* Build argument list for the vectorized call. */
1879 vargs
.create (nargs
);
1885 vec
<vec
<tree
> > vec_defs
;
1886 vec_defs
.create (nargs
);
1887 vec
<tree
> vec_oprnds0
;
1889 for (i
= 0; i
< nargs
; i
++)
1890 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1891 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1892 vec_oprnds0
= vec_defs
[0];
1894 /* Arguments are ready. Create the new vector stmt. */
1895 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
1898 for (k
= 0; k
< nargs
; k
++)
1900 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1901 vargs
[k
] = vec_oprndsk
[i
];
1903 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1904 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1905 gimple_call_set_lhs (new_stmt
, new_temp
);
1906 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1907 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1910 for (i
= 0; i
< nargs
; i
++)
1912 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1913 vec_oprndsi
.release ();
1915 vec_defs
.release ();
1919 for (i
= 0; i
< nargs
; i
++)
1921 op
= gimple_call_arg (stmt
, i
);
1924 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1927 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1929 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1932 vargs
.quick_push (vec_oprnd0
);
1935 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1936 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1937 gimple_call_set_lhs (new_stmt
, new_temp
);
1938 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1941 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1943 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1945 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1951 for (j
= 0; j
< ncopies
; ++j
)
1953 /* Build argument list for the vectorized call. */
1955 vargs
.create (nargs
* 2);
1961 vec
<vec
<tree
> > vec_defs
;
1962 vec_defs
.create (nargs
);
1963 vec
<tree
> vec_oprnds0
;
1965 for (i
= 0; i
< nargs
; i
++)
1966 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1967 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1968 vec_oprnds0
= vec_defs
[0];
1970 /* Arguments are ready. Create the new vector stmt. */
1971 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
1975 for (k
= 0; k
< nargs
; k
++)
1977 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1978 vargs
.quick_push (vec_oprndsk
[i
]);
1979 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
1981 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1982 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1983 gimple_call_set_lhs (new_stmt
, new_temp
);
1984 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1985 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1988 for (i
= 0; i
< nargs
; i
++)
1990 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1991 vec_oprndsi
.release ();
1993 vec_defs
.release ();
1997 for (i
= 0; i
< nargs
; i
++)
1999 op
= gimple_call_arg (stmt
, i
);
2003 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2005 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2009 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2011 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2013 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2016 vargs
.quick_push (vec_oprnd0
);
2017 vargs
.quick_push (vec_oprnd1
);
2020 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2021 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2022 gimple_call_set_lhs (new_stmt
, new_temp
);
2023 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2026 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2028 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2030 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2033 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2038 /* No current target implements this case. */
2044 /* Update the exception handling table with the vector stmt if necessary. */
2045 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2046 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2048 /* The call in STMT might prevent it from being removed in dce.
2049 We however cannot remove it here, due to the way the ssa name
2050 it defines is mapped to the new definition. So just replace
2051 rhs of the statement with something harmless. */
2056 type
= TREE_TYPE (scalar_dest
);
2057 if (is_pattern_stmt_p (stmt_info
))
2058 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2060 lhs
= gimple_call_lhs (stmt
);
2061 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2062 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2063 set_vinfo_for_stmt (stmt
, NULL
);
2064 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2065 gsi_replace (gsi
, new_stmt
, false);
2066 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2072 /* Function vect_gen_widened_results_half
2074 Create a vector stmt whose code, type, number of arguments, and result
2075 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2076 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2077 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2078 needs to be created (DECL is a function-decl of a target-builtin).
2079 STMT is the original scalar stmt that we are vectorizing. */
2082 vect_gen_widened_results_half (enum tree_code code
,
2084 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2085 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2091 /* Generate half of the widened result: */
2092 if (code
== CALL_EXPR
)
2094 /* Target specific support */
2095 if (op_type
== binary_op
)
2096 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2098 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2099 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2100 gimple_call_set_lhs (new_stmt
, new_temp
);
2104 /* Generic support */
2105 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2106 if (op_type
!= binary_op
)
2108 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2110 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2111 gimple_assign_set_lhs (new_stmt
, new_temp
);
2113 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2119 /* Get vectorized definitions for loop-based vectorization. For the first
2120 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2121 scalar operand), and for the rest we get a copy with
2122 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2123 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2124 The vectors are collected into VEC_OPRNDS. */
2127 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2128 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
2132 /* Get first vector operand. */
2133 /* All the vector operands except the very first one (that is scalar oprnd)
2135 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2136 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2138 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2140 vec_oprnds
->quick_push (vec_oprnd
);
2142 /* Get second vector operand. */
2143 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2144 vec_oprnds
->quick_push (vec_oprnd
);
2148 /* For conversion in multiple steps, continue to get operands
2151 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2155 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2156 For multi-step conversions store the resulting vectors and call the function
2160 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
2161 int multi_step_cvt
, gimple stmt
,
2163 gimple_stmt_iterator
*gsi
,
2164 slp_tree slp_node
, enum tree_code code
,
2165 stmt_vec_info
*prev_stmt_info
)
2168 tree vop0
, vop1
, new_tmp
, vec_dest
;
2170 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2172 vec_dest
= vec_dsts
.pop ();
2174 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
2176 /* Create demotion operation. */
2177 vop0
= (*vec_oprnds
)[i
];
2178 vop1
= (*vec_oprnds
)[i
+ 1];
2179 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2180 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2181 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2182 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2185 /* Store the resulting vector for next recursive call. */
2186 (*vec_oprnds
)[i
/2] = new_tmp
;
2189 /* This is the last step of the conversion sequence. Store the
2190 vectors in SLP_NODE or in vector info of the scalar statement
2191 (or in STMT_VINFO_RELATED_STMT chain). */
2193 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2196 if (!*prev_stmt_info
)
2197 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2199 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2201 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2206 /* For multi-step demotion operations we first generate demotion operations
2207 from the source type to the intermediate types, and then combine the
2208 results (stored in VEC_OPRNDS) in demotion operation to the destination
2212 /* At each level of recursion we have half of the operands we had at the
2214 vec_oprnds
->truncate ((i
+1)/2);
2215 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2216 stmt
, vec_dsts
, gsi
, slp_node
,
2217 VEC_PACK_TRUNC_EXPR
,
2221 vec_dsts
.quick_push (vec_dest
);
2225 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2226 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2227 the resulting vectors and call the function recursively. */
2230 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
2231 vec
<tree
> *vec_oprnds1
,
2232 gimple stmt
, tree vec_dest
,
2233 gimple_stmt_iterator
*gsi
,
2234 enum tree_code code1
,
2235 enum tree_code code2
, tree decl1
,
2236 tree decl2
, int op_type
)
2239 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2240 gimple new_stmt1
, new_stmt2
;
2241 vec
<tree
> vec_tmp
= vNULL
;
2243 vec_tmp
.create (vec_oprnds0
->length () * 2);
2244 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
2246 if (op_type
== binary_op
)
2247 vop1
= (*vec_oprnds1
)[i
];
2251 /* Generate the two halves of promotion operation. */
2252 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2253 op_type
, vec_dest
, gsi
, stmt
);
2254 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2255 op_type
, vec_dest
, gsi
, stmt
);
2256 if (is_gimple_call (new_stmt1
))
2258 new_tmp1
= gimple_call_lhs (new_stmt1
);
2259 new_tmp2
= gimple_call_lhs (new_stmt2
);
2263 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2264 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2267 /* Store the results for the next step. */
2268 vec_tmp
.quick_push (new_tmp1
);
2269 vec_tmp
.quick_push (new_tmp2
);
2272 vec_oprnds0
->release ();
2273 *vec_oprnds0
= vec_tmp
;
2277 /* Check if STMT performs a conversion operation, that can be vectorized.
2278 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2279 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2280 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2283 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2284 gimple
*vec_stmt
, slp_tree slp_node
)
2288 tree op0
, op1
= NULL_TREE
;
2289 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2290 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2291 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2292 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2293 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2294 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2298 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2299 gimple new_stmt
= NULL
;
2300 stmt_vec_info prev_stmt_info
;
2303 tree vectype_out
, vectype_in
;
2305 tree lhs_type
, rhs_type
;
2306 enum { NARROW
, NONE
, WIDEN
} modifier
;
2307 vec
<tree
> vec_oprnds0
= vNULL
;
2308 vec
<tree
> vec_oprnds1
= vNULL
;
2310 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2311 int multi_step_cvt
= 0;
2312 vec
<tree
> vec_dsts
= vNULL
;
2313 vec
<tree
> interm_types
= vNULL
;
2314 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2316 enum machine_mode rhs_mode
;
2317 unsigned short fltsz
;
2319 /* Is STMT a vectorizable conversion? */
2321 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2324 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2327 if (!is_gimple_assign (stmt
))
2330 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2333 code
= gimple_assign_rhs_code (stmt
);
2334 if (!CONVERT_EXPR_CODE_P (code
)
2335 && code
!= FIX_TRUNC_EXPR
2336 && code
!= FLOAT_EXPR
2337 && code
!= WIDEN_MULT_EXPR
2338 && code
!= WIDEN_LSHIFT_EXPR
)
2341 op_type
= TREE_CODE_LENGTH (code
);
2343 /* Check types of lhs and rhs. */
2344 scalar_dest
= gimple_assign_lhs (stmt
);
2345 lhs_type
= TREE_TYPE (scalar_dest
);
2346 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2348 op0
= gimple_assign_rhs1 (stmt
);
2349 rhs_type
= TREE_TYPE (op0
);
2351 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2352 && !((INTEGRAL_TYPE_P (lhs_type
)
2353 && INTEGRAL_TYPE_P (rhs_type
))
2354 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2355 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2358 if ((INTEGRAL_TYPE_P (lhs_type
)
2359 && (TYPE_PRECISION (lhs_type
)
2360 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2361 || (INTEGRAL_TYPE_P (rhs_type
)
2362 && (TYPE_PRECISION (rhs_type
)
2363 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2367 "type conversion to/from bit-precision unsupported.");
2371 /* Check the operands of the operation. */
2372 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2373 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2380 if (op_type
== binary_op
)
2384 op1
= gimple_assign_rhs2 (stmt
);
2385 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2386 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2388 if (CONSTANT_CLASS_P (op0
))
2389 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2390 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2392 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2397 if (dump_enabled_p ())
2398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2404 /* If op0 is an external or constant defs use a vector type of
2405 the same size as the output vector type. */
2407 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2409 gcc_assert (vectype_in
);
2412 if (dump_enabled_p ())
2414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2415 "no vectype for scalar type ");
2416 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2422 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2423 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2424 if (nunits_in
< nunits_out
)
2426 else if (nunits_out
== nunits_in
)
2431 /* Multiple types in SLP are handled by creating the appropriate number of
2432 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2434 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2436 else if (modifier
== NARROW
)
2437 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2439 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2441 /* Sanity check: make sure that at least one copy of the vectorized stmt
2442 needs to be generated. */
2443 gcc_assert (ncopies
>= 1);
2445 /* Supportable by target? */
2449 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2451 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2456 if (dump_enabled_p ())
2457 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2458 "conversion not supported by target.");
2462 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2463 &code1
, &code2
, &multi_step_cvt
,
2466 /* Binary widening operation can only be supported directly by the
2468 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2472 if (code
!= FLOAT_EXPR
2473 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2474 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2477 rhs_mode
= TYPE_MODE (rhs_type
);
2478 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2479 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2480 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2481 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2485 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2486 if (cvt_type
== NULL_TREE
)
2489 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2491 if (!supportable_convert_operation (code
, vectype_out
,
2492 cvt_type
, &decl1
, &codecvt1
))
2495 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2496 cvt_type
, &codecvt1
,
2497 &codecvt2
, &multi_step_cvt
,
2501 gcc_assert (multi_step_cvt
== 0);
2503 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2504 vectype_in
, &code1
, &code2
,
2505 &multi_step_cvt
, &interm_types
))
2509 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2512 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2513 codecvt2
= ERROR_MARK
;
2517 interm_types
.safe_push (cvt_type
);
2518 cvt_type
= NULL_TREE
;
2523 gcc_assert (op_type
== unary_op
);
2524 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2525 &code1
, &multi_step_cvt
,
2529 if (code
!= FIX_TRUNC_EXPR
2530 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2531 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2534 rhs_mode
= TYPE_MODE (rhs_type
);
2536 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2537 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2538 if (cvt_type
== NULL_TREE
)
2540 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2543 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2544 &code1
, &multi_step_cvt
,
2553 if (!vec_stmt
) /* transformation not required. */
2555 if (dump_enabled_p ())
2556 dump_printf_loc (MSG_NOTE
, vect_location
,
2557 "=== vectorizable_conversion ===");
2558 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2560 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2561 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2563 else if (modifier
== NARROW
)
2565 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2566 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2570 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2571 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2573 interm_types
.release ();
2578 if (dump_enabled_p ())
2579 dump_printf_loc (MSG_NOTE
, vect_location
,
2580 "transform conversion. ncopies = %d.", ncopies
);
2582 if (op_type
== binary_op
)
2584 if (CONSTANT_CLASS_P (op0
))
2585 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2586 else if (CONSTANT_CLASS_P (op1
))
2587 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2590 /* In case of multi-step conversion, we first generate conversion operations
2591 to the intermediate types, and then from that types to the final one.
2592 We create vector destinations for the intermediate type (TYPES) received
2593 from supportable_*_operation, and store them in the correct order
2594 for future use in vect_create_vectorized_*_stmts (). */
2595 vec_dsts
.create (multi_step_cvt
+ 1);
2596 vec_dest
= vect_create_destination_var (scalar_dest
,
2597 (cvt_type
&& modifier
== WIDEN
)
2598 ? cvt_type
: vectype_out
);
2599 vec_dsts
.quick_push (vec_dest
);
2603 for (i
= interm_types
.length () - 1;
2604 interm_types
.iterate (i
, &intermediate_type
); i
--)
2606 vec_dest
= vect_create_destination_var (scalar_dest
,
2608 vec_dsts
.quick_push (vec_dest
);
2613 vec_dest
= vect_create_destination_var (scalar_dest
,
2615 ? vectype_out
: cvt_type
);
2619 if (modifier
== WIDEN
)
2621 vec_oprnds0
.create (multi_step_cvt
? vect_pow2(multi_step_cvt
) : 1);
2622 if (op_type
== binary_op
)
2623 vec_oprnds1
.create (1);
2625 else if (modifier
== NARROW
)
2626 vec_oprnds0
.create (
2627 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2629 else if (code
== WIDEN_LSHIFT_EXPR
)
2630 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
2633 prev_stmt_info
= NULL
;
2637 for (j
= 0; j
< ncopies
; j
++)
2640 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2643 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2645 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2647 /* Arguments are ready, create the new vector stmt. */
2648 if (code1
== CALL_EXPR
)
2650 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2651 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2652 gimple_call_set_lhs (new_stmt
, new_temp
);
2656 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2657 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2659 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2660 gimple_assign_set_lhs (new_stmt
, new_temp
);
2663 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2665 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2669 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2671 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2672 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2677 /* In case the vectorization factor (VF) is bigger than the number
2678 of elements that we can fit in a vectype (nunits), we have to
2679 generate more than one vector stmt - i.e - we need to "unroll"
2680 the vector stmt by a factor VF/nunits. */
2681 for (j
= 0; j
< ncopies
; j
++)
2688 if (code
== WIDEN_LSHIFT_EXPR
)
2693 /* Store vec_oprnd1 for every vector stmt to be created
2694 for SLP_NODE. We check during the analysis that all
2695 the shift arguments are the same. */
2696 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2697 vec_oprnds1
.quick_push (vec_oprnd1
);
2699 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2703 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2704 &vec_oprnds1
, slp_node
, -1);
2708 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2709 vec_oprnds0
.quick_push (vec_oprnd0
);
2710 if (op_type
== binary_op
)
2712 if (code
== WIDEN_LSHIFT_EXPR
)
2715 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2717 vec_oprnds1
.quick_push (vec_oprnd1
);
2723 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2724 vec_oprnds0
.truncate (0);
2725 vec_oprnds0
.quick_push (vec_oprnd0
);
2726 if (op_type
== binary_op
)
2728 if (code
== WIDEN_LSHIFT_EXPR
)
2731 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2733 vec_oprnds1
.truncate (0);
2734 vec_oprnds1
.quick_push (vec_oprnd1
);
2738 /* Arguments are ready. Create the new vector stmts. */
2739 for (i
= multi_step_cvt
; i
>= 0; i
--)
2741 tree this_dest
= vec_dsts
[i
];
2742 enum tree_code c1
= code1
, c2
= code2
;
2743 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2748 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2750 stmt
, this_dest
, gsi
,
2751 c1
, c2
, decl1
, decl2
,
2755 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2759 if (codecvt1
== CALL_EXPR
)
2761 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2762 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2763 gimple_call_set_lhs (new_stmt
, new_temp
);
2767 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2768 new_temp
= make_ssa_name (vec_dest
, NULL
);
2769 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2774 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2777 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2780 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2783 if (!prev_stmt_info
)
2784 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2786 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2787 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2792 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2796 /* In case the vectorization factor (VF) is bigger than the number
2797 of elements that we can fit in a vectype (nunits), we have to
2798 generate more than one vector stmt - i.e - we need to "unroll"
2799 the vector stmt by a factor VF/nunits. */
2800 for (j
= 0; j
< ncopies
; j
++)
2804 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2808 vec_oprnds0
.truncate (0);
2809 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2810 vect_pow2 (multi_step_cvt
) - 1);
2813 /* Arguments are ready. Create the new vector stmts. */
2815 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2817 if (codecvt1
== CALL_EXPR
)
2819 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2820 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2821 gimple_call_set_lhs (new_stmt
, new_temp
);
2825 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2826 new_temp
= make_ssa_name (vec_dest
, NULL
);
2827 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2831 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2832 vec_oprnds0
[i
] = new_temp
;
2835 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2836 stmt
, vec_dsts
, gsi
,
2841 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2845 vec_oprnds0
.release ();
2846 vec_oprnds1
.release ();
2847 vec_dsts
.release ();
2848 interm_types
.release ();
2854 /* Function vectorizable_assignment.
2856 Check if STMT performs an assignment (copy) that can be vectorized.
2857 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2858 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2859 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2862 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2863 gimple
*vec_stmt
, slp_tree slp_node
)
2868 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2869 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2870 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2874 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2875 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2878 vec
<tree
> vec_oprnds
= vNULL
;
2880 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2881 gimple new_stmt
= NULL
;
2882 stmt_vec_info prev_stmt_info
= NULL
;
2883 enum tree_code code
;
2886 /* Multiple types in SLP are handled by creating the appropriate number of
2887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2889 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2892 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2894 gcc_assert (ncopies
>= 1);
2896 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2899 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2902 /* Is vectorizable assignment? */
2903 if (!is_gimple_assign (stmt
))
2906 scalar_dest
= gimple_assign_lhs (stmt
);
2907 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2910 code
= gimple_assign_rhs_code (stmt
);
2911 if (gimple_assign_single_p (stmt
)
2912 || code
== PAREN_EXPR
2913 || CONVERT_EXPR_CODE_P (code
))
2914 op
= gimple_assign_rhs1 (stmt
);
2918 if (code
== VIEW_CONVERT_EXPR
)
2919 op
= TREE_OPERAND (op
, 0);
2921 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2922 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2924 if (dump_enabled_p ())
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2930 /* We can handle NOP_EXPR conversions that do not change the number
2931 of elements or the vector size. */
2932 if ((CONVERT_EXPR_CODE_P (code
)
2933 || code
== VIEW_CONVERT_EXPR
)
2935 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2936 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2937 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2940 /* We do not handle bit-precision changes. */
2941 if ((CONVERT_EXPR_CODE_P (code
)
2942 || code
== VIEW_CONVERT_EXPR
)
2943 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2944 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2945 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2946 || ((TYPE_PRECISION (TREE_TYPE (op
))
2947 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2948 /* But a conversion that does not change the bit-pattern is ok. */
2949 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2950 > TYPE_PRECISION (TREE_TYPE (op
)))
2951 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2953 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2955 "type conversion to/from bit-precision "
2960 if (!vec_stmt
) /* transformation not required. */
2962 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2963 if (dump_enabled_p ())
2964 dump_printf_loc (MSG_NOTE
, vect_location
,
2965 "=== vectorizable_assignment ===");
2966 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2971 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.");
2975 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2978 for (j
= 0; j
< ncopies
; j
++)
2982 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2984 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2986 /* Arguments are ready. create the new vector stmt. */
2987 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2989 if (CONVERT_EXPR_CODE_P (code
)
2990 || code
== VIEW_CONVERT_EXPR
)
2991 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2992 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2993 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2994 gimple_assign_set_lhs (new_stmt
, new_temp
);
2995 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2997 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3004 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3006 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3008 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3011 vec_oprnds
.release ();
3016 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3017 either as shift by a scalar or by a vector. */
3020 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
3023 enum machine_mode vec_mode
;
3028 vectype
= get_vectype_for_scalar_type (scalar_type
);
3032 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3034 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3036 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3038 || (optab_handler (optab
, TYPE_MODE (vectype
))
3039 == CODE_FOR_nothing
))
3043 vec_mode
= TYPE_MODE (vectype
);
3044 icode
= (int) optab_handler (optab
, vec_mode
);
3045 if (icode
== CODE_FOR_nothing
)
3052 /* Function vectorizable_shift.
3054 Check if STMT performs a shift operation that can be vectorized.
3055 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3056 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3057 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3060 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3061 gimple
*vec_stmt
, slp_tree slp_node
)
3065 tree op0
, op1
= NULL
;
3066 tree vec_oprnd1
= NULL_TREE
;
3067 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3069 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3070 enum tree_code code
;
3071 enum machine_mode vec_mode
;
3075 enum machine_mode optab_op2_mode
;
3078 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3079 gimple new_stmt
= NULL
;
3080 stmt_vec_info prev_stmt_info
;
3087 vec
<tree
> vec_oprnds0
= vNULL
;
3088 vec
<tree
> vec_oprnds1
= vNULL
;
3091 bool scalar_shift_arg
= true;
3092 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3095 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3098 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3101 /* Is STMT a vectorizable binary/unary operation? */
3102 if (!is_gimple_assign (stmt
))
3105 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3108 code
= gimple_assign_rhs_code (stmt
);
3110 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3111 || code
== RROTATE_EXPR
))
3114 scalar_dest
= gimple_assign_lhs (stmt
);
3115 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3116 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3119 if (dump_enabled_p ())
3120 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3121 "bit-precision shifts not supported.");
3125 op0
= gimple_assign_rhs1 (stmt
);
3126 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3127 &def_stmt
, &def
, &dt
[0], &vectype
))
3129 if (dump_enabled_p ())
3130 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3134 /* If op0 is an external or constant def use a vector type with
3135 the same size as the output vector type. */
3137 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3139 gcc_assert (vectype
);
3142 if (dump_enabled_p ())
3143 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3144 "no vectype for scalar type ");
3148 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3149 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3150 if (nunits_out
!= nunits_in
)
3153 op1
= gimple_assign_rhs2 (stmt
);
3154 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3155 &def
, &dt
[1], &op1_vectype
))
3157 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3164 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3168 /* Multiple types in SLP are handled by creating the appropriate number of
3169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3171 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3174 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3176 gcc_assert (ncopies
>= 1);
3178 /* Determine whether the shift amount is a vector, or scalar. If the
3179 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3181 if (dt
[1] == vect_internal_def
&& !slp_node
)
3182 scalar_shift_arg
= false;
3183 else if (dt
[1] == vect_constant_def
3184 || dt
[1] == vect_external_def
3185 || dt
[1] == vect_internal_def
)
3187 /* In SLP, need to check whether the shift count is the same,
3188 in loops if it is a constant or invariant, it is always
3192 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3195 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
3196 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3197 scalar_shift_arg
= false;
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3204 "operand mode requires invariant argument.");
3208 /* Vector shifted by vector. */
3209 if (!scalar_shift_arg
)
3211 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3212 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_NOTE
, vect_location
,
3214 "vector/vector shift/rotate found.");
3217 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3218 if (op1_vectype
== NULL_TREE
3219 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3221 if (dump_enabled_p ())
3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3223 "unusable type for last operand in"
3224 " vector/vector shift/rotate.");
3228 /* See if the machine has a vector shifted by scalar insn and if not
3229 then see if it has a vector shifted by vector insn. */
3232 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3234 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3236 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_NOTE
, vect_location
,
3238 "vector/scalar shift/rotate found.");
3242 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3244 && (optab_handler (optab
, TYPE_MODE (vectype
))
3245 != CODE_FOR_nothing
))
3247 scalar_shift_arg
= false;
3249 if (dump_enabled_p ())
3250 dump_printf_loc (MSG_NOTE
, vect_location
,
3251 "vector/vector shift/rotate found.");
3253 /* Unlike the other binary operators, shifts/rotates have
3254 the rhs being int, instead of the same type as the lhs,
3255 so make sure the scalar is the right type if we are
3256 dealing with vectors of long long/long/short/char. */
3257 if (dt
[1] == vect_constant_def
)
3258 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3259 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3263 && TYPE_MODE (TREE_TYPE (vectype
))
3264 != TYPE_MODE (TREE_TYPE (op1
)))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3268 "unusable type for last operand in"
3269 " vector/vector shift/rotate.");
3272 if (vec_stmt
&& !slp_node
)
3274 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3275 op1
= vect_init_vector (stmt
, op1
,
3276 TREE_TYPE (vectype
), NULL
);
3283 /* Supportable by target? */
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3291 vec_mode
= TYPE_MODE (vectype
);
3292 icode
= (int) optab_handler (optab
, vec_mode
);
3293 if (icode
== CODE_FOR_nothing
)
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3297 "op not supported by target.");
3298 /* Check only during analysis. */
3299 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3300 || (vf
< vect_min_worthwhile_factor (code
)
3303 if (dump_enabled_p ())
3304 dump_printf_loc (MSG_NOTE
, vect_location
, "proceeding using word mode.");
3307 /* Worthwhile without SIMD support? Check only during analysis. */
3308 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3309 && vf
< vect_min_worthwhile_factor (code
)
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3314 "not worthwhile without SIMD support.");
3318 if (!vec_stmt
) /* transformation not required. */
3320 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3321 if (dump_enabled_p ())
3322 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_shift ===");
3323 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_NOTE
, vect_location
,
3331 "transform binary/unary operation.");
3334 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3336 prev_stmt_info
= NULL
;
3337 for (j
= 0; j
< ncopies
; j
++)
3342 if (scalar_shift_arg
)
3344 /* Vector shl and shr insn patterns can be defined with scalar
3345 operand 2 (shift operand). In this case, use constant or loop
3346 invariant op1 directly, without extending it to vector mode
3348 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3349 if (!VECTOR_MODE_P (optab_op2_mode
))
3351 if (dump_enabled_p ())
3352 dump_printf_loc (MSG_NOTE
, vect_location
,
3353 "operand 1 using scalar mode.");
3355 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
3356 vec_oprnds1
.quick_push (vec_oprnd1
);
3359 /* Store vec_oprnd1 for every vector stmt to be created
3360 for SLP_NODE. We check during the analysis that all
3361 the shift arguments are the same.
3362 TODO: Allow different constants for different vector
3363 stmts generated for an SLP instance. */
3364 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3365 vec_oprnds1
.quick_push (vec_oprnd1
);
3370 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3371 (a special case for certain kind of vector shifts); otherwise,
3372 operand 1 should be of a vector type (the usual case). */
3374 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3377 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3381 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3383 /* Arguments are ready. Create the new vector stmt. */
3384 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3386 vop1
= vec_oprnds1
[i
];
3387 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3388 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3389 gimple_assign_set_lhs (new_stmt
, new_temp
);
3390 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3392 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3399 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3401 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3402 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3405 vec_oprnds0
.release ();
3406 vec_oprnds1
.release ();
3412 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3413 gimple_stmt_iterator
*);
3416 /* Function vectorizable_operation.
3418 Check if STMT performs a binary, unary or ternary operation that can
3420 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3421 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3422 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3425 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3426 gimple
*vec_stmt
, slp_tree slp_node
)
3430 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3431 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3433 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3434 enum tree_code code
;
3435 enum machine_mode vec_mode
;
3442 enum vect_def_type dt
[3]
3443 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3444 gimple new_stmt
= NULL
;
3445 stmt_vec_info prev_stmt_info
;
3451 vec
<tree
> vec_oprnds0
= vNULL
;
3452 vec
<tree
> vec_oprnds1
= vNULL
;
3453 vec
<tree
> vec_oprnds2
= vNULL
;
3454 tree vop0
, vop1
, vop2
;
3455 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3458 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3461 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3464 /* Is STMT a vectorizable binary/unary operation? */
3465 if (!is_gimple_assign (stmt
))
3468 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3471 code
= gimple_assign_rhs_code (stmt
);
3473 /* For pointer addition, we should use the normal plus for
3474 the vector addition. */
3475 if (code
== POINTER_PLUS_EXPR
)
3478 /* Support only unary or binary operations. */
3479 op_type
= TREE_CODE_LENGTH (code
);
3480 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3482 if (dump_enabled_p ())
3483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3484 "num. args = %d (not unary/binary/ternary op).",
3489 scalar_dest
= gimple_assign_lhs (stmt
);
3490 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3492 /* Most operations cannot handle bit-precision types without extra
3494 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3495 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3496 /* Exception are bitwise binary operations. */
3497 && code
!= BIT_IOR_EXPR
3498 && code
!= BIT_XOR_EXPR
3499 && code
!= BIT_AND_EXPR
)
3501 if (dump_enabled_p ())
3502 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3503 "bit-precision arithmetic not supported.");
3507 op0
= gimple_assign_rhs1 (stmt
);
3508 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3509 &def_stmt
, &def
, &dt
[0], &vectype
))
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3516 /* If op0 is an external or constant def use a vector type with
3517 the same size as the output vector type. */
3519 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3521 gcc_assert (vectype
);
3524 if (dump_enabled_p ())
3526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3527 "no vectype for scalar type ");
3528 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3535 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3536 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3537 if (nunits_out
!= nunits_in
)
3540 if (op_type
== binary_op
|| op_type
== ternary_op
)
3542 op1
= gimple_assign_rhs2 (stmt
);
3543 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3546 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3552 if (op_type
== ternary_op
)
3554 op2
= gimple_assign_rhs3 (stmt
);
3555 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3566 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3570 /* Multiple types in SLP are handled by creating the appropriate number of
3571 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3573 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3576 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3578 gcc_assert (ncopies
>= 1);
3580 /* Shifts are handled in vectorizable_shift (). */
3581 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3582 || code
== RROTATE_EXPR
)
3585 /* Supportable by target? */
3587 vec_mode
= TYPE_MODE (vectype
);
3588 if (code
== MULT_HIGHPART_EXPR
)
3590 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3591 icode
= LAST_INSN_CODE
;
3593 icode
= CODE_FOR_nothing
;
3597 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3600 if (dump_enabled_p ())
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3605 icode
= (int) optab_handler (optab
, vec_mode
);
3608 if (icode
== CODE_FOR_nothing
)
3610 if (dump_enabled_p ())
3611 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3612 "op not supported by target.");
3613 /* Check only during analysis. */
3614 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3615 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3617 if (dump_enabled_p ())
3618 dump_printf_loc (MSG_NOTE
, vect_location
, "proceeding using word mode.");
3621 /* Worthwhile without SIMD support? Check only during analysis. */
3622 if (!VECTOR_MODE_P (vec_mode
)
3624 && vf
< vect_min_worthwhile_factor (code
))
3626 if (dump_enabled_p ())
3627 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3628 "not worthwhile without SIMD support.");
3632 if (!vec_stmt
) /* transformation not required. */
3634 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3635 if (dump_enabled_p ())
3636 dump_printf_loc (MSG_NOTE
, vect_location
,
3637 "=== vectorizable_operation ===");
3638 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_NOTE
, vect_location
,
3646 "transform binary/unary operation.");
3649 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3651 /* In case the vectorization factor (VF) is bigger than the number
3652 of elements that we can fit in a vectype (nunits), we have to generate
3653 more than one vector stmt - i.e - we need to "unroll" the
3654 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3655 from one copy of the vector stmt to the next, in the field
3656 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3657 stages to find the correct vector defs to be used when vectorizing
3658 stmts that use the defs of the current stmt. The example below
3659 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3660 we need to create 4 vectorized stmts):
3662 before vectorization:
3663 RELATED_STMT VEC_STMT
3667 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3669 RELATED_STMT VEC_STMT
3670 VS1_0: vx0 = memref0 VS1_1 -
3671 VS1_1: vx1 = memref1 VS1_2 -
3672 VS1_2: vx2 = memref2 VS1_3 -
3673 VS1_3: vx3 = memref3 - -
3674 S1: x = load - VS1_0
3677 step2: vectorize stmt S2 (done here):
3678 To vectorize stmt S2 we first need to find the relevant vector
3679 def for the first operand 'x'. This is, as usual, obtained from
3680 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3681 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3682 relevant vector def 'vx0'. Having found 'vx0' we can generate
3683 the vector stmt VS2_0, and as usual, record it in the
3684 STMT_VINFO_VEC_STMT of stmt S2.
3685 When creating the second copy (VS2_1), we obtain the relevant vector
3686 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3687 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3688 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3689 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3690 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3691 chain of stmts and pointers:
3692 RELATED_STMT VEC_STMT
3693 VS1_0: vx0 = memref0 VS1_1 -
3694 VS1_1: vx1 = memref1 VS1_2 -
3695 VS1_2: vx2 = memref2 VS1_3 -
3696 VS1_3: vx3 = memref3 - -
3697 S1: x = load - VS1_0
3698 VS2_0: vz0 = vx0 + v1 VS2_1 -
3699 VS2_1: vz1 = vx1 + v1 VS2_2 -
3700 VS2_2: vz2 = vx2 + v1 VS2_3 -
3701 VS2_3: vz3 = vx3 + v1 - -
3702 S2: z = x + 1 - VS2_0 */
3704 prev_stmt_info
= NULL
;
3705 for (j
= 0; j
< ncopies
; j
++)
3710 if (op_type
== binary_op
|| op_type
== ternary_op
)
3711 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3714 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3716 if (op_type
== ternary_op
)
3718 vec_oprnds2
.create (1);
3719 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
3726 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3727 if (op_type
== ternary_op
)
3729 tree vec_oprnd
= vec_oprnds2
.pop ();
3730 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
3735 /* Arguments are ready. Create the new vector stmt. */
3736 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3738 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3739 ? vec_oprnds1
[i
] : NULL_TREE
);
3740 vop2
= ((op_type
== ternary_op
)
3741 ? vec_oprnds2
[i
] : NULL_TREE
);
3742 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3744 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3745 gimple_assign_set_lhs (new_stmt
, new_temp
);
3746 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3748 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3755 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3757 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3758 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3761 vec_oprnds0
.release ();
3762 vec_oprnds1
.release ();
3763 vec_oprnds2
.release ();
3769 /* Function vectorizable_store.
3771 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3773 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3774 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3775 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3778 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3784 tree vec_oprnd
= NULL_TREE
;
3785 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3786 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3787 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3789 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3790 struct loop
*loop
= NULL
;
3791 enum machine_mode vec_mode
;
3793 enum dr_alignment_support alignment_support_scheme
;
3796 enum vect_def_type dt
;
3797 stmt_vec_info prev_stmt_info
= NULL
;
3798 tree dataref_ptr
= NULL_TREE
;
3799 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3802 gimple next_stmt
, first_stmt
= NULL
;
3803 bool grouped_store
= false;
3804 bool store_lanes_p
= false;
3805 unsigned int group_size
, i
;
3806 vec
<tree
> dr_chain
= vNULL
;
3807 vec
<tree
> oprnds
= vNULL
;
3808 vec
<tree
> result_chain
= vNULL
;
3810 vec
<tree
> vec_oprnds
= vNULL
;
3811 bool slp
= (slp_node
!= NULL
);
3812 unsigned int vec_num
;
3813 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3817 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3819 /* Multiple types in SLP are handled by creating the appropriate number of
3820 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3822 if (slp
|| PURE_SLP_STMT (stmt_info
))
3825 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3827 gcc_assert (ncopies
>= 1);
3829 /* FORNOW. This restriction should be relaxed. */
3830 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3832 if (dump_enabled_p ())
3833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3834 "multiple types in nested loop.");
3838 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3841 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3844 /* Is vectorizable store? */
3846 if (!is_gimple_assign (stmt
))
3849 scalar_dest
= gimple_assign_lhs (stmt
);
3850 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3851 && is_pattern_stmt_p (stmt_info
))
3852 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3853 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3854 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3855 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3856 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3857 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3858 && TREE_CODE (scalar_dest
) != MEM_REF
)
3861 gcc_assert (gimple_assign_single_p (stmt
));
3862 op
= gimple_assign_rhs1 (stmt
);
3863 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3866 if (dump_enabled_p ())
3867 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3872 elem_type
= TREE_TYPE (vectype
);
3873 vec_mode
= TYPE_MODE (vectype
);
3875 /* FORNOW. In some cases can vectorize even if data-type not supported
3876 (e.g. - array initialization with 0). */
3877 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3880 if (!STMT_VINFO_DATA_REF (stmt_info
))
3883 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3884 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3885 size_zero_node
) < 0)
3887 if (dump_enabled_p ())
3888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3889 "negative step for store.");
3893 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3895 grouped_store
= true;
3896 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3897 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3899 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3900 if (vect_store_lanes_supported (vectype
, group_size
))
3901 store_lanes_p
= true;
3902 else if (!vect_grouped_store_supported (vectype
, group_size
))
3906 if (first_stmt
== stmt
)
3908 /* STMT is the leader of the group. Check the operands of all the
3909 stmts of the group. */
3910 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3913 gcc_assert (gimple_assign_single_p (next_stmt
));
3914 op
= gimple_assign_rhs1 (next_stmt
);
3915 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3916 &def_stmt
, &def
, &dt
))
3918 if (dump_enabled_p ())
3919 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3923 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3928 if (!vec_stmt
) /* transformation not required. */
3930 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3931 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
3940 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3941 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3943 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3946 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3948 /* We vectorize all the stmts of the interleaving group when we
3949 reach the last stmt in the group. */
3950 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3951 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3960 grouped_store
= false;
3961 /* VEC_NUM is the number of vect stmts to be created for this
3963 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3964 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3965 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3966 op
= gimple_assign_rhs1 (first_stmt
);
3969 /* VEC_NUM is the number of vect stmts to be created for this
3971 vec_num
= group_size
;
3977 group_size
= vec_num
= 1;
3980 if (dump_enabled_p ())
3981 dump_printf_loc (MSG_NOTE
, vect_location
,
3982 "transform store. ncopies = %d", ncopies
);
3984 dr_chain
.create (group_size
);
3985 oprnds
.create (group_size
);
3987 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3988 gcc_assert (alignment_support_scheme
);
3989 /* Targets with store-lane instructions must not require explicit
3991 gcc_assert (!store_lanes_p
3992 || alignment_support_scheme
== dr_aligned
3993 || alignment_support_scheme
== dr_unaligned_supported
);
3996 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3998 aggr_type
= vectype
;
4000 /* In case the vectorization factor (VF) is bigger than the number
4001 of elements that we can fit in a vectype (nunits), we have to generate
4002 more than one vector stmt - i.e - we need to "unroll" the
4003 vector stmt by a factor VF/nunits. For more details see documentation in
4004 vect_get_vec_def_for_copy_stmt. */
4006 /* In case of interleaving (non-unit grouped access):
4013 We create vectorized stores starting from base address (the access of the
4014 first stmt in the chain (S2 in the above example), when the last store stmt
4015 of the chain (S4) is reached:
4018 VS2: &base + vec_size*1 = vx0
4019 VS3: &base + vec_size*2 = vx1
4020 VS4: &base + vec_size*3 = vx3
4022 Then permutation statements are generated:
4024 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4025 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4028 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4029 (the order of the data-refs in the output of vect_permute_store_chain
4030 corresponds to the order of scalar stmts in the interleaving chain - see
4031 the documentation of vect_permute_store_chain()).
4033 In case of both multiple types and interleaving, above vector stores and
4034 permutation stmts are created for every copy. The result vector stmts are
4035 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4036 STMT_VINFO_RELATED_STMT for the next copies.
4039 prev_stmt_info
= NULL
;
4040 for (j
= 0; j
< ncopies
; j
++)
4049 /* Get vectorized arguments for SLP_NODE. */
4050 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4051 NULL
, slp_node
, -1);
4053 vec_oprnd
= vec_oprnds
[0];
4057 /* For interleaved stores we collect vectorized defs for all the
4058 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4059 used as an input to vect_permute_store_chain(), and OPRNDS as
4060 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4062 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4063 OPRNDS are of size 1. */
4064 next_stmt
= first_stmt
;
4065 for (i
= 0; i
< group_size
; i
++)
4067 /* Since gaps are not supported for interleaved stores,
4068 GROUP_SIZE is the exact number of stmts in the chain.
4069 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4070 there is no interleaving, GROUP_SIZE is 1, and only one
4071 iteration of the loop will be executed. */
4072 gcc_assert (next_stmt
4073 && gimple_assign_single_p (next_stmt
));
4074 op
= gimple_assign_rhs1 (next_stmt
);
4076 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4078 dr_chain
.quick_push (vec_oprnd
);
4079 oprnds
.quick_push (vec_oprnd
);
4080 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4084 /* We should have catched mismatched types earlier. */
4085 gcc_assert (useless_type_conversion_p (vectype
,
4086 TREE_TYPE (vec_oprnd
)));
4087 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
4088 NULL_TREE
, &dummy
, gsi
,
4089 &ptr_incr
, false, &inv_p
);
4090 gcc_assert (bb_vinfo
|| !inv_p
);
4094 /* For interleaved stores we created vectorized defs for all the
4095 defs stored in OPRNDS in the previous iteration (previous copy).
4096 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4097 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4099 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4100 OPRNDS are of size 1. */
4101 for (i
= 0; i
< group_size
; i
++)
4104 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4106 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4107 dr_chain
[i
] = vec_oprnd
;
4108 oprnds
[i
] = vec_oprnd
;
4110 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4111 TYPE_SIZE_UNIT (aggr_type
));
4118 /* Combine all the vectors into an array. */
4119 vec_array
= create_vector_array (vectype
, vec_num
);
4120 for (i
= 0; i
< vec_num
; i
++)
4122 vec_oprnd
= dr_chain
[i
];
4123 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4127 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4128 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4129 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4130 gimple_call_set_lhs (new_stmt
, data_ref
);
4131 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4139 result_chain
.create (group_size
);
4141 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4145 next_stmt
= first_stmt
;
4146 for (i
= 0; i
< vec_num
; i
++)
4148 unsigned align
, misalign
;
4151 /* Bump the vector pointer. */
4152 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4156 vec_oprnd
= vec_oprnds
[i
];
4157 else if (grouped_store
)
4158 /* For grouped stores vectorized defs are interleaved in
4159 vect_permute_store_chain(). */
4160 vec_oprnd
= result_chain
[i
];
4162 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4163 build_int_cst (reference_alias_ptr_type
4164 (DR_REF (first_dr
)), 0));
4165 align
= TYPE_ALIGN_UNIT (vectype
);
4166 if (aligned_access_p (first_dr
))
4168 else if (DR_MISALIGNMENT (first_dr
) == -1)
4170 TREE_TYPE (data_ref
)
4171 = build_aligned_type (TREE_TYPE (data_ref
),
4172 TYPE_ALIGN (elem_type
));
4173 align
= TYPE_ALIGN_UNIT (elem_type
);
4178 TREE_TYPE (data_ref
)
4179 = build_aligned_type (TREE_TYPE (data_ref
),
4180 TYPE_ALIGN (elem_type
));
4181 misalign
= DR_MISALIGNMENT (first_dr
);
4183 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4186 /* Arguments are ready. Create the new vector stmt. */
4187 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4188 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4193 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4201 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4203 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4204 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4208 dr_chain
.release ();
4210 result_chain
.release ();
4211 vec_oprnds
.release ();
4216 /* Given a vector type VECTYPE and permutation SEL returns
4217 the VECTOR_CST mask that implements the permutation of the
4218 vector elements. If that is impossible to do, returns NULL. */
4221 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4223 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4226 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4228 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4231 mask_elt_type
= lang_hooks
.types
.type_for_mode
4232 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4233 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4235 mask_elts
= XALLOCAVEC (tree
, nunits
);
4236 for (i
= nunits
- 1; i
>= 0; i
--)
4237 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4238 mask_vec
= build_vector (mask_type
, mask_elts
);
4243 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4244 reversal of the vector elements. If that is impossible to do,
4248 perm_mask_for_reverse (tree vectype
)
4253 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4254 sel
= XALLOCAVEC (unsigned char, nunits
);
4256 for (i
= 0; i
< nunits
; ++i
)
4257 sel
[i
] = nunits
- 1 - i
;
4259 return vect_gen_perm_mask (vectype
, sel
);
4262 /* Given a vector variable X and Y, that was generated for the scalar
4263 STMT, generate instructions to permute the vector elements of X and Y
4264 using permutation mask MASK_VEC, insert them at *GSI and return the
4265 permuted vector variable. */
4268 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4269 gimple_stmt_iterator
*gsi
)
4271 tree vectype
= TREE_TYPE (x
);
4272 tree perm_dest
, data_ref
;
4275 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4276 data_ref
= make_ssa_name (perm_dest
, NULL
);
4278 /* Generate the permute statement. */
4279 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4281 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4286 /* vectorizable_load.
4288 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4290 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4291 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4292 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4295 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4296 slp_tree slp_node
, slp_instance slp_node_instance
)
4299 tree vec_dest
= NULL
;
4300 tree data_ref
= NULL
;
4301 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4302 stmt_vec_info prev_stmt_info
;
4303 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4304 struct loop
*loop
= NULL
;
4305 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4306 bool nested_in_vect_loop
= false;
4307 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4308 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4311 enum machine_mode mode
;
4312 gimple new_stmt
= NULL
;
4314 enum dr_alignment_support alignment_support_scheme
;
4315 tree dataref_ptr
= NULL_TREE
;
4317 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4319 int i
, j
, group_size
, group_gap
;
4320 tree msq
= NULL_TREE
, lsq
;
4321 tree offset
= NULL_TREE
;
4322 tree realignment_token
= NULL_TREE
;
4324 vec
<tree
> dr_chain
= vNULL
;
4325 bool grouped_load
= false;
4326 bool load_lanes_p
= false;
4329 bool negative
= false;
4330 bool compute_in_loop
= false;
4331 struct loop
*at_loop
;
4333 bool slp
= (slp_node
!= NULL
);
4334 bool slp_perm
= false;
4335 enum tree_code code
;
4336 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4339 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4340 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4341 int gather_scale
= 1;
4342 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4346 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4347 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4348 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4353 /* Multiple types in SLP are handled by creating the appropriate number of
4354 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4356 if (slp
|| PURE_SLP_STMT (stmt_info
))
4359 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4361 gcc_assert (ncopies
>= 1);
4363 /* FORNOW. This restriction should be relaxed. */
4364 if (nested_in_vect_loop
&& ncopies
> 1)
4366 if (dump_enabled_p ())
4367 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4368 "multiple types in nested loop.");
4372 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4375 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4378 /* Is vectorizable load? */
4379 if (!is_gimple_assign (stmt
))
4382 scalar_dest
= gimple_assign_lhs (stmt
);
4383 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4386 code
= gimple_assign_rhs_code (stmt
);
4387 if (code
!= ARRAY_REF
4388 && code
!= INDIRECT_REF
4389 && code
!= COMPONENT_REF
4390 && code
!= IMAGPART_EXPR
4391 && code
!= REALPART_EXPR
4393 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4396 if (!STMT_VINFO_DATA_REF (stmt_info
))
4399 elem_type
= TREE_TYPE (vectype
);
4400 mode
= TYPE_MODE (vectype
);
4402 /* FORNOW. In some cases can vectorize even if data-type not supported
4403 (e.g. - data copies). */
4404 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4406 if (dump_enabled_p ())
4407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4408 "Aligned load, but unsupported type.");
4412 /* Check if the load is a part of an interleaving chain. */
4413 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4415 grouped_load
= true;
4417 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4419 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4420 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4422 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4423 if (vect_load_lanes_supported (vectype
, group_size
))
4424 load_lanes_p
= true;
4425 else if (!vect_grouped_load_supported (vectype
, group_size
))
4431 if (STMT_VINFO_GATHER_P (stmt_info
))
4435 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4436 &gather_off
, &gather_scale
);
4437 gcc_assert (gather_decl
);
4438 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4439 &def_stmt
, &def
, &gather_dt
,
4440 &gather_off_vectype
))
4442 if (dump_enabled_p ())
4443 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4444 "gather index use not simple.");
4448 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4452 negative
= tree_int_cst_compare (nested_in_vect_loop
4453 ? STMT_VINFO_DR_STEP (stmt_info
)
4455 size_zero_node
) < 0;
4456 if (negative
&& ncopies
> 1)
4458 if (dump_enabled_p ())
4459 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4460 "multiple types with negative step.");
4466 gcc_assert (!grouped_load
);
4467 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4468 if (alignment_support_scheme
!= dr_aligned
4469 && alignment_support_scheme
!= dr_unaligned_supported
)
4471 if (dump_enabled_p ())
4472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4473 "negative step but alignment required.");
4476 if (!perm_mask_for_reverse (vectype
))
4478 if (dump_enabled_p ())
4479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4480 "negative step and reversing not supported.");
4486 if (!vec_stmt
) /* transformation not required. */
4488 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4489 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4493 if (dump_enabled_p ())
4494 dump_printf_loc (MSG_NOTE
, vect_location
,
4495 "transform load. ncopies = %d", ncopies
);
4499 if (STMT_VINFO_GATHER_P (stmt_info
))
4501 tree vec_oprnd0
= NULL_TREE
, op
;
4502 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4503 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4504 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4505 edge pe
= loop_preheader_edge (loop
);
4508 enum { NARROW
, NONE
, WIDEN
} modifier
;
4509 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4511 if (nunits
== gather_off_nunits
)
4513 else if (nunits
== gather_off_nunits
/ 2)
4515 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4518 for (i
= 0; i
< gather_off_nunits
; ++i
)
4519 sel
[i
] = i
| nunits
;
4521 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4522 gcc_assert (perm_mask
!= NULL_TREE
);
4524 else if (nunits
== gather_off_nunits
* 2)
4526 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4529 for (i
= 0; i
< nunits
; ++i
)
4530 sel
[i
] = i
< gather_off_nunits
4531 ? i
: i
+ nunits
- gather_off_nunits
;
4533 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4534 gcc_assert (perm_mask
!= NULL_TREE
);
4540 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4541 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4542 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4543 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4544 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4545 scaletype
= TREE_VALUE (arglist
);
4546 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4547 && types_compatible_p (srctype
, masktype
));
4549 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4551 ptr
= fold_convert (ptrtype
, gather_base
);
4552 if (!is_gimple_min_invariant (ptr
))
4554 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4555 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4556 gcc_assert (!new_bb
);
4559 /* Currently we support only unconditional gather loads,
4560 so mask should be all ones. */
4561 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4562 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4563 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4567 for (j
= 0; j
< 6; ++j
)
4569 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4570 mask
= build_real (TREE_TYPE (masktype
), r
);
4574 mask
= build_vector_from_val (masktype
, mask
);
4575 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4577 scale
= build_int_cst (scaletype
, gather_scale
);
4579 prev_stmt_info
= NULL
;
4580 for (j
= 0; j
< ncopies
; ++j
)
4582 if (modifier
== WIDEN
&& (j
& 1))
4583 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4584 perm_mask
, stmt
, gsi
);
4587 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4590 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4592 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4594 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4595 == TYPE_VECTOR_SUBPARTS (idxtype
));
4596 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4597 var
= make_ssa_name (var
, NULL
);
4598 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4600 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4602 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4607 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4609 if (!useless_type_conversion_p (vectype
, rettype
))
4611 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4612 == TYPE_VECTOR_SUBPARTS (rettype
));
4613 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4614 op
= make_ssa_name (var
, new_stmt
);
4615 gimple_call_set_lhs (new_stmt
, op
);
4616 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4617 var
= make_ssa_name (vec_dest
, NULL
);
4618 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4620 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4625 var
= make_ssa_name (vec_dest
, new_stmt
);
4626 gimple_call_set_lhs (new_stmt
, var
);
4629 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4631 if (modifier
== NARROW
)
4638 var
= permute_vec_elements (prev_res
, var
,
4639 perm_mask
, stmt
, gsi
);
4640 new_stmt
= SSA_NAME_DEF_STMT (var
);
4643 if (prev_stmt_info
== NULL
)
4644 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4646 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4647 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4651 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4653 gimple_stmt_iterator incr_gsi
;
4659 vec
<constructor_elt
, va_gc
> *v
= NULL
;
4660 gimple_seq stmts
= NULL
;
4661 tree stride_base
, stride_step
, alias_off
;
4663 gcc_assert (!nested_in_vect_loop
);
4666 = fold_build_pointer_plus
4667 (unshare_expr (DR_BASE_ADDRESS (dr
)),
4668 size_binop (PLUS_EXPR
,
4669 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
4670 convert_to_ptrofftype (DR_INIT(dr
))));
4671 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
4673 /* For a load with loop-invariant (but other than power-of-2)
4674 stride (i.e. not a grouped access) like so:
4676 for (i = 0; i < n; i += stride)
4679 we generate a new induction variable and new accesses to
4680 form a new vector (or vectors, depending on ncopies):
4682 for (j = 0; ; j += VF*stride)
4684 tmp2 = array[j + stride];
4686 vectemp = {tmp1, tmp2, ...}
4689 ivstep
= stride_step
;
4690 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4691 build_int_cst (TREE_TYPE (ivstep
), vf
));
4693 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4695 create_iv (stride_base
, ivstep
, NULL
,
4696 loop
, &incr_gsi
, insert_after
,
4698 incr
= gsi_stmt (incr_gsi
);
4699 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4701 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4703 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4705 prev_stmt_info
= NULL
;
4706 running_off
= offvar
;
4707 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
4708 for (j
= 0; j
< ncopies
; j
++)
4712 vec_alloc (v
, nunits
);
4713 for (i
= 0; i
< nunits
; i
++)
4715 tree newref
, newoff
;
4717 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
4718 running_off
, alias_off
);
4720 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4723 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4724 newoff
= copy_ssa_name (running_off
, NULL
);
4725 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4726 running_off
, stride_step
);
4727 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4729 running_off
= newoff
;
4732 vec_inv
= build_constructor (vectype
, v
);
4733 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4734 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4737 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4739 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4740 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4747 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4749 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
).exists ()
4750 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
4751 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4753 /* Check if the chain of loads is already vectorized. */
4754 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4756 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4759 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4760 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4762 /* VEC_NUM is the number of vect stmts to be created for this group. */
4765 grouped_load
= false;
4766 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4767 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
).exists ())
4769 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
4773 vec_num
= group_size
;
4781 group_size
= vec_num
= 1;
4785 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4786 gcc_assert (alignment_support_scheme
);
4787 /* Targets with load-lane instructions must not require explicit
4789 gcc_assert (!load_lanes_p
4790 || alignment_support_scheme
== dr_aligned
4791 || alignment_support_scheme
== dr_unaligned_supported
);
4793 /* In case the vectorization factor (VF) is bigger than the number
4794 of elements that we can fit in a vectype (nunits), we have to generate
4795 more than one vector stmt - i.e - we need to "unroll" the
4796 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4797 from one copy of the vector stmt to the next, in the field
4798 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4799 stages to find the correct vector defs to be used when vectorizing
4800 stmts that use the defs of the current stmt. The example below
4801 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4802 need to create 4 vectorized stmts):
4804 before vectorization:
4805 RELATED_STMT VEC_STMT
4809 step 1: vectorize stmt S1:
4810 We first create the vector stmt VS1_0, and, as usual, record a
4811 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4812 Next, we create the vector stmt VS1_1, and record a pointer to
4813 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4814 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4816 RELATED_STMT VEC_STMT
4817 VS1_0: vx0 = memref0 VS1_1 -
4818 VS1_1: vx1 = memref1 VS1_2 -
4819 VS1_2: vx2 = memref2 VS1_3 -
4820 VS1_3: vx3 = memref3 - -
4821 S1: x = load - VS1_0
4824 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4825 information we recorded in RELATED_STMT field is used to vectorize
4828 /* In case of interleaving (non-unit grouped access):
4835 Vectorized loads are created in the order of memory accesses
4836 starting from the access of the first stmt of the chain:
4839 VS2: vx1 = &base + vec_size*1
4840 VS3: vx3 = &base + vec_size*2
4841 VS4: vx4 = &base + vec_size*3
4843 Then permutation statements are generated:
4845 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4846 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4849 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4850 (the order of the data-refs in the output of vect_permute_load_chain
4851 corresponds to the order of scalar stmts in the interleaving chain - see
4852 the documentation of vect_permute_load_chain()).
4853 The generation of permutation stmts and recording them in
4854 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4856 In case of both multiple types and interleaving, the vector loads and
4857 permutation stmts above are created for every copy. The result vector
4858 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4859 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4861 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4862 on a target that supports unaligned accesses (dr_unaligned_supported)
4863 we generate the following code:
4867 p = p + indx * vectype_size;
4872 Otherwise, the data reference is potentially unaligned on a target that
4873 does not support unaligned accesses (dr_explicit_realign_optimized) -
4874 then generate the following code, in which the data in each iteration is
4875 obtained by two vector loads, one from the previous iteration, and one
4876 from the current iteration:
4878 msq_init = *(floor(p1))
4879 p2 = initial_addr + VS - 1;
4880 realignment_token = call target_builtin;
4883 p2 = p2 + indx * vectype_size
4885 vec_dest = realign_load (msq, lsq, realignment_token)
4890 /* If the misalignment remains the same throughout the execution of the
4891 loop, we can create the init_addr and permutation mask at the loop
4892 preheader. Otherwise, it needs to be created inside the loop.
4893 This can only occur when vectorizing memory accesses in the inner-loop
4894 nested within an outer-loop that is being vectorized. */
4896 if (nested_in_vect_loop
4897 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4898 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4900 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4901 compute_in_loop
= true;
4904 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4905 || alignment_support_scheme
== dr_explicit_realign
)
4906 && !compute_in_loop
)
4908 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4909 alignment_support_scheme
, NULL_TREE
,
4911 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4913 phi
= SSA_NAME_DEF_STMT (msq
);
4914 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4921 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4924 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4926 aggr_type
= vectype
;
4928 prev_stmt_info
= NULL
;
4929 for (j
= 0; j
< ncopies
; j
++)
4931 /* 1. Create the vector or array pointer update chain. */
4933 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4934 offset
, &dummy
, gsi
,
4935 &ptr_incr
, false, &inv_p
);
4937 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4938 TYPE_SIZE_UNIT (aggr_type
));
4940 if (grouped_load
|| slp_perm
)
4941 dr_chain
.create (vec_num
);
4947 vec_array
= create_vector_array (vectype
, vec_num
);
4950 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4951 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4952 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4953 gimple_call_set_lhs (new_stmt
, vec_array
);
4954 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4956 /* Extract each vector into an SSA_NAME. */
4957 for (i
= 0; i
< vec_num
; i
++)
4959 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4961 dr_chain
.quick_push (new_temp
);
4964 /* Record the mapping between SSA_NAMEs and statements. */
4965 vect_record_grouped_load_vectors (stmt
, dr_chain
);
4969 for (i
= 0; i
< vec_num
; i
++)
4972 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4975 /* 2. Create the vector-load in the loop. */
4976 switch (alignment_support_scheme
)
4979 case dr_unaligned_supported
:
4981 unsigned int align
, misalign
;
4984 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4985 build_int_cst (reference_alias_ptr_type
4986 (DR_REF (first_dr
)), 0));
4987 align
= TYPE_ALIGN_UNIT (vectype
);
4988 if (alignment_support_scheme
== dr_aligned
)
4990 gcc_assert (aligned_access_p (first_dr
));
4993 else if (DR_MISALIGNMENT (first_dr
) == -1)
4995 TREE_TYPE (data_ref
)
4996 = build_aligned_type (TREE_TYPE (data_ref
),
4997 TYPE_ALIGN (elem_type
));
4998 align
= TYPE_ALIGN_UNIT (elem_type
);
5003 TREE_TYPE (data_ref
)
5004 = build_aligned_type (TREE_TYPE (data_ref
),
5005 TYPE_ALIGN (elem_type
));
5006 misalign
= DR_MISALIGNMENT (first_dr
);
5008 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5012 case dr_explicit_realign
:
5017 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5019 if (compute_in_loop
)
5020 msq
= vect_setup_realignment (first_stmt
, gsi
,
5022 dr_explicit_realign
,
5025 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
5026 new_stmt
= gimple_build_assign_with_ops
5027 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
5029 (TREE_TYPE (dataref_ptr
),
5030 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5031 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5033 = build2 (MEM_REF
, vectype
, ptr
,
5034 build_int_cst (reference_alias_ptr_type
5035 (DR_REF (first_dr
)), 0));
5036 vec_dest
= vect_create_destination_var (scalar_dest
,
5038 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5039 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5040 gimple_assign_set_lhs (new_stmt
, new_temp
);
5041 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5042 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5043 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5046 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5047 TYPE_SIZE_UNIT (elem_type
));
5048 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5049 new_stmt
= gimple_build_assign_with_ops
5050 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5053 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5054 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5055 gimple_assign_set_lhs (new_stmt
, ptr
);
5056 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5058 = build2 (MEM_REF
, vectype
, ptr
,
5059 build_int_cst (reference_alias_ptr_type
5060 (DR_REF (first_dr
)), 0));
5063 case dr_explicit_realign_optimized
:
5064 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5065 new_stmt
= gimple_build_assign_with_ops
5066 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5068 (TREE_TYPE (dataref_ptr
),
5069 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5070 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5072 = build2 (MEM_REF
, vectype
, new_temp
,
5073 build_int_cst (reference_alias_ptr_type
5074 (DR_REF (first_dr
)), 0));
5079 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5080 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5081 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5082 gimple_assign_set_lhs (new_stmt
, new_temp
);
5083 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5085 /* 3. Handle explicit realignment if necessary/supported.
5087 vec_dest = realign_load (msq, lsq, realignment_token) */
5088 if (alignment_support_scheme
== dr_explicit_realign_optimized
5089 || alignment_support_scheme
== dr_explicit_realign
)
5091 lsq
= gimple_assign_lhs (new_stmt
);
5092 if (!realignment_token
)
5093 realignment_token
= dataref_ptr
;
5094 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5096 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
5099 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5100 gimple_assign_set_lhs (new_stmt
, new_temp
);
5101 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5103 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5106 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5107 add_phi_arg (phi
, lsq
,
5108 loop_latch_edge (containing_loop
),
5114 /* 4. Handle invariant-load. */
5115 if (inv_p
&& !bb_vinfo
)
5117 gimple_stmt_iterator gsi2
= *gsi
;
5118 gcc_assert (!grouped_load
);
5120 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5122 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5127 tree perm_mask
= perm_mask_for_reverse (vectype
);
5128 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5129 perm_mask
, stmt
, gsi
);
5130 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5133 /* Collect vector loads and later create their permutation in
5134 vect_transform_grouped_load (). */
5135 if (grouped_load
|| slp_perm
)
5136 dr_chain
.quick_push (new_temp
);
5138 /* Store vector loads in the corresponding SLP_NODE. */
5139 if (slp
&& !slp_perm
)
5140 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5142 /* Bump the vector pointer to account for a gap. */
5143 if (slp
&& group_gap
!= 0)
5145 tree bump
= size_binop (MULT_EXPR
,
5146 TYPE_SIZE_UNIT (elem_type
),
5147 size_int (group_gap
));
5148 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5153 if (slp
&& !slp_perm
)
5158 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
5159 slp_node_instance
, false))
5161 dr_chain
.release ();
5170 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5171 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5176 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5178 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5179 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5182 dr_chain
.release ();
5188 /* Function vect_is_simple_cond.
5191 LOOP - the loop that is being vectorized.
5192 COND - Condition that is checked for simple use.
5195 *COMP_VECTYPE - the vector type for the comparison.
5197 Returns whether a COND can be vectorized. Checks whether
5198 condition operands are supportable using vec_is_simple_use. */
5201 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5202 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5206 enum vect_def_type dt
;
5207 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5209 if (!COMPARISON_CLASS_P (cond
))
5212 lhs
= TREE_OPERAND (cond
, 0);
5213 rhs
= TREE_OPERAND (cond
, 1);
5215 if (TREE_CODE (lhs
) == SSA_NAME
)
5217 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5218 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5219 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5222 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5223 && TREE_CODE (lhs
) != FIXED_CST
)
5226 if (TREE_CODE (rhs
) == SSA_NAME
)
5228 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5229 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5230 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5233 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5234 && TREE_CODE (rhs
) != FIXED_CST
)
5237 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5241 /* vectorizable_condition.
5243 Check if STMT is conditional modify expression that can be vectorized.
5244 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5245 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5248 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5249 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5250 else caluse if it is 2).
5252 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5255 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5256 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5259 tree scalar_dest
= NULL_TREE
;
5260 tree vec_dest
= NULL_TREE
;
5261 tree cond_expr
, then_clause
, else_clause
;
5262 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5263 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5264 tree comp_vectype
= NULL_TREE
;
5265 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5266 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5267 tree vec_compare
, vec_cond_expr
;
5269 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5271 enum vect_def_type dt
, dts
[4];
5272 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5274 enum tree_code code
;
5275 stmt_vec_info prev_stmt_info
= NULL
;
5277 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5278 vec
<tree
> vec_oprnds0
= vNULL
;
5279 vec
<tree
> vec_oprnds1
= vNULL
;
5280 vec
<tree
> vec_oprnds2
= vNULL
;
5281 vec
<tree
> vec_oprnds3
= vNULL
;
5284 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5287 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5289 gcc_assert (ncopies
>= 1);
5290 if (reduc_index
&& ncopies
> 1)
5291 return false; /* FORNOW */
5293 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5296 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5299 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5300 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5304 /* FORNOW: not yet supported. */
5305 if (STMT_VINFO_LIVE_P (stmt_info
))
5307 if (dump_enabled_p ())
5308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5309 "value used after loop.");
5313 /* Is vectorizable conditional operation? */
5314 if (!is_gimple_assign (stmt
))
5317 code
= gimple_assign_rhs_code (stmt
);
5319 if (code
!= COND_EXPR
)
5322 cond_expr
= gimple_assign_rhs1 (stmt
);
5323 then_clause
= gimple_assign_rhs2 (stmt
);
5324 else_clause
= gimple_assign_rhs3 (stmt
);
5326 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5331 if (TREE_CODE (then_clause
) == SSA_NAME
)
5333 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5334 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5335 &then_def_stmt
, &def
, &dt
))
5338 else if (TREE_CODE (then_clause
) != INTEGER_CST
5339 && TREE_CODE (then_clause
) != REAL_CST
5340 && TREE_CODE (then_clause
) != FIXED_CST
)
5343 if (TREE_CODE (else_clause
) == SSA_NAME
)
5345 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5346 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5347 &else_def_stmt
, &def
, &dt
))
5350 else if (TREE_CODE (else_clause
) != INTEGER_CST
5351 && TREE_CODE (else_clause
) != REAL_CST
5352 && TREE_CODE (else_clause
) != FIXED_CST
)
5355 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
5356 /* The result of a vector comparison should be signed type. */
5357 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
5358 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
5359 if (vec_cmp_type
== NULL_TREE
)
5364 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5365 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5372 vec_oprnds0
.create (1);
5373 vec_oprnds1
.create (1);
5374 vec_oprnds2
.create (1);
5375 vec_oprnds3
.create (1);
5379 scalar_dest
= gimple_assign_lhs (stmt
);
5380 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5382 /* Handle cond expr. */
5383 for (j
= 0; j
< ncopies
; j
++)
5385 gimple new_stmt
= NULL
;
5392 vec
<vec
<tree
> > vec_defs
;
5394 vec_defs
.create (4);
5395 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
5396 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
5397 ops
.safe_push (then_clause
);
5398 ops
.safe_push (else_clause
);
5399 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5400 vec_oprnds3
= vec_defs
.pop ();
5401 vec_oprnds2
= vec_defs
.pop ();
5402 vec_oprnds1
= vec_defs
.pop ();
5403 vec_oprnds0
= vec_defs
.pop ();
5406 vec_defs
.release ();
5412 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5414 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5415 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5418 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5420 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5421 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5422 if (reduc_index
== 1)
5423 vec_then_clause
= reduc_def
;
5426 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5428 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5429 NULL
, >emp
, &def
, &dts
[2]);
5431 if (reduc_index
== 2)
5432 vec_else_clause
= reduc_def
;
5435 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5437 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5438 NULL
, >emp
, &def
, &dts
[3]);
5444 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5445 vec_oprnds0
.pop ());
5446 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5447 vec_oprnds1
.pop ());
5448 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5449 vec_oprnds2
.pop ());
5450 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5451 vec_oprnds3
.pop ());
5456 vec_oprnds0
.quick_push (vec_cond_lhs
);
5457 vec_oprnds1
.quick_push (vec_cond_rhs
);
5458 vec_oprnds2
.quick_push (vec_then_clause
);
5459 vec_oprnds3
.quick_push (vec_else_clause
);
5462 /* Arguments are ready. Create the new vector stmt. */
5463 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
5465 vec_cond_rhs
= vec_oprnds1
[i
];
5466 vec_then_clause
= vec_oprnds2
[i
];
5467 vec_else_clause
= vec_oprnds3
[i
];
5469 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
5470 vec_cond_lhs
, vec_cond_rhs
);
5471 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5472 vec_compare
, vec_then_clause
, vec_else_clause
);
5474 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5475 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5476 gimple_assign_set_lhs (new_stmt
, new_temp
);
5477 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5479 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5486 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5488 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5490 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5493 vec_oprnds0
.release ();
5494 vec_oprnds1
.release ();
5495 vec_oprnds2
.release ();
5496 vec_oprnds3
.release ();
5502 /* Make sure the statement is vectorizable. */
5505 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5507 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5508 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5509 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5511 tree scalar_type
, vectype
;
5512 gimple pattern_stmt
;
5513 gimple_seq pattern_def_seq
;
5515 if (dump_enabled_p ())
5517 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
5518 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5521 if (gimple_has_volatile_ops (stmt
))
5523 if (dump_enabled_p ())
5524 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5525 "not vectorized: stmt has volatile operands");
5530 /* Skip stmts that do not need to be vectorized. In loops this is expected
5532 - the COND_EXPR which is the loop exit condition
5533 - any LABEL_EXPRs in the loop
5534 - computations that are used only for array indexing or loop control.
5535 In basic blocks we only analyze statements that are a part of some SLP
5536 instance, therefore, all the statements are relevant.
5538 Pattern statement needs to be analyzed instead of the original statement
5539 if the original statement is not relevant. Otherwise, we analyze both
5540 statements. In basic blocks we are called from some SLP instance
5541 traversal, don't analyze pattern stmts instead, the pattern stmts
5542 already will be part of SLP instance. */
5544 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5545 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5546 && !STMT_VINFO_LIVE_P (stmt_info
))
5548 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5550 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5551 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5553 /* Analyze PATTERN_STMT instead of the original stmt. */
5554 stmt
= pattern_stmt
;
5555 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5556 if (dump_enabled_p ())
5558 dump_printf_loc (MSG_NOTE
, vect_location
,
5559 "==> examining pattern statement: ");
5560 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5565 if (dump_enabled_p ())
5566 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.");
5571 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5574 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5575 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5577 /* Analyze PATTERN_STMT too. */
5578 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_NOTE
, vect_location
,
5581 "==> examining pattern statement: ");
5582 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5585 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5589 if (is_pattern_stmt_p (stmt_info
)
5591 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5593 gimple_stmt_iterator si
;
5595 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5597 gimple pattern_def_stmt
= gsi_stmt (si
);
5598 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5599 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5601 /* Analyze def stmt of STMT if it's a pattern stmt. */
5602 if (dump_enabled_p ())
5604 dump_printf_loc (MSG_NOTE
, vect_location
,
5605 "==> examining pattern def statement: ");
5606 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
5609 if (!vect_analyze_stmt (pattern_def_stmt
,
5610 need_to_vectorize
, node
))
5616 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5618 case vect_internal_def
:
5621 case vect_reduction_def
:
5622 case vect_nested_cycle
:
5623 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5624 || relevance
== vect_used_in_outer_by_reduction
5625 || relevance
== vect_unused_in_scope
));
5628 case vect_induction_def
:
5629 case vect_constant_def
:
5630 case vect_external_def
:
5631 case vect_unknown_def_type
:
5638 gcc_assert (PURE_SLP_STMT (stmt_info
));
5640 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5641 if (dump_enabled_p ())
5643 dump_printf_loc (MSG_NOTE
, vect_location
,
5644 "get vectype for scalar type: ");
5645 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
5648 vectype
= get_vectype_for_scalar_type (scalar_type
);
5651 if (dump_enabled_p ())
5653 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5654 "not SLPed: unsupported data-type ");
5655 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5661 if (dump_enabled_p ())
5663 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
5664 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
5667 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5670 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5672 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5673 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5674 *need_to_vectorize
= true;
5679 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5680 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5681 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5682 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5683 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5684 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5685 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5686 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5687 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5688 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5689 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5693 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5694 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5695 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5696 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5697 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5698 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5699 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5700 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5705 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5708 "not vectorized: relevant stmt not ");
5709 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5710 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5719 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5720 need extra handling, except for vectorizable reductions. */
5721 if (STMT_VINFO_LIVE_P (stmt_info
)
5722 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5723 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5727 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5730 "not vectorized: live stmt not ");
5731 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5742 /* Function vect_transform_stmt.
5744 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5747 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5748 bool *grouped_store
, slp_tree slp_node
,
5749 slp_instance slp_node_instance
)
5751 bool is_store
= false;
5752 gimple vec_stmt
= NULL
;
5753 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5756 switch (STMT_VINFO_TYPE (stmt_info
))
5758 case type_demotion_vec_info_type
:
5759 case type_promotion_vec_info_type
:
5760 case type_conversion_vec_info_type
:
5761 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5765 case induc_vec_info_type
:
5766 gcc_assert (!slp_node
);
5767 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5771 case shift_vec_info_type
:
5772 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5776 case op_vec_info_type
:
5777 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5781 case assignment_vec_info_type
:
5782 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5786 case load_vec_info_type
:
5787 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5792 case store_vec_info_type
:
5793 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5795 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5797 /* In case of interleaving, the whole chain is vectorized when the
5798 last store in the chain is reached. Store stmts before the last
5799 one are skipped, and there vec_stmt_info shouldn't be freed
5801 *grouped_store
= true;
5802 if (STMT_VINFO_VEC_STMT (stmt_info
))
5809 case condition_vec_info_type
:
5810 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5814 case call_vec_info_type
:
5815 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5816 stmt
= gsi_stmt (*gsi
);
5819 case reduc_vec_info_type
:
5820 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5825 if (!STMT_VINFO_LIVE_P (stmt_info
))
5827 if (dump_enabled_p ())
5828 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5829 "stmt not supported.");
5834 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5835 is being vectorized, but outside the immediately enclosing loop. */
5837 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5838 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5839 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5840 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5841 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5842 || STMT_VINFO_RELEVANT (stmt_info
) ==
5843 vect_used_in_outer_by_reduction
))
5845 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5846 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5847 imm_use_iterator imm_iter
;
5848 use_operand_p use_p
;
5852 if (dump_enabled_p ())
5853 dump_printf_loc (MSG_NOTE
, vect_location
,
5854 "Record the vdef for outer-loop vectorization.");
5856 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5857 (to be used when vectorizing outer-loop stmts that use the DEF of
5859 if (gimple_code (stmt
) == GIMPLE_PHI
)
5860 scalar_dest
= PHI_RESULT (stmt
);
5862 scalar_dest
= gimple_assign_lhs (stmt
);
5864 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5866 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5868 exit_phi
= USE_STMT (use_p
);
5869 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5874 /* Handle stmts whose DEF is used outside the loop-nest that is
5875 being vectorized. */
5876 if (STMT_VINFO_LIVE_P (stmt_info
)
5877 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5879 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5884 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5890 /* Remove a group of stores (for SLP or interleaving), free their
5894 vect_remove_stores (gimple first_stmt
)
5896 gimple next
= first_stmt
;
5898 gimple_stmt_iterator next_si
;
5902 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5904 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5905 if (is_pattern_stmt_p (stmt_info
))
5906 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5907 /* Free the attached stmt_vec_info and remove the stmt. */
5908 next_si
= gsi_for_stmt (next
);
5909 unlink_stmt_vdef (next
);
5910 gsi_remove (&next_si
, true);
5911 release_defs (next
);
5912 free_stmt_vec_info (next
);
5918 /* Function new_stmt_vec_info.
5920 Create and initialize a new stmt_vec_info struct for STMT. */
5923 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5924 bb_vec_info bb_vinfo
)
5927 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5929 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5930 STMT_VINFO_STMT (res
) = stmt
;
5931 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5932 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5933 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5934 STMT_VINFO_LIVE_P (res
) = false;
5935 STMT_VINFO_VECTYPE (res
) = NULL
;
5936 STMT_VINFO_VEC_STMT (res
) = NULL
;
5937 STMT_VINFO_VECTORIZABLE (res
) = true;
5938 STMT_VINFO_IN_PATTERN_P (res
) = false;
5939 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5940 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5941 STMT_VINFO_DATA_REF (res
) = NULL
;
5943 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5944 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5945 STMT_VINFO_DR_INIT (res
) = NULL
;
5946 STMT_VINFO_DR_STEP (res
) = NULL
;
5947 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5949 if (gimple_code (stmt
) == GIMPLE_PHI
5950 && is_loop_header_bb_p (gimple_bb (stmt
)))
5951 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5953 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5955 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
5956 STMT_SLP_TYPE (res
) = loop_vect
;
5957 GROUP_FIRST_ELEMENT (res
) = NULL
;
5958 GROUP_NEXT_ELEMENT (res
) = NULL
;
5959 GROUP_SIZE (res
) = 0;
5960 GROUP_STORE_COUNT (res
) = 0;
5961 GROUP_GAP (res
) = 0;
5962 GROUP_SAME_DR_STMT (res
) = NULL
;
5963 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5969 /* Create a hash table for stmt_vec_info. */
5972 init_stmt_vec_info_vec (void)
5974 gcc_assert (!stmt_vec_info_vec
.exists ());
5975 stmt_vec_info_vec
.create (50);
5979 /* Free hash table for stmt_vec_info. */
5982 free_stmt_vec_info_vec (void)
5986 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
5988 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
5989 gcc_assert (stmt_vec_info_vec
.exists ());
5990 stmt_vec_info_vec
.release ();
5994 /* Free stmt vectorization related info. */
5997 free_stmt_vec_info (gimple stmt
)
5999 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6004 /* Check if this statement has a related "pattern stmt"
6005 (introduced by the vectorizer during the pattern recognition
6006 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6008 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
6010 stmt_vec_info patt_info
6011 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6014 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
6017 gimple_stmt_iterator si
;
6018 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
6019 free_stmt_vec_info (gsi_stmt (si
));
6021 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6025 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
6026 set_vinfo_for_stmt (stmt
, NULL
);
6031 /* Function get_vectype_for_scalar_type_and_size.
6033 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6037 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6039 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6040 enum machine_mode simd_mode
;
6041 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6048 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6049 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6052 /* For vector types of elements whose mode precision doesn't
6053 match their types precision we use a element type of mode
6054 precision. The vectorization routines will have to make sure
6055 they support the proper result truncation/extension.
6056 We also make sure to build vector types with INTEGER_TYPE
6057 component type only. */
6058 if (INTEGRAL_TYPE_P (scalar_type
)
6059 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6060 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6061 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6062 TYPE_UNSIGNED (scalar_type
));
6064 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6065 When the component mode passes the above test simply use a type
6066 corresponding to that mode. The theory is that any use that
6067 would cause problems with this will disable vectorization anyway. */
6068 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6069 && !INTEGRAL_TYPE_P (scalar_type
)
6070 && !POINTER_TYPE_P (scalar_type
))
6071 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6073 /* We can't build a vector type of elements with alignment bigger than
6075 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6076 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
6077 TYPE_UNSIGNED (scalar_type
));
6079 /* If we felt back to using the mode fail if there was
6080 no scalar type for it. */
6081 if (scalar_type
== NULL_TREE
)
6084 /* If no size was supplied use the mode the target prefers. Otherwise
6085 lookup a vector mode of the specified size. */
6087 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6089 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6090 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6094 vectype
= build_vector_type (scalar_type
, nunits
);
6095 if (dump_enabled_p ())
6097 dump_printf_loc (MSG_NOTE
, vect_location
,
6098 "get vectype with %d units of type ", nunits
);
6099 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
6105 if (dump_enabled_p ())
6107 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
6108 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
6111 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6112 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6116 "mode not supported by target.");
6123 unsigned int current_vector_size
;
6125 /* Function get_vectype_for_scalar_type.
6127 Returns the vector type corresponding to SCALAR_TYPE as supported
6131 get_vectype_for_scalar_type (tree scalar_type
)
6134 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6135 current_vector_size
);
6137 && current_vector_size
== 0)
6138 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6142 /* Function get_same_sized_vectype
6144 Returns a vector type corresponding to SCALAR_TYPE of size
6145 VECTOR_TYPE if supported by the target. */
6148 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6150 return get_vectype_for_scalar_type_and_size
6151 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6154 /* Function vect_is_simple_use.
6157 LOOP_VINFO - the vect info of the loop that is being vectorized.
6158 BB_VINFO - the vect info of the basic block that is being vectorized.
6159 OPERAND - operand of STMT in the loop or bb.
6160 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6162 Returns whether a stmt with OPERAND can be vectorized.
6163 For loops, supportable operands are constants, loop invariants, and operands
6164 that are defined by the current iteration of the loop. Unsupportable
6165 operands are those that are defined by a previous iteration of the loop (as
6166 is the case in reduction/induction computations).
6167 For basic blocks, supportable operands are constants and bb invariants.
6168 For now, operands defined outside the basic block are not supported. */
6171 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6172 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6173 tree
*def
, enum vect_def_type
*dt
)
6176 stmt_vec_info stmt_vinfo
;
6177 struct loop
*loop
= NULL
;
6180 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6185 if (dump_enabled_p ())
6187 dump_printf_loc (MSG_NOTE
, vect_location
,
6188 "vect_is_simple_use: operand ");
6189 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
6192 if (CONSTANT_CLASS_P (operand
))
6194 *dt
= vect_constant_def
;
6198 if (is_gimple_min_invariant (operand
))
6201 *dt
= vect_external_def
;
6205 if (TREE_CODE (operand
) == PAREN_EXPR
)
6207 if (dump_enabled_p ())
6208 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.");
6209 operand
= TREE_OPERAND (operand
, 0);
6212 if (TREE_CODE (operand
) != SSA_NAME
)
6214 if (dump_enabled_p ())
6215 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6220 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6221 if (*def_stmt
== NULL
)
6223 if (dump_enabled_p ())
6224 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6229 if (dump_enabled_p ())
6231 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
6232 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
6235 /* Empty stmt is expected only in case of a function argument.
6236 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6237 if (gimple_nop_p (*def_stmt
))
6240 *dt
= vect_external_def
;
6244 bb
= gimple_bb (*def_stmt
);
6246 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6247 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6248 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6249 *dt
= vect_external_def
;
6252 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6253 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6256 if (*dt
== vect_unknown_def_type
6258 && *dt
== vect_double_reduction_def
6259 && gimple_code (stmt
) != GIMPLE_PHI
))
6261 if (dump_enabled_p ())
6262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6263 "Unsupported pattern.");
6267 if (dump_enabled_p ())
6268 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.", *dt
);
6270 switch (gimple_code (*def_stmt
))
6273 *def
= gimple_phi_result (*def_stmt
);
6277 *def
= gimple_assign_lhs (*def_stmt
);
6281 *def
= gimple_call_lhs (*def_stmt
);
6286 if (dump_enabled_p ())
6287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6288 "unsupported defining stmt: ");
6295 /* Function vect_is_simple_use_1.
6297 Same as vect_is_simple_use_1 but also determines the vector operand
6298 type of OPERAND and stores it to *VECTYPE. If the definition of
6299 OPERAND is vect_uninitialized_def, vect_constant_def or
6300 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6301 is responsible to compute the best suited vector type for the
6305 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6306 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6307 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6309 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6313 /* Now get a vector type if the def is internal, otherwise supply
6314 NULL_TREE and leave it up to the caller to figure out a proper
6315 type for the use stmt. */
6316 if (*dt
== vect_internal_def
6317 || *dt
== vect_induction_def
6318 || *dt
== vect_reduction_def
6319 || *dt
== vect_double_reduction_def
6320 || *dt
== vect_nested_cycle
)
6322 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6324 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6325 && !STMT_VINFO_RELEVANT (stmt_info
)
6326 && !STMT_VINFO_LIVE_P (stmt_info
))
6327 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6329 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6330 gcc_assert (*vectype
!= NULL_TREE
);
6332 else if (*dt
== vect_uninitialized_def
6333 || *dt
== vect_constant_def
6334 || *dt
== vect_external_def
)
6335 *vectype
= NULL_TREE
;
6343 /* Function supportable_widening_operation
6345 Check whether an operation represented by the code CODE is a
6346 widening operation that is supported by the target platform in
6347 vector form (i.e., when operating on arguments of type VECTYPE_IN
6348 producing a result of type VECTYPE_OUT).
6350 Widening operations we currently support are NOP (CONVERT), FLOAT
6351 and WIDEN_MULT. This function checks if these operations are supported
6352 by the target platform either directly (via vector tree-codes), or via
6356 - CODE1 and CODE2 are codes of vector operations to be used when
6357 vectorizing the operation, if available.
6358 - MULTI_STEP_CVT determines the number of required intermediate steps in
6359 case of multi-step conversion (like char->short->int - in that case
6360 MULTI_STEP_CVT will be 1).
6361 - INTERM_TYPES contains the intermediate type required to perform the
6362 widening operation (short in the above example). */
6365 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6366 tree vectype_out
, tree vectype_in
,
6367 enum tree_code
*code1
, enum tree_code
*code2
,
6368 int *multi_step_cvt
,
6369 vec
<tree
> *interm_types
)
6371 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6372 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6373 struct loop
*vect_loop
= NULL
;
6374 enum machine_mode vec_mode
;
6375 enum insn_code icode1
, icode2
;
6376 optab optab1
, optab2
;
6377 tree vectype
= vectype_in
;
6378 tree wide_vectype
= vectype_out
;
6379 enum tree_code c1
, c2
;
6381 tree prev_type
, intermediate_type
;
6382 enum machine_mode intermediate_mode
, prev_mode
;
6383 optab optab3
, optab4
;
6385 *multi_step_cvt
= 0;
6387 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6391 case WIDEN_MULT_EXPR
:
6392 /* The result of a vectorized widening operation usually requires
6393 two vectors (because the widened results do not fit into one vector).
6394 The generated vector results would normally be expected to be
6395 generated in the same order as in the original scalar computation,
6396 i.e. if 8 results are generated in each vector iteration, they are
6397 to be organized as follows:
6398 vect1: [res1,res2,res3,res4],
6399 vect2: [res5,res6,res7,res8].
6401 However, in the special case that the result of the widening
6402 operation is used in a reduction computation only, the order doesn't
6403 matter (because when vectorizing a reduction we change the order of
6404 the computation). Some targets can take advantage of this and
6405 generate more efficient code. For example, targets like Altivec,
6406 that support widen_mult using a sequence of {mult_even,mult_odd}
6407 generate the following vectors:
6408 vect1: [res1,res3,res5,res7],
6409 vect2: [res2,res4,res6,res8].
6411 When vectorizing outer-loops, we execute the inner-loop sequentially
6412 (each vectorized inner-loop iteration contributes to VF outer-loop
6413 iterations in parallel). We therefore don't allow to change the
6414 order of the computation in the inner-loop during outer-loop
6416 /* TODO: Another case in which order doesn't *really* matter is when we
6417 widen and then contract again, e.g. (short)((int)x * y >> 8).
6418 Normally, pack_trunc performs an even/odd permute, whereas the
6419 repack from an even/odd expansion would be an interleave, which
6420 would be significantly simpler for e.g. AVX2. */
6421 /* In any case, in order to avoid duplicating the code below, recurse
6422 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6423 are properly set up for the caller. If we fail, we'll continue with
6424 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6426 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6427 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6428 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6429 stmt
, vectype_out
, vectype_in
,
6430 code1
, code2
, multi_step_cvt
,
6433 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6434 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6437 case VEC_WIDEN_MULT_EVEN_EXPR
:
6438 /* Support the recursion induced just above. */
6439 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6440 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6443 case WIDEN_LSHIFT_EXPR
:
6444 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6445 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6449 c1
= VEC_UNPACK_LO_EXPR
;
6450 c2
= VEC_UNPACK_HI_EXPR
;
6454 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6455 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6458 case FIX_TRUNC_EXPR
:
6459 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6460 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6461 computing the operation. */
6468 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6470 enum tree_code ctmp
= c1
;
6475 if (code
== FIX_TRUNC_EXPR
)
6477 /* The signedness is determined from output operand. */
6478 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6479 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6483 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6484 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6487 if (!optab1
|| !optab2
)
6490 vec_mode
= TYPE_MODE (vectype
);
6491 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6492 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6498 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6499 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6502 /* Check if it's a multi-step conversion that can be done using intermediate
6505 prev_type
= vectype
;
6506 prev_mode
= vec_mode
;
6508 if (!CONVERT_EXPR_CODE_P (code
))
6511 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6512 intermediate steps in promotion sequence. We try
6513 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6515 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6516 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6518 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6520 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6521 TYPE_UNSIGNED (prev_type
));
6522 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6523 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6525 if (!optab3
|| !optab4
6526 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6527 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6528 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6529 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6530 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6531 == CODE_FOR_nothing
)
6532 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6533 == CODE_FOR_nothing
))
6536 interm_types
->quick_push (intermediate_type
);
6537 (*multi_step_cvt
)++;
6539 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6540 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6543 prev_type
= intermediate_type
;
6544 prev_mode
= intermediate_mode
;
6547 interm_types
->release ();
6552 /* Function supportable_narrowing_operation
6554 Check whether an operation represented by the code CODE is a
6555 narrowing operation that is supported by the target platform in
6556 vector form (i.e., when operating on arguments of type VECTYPE_IN
6557 and producing a result of type VECTYPE_OUT).
6559 Narrowing operations we currently support are NOP (CONVERT) and
6560 FIX_TRUNC. This function checks if these operations are supported by
6561 the target platform directly via vector tree-codes.
6564 - CODE1 is the code of a vector operation to be used when
6565 vectorizing the operation, if available.
6566 - MULTI_STEP_CVT determines the number of required intermediate steps in
6567 case of multi-step conversion (like int->short->char - in that case
6568 MULTI_STEP_CVT will be 1).
6569 - INTERM_TYPES contains the intermediate type required to perform the
6570 narrowing operation (short in the above example). */
6573 supportable_narrowing_operation (enum tree_code code
,
6574 tree vectype_out
, tree vectype_in
,
6575 enum tree_code
*code1
, int *multi_step_cvt
,
6576 vec
<tree
> *interm_types
)
6578 enum machine_mode vec_mode
;
6579 enum insn_code icode1
;
6580 optab optab1
, interm_optab
;
6581 tree vectype
= vectype_in
;
6582 tree narrow_vectype
= vectype_out
;
6584 tree intermediate_type
;
6585 enum machine_mode intermediate_mode
, prev_mode
;
6589 *multi_step_cvt
= 0;
6593 c1
= VEC_PACK_TRUNC_EXPR
;
6596 case FIX_TRUNC_EXPR
:
6597 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6601 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6602 tree code and optabs used for computing the operation. */
6609 if (code
== FIX_TRUNC_EXPR
)
6610 /* The signedness is determined from output operand. */
6611 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6613 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6618 vec_mode
= TYPE_MODE (vectype
);
6619 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6624 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6627 /* Check if it's a multi-step conversion that can be done using intermediate
6629 prev_mode
= vec_mode
;
6630 if (code
== FIX_TRUNC_EXPR
)
6631 uns
= TYPE_UNSIGNED (vectype_out
);
6633 uns
= TYPE_UNSIGNED (vectype
);
6635 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6636 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6637 costly than signed. */
6638 if (code
== FIX_TRUNC_EXPR
&& uns
)
6640 enum insn_code icode2
;
6643 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6645 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6646 if (interm_optab
!= unknown_optab
6647 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6648 && insn_data
[icode1
].operand
[0].mode
6649 == insn_data
[icode2
].operand
[0].mode
)
6652 optab1
= interm_optab
;
6657 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6658 intermediate steps in promotion sequence. We try
6659 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6660 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6661 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6663 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6665 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6667 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6670 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6671 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6672 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6673 == CODE_FOR_nothing
))
6676 interm_types
->quick_push (intermediate_type
);
6677 (*multi_step_cvt
)++;
6679 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6682 prev_mode
= intermediate_mode
;
6683 optab1
= interm_optab
;
6686 interm_types
->release ();