1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
35 #include "recog.h" /* FIXME: for insn_data */
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*stmt_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
81 tree vectype
= stmt_vectype (stmt_info
);
82 add_stmt_info_to_vec (stmt_cost_vec
, count
, kind
,
83 STMT_VINFO_STMT (stmt_info
), misalign
);
85 (targetm
.vectorize
.builtin_vectorization_cost (kind
, vectype
, misalign
)
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
, misalign
);
104 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
109 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
113 /* ARRAY is an array of vectors created by create_vector_array.
114 Return an SSA_NAME for the vector in index N. The reference
115 is part of the vectorization of STMT and the vector is associated
116 with scalar destination SCALAR_DEST. */
119 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
120 tree array
, unsigned HOST_WIDE_INT n
)
122 tree vect_type
, vect
, vect_name
, array_ref
;
125 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
126 vect_type
= TREE_TYPE (TREE_TYPE (array
));
127 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
128 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
129 build_int_cst (size_type_node
, n
),
130 NULL_TREE
, NULL_TREE
);
132 new_stmt
= gimple_build_assign (vect
, array_ref
);
133 vect_name
= make_ssa_name (vect
, new_stmt
);
134 gimple_assign_set_lhs (new_stmt
, vect_name
);
135 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
140 /* ARRAY is an array of vectors created by create_vector_array.
141 Emit code to store SSA_NAME VECT in index N of the array.
142 The store is part of the vectorization of STMT. */
145 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
146 tree array
, unsigned HOST_WIDE_INT n
)
151 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
152 build_int_cst (size_type_node
, n
),
153 NULL_TREE
, NULL_TREE
);
155 new_stmt
= gimple_build_assign (array_ref
, vect
);
156 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
159 /* PTR is a pointer to an array of type TYPE. Return a representation
160 of *PTR. The memory reference replaces those in FIRST_DR
164 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
166 tree mem_ref
, alias_ptr_type
;
168 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
169 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
170 /* Arrays have the same alignment as their type. */
171 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
175 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177 /* Function vect_mark_relevant.
179 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
183 enum vect_relevant relevant
, bool live_p
,
184 bool used_in_pattern
)
186 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
187 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
188 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
191 if (vect_print_dump_info (REPORT_DETAILS
))
192 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
194 /* If this stmt is an original stmt in a pattern, we might need to mark its
195 related pattern stmt instead of the original stmt. However, such stmts
196 may have their own uses that are not in any pattern, in such cases the
197 stmt itself should be marked. */
198 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
201 if (!used_in_pattern
)
203 imm_use_iterator imm_iter
;
207 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
208 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
210 if (is_gimple_assign (stmt
))
211 lhs
= gimple_assign_lhs (stmt
);
213 lhs
= gimple_call_lhs (stmt
);
215 /* This use is out of pattern use, if LHS has other uses that are
216 pattern uses, we should mark the stmt itself, and not the pattern
218 if (TREE_CODE (lhs
) == SSA_NAME
)
219 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
221 if (is_gimple_debug (USE_STMT (use_p
)))
223 use_stmt
= USE_STMT (use_p
);
225 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
228 if (vinfo_for_stmt (use_stmt
)
229 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
239 /* This is the last stmt in a sequence that was detected as a
240 pattern that can potentially be vectorized. Don't mark the stmt
241 as relevant/live because it's not going to be vectorized.
242 Instead mark the pattern-stmt that replaces it. */
244 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
246 if (vect_print_dump_info (REPORT_DETAILS
))
247 fprintf (vect_dump
, "last stmt in pattern. don't mark"
249 stmt_info
= vinfo_for_stmt (pattern_stmt
);
250 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
251 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
252 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
257 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
258 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
259 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
261 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
262 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
264 if (vect_print_dump_info (REPORT_DETAILS
))
265 fprintf (vect_dump
, "already marked relevant/live.");
269 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
273 /* Function vect_stmt_relevant_p.
275 Return true if STMT in loop that is represented by LOOP_VINFO is
276 "relevant for vectorization".
278 A stmt is considered "relevant for vectorization" if:
279 - it has uses outside the loop.
280 - it has vdefs (it alters memory).
281 - control stmts in the loop (except for the exit condition).
283 CHECKME: what other side effects would the vectorizer allow? */
286 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
287 enum vect_relevant
*relevant
, bool *live_p
)
289 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
291 imm_use_iterator imm_iter
;
295 *relevant
= vect_unused_in_scope
;
298 /* cond stmt other than loop exit cond. */
299 if (is_ctrl_stmt (stmt
)
300 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
301 != loop_exit_ctrl_vec_info_type
)
302 *relevant
= vect_used_in_scope
;
304 /* changing memory. */
305 if (gimple_code (stmt
) != GIMPLE_PHI
)
306 if (gimple_vdef (stmt
))
308 if (vect_print_dump_info (REPORT_DETAILS
))
309 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
310 *relevant
= vect_used_in_scope
;
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
316 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
318 basic_block bb
= gimple_bb (USE_STMT (use_p
));
319 if (!flow_bb_inside_loop_p (loop
, bb
))
321 if (vect_print_dump_info (REPORT_DETAILS
))
322 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
324 if (is_gimple_debug (USE_STMT (use_p
)))
327 /* We expect all such uses to be in the loop exit phis
328 (because of loop closed form) */
329 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
330 gcc_assert (bb
== single_exit (loop
)->dest
);
337 return (*live_p
|| *relevant
);
341 /* Function exist_non_indexing_operands_for_use_p
343 USE is one of the uses attached to STMT. Check if USE is
344 used in STMT for anything other than indexing an array. */
347 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
350 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
352 /* USE corresponds to some operand in STMT. If there is no data
353 reference in STMT, then any operand that corresponds to USE
354 is not indexing an array. */
355 if (!STMT_VINFO_DATA_REF (stmt_info
))
358 /* STMT has a data_ref. FORNOW this means that its of one of
362 (This should have been verified in analyze_data_refs).
364 'var' in the second case corresponds to a def, not a use,
365 so USE cannot correspond to any operands that are not used
368 Therefore, all we need to check is if STMT falls into the
369 first case, and whether var corresponds to USE. */
371 if (!gimple_assign_copy_p (stmt
))
373 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
375 operand
= gimple_assign_rhs1 (stmt
);
376 if (TREE_CODE (operand
) != SSA_NAME
)
387 Function process_use.
390 - a USE in STMT in a loop represented by LOOP_VINFO
391 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
392 that defined USE. This is done by calling mark_relevant and passing it
393 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
394 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
398 Generally, LIVE_P and RELEVANT are used to define the liveness and
399 relevance info of the DEF_STMT of this USE:
400 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
401 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
403 - case 1: If USE is used only for address computations (e.g. array indexing),
404 which does not need to be directly vectorized, then the liveness/relevance
405 of the respective DEF_STMT is left unchanged.
406 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
407 skip DEF_STMT cause it had already been processed.
408 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
409 be modified accordingly.
411 Return true if everything is as expected. Return false otherwise. */
414 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
415 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
,
418 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
419 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
420 stmt_vec_info dstmt_vinfo
;
421 basic_block bb
, def_bb
;
424 enum vect_def_type dt
;
426 /* case 1: we are only interested in uses that need to be vectorized. Uses
427 that are used for address computation are not considered relevant. */
428 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
431 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
433 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
434 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
438 if (!def_stmt
|| gimple_nop_p (def_stmt
))
441 def_bb
= gimple_bb (def_stmt
);
442 if (!flow_bb_inside_loop_p (loop
, def_bb
))
444 if (vect_print_dump_info (REPORT_DETAILS
))
445 fprintf (vect_dump
, "def_stmt is out of loop.");
449 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
450 DEF_STMT must have already been processed, because this should be the
451 only way that STMT, which is a reduction-phi, was put in the worklist,
452 as there should be no other uses for DEF_STMT in the loop. So we just
453 check that everything is as expected, and we are done. */
454 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
455 bb
= gimple_bb (stmt
);
456 if (gimple_code (stmt
) == GIMPLE_PHI
457 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
458 && gimple_code (def_stmt
) != GIMPLE_PHI
459 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
460 && bb
->loop_father
== def_bb
->loop_father
)
462 if (vect_print_dump_info (REPORT_DETAILS
))
463 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
464 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
465 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
466 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
467 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
468 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
472 /* case 3a: outer-loop stmt defining an inner-loop stmt:
473 outer-loop-header-bb:
479 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
481 if (vect_print_dump_info (REPORT_DETAILS
))
482 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
486 case vect_unused_in_scope
:
487 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
488 vect_used_in_scope
: vect_unused_in_scope
;
491 case vect_used_in_outer_by_reduction
:
492 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
493 relevant
= vect_used_by_reduction
;
496 case vect_used_in_outer
:
497 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
498 relevant
= vect_used_in_scope
;
501 case vect_used_in_scope
:
509 /* case 3b: inner-loop stmt defining an outer-loop stmt:
510 outer-loop-header-bb:
514 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
516 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
518 if (vect_print_dump_info (REPORT_DETAILS
))
519 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
523 case vect_unused_in_scope
:
524 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
525 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
526 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
529 case vect_used_by_reduction
:
530 relevant
= vect_used_in_outer_by_reduction
;
533 case vect_used_in_scope
:
534 relevant
= vect_used_in_outer
;
542 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
543 is_pattern_stmt_p (stmt_vinfo
));
548 /* Function vect_mark_stmts_to_be_vectorized.
550 Not all stmts in the loop need to be vectorized. For example:
559 Stmt 1 and 3 do not need to be vectorized, because loop control and
560 addressing of vectorized data-refs are handled differently.
562 This pass detects such stmts. */
565 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
567 VEC(gimple
,heap
) *worklist
;
568 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
569 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
570 unsigned int nbbs
= loop
->num_nodes
;
571 gimple_stmt_iterator si
;
574 stmt_vec_info stmt_vinfo
;
578 enum vect_relevant relevant
, tmp_relevant
;
579 enum vect_def_type def_type
;
581 if (vect_print_dump_info (REPORT_DETAILS
))
582 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
584 worklist
= VEC_alloc (gimple
, heap
, 64);
586 /* 1. Init worklist. */
587 for (i
= 0; i
< nbbs
; i
++)
590 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
593 if (vect_print_dump_info (REPORT_DETAILS
))
595 fprintf (vect_dump
, "init: phi relevant? ");
596 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
599 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
600 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
602 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
604 stmt
= gsi_stmt (si
);
605 if (vect_print_dump_info (REPORT_DETAILS
))
607 fprintf (vect_dump
, "init: stmt relevant? ");
608 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
611 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
612 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
616 /* 2. Process_worklist */
617 while (VEC_length (gimple
, worklist
) > 0)
622 stmt
= VEC_pop (gimple
, worklist
);
623 if (vect_print_dump_info (REPORT_DETAILS
))
625 fprintf (vect_dump
, "worklist: examine stmt: ");
626 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
629 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
630 (DEF_STMT) as relevant/irrelevant and live/dead according to the
631 liveness and relevance properties of STMT. */
632 stmt_vinfo
= vinfo_for_stmt (stmt
);
633 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
634 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
636 /* Generally, the liveness and relevance properties of STMT are
637 propagated as is to the DEF_STMTs of its USEs:
638 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
639 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
641 One exception is when STMT has been identified as defining a reduction
642 variable; in this case we set the liveness/relevance as follows:
644 relevant = vect_used_by_reduction
645 This is because we distinguish between two kinds of relevant stmts -
646 those that are used by a reduction computation, and those that are
647 (also) used by a regular computation. This allows us later on to
648 identify stmts that are used solely by a reduction, and therefore the
649 order of the results that they produce does not have to be kept. */
651 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
652 tmp_relevant
= relevant
;
655 case vect_reduction_def
:
656 switch (tmp_relevant
)
658 case vect_unused_in_scope
:
659 relevant
= vect_used_by_reduction
;
662 case vect_used_by_reduction
:
663 if (gimple_code (stmt
) == GIMPLE_PHI
)
668 if (vect_print_dump_info (REPORT_DETAILS
))
669 fprintf (vect_dump
, "unsupported use of reduction.");
671 VEC_free (gimple
, heap
, worklist
);
678 case vect_nested_cycle
:
679 if (tmp_relevant
!= vect_unused_in_scope
680 && tmp_relevant
!= vect_used_in_outer_by_reduction
681 && tmp_relevant
!= vect_used_in_outer
)
683 if (vect_print_dump_info (REPORT_DETAILS
))
684 fprintf (vect_dump
, "unsupported use of nested cycle.");
686 VEC_free (gimple
, heap
, worklist
);
693 case vect_double_reduction_def
:
694 if (tmp_relevant
!= vect_unused_in_scope
695 && tmp_relevant
!= vect_used_by_reduction
)
697 if (vect_print_dump_info (REPORT_DETAILS
))
698 fprintf (vect_dump
, "unsupported use of double reduction.");
700 VEC_free (gimple
, heap
, worklist
);
711 if (is_pattern_stmt_p (stmt_vinfo
))
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (is_gimple_assign (stmt
))
718 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
719 tree op
= gimple_assign_rhs1 (stmt
);
722 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
724 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
725 live_p
, relevant
, &worklist
, false)
726 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
727 live_p
, relevant
, &worklist
, false))
729 VEC_free (gimple
, heap
, worklist
);
734 for (; i
< gimple_num_ops (stmt
); i
++)
736 op
= gimple_op (stmt
, i
);
737 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
740 VEC_free (gimple
, heap
, worklist
);
745 else if (is_gimple_call (stmt
))
747 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
749 tree arg
= gimple_call_arg (stmt
, i
);
750 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
753 VEC_free (gimple
, heap
, worklist
);
760 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
762 tree op
= USE_FROM_PTR (use_p
);
763 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
766 VEC_free (gimple
, heap
, worklist
);
771 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
774 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
776 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
779 VEC_free (gimple
, heap
, worklist
);
783 } /* while worklist */
785 VEC_free (gimple
, heap
, worklist
);
790 /* Function vect_model_simple_cost.
792 Models cost for simple operations, i.e. those that only emit ncopies of a
793 single op. Right now, this does not account for multiple insns that could
794 be generated for the single vector op. We will handle that shortly. */
797 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
798 enum vect_def_type
*dt
, slp_tree slp_node
,
799 stmt_vector_for_cost
*stmt_cost_vec
)
802 int inside_cost
= 0, outside_cost
= 0;
804 /* The SLP costs were already calculated during SLP tree build. */
805 if (PURE_SLP_STMT (stmt_info
))
808 /* FORNOW: Assuming maximum 2 args per stmts. */
809 for (i
= 0; i
< 2; i
++)
811 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
812 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
815 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
816 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
818 /* Pass the inside-of-loop statements to the target-specific cost model. */
819 inside_cost
= record_stmt_cost (stmt_cost_vec
, ncopies
, vector_stmt
,
822 if (vect_print_dump_info (REPORT_COST
))
823 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
824 "outside_cost = %d .", inside_cost
, outside_cost
);
828 /* Model cost for type demotion and promotion operations. PWR is normally
829 zero for single-step promotions and demotions. It will be one if
830 two-step promotion/demotion is required, and so on. Each additional
831 step doubles the number of instructions required. */
834 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
835 enum vect_def_type
*dt
, int pwr
)
838 int inside_cost
= 0, outside_cost
= 0;
839 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
840 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
841 void *target_cost_data
;
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info
))
848 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
850 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
852 for (i
= 0; i
< pwr
+ 1; i
++)
854 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
856 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
857 vec_promote_demote
, stmt_info
, 0);
860 /* FORNOW: Assuming maximum 2 args per stmts. */
861 for (i
= 0; i
< 2; i
++)
863 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
864 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
867 if (vect_print_dump_info (REPORT_COST
))
868 fprintf (vect_dump
, "vect_model_promotion_demotion_cost: inside_cost = %d, "
869 "outside_cost = %d .", inside_cost
, outside_cost
);
871 /* Set the costs in STMT_INFO. */
872 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, NULL
, outside_cost
);
875 /* Function vect_cost_group_size
877 For grouped load or store, return the group_size only if it is the first
878 load or store of a group, else return 1. This ensures that group size is
879 only returned once per group. */
882 vect_cost_group_size (stmt_vec_info stmt_info
)
884 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
886 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
887 return GROUP_SIZE (stmt_info
);
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
899 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
900 bool store_lanes_p
, enum vect_def_type dt
,
901 slp_tree slp_node
, stmt_vector_for_cost
*stmt_cost_vec
)
904 unsigned int inside_cost
= 0, outside_cost
= 0;
905 struct data_reference
*first_dr
;
908 /* The SLP costs were already calculated during SLP tree build. */
909 if (PURE_SLP_STMT (stmt_info
))
912 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
913 outside_cost
= vect_get_stmt_cost (scalar_to_vec
);
915 /* Grouped access? */
916 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
920 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
925 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
926 group_size
= vect_cost_group_size (stmt_info
);
929 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
931 /* Not a grouped access. */
935 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
938 /* We assume that the cost of a single store-lanes instruction is
939 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
940 access is instead being provided by a permute-and-store operation,
941 include the cost of the permutes. */
942 if (!store_lanes_p
&& group_size
> 1)
944 /* Uses a high and low interleave operation for each needed permute. */
946 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
947 inside_cost
= record_stmt_cost (stmt_cost_vec
, nstmts
, vec_perm
,
950 if (vect_print_dump_info (REPORT_COST
))
951 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
955 /* Costs of the stores. */
956 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, stmt_cost_vec
);
958 if (vect_print_dump_info (REPORT_COST
))
959 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
960 "outside_cost = %d .", inside_cost
, outside_cost
);
962 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
963 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
967 /* Calculate cost of DR's memory access. */
969 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
970 unsigned int *inside_cost
,
971 stmt_vector_for_cost
*stmt_cost_vec
)
973 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
974 gimple stmt
= DR_STMT (dr
);
975 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
977 switch (alignment_support_scheme
)
981 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
982 vector_store
, stmt_info
, 0);
984 if (vect_print_dump_info (REPORT_COST
))
985 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
990 case dr_unaligned_supported
:
992 /* Here, we assign an additional cost for the unaligned store. */
993 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
994 unaligned_store
, stmt_info
,
995 DR_MISALIGNMENT (dr
));
997 if (vect_print_dump_info (REPORT_COST
))
998 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
1004 case dr_unaligned_unsupported
:
1006 *inside_cost
= VECT_MAX_COST
;
1008 if (vect_print_dump_info (REPORT_COST
))
1009 fprintf (vect_dump
, "vect_model_store_cost: unsupported access.");
1020 /* Function vect_model_load_cost
1022 Models cost for loads. In the case of grouped accesses, the last access
1023 has the overhead of the grouped access attributed to it. Since unaligned
1024 accesses are supported for loads, we also account for the costs of the
1025 access scheme chosen. */
1028 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, bool load_lanes_p
,
1029 slp_tree slp_node
, stmt_vector_for_cost
*stmt_cost_vec
)
1033 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1034 unsigned int inside_cost
= 0, outside_cost
= 0;
1036 /* The SLP costs were already calculated during SLP tree build. */
1037 if (PURE_SLP_STMT (stmt_info
))
1040 /* Grouped accesses? */
1041 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1042 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1044 group_size
= vect_cost_group_size (stmt_info
);
1045 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1047 /* Not a grouped access. */
1054 /* We assume that the cost of a single load-lanes instruction is
1055 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1056 access is instead being provided by a load-and-permute operation,
1057 include the cost of the permutes. */
1058 if (!load_lanes_p
&& group_size
> 1)
1060 /* Uses an even and odd extract operations for each needed permute. */
1061 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1062 inside_cost
+= record_stmt_cost (stmt_cost_vec
, nstmts
, vec_perm
,
1065 if (vect_print_dump_info (REPORT_COST
))
1066 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
1070 /* The loads themselves. */
1071 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1073 /* N scalar loads plus gathering them into a vector. */
1074 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1075 inside_cost
+= record_stmt_cost (stmt_cost_vec
,
1076 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1077 scalar_load
, stmt_info
, 0);
1078 inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
, vec_construct
,
1082 vect_get_load_cost (first_dr
, ncopies
,
1083 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1084 || group_size
> 1 || slp_node
),
1085 &inside_cost
, &outside_cost
, stmt_cost_vec
);
1087 if (vect_print_dump_info (REPORT_COST
))
1088 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
1089 "outside_cost = %d .", inside_cost
, outside_cost
);
1091 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1092 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
1096 /* Calculate cost of DR's memory access. */
1098 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1099 bool add_realign_cost
, unsigned int *inside_cost
,
1100 unsigned int *outside_cost
,
1101 stmt_vector_for_cost
*stmt_cost_vec
)
1103 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1104 gimple stmt
= DR_STMT (dr
);
1105 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1107 switch (alignment_support_scheme
)
1111 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1112 vector_load
, stmt_info
, 0);
1114 if (vect_print_dump_info (REPORT_COST
))
1115 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1119 case dr_unaligned_supported
:
1121 /* Here, we assign an additional cost for the unaligned load. */
1122 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1123 unaligned_load
, stmt_info
,
1124 DR_MISALIGNMENT (dr
));
1126 if (vect_print_dump_info (REPORT_COST
))
1127 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1132 case dr_explicit_realign
:
1134 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
* 2,
1135 vector_load
, stmt_info
, 0);
1136 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1137 vec_perm
, stmt_info
, 0);
1139 /* FIXME: If the misalignment remains fixed across the iterations of
1140 the containing loop, the following cost should be added to the
1142 if (targetm
.vectorize
.builtin_mask_for_load
)
1143 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, 1, vector_stmt
,
1146 if (vect_print_dump_info (REPORT_COST
))
1147 fprintf (vect_dump
, "vect_model_load_cost: explicit realign");
1151 case dr_explicit_realign_optimized
:
1153 if (vect_print_dump_info (REPORT_COST
))
1154 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1157 /* Unaligned software pipeline has a load of an address, an initial
1158 load, and possibly a mask operation to "prime" the loop. However,
1159 if this is an access in a group of loads, which provide grouped
1160 access, then the above cost should only be considered for one
1161 access in the group. Inside the loop, there is a load op
1162 and a realignment op. */
1164 if (add_realign_cost
)
1166 *outside_cost
= 2 * vect_get_stmt_cost (vector_stmt
);
1167 if (targetm
.vectorize
.builtin_mask_for_load
)
1168 *outside_cost
+= vect_get_stmt_cost (vector_stmt
);
1171 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1172 vector_load
, stmt_info
, 0);
1173 *inside_cost
+= record_stmt_cost (stmt_cost_vec
, ncopies
,
1174 vec_perm
, stmt_info
, 0);
1176 if (vect_print_dump_info (REPORT_COST
))
1178 "vect_model_load_cost: explicit realign optimized");
1183 case dr_unaligned_unsupported
:
1185 *inside_cost
= VECT_MAX_COST
;
1187 if (vect_print_dump_info (REPORT_COST
))
1188 fprintf (vect_dump
, "vect_model_load_cost: unsupported access.");
1198 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1199 the loop preheader for the vectorized stmt STMT. */
1202 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1205 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1208 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1213 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1217 if (nested_in_vect_loop_p (loop
, stmt
))
1220 pe
= loop_preheader_edge (loop
);
1221 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1222 gcc_assert (!new_bb
);
1226 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1228 gimple_stmt_iterator gsi_bb_start
;
1230 gcc_assert (bb_vinfo
);
1231 bb
= BB_VINFO_BB (bb_vinfo
);
1232 gsi_bb_start
= gsi_after_labels (bb
);
1233 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1237 if (vect_print_dump_info (REPORT_DETAILS
))
1239 fprintf (vect_dump
, "created new init_stmt: ");
1240 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1244 /* Function vect_init_vector.
1246 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1247 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1248 vector type a vector with all elements equal to VAL is created first.
1249 Place the initialization at BSI if it is not NULL. Otherwise, place the
1250 initialization at the loop preheader.
1251 Return the DEF of INIT_STMT.
1252 It will be used in the vectorization of STMT. */
1255 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1262 if (TREE_CODE (type
) == VECTOR_TYPE
1263 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1265 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1267 if (CONSTANT_CLASS_P (val
))
1268 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1271 new_var
= create_tmp_reg (TREE_TYPE (type
), NULL
);
1272 add_referenced_var (new_var
);
1273 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1276 new_temp
= make_ssa_name (new_var
, init_stmt
);
1277 gimple_assign_set_lhs (init_stmt
, new_temp
);
1278 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1282 val
= build_vector_from_val (type
, val
);
1285 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1286 init_stmt
= gimple_build_assign (new_var
, val
);
1287 new_temp
= make_ssa_name (new_var
, init_stmt
);
1288 gimple_assign_set_lhs (init_stmt
, new_temp
);
1289 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1290 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1295 /* Function vect_get_vec_def_for_operand.
1297 OP is an operand in STMT. This function returns a (vector) def that will be
1298 used in the vectorized stmt for STMT.
1300 In the case that OP is an SSA_NAME which is defined in the loop, then
1301 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1303 In case OP is an invariant or constant, a new stmt that creates a vector def
1304 needs to be introduced. */
1307 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1312 stmt_vec_info def_stmt_info
= NULL
;
1313 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1314 unsigned int nunits
;
1315 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1317 enum vect_def_type dt
;
1321 if (vect_print_dump_info (REPORT_DETAILS
))
1323 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1324 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1327 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1328 &def_stmt
, &def
, &dt
);
1329 gcc_assert (is_simple_use
);
1330 if (vect_print_dump_info (REPORT_DETAILS
))
1334 fprintf (vect_dump
, "def = ");
1335 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1339 fprintf (vect_dump
, " def_stmt = ");
1340 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1346 /* Case 1: operand is a constant. */
1347 case vect_constant_def
:
1349 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1350 gcc_assert (vector_type
);
1351 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1356 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1357 if (vect_print_dump_info (REPORT_DETAILS
))
1358 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1360 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1363 /* Case 2: operand is defined outside the loop - loop invariant. */
1364 case vect_external_def
:
1366 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1367 gcc_assert (vector_type
);
1372 /* Create 'vec_inv = {inv,inv,..,inv}' */
1373 if (vect_print_dump_info (REPORT_DETAILS
))
1374 fprintf (vect_dump
, "Create vector_inv.");
1376 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1379 /* Case 3: operand is defined inside the loop. */
1380 case vect_internal_def
:
1383 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1388 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1389 /* Get vectorized pattern statement. */
1391 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1392 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1393 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1394 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1395 gcc_assert (vec_stmt
);
1396 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1397 vec_oprnd
= PHI_RESULT (vec_stmt
);
1398 else if (is_gimple_call (vec_stmt
))
1399 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1401 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1405 /* Case 4: operand is defined by a loop header phi - reduction */
1406 case vect_reduction_def
:
1407 case vect_double_reduction_def
:
1408 case vect_nested_cycle
:
1412 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1413 loop
= (gimple_bb (def_stmt
))->loop_father
;
1415 /* Get the def before the loop */
1416 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1417 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1420 /* Case 5: operand is defined by loop-header phi - induction. */
1421 case vect_induction_def
:
1423 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1425 /* Get the def from the vectorized stmt. */
1426 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1427 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1428 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1429 vec_oprnd
= PHI_RESULT (vec_stmt
);
1431 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1441 /* Function vect_get_vec_def_for_stmt_copy
1443 Return a vector-def for an operand. This function is used when the
1444 vectorized stmt to be created (by the caller to this function) is a "copy"
1445 created in case the vectorized result cannot fit in one vector, and several
1446 copies of the vector-stmt are required. In this case the vector-def is
1447 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1448 of the stmt that defines VEC_OPRND.
1449 DT is the type of the vector def VEC_OPRND.
1452 In case the vectorization factor (VF) is bigger than the number
1453 of elements that can fit in a vectype (nunits), we have to generate
1454 more than one vector stmt to vectorize the scalar stmt. This situation
1455 arises when there are multiple data-types operated upon in the loop; the
1456 smallest data-type determines the VF, and as a result, when vectorizing
1457 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1458 vector stmt (each computing a vector of 'nunits' results, and together
1459 computing 'VF' results in each iteration). This function is called when
1460 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1461 which VF=16 and nunits=4, so the number of copies required is 4):
1463 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1465 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1466 VS1.1: vx.1 = memref1 VS1.2
1467 VS1.2: vx.2 = memref2 VS1.3
1468 VS1.3: vx.3 = memref3
1470 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1471 VSnew.1: vz1 = vx.1 + ... VSnew.2
1472 VSnew.2: vz2 = vx.2 + ... VSnew.3
1473 VSnew.3: vz3 = vx.3 + ...
1475 The vectorization of S1 is explained in vectorizable_load.
1476 The vectorization of S2:
1477 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1478 the function 'vect_get_vec_def_for_operand' is called to
1479 get the relevant vector-def for each operand of S2. For operand x it
1480 returns the vector-def 'vx.0'.
1482 To create the remaining copies of the vector-stmt (VSnew.j), this
1483 function is called to get the relevant vector-def for each operand. It is
1484 obtained from the respective VS1.j stmt, which is recorded in the
1485 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1487 For example, to obtain the vector-def 'vx.1' in order to create the
1488 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1489 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1490 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1491 and return its def ('vx.1').
1492 Overall, to create the above sequence this function will be called 3 times:
1493 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1494 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1495 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1498 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1500 gimple vec_stmt_for_operand
;
1501 stmt_vec_info def_stmt_info
;
1503 /* Do nothing; can reuse same def. */
1504 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1507 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1508 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1509 gcc_assert (def_stmt_info
);
1510 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1511 gcc_assert (vec_stmt_for_operand
);
1512 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1513 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1514 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1516 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1521 /* Get vectorized definitions for the operands to create a copy of an original
1522 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1525 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1526 VEC(tree
,heap
) **vec_oprnds0
,
1527 VEC(tree
,heap
) **vec_oprnds1
)
1529 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1531 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1532 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1534 if (vec_oprnds1
&& *vec_oprnds1
)
1536 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1537 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1538 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1543 /* Get vectorized definitions for OP0 and OP1.
1544 REDUC_INDEX is the index of reduction operand in case of reduction,
1545 and -1 otherwise. */
1548 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1549 VEC (tree
, heap
) **vec_oprnds0
,
1550 VEC (tree
, heap
) **vec_oprnds1
,
1551 slp_tree slp_node
, int reduc_index
)
1555 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1556 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, nops
);
1557 VEC (slp_void_p
, heap
) *vec_defs
= VEC_alloc (slp_void_p
, heap
, nops
);
1559 VEC_quick_push (tree
, ops
, op0
);
1561 VEC_quick_push (tree
, ops
, op1
);
1563 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1565 *vec_oprnds0
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1567 *vec_oprnds1
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 1);
1569 VEC_free (tree
, heap
, ops
);
1570 VEC_free (slp_void_p
, heap
, vec_defs
);
1576 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1577 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1578 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1582 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1583 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1584 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1590 /* Function vect_finish_stmt_generation.
1592 Insert a new stmt. */
1595 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1596 gimple_stmt_iterator
*gsi
)
1598 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1599 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1600 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1602 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1604 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1606 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1609 if (vect_print_dump_info (REPORT_DETAILS
))
1611 fprintf (vect_dump
, "add new stmt: ");
1612 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1615 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1618 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1619 a function declaration if the target has a vectorized version
1620 of the function, or NULL_TREE if the function cannot be vectorized. */
1623 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1625 tree fndecl
= gimple_call_fndecl (call
);
1627 /* We only handle functions that do not read or clobber memory -- i.e.
1628 const or novops ones. */
1629 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1633 || TREE_CODE (fndecl
) != FUNCTION_DECL
1634 || !DECL_BUILT_IN (fndecl
))
1637 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1641 /* Function vectorizable_call.
1643 Check if STMT performs a function call that can be vectorized.
1644 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1645 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1646 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1649 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1655 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1656 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1657 tree vectype_out
, vectype_in
;
1660 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1661 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1662 tree fndecl
, new_temp
, def
, rhs_type
;
1664 enum vect_def_type dt
[3]
1665 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1666 gimple new_stmt
= NULL
;
1668 VEC(tree
, heap
) *vargs
= NULL
;
1669 enum { NARROW
, NONE
, WIDEN
} modifier
;
1673 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1676 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1679 /* Is STMT a vectorizable call? */
1680 if (!is_gimple_call (stmt
))
1683 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1686 if (stmt_can_throw_internal (stmt
))
1689 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1691 /* Process function arguments. */
1692 rhs_type
= NULL_TREE
;
1693 vectype_in
= NULL_TREE
;
1694 nargs
= gimple_call_num_args (stmt
);
1696 /* Bail out if the function has more than three arguments, we do not have
1697 interesting builtin functions to vectorize with more than two arguments
1698 except for fma. No arguments is also not good. */
1699 if (nargs
== 0 || nargs
> 3)
1702 for (i
= 0; i
< nargs
; i
++)
1706 op
= gimple_call_arg (stmt
, i
);
1708 /* We can only handle calls with arguments of the same type. */
1710 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1712 if (vect_print_dump_info (REPORT_DETAILS
))
1713 fprintf (vect_dump
, "argument types differ.");
1717 rhs_type
= TREE_TYPE (op
);
1719 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1720 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1722 if (vect_print_dump_info (REPORT_DETAILS
))
1723 fprintf (vect_dump
, "use not simple.");
1728 vectype_in
= opvectype
;
1730 && opvectype
!= vectype_in
)
1732 if (vect_print_dump_info (REPORT_DETAILS
))
1733 fprintf (vect_dump
, "argument vector types differ.");
1737 /* If all arguments are external or constant defs use a vector type with
1738 the same size as the output vector type. */
1740 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1742 gcc_assert (vectype_in
);
1745 if (vect_print_dump_info (REPORT_DETAILS
))
1747 fprintf (vect_dump
, "no vectype for scalar type ");
1748 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1755 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1756 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1757 if (nunits_in
== nunits_out
/ 2)
1759 else if (nunits_out
== nunits_in
)
1761 else if (nunits_out
== nunits_in
/ 2)
1766 /* For now, we only vectorize functions if a target specific builtin
1767 is available. TODO -- in some cases, it might be profitable to
1768 insert the calls for pieces of the vector, in order to be able
1769 to vectorize other operations in the loop. */
1770 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1771 if (fndecl
== NULL_TREE
)
1773 if (vect_print_dump_info (REPORT_DETAILS
))
1774 fprintf (vect_dump
, "function is not vectorizable.");
1779 gcc_assert (!gimple_vuse (stmt
));
1781 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1783 else if (modifier
== NARROW
)
1784 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1786 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1788 /* Sanity check: make sure that at least one copy of the vectorized stmt
1789 needs to be generated. */
1790 gcc_assert (ncopies
>= 1);
1792 if (!vec_stmt
) /* transformation not required. */
1794 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1795 if (vect_print_dump_info (REPORT_DETAILS
))
1796 fprintf (vect_dump
, "=== vectorizable_call ===");
1797 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1803 if (vect_print_dump_info (REPORT_DETAILS
))
1804 fprintf (vect_dump
, "transform call.");
1807 scalar_dest
= gimple_call_lhs (stmt
);
1808 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1810 prev_stmt_info
= NULL
;
1814 for (j
= 0; j
< ncopies
; ++j
)
1816 /* Build argument list for the vectorized call. */
1818 vargs
= VEC_alloc (tree
, heap
, nargs
);
1820 VEC_truncate (tree
, vargs
, 0);
1824 VEC (slp_void_p
, heap
) *vec_defs
1825 = VEC_alloc (slp_void_p
, heap
, nargs
);
1826 VEC (tree
, heap
) *vec_oprnds0
;
1828 for (i
= 0; i
< nargs
; i
++)
1829 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1830 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1832 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1834 /* Arguments are ready. Create the new vector stmt. */
1835 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_oprnd0
)
1838 for (k
= 0; k
< nargs
; k
++)
1840 VEC (tree
, heap
) *vec_oprndsk
1841 = (VEC (tree
, heap
) *)
1842 VEC_index (slp_void_p
, vec_defs
, k
);
1843 VEC_replace (tree
, vargs
, k
,
1844 VEC_index (tree
, vec_oprndsk
, i
));
1846 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1847 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1848 gimple_call_set_lhs (new_stmt
, new_temp
);
1849 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1850 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1854 for (i
= 0; i
< nargs
; i
++)
1856 VEC (tree
, heap
) *vec_oprndsi
1857 = (VEC (tree
, heap
) *)
1858 VEC_index (slp_void_p
, vec_defs
, i
);
1859 VEC_free (tree
, heap
, vec_oprndsi
);
1861 VEC_free (slp_void_p
, heap
, vec_defs
);
1865 for (i
= 0; i
< nargs
; i
++)
1867 op
= gimple_call_arg (stmt
, i
);
1870 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1873 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1875 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1878 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1881 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1882 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1883 gimple_call_set_lhs (new_stmt
, new_temp
);
1884 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1887 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1889 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1891 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1897 for (j
= 0; j
< ncopies
; ++j
)
1899 /* Build argument list for the vectorized call. */
1901 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1903 VEC_truncate (tree
, vargs
, 0);
1907 VEC (slp_void_p
, heap
) *vec_defs
1908 = VEC_alloc (slp_void_p
, heap
, nargs
);
1909 VEC (tree
, heap
) *vec_oprnds0
;
1911 for (i
= 0; i
< nargs
; i
++)
1912 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1913 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1915 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1917 /* Arguments are ready. Create the new vector stmt. */
1918 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vec_oprnd0
);
1922 VEC_truncate (tree
, vargs
, 0);
1923 for (k
= 0; k
< nargs
; k
++)
1925 VEC (tree
, heap
) *vec_oprndsk
1926 = (VEC (tree
, heap
) *)
1927 VEC_index (slp_void_p
, vec_defs
, k
);
1928 VEC_quick_push (tree
, vargs
,
1929 VEC_index (tree
, vec_oprndsk
, i
));
1930 VEC_quick_push (tree
, vargs
,
1931 VEC_index (tree
, vec_oprndsk
, i
+ 1));
1933 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1934 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1935 gimple_call_set_lhs (new_stmt
, new_temp
);
1936 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1937 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1941 for (i
= 0; i
< nargs
; i
++)
1943 VEC (tree
, heap
) *vec_oprndsi
1944 = (VEC (tree
, heap
) *)
1945 VEC_index (slp_void_p
, vec_defs
, i
);
1946 VEC_free (tree
, heap
, vec_oprndsi
);
1948 VEC_free (slp_void_p
, heap
, vec_defs
);
1952 for (i
= 0; i
< nargs
; i
++)
1954 op
= gimple_call_arg (stmt
, i
);
1958 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1960 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1964 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1966 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1968 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1971 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1972 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1975 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1976 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1977 gimple_call_set_lhs (new_stmt
, new_temp
);
1978 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1981 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1983 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1985 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1988 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1993 /* No current target implements this case. */
1997 VEC_free (tree
, heap
, vargs
);
1999 /* Update the exception handling table with the vector stmt if necessary. */
2000 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2001 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2003 /* The call in STMT might prevent it from being removed in dce.
2004 We however cannot remove it here, due to the way the ssa name
2005 it defines is mapped to the new definition. So just replace
2006 rhs of the statement with something harmless. */
2011 type
= TREE_TYPE (scalar_dest
);
2012 if (is_pattern_stmt_p (stmt_info
))
2013 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2015 lhs
= gimple_call_lhs (stmt
);
2016 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2017 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2018 set_vinfo_for_stmt (stmt
, NULL
);
2019 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2020 gsi_replace (gsi
, new_stmt
, false);
2021 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2027 /* Function vect_gen_widened_results_half
2029 Create a vector stmt whose code, type, number of arguments, and result
2030 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2031 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2032 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2033 needs to be created (DECL is a function-decl of a target-builtin).
2034 STMT is the original scalar stmt that we are vectorizing. */
2037 vect_gen_widened_results_half (enum tree_code code
,
2039 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2040 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2046 /* Generate half of the widened result: */
2047 if (code
== CALL_EXPR
)
2049 /* Target specific support */
2050 if (op_type
== binary_op
)
2051 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2053 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2054 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2055 gimple_call_set_lhs (new_stmt
, new_temp
);
2059 /* Generic support */
2060 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2061 if (op_type
!= binary_op
)
2063 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2065 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2066 gimple_assign_set_lhs (new_stmt
, new_temp
);
2068 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2074 /* Get vectorized definitions for loop-based vectorization. For the first
2075 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2076 scalar operand), and for the rest we get a copy with
2077 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2078 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2079 The vectors are collected into VEC_OPRNDS. */
2082 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2083 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2087 /* Get first vector operand. */
2088 /* All the vector operands except the very first one (that is scalar oprnd)
2090 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2091 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2093 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2095 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2097 /* Get second vector operand. */
2098 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2099 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2103 /* For conversion in multiple steps, continue to get operands
2106 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2110 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2111 For multi-step conversions store the resulting vectors and call the function
2115 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2116 int multi_step_cvt
, gimple stmt
,
2117 VEC (tree
, heap
) *vec_dsts
,
2118 gimple_stmt_iterator
*gsi
,
2119 slp_tree slp_node
, enum tree_code code
,
2120 stmt_vec_info
*prev_stmt_info
)
2123 tree vop0
, vop1
, new_tmp
, vec_dest
;
2125 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2127 vec_dest
= VEC_pop (tree
, vec_dsts
);
2129 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2131 /* Create demotion operation. */
2132 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2133 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2134 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2135 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2136 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2137 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2140 /* Store the resulting vector for next recursive call. */
2141 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2144 /* This is the last step of the conversion sequence. Store the
2145 vectors in SLP_NODE or in vector info of the scalar statement
2146 (or in STMT_VINFO_RELATED_STMT chain). */
2148 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2151 if (!*prev_stmt_info
)
2152 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2154 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2156 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2161 /* For multi-step demotion operations we first generate demotion operations
2162 from the source type to the intermediate types, and then combine the
2163 results (stored in VEC_OPRNDS) in demotion operation to the destination
2167 /* At each level of recursion we have half of the operands we had at the
2169 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2170 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2171 stmt
, vec_dsts
, gsi
, slp_node
,
2172 VEC_PACK_TRUNC_EXPR
,
2176 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2180 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2181 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2182 the resulting vectors and call the function recursively. */
2185 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2186 VEC (tree
, heap
) **vec_oprnds1
,
2187 gimple stmt
, tree vec_dest
,
2188 gimple_stmt_iterator
*gsi
,
2189 enum tree_code code1
,
2190 enum tree_code code2
, tree decl1
,
2191 tree decl2
, int op_type
)
2194 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2195 gimple new_stmt1
, new_stmt2
;
2196 VEC (tree
, heap
) *vec_tmp
= NULL
;
2198 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2199 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
2201 if (op_type
== binary_op
)
2202 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2206 /* Generate the two halves of promotion operation. */
2207 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2208 op_type
, vec_dest
, gsi
, stmt
);
2209 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2210 op_type
, vec_dest
, gsi
, stmt
);
2211 if (is_gimple_call (new_stmt1
))
2213 new_tmp1
= gimple_call_lhs (new_stmt1
);
2214 new_tmp2
= gimple_call_lhs (new_stmt2
);
2218 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2219 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2222 /* Store the results for the next step. */
2223 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2224 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2227 VEC_free (tree
, heap
, *vec_oprnds0
);
2228 *vec_oprnds0
= vec_tmp
;
2232 /* Check if STMT performs a conversion operation, that can be vectorized.
2233 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2234 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2235 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2238 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2239 gimple
*vec_stmt
, slp_tree slp_node
)
2243 tree op0
, op1
= NULL_TREE
;
2244 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2245 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2246 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2247 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2248 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2249 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2253 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2254 gimple new_stmt
= NULL
;
2255 stmt_vec_info prev_stmt_info
;
2258 tree vectype_out
, vectype_in
;
2260 tree lhs_type
, rhs_type
;
2261 enum { NARROW
, NONE
, WIDEN
} modifier
;
2262 VEC (tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2264 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2265 int multi_step_cvt
= 0;
2266 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
;
2267 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2269 enum machine_mode rhs_mode
;
2270 unsigned short fltsz
;
2272 /* Is STMT a vectorizable conversion? */
2274 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2277 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2280 if (!is_gimple_assign (stmt
))
2283 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2286 code
= gimple_assign_rhs_code (stmt
);
2287 if (!CONVERT_EXPR_CODE_P (code
)
2288 && code
!= FIX_TRUNC_EXPR
2289 && code
!= FLOAT_EXPR
2290 && code
!= WIDEN_MULT_EXPR
2291 && code
!= WIDEN_LSHIFT_EXPR
)
2294 op_type
= TREE_CODE_LENGTH (code
);
2296 /* Check types of lhs and rhs. */
2297 scalar_dest
= gimple_assign_lhs (stmt
);
2298 lhs_type
= TREE_TYPE (scalar_dest
);
2299 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2301 op0
= gimple_assign_rhs1 (stmt
);
2302 rhs_type
= TREE_TYPE (op0
);
2304 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2305 && !((INTEGRAL_TYPE_P (lhs_type
)
2306 && INTEGRAL_TYPE_P (rhs_type
))
2307 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2308 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2311 if ((INTEGRAL_TYPE_P (lhs_type
)
2312 && (TYPE_PRECISION (lhs_type
)
2313 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2314 || (INTEGRAL_TYPE_P (rhs_type
)
2315 && (TYPE_PRECISION (rhs_type
)
2316 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2318 if (vect_print_dump_info (REPORT_DETAILS
))
2320 "type conversion to/from bit-precision unsupported.");
2324 /* Check the operands of the operation. */
2325 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2326 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2328 if (vect_print_dump_info (REPORT_DETAILS
))
2329 fprintf (vect_dump
, "use not simple.");
2332 if (op_type
== binary_op
)
2336 op1
= gimple_assign_rhs2 (stmt
);
2337 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2338 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2340 if (CONSTANT_CLASS_P (op0
))
2341 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2342 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2344 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2349 if (vect_print_dump_info (REPORT_DETAILS
))
2350 fprintf (vect_dump
, "use not simple.");
2355 /* If op0 is an external or constant defs use a vector type of
2356 the same size as the output vector type. */
2358 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2360 gcc_assert (vectype_in
);
2363 if (vect_print_dump_info (REPORT_DETAILS
))
2365 fprintf (vect_dump
, "no vectype for scalar type ");
2366 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
2372 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2373 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2374 if (nunits_in
< nunits_out
)
2376 else if (nunits_out
== nunits_in
)
2381 /* Multiple types in SLP are handled by creating the appropriate number of
2382 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2384 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2386 else if (modifier
== NARROW
)
2387 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2389 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies
>= 1);
2395 /* Supportable by target? */
2399 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2401 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2406 if (vect_print_dump_info (REPORT_DETAILS
))
2407 fprintf (vect_dump
, "conversion not supported by target.");
2411 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2412 &code1
, &code2
, &multi_step_cvt
,
2415 /* Binary widening operation can only be supported directly by the
2417 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2421 if (code
!= FLOAT_EXPR
2422 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2423 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2426 rhs_mode
= TYPE_MODE (rhs_type
);
2427 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2428 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2429 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2430 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2433 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2434 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2435 if (cvt_type
== NULL_TREE
)
2438 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2440 if (!supportable_convert_operation (code
, vectype_out
,
2441 cvt_type
, &decl1
, &codecvt1
))
2444 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2445 cvt_type
, &codecvt1
,
2446 &codecvt2
, &multi_step_cvt
,
2450 gcc_assert (multi_step_cvt
== 0);
2452 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2453 vectype_in
, &code1
, &code2
,
2454 &multi_step_cvt
, &interm_types
))
2458 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2461 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2462 codecvt2
= ERROR_MARK
;
2466 VEC_safe_push (tree
, heap
, interm_types
, cvt_type
);
2467 cvt_type
= NULL_TREE
;
2472 gcc_assert (op_type
== unary_op
);
2473 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2474 &code1
, &multi_step_cvt
,
2478 if (code
!= FIX_TRUNC_EXPR
2479 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2480 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2483 rhs_mode
= TYPE_MODE (rhs_type
);
2485 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2486 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2487 if (cvt_type
== NULL_TREE
)
2489 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2492 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2493 &code1
, &multi_step_cvt
,
2502 if (!vec_stmt
) /* transformation not required. */
2504 if (vect_print_dump_info (REPORT_DETAILS
))
2505 fprintf (vect_dump
, "=== vectorizable_conversion ===");
2506 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2508 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2509 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2511 else if (modifier
== NARROW
)
2513 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2514 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2518 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2519 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2521 VEC_free (tree
, heap
, interm_types
);
2526 if (vect_print_dump_info (REPORT_DETAILS
))
2527 fprintf (vect_dump
, "transform conversion. ncopies = %d.", ncopies
);
2529 if (op_type
== binary_op
)
2531 if (CONSTANT_CLASS_P (op0
))
2532 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2533 else if (CONSTANT_CLASS_P (op1
))
2534 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2537 /* In case of multi-step conversion, we first generate conversion operations
2538 to the intermediate types, and then from that types to the final one.
2539 We create vector destinations for the intermediate type (TYPES) received
2540 from supportable_*_operation, and store them in the correct order
2541 for future use in vect_create_vectorized_*_stmts (). */
2542 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2543 vec_dest
= vect_create_destination_var (scalar_dest
,
2544 (cvt_type
&& modifier
== WIDEN
)
2545 ? cvt_type
: vectype_out
);
2546 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2550 for (i
= VEC_length (tree
, interm_types
) - 1;
2551 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2553 vec_dest
= vect_create_destination_var (scalar_dest
,
2555 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2560 vec_dest
= vect_create_destination_var (scalar_dest
,
2562 ? vectype_out
: cvt_type
);
2566 if (modifier
== NONE
)
2567 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2568 else if (modifier
== WIDEN
)
2570 vec_oprnds0
= VEC_alloc (tree
, heap
,
2572 ? vect_pow2 (multi_step_cvt
) : 1));
2573 if (op_type
== binary_op
)
2574 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2577 vec_oprnds0
= VEC_alloc (tree
, heap
,
2579 ? vect_pow2 (multi_step_cvt
) : 1));
2581 else if (code
== WIDEN_LSHIFT_EXPR
)
2582 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2585 prev_stmt_info
= NULL
;
2589 for (j
= 0; j
< ncopies
; j
++)
2592 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2595 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2597 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2599 /* Arguments are ready, create the new vector stmt. */
2600 if (code1
== CALL_EXPR
)
2602 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2603 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2604 gimple_call_set_lhs (new_stmt
, new_temp
);
2608 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2609 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2611 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2612 gimple_assign_set_lhs (new_stmt
, new_temp
);
2615 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2617 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2622 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2624 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2625 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2630 /* In case the vectorization factor (VF) is bigger than the number
2631 of elements that we can fit in a vectype (nunits), we have to
2632 generate more than one vector stmt - i.e - we need to "unroll"
2633 the vector stmt by a factor VF/nunits. */
2634 for (j
= 0; j
< ncopies
; j
++)
2641 if (code
== WIDEN_LSHIFT_EXPR
)
2646 /* Store vec_oprnd1 for every vector stmt to be created
2647 for SLP_NODE. We check during the analysis that all
2648 the shift arguments are the same. */
2649 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2650 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2652 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2656 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2657 &vec_oprnds1
, slp_node
, -1);
2661 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2662 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2663 if (op_type
== binary_op
)
2665 if (code
== WIDEN_LSHIFT_EXPR
)
2668 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2670 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2676 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2677 VEC_truncate (tree
, vec_oprnds0
, 0);
2678 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2679 if (op_type
== binary_op
)
2681 if (code
== WIDEN_LSHIFT_EXPR
)
2684 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2686 VEC_truncate (tree
, vec_oprnds1
, 0);
2687 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2691 /* Arguments are ready. Create the new vector stmts. */
2692 for (i
= multi_step_cvt
; i
>= 0; i
--)
2694 tree this_dest
= VEC_index (tree
, vec_dsts
, i
);
2695 enum tree_code c1
= code1
, c2
= code2
;
2696 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2701 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2703 stmt
, this_dest
, gsi
,
2704 c1
, c2
, decl1
, decl2
,
2708 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2712 if (codecvt1
== CALL_EXPR
)
2714 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2715 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2716 gimple_call_set_lhs (new_stmt
, new_temp
);
2720 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2721 new_temp
= make_ssa_name (vec_dest
, NULL
);
2722 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2727 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2730 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2733 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2737 if (!prev_stmt_info
)
2738 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2740 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2741 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2746 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2750 /* In case the vectorization factor (VF) is bigger than the number
2751 of elements that we can fit in a vectype (nunits), we have to
2752 generate more than one vector stmt - i.e - we need to "unroll"
2753 the vector stmt by a factor VF/nunits. */
2754 for (j
= 0; j
< ncopies
; j
++)
2758 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2762 VEC_truncate (tree
, vec_oprnds0
, 0);
2763 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2764 vect_pow2 (multi_step_cvt
) - 1);
2767 /* Arguments are ready. Create the new vector stmts. */
2769 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2771 if (codecvt1
== CALL_EXPR
)
2773 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2774 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2775 gimple_call_set_lhs (new_stmt
, new_temp
);
2779 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2780 new_temp
= make_ssa_name (vec_dest
, NULL
);
2781 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2785 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2786 VEC_replace (tree
, vec_oprnds0
, i
, new_temp
);
2789 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2790 stmt
, vec_dsts
, gsi
,
2795 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2799 VEC_free (tree
, heap
, vec_oprnds0
);
2800 VEC_free (tree
, heap
, vec_oprnds1
);
2801 VEC_free (tree
, heap
, vec_dsts
);
2802 VEC_free (tree
, heap
, interm_types
);
2808 /* Function vectorizable_assignment.
2810 Check if STMT performs an assignment (copy) that can be vectorized.
2811 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2812 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2813 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2816 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2817 gimple
*vec_stmt
, slp_tree slp_node
)
2822 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2823 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2824 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2828 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2829 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2832 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2834 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2835 gimple new_stmt
= NULL
;
2836 stmt_vec_info prev_stmt_info
= NULL
;
2837 enum tree_code code
;
2840 /* Multiple types in SLP are handled by creating the appropriate number of
2841 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2843 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2846 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2848 gcc_assert (ncopies
>= 1);
2850 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2853 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2856 /* Is vectorizable assignment? */
2857 if (!is_gimple_assign (stmt
))
2860 scalar_dest
= gimple_assign_lhs (stmt
);
2861 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2864 code
= gimple_assign_rhs_code (stmt
);
2865 if (gimple_assign_single_p (stmt
)
2866 || code
== PAREN_EXPR
2867 || CONVERT_EXPR_CODE_P (code
))
2868 op
= gimple_assign_rhs1 (stmt
);
2872 if (code
== VIEW_CONVERT_EXPR
)
2873 op
= TREE_OPERAND (op
, 0);
2875 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2876 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2878 if (vect_print_dump_info (REPORT_DETAILS
))
2879 fprintf (vect_dump
, "use not simple.");
2883 /* We can handle NOP_EXPR conversions that do not change the number
2884 of elements or the vector size. */
2885 if ((CONVERT_EXPR_CODE_P (code
)
2886 || code
== VIEW_CONVERT_EXPR
)
2888 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2889 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2890 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2893 /* We do not handle bit-precision changes. */
2894 if ((CONVERT_EXPR_CODE_P (code
)
2895 || code
== VIEW_CONVERT_EXPR
)
2896 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2897 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2898 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2899 || ((TYPE_PRECISION (TREE_TYPE (op
))
2900 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2901 /* But a conversion that does not change the bit-pattern is ok. */
2902 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2903 > TYPE_PRECISION (TREE_TYPE (op
)))
2904 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2906 if (vect_print_dump_info (REPORT_DETAILS
))
2907 fprintf (vect_dump
, "type conversion to/from bit-precision "
2912 if (!vec_stmt
) /* transformation not required. */
2914 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2915 if (vect_print_dump_info (REPORT_DETAILS
))
2916 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2917 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2922 if (vect_print_dump_info (REPORT_DETAILS
))
2923 fprintf (vect_dump
, "transform assignment.");
2926 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2929 for (j
= 0; j
< ncopies
; j
++)
2933 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2935 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2937 /* Arguments are ready. create the new vector stmt. */
2938 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2940 if (CONVERT_EXPR_CODE_P (code
)
2941 || code
== VIEW_CONVERT_EXPR
)
2942 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2943 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2944 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2945 gimple_assign_set_lhs (new_stmt
, new_temp
);
2946 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2948 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2955 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2957 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2959 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2962 VEC_free (tree
, heap
, vec_oprnds
);
2967 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2968 either as shift by a scalar or by a vector. */
2971 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2974 enum machine_mode vec_mode
;
2979 vectype
= get_vectype_for_scalar_type (scalar_type
);
2983 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2985 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2987 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2989 || (optab_handler (optab
, TYPE_MODE (vectype
))
2990 == CODE_FOR_nothing
))
2994 vec_mode
= TYPE_MODE (vectype
);
2995 icode
= (int) optab_handler (optab
, vec_mode
);
2996 if (icode
== CODE_FOR_nothing
)
3003 /* Function vectorizable_shift.
3005 Check if STMT performs a shift operation that can be vectorized.
3006 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3007 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3011 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3012 gimple
*vec_stmt
, slp_tree slp_node
)
3016 tree op0
, op1
= NULL
;
3017 tree vec_oprnd1
= NULL_TREE
;
3018 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3020 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3021 enum tree_code code
;
3022 enum machine_mode vec_mode
;
3026 enum machine_mode optab_op2_mode
;
3029 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3030 gimple new_stmt
= NULL
;
3031 stmt_vec_info prev_stmt_info
;
3038 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
3041 bool scalar_shift_arg
= true;
3042 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3045 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3048 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3051 /* Is STMT a vectorizable binary/unary operation? */
3052 if (!is_gimple_assign (stmt
))
3055 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3058 code
= gimple_assign_rhs_code (stmt
);
3060 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3061 || code
== RROTATE_EXPR
))
3064 scalar_dest
= gimple_assign_lhs (stmt
);
3065 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3066 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3067 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3069 if (vect_print_dump_info (REPORT_DETAILS
))
3070 fprintf (vect_dump
, "bit-precision shifts not supported.");
3074 op0
= gimple_assign_rhs1 (stmt
);
3075 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3076 &def_stmt
, &def
, &dt
[0], &vectype
))
3078 if (vect_print_dump_info (REPORT_DETAILS
))
3079 fprintf (vect_dump
, "use not simple.");
3082 /* If op0 is an external or constant def use a vector type with
3083 the same size as the output vector type. */
3085 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3087 gcc_assert (vectype
);
3090 if (vect_print_dump_info (REPORT_DETAILS
))
3092 fprintf (vect_dump
, "no vectype for scalar type ");
3093 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3099 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3100 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3101 if (nunits_out
!= nunits_in
)
3104 op1
= gimple_assign_rhs2 (stmt
);
3105 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3106 &def
, &dt
[1], &op1_vectype
))
3108 if (vect_print_dump_info (REPORT_DETAILS
))
3109 fprintf (vect_dump
, "use not simple.");
3114 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3118 /* Multiple types in SLP are handled by creating the appropriate number of
3119 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3121 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3124 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3126 gcc_assert (ncopies
>= 1);
3128 /* Determine whether the shift amount is a vector, or scalar. If the
3129 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3131 if (dt
[1] == vect_internal_def
&& !slp_node
)
3132 scalar_shift_arg
= false;
3133 else if (dt
[1] == vect_constant_def
3134 || dt
[1] == vect_external_def
3135 || dt
[1] == vect_internal_def
)
3137 /* In SLP, need to check whether the shift count is the same,
3138 in loops if it is a constant or invariant, it is always
3142 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3145 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
3146 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3147 scalar_shift_arg
= false;
3152 if (vect_print_dump_info (REPORT_DETAILS
))
3153 fprintf (vect_dump
, "operand mode requires invariant argument.");
3157 /* Vector shifted by vector. */
3158 if (!scalar_shift_arg
)
3160 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3161 if (vect_print_dump_info (REPORT_DETAILS
))
3162 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3164 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3165 if (op1_vectype
== NULL_TREE
3166 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3168 if (vect_print_dump_info (REPORT_DETAILS
))
3169 fprintf (vect_dump
, "unusable type for last operand in"
3170 " vector/vector shift/rotate.");
3174 /* See if the machine has a vector shifted by scalar insn and if not
3175 then see if it has a vector shifted by vector insn. */
3178 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3180 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3182 if (vect_print_dump_info (REPORT_DETAILS
))
3183 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3187 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3189 && (optab_handler (optab
, TYPE_MODE (vectype
))
3190 != CODE_FOR_nothing
))
3192 scalar_shift_arg
= false;
3194 if (vect_print_dump_info (REPORT_DETAILS
))
3195 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3197 /* Unlike the other binary operators, shifts/rotates have
3198 the rhs being int, instead of the same type as the lhs,
3199 so make sure the scalar is the right type if we are
3200 dealing with vectors of long long/long/short/char. */
3201 if (dt
[1] == vect_constant_def
)
3202 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3203 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3207 && TYPE_MODE (TREE_TYPE (vectype
))
3208 != TYPE_MODE (TREE_TYPE (op1
)))
3210 if (vect_print_dump_info (REPORT_DETAILS
))
3211 fprintf (vect_dump
, "unusable type for last operand in"
3212 " vector/vector shift/rotate.");
3215 if (vec_stmt
&& !slp_node
)
3217 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3218 op1
= vect_init_vector (stmt
, op1
,
3219 TREE_TYPE (vectype
), NULL
);
3226 /* Supportable by target? */
3229 if (vect_print_dump_info (REPORT_DETAILS
))
3230 fprintf (vect_dump
, "no optab.");
3233 vec_mode
= TYPE_MODE (vectype
);
3234 icode
= (int) optab_handler (optab
, vec_mode
);
3235 if (icode
== CODE_FOR_nothing
)
3237 if (vect_print_dump_info (REPORT_DETAILS
))
3238 fprintf (vect_dump
, "op not supported by target.");
3239 /* Check only during analysis. */
3240 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3241 || (vf
< vect_min_worthwhile_factor (code
)
3244 if (vect_print_dump_info (REPORT_DETAILS
))
3245 fprintf (vect_dump
, "proceeding using word mode.");
3248 /* Worthwhile without SIMD support? Check only during analysis. */
3249 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3250 && vf
< vect_min_worthwhile_factor (code
)
3253 if (vect_print_dump_info (REPORT_DETAILS
))
3254 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3258 if (!vec_stmt
) /* transformation not required. */
3260 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3261 if (vect_print_dump_info (REPORT_DETAILS
))
3262 fprintf (vect_dump
, "=== vectorizable_shift ===");
3263 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3269 if (vect_print_dump_info (REPORT_DETAILS
))
3270 fprintf (vect_dump
, "transform binary/unary operation.");
3273 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3275 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3276 created in the previous stages of the recursion, so no allocation is
3277 needed, except for the case of shift with scalar shift argument. In that
3278 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3279 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3280 In case of loop-based vectorization we allocate VECs of size 1. We
3281 allocate VEC_OPRNDS1 only in case of binary operation. */
3284 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3285 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3287 else if (scalar_shift_arg
)
3288 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3290 prev_stmt_info
= NULL
;
3291 for (j
= 0; j
< ncopies
; j
++)
3296 if (scalar_shift_arg
)
3298 /* Vector shl and shr insn patterns can be defined with scalar
3299 operand 2 (shift operand). In this case, use constant or loop
3300 invariant op1 directly, without extending it to vector mode
3302 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3303 if (!VECTOR_MODE_P (optab_op2_mode
))
3305 if (vect_print_dump_info (REPORT_DETAILS
))
3306 fprintf (vect_dump
, "operand 1 using scalar mode.");
3308 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3311 /* Store vec_oprnd1 for every vector stmt to be created
3312 for SLP_NODE. We check during the analysis that all
3313 the shift arguments are the same.
3314 TODO: Allow different constants for different vector
3315 stmts generated for an SLP instance. */
3316 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3317 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3322 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3323 (a special case for certain kind of vector shifts); otherwise,
3324 operand 1 should be of a vector type (the usual case). */
3326 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3329 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3333 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3335 /* Arguments are ready. Create the new vector stmt. */
3336 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3338 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
3339 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3340 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3341 gimple_assign_set_lhs (new_stmt
, new_temp
);
3342 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3344 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3351 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3353 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3354 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3357 VEC_free (tree
, heap
, vec_oprnds0
);
3358 VEC_free (tree
, heap
, vec_oprnds1
);
3364 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3365 gimple_stmt_iterator
*);
3368 /* Function vectorizable_operation.
3370 Check if STMT performs a binary, unary or ternary operation that can
3372 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3373 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3374 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3377 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3378 gimple
*vec_stmt
, slp_tree slp_node
)
3382 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3383 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3385 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3386 enum tree_code code
;
3387 enum machine_mode vec_mode
;
3394 enum vect_def_type dt
[3]
3395 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3396 gimple new_stmt
= NULL
;
3397 stmt_vec_info prev_stmt_info
;
3403 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
3404 tree vop0
, vop1
, vop2
;
3405 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3408 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3411 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3414 /* Is STMT a vectorizable binary/unary operation? */
3415 if (!is_gimple_assign (stmt
))
3418 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3421 code
= gimple_assign_rhs_code (stmt
);
3423 /* For pointer addition, we should use the normal plus for
3424 the vector addition. */
3425 if (code
== POINTER_PLUS_EXPR
)
3428 /* Support only unary or binary operations. */
3429 op_type
= TREE_CODE_LENGTH (code
);
3430 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3432 if (vect_print_dump_info (REPORT_DETAILS
))
3433 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
3438 scalar_dest
= gimple_assign_lhs (stmt
);
3439 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3441 /* Most operations cannot handle bit-precision types without extra
3443 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3444 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3445 /* Exception are bitwise binary operations. */
3446 && code
!= BIT_IOR_EXPR
3447 && code
!= BIT_XOR_EXPR
3448 && code
!= BIT_AND_EXPR
)
3450 if (vect_print_dump_info (REPORT_DETAILS
))
3451 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
3455 op0
= gimple_assign_rhs1 (stmt
);
3456 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3457 &def_stmt
, &def
, &dt
[0], &vectype
))
3459 if (vect_print_dump_info (REPORT_DETAILS
))
3460 fprintf (vect_dump
, "use not simple.");
3463 /* If op0 is an external or constant def use a vector type with
3464 the same size as the output vector type. */
3466 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3468 gcc_assert (vectype
);
3471 if (vect_print_dump_info (REPORT_DETAILS
))
3473 fprintf (vect_dump
, "no vectype for scalar type ");
3474 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3480 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3481 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3482 if (nunits_out
!= nunits_in
)
3485 if (op_type
== binary_op
|| op_type
== ternary_op
)
3487 op1
= gimple_assign_rhs2 (stmt
);
3488 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3491 if (vect_print_dump_info (REPORT_DETAILS
))
3492 fprintf (vect_dump
, "use not simple.");
3496 if (op_type
== ternary_op
)
3498 op2
= gimple_assign_rhs3 (stmt
);
3499 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3502 if (vect_print_dump_info (REPORT_DETAILS
))
3503 fprintf (vect_dump
, "use not simple.");
3509 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3513 /* Multiple types in SLP are handled by creating the appropriate number of
3514 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3516 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3519 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3521 gcc_assert (ncopies
>= 1);
3523 /* Shifts are handled in vectorizable_shift (). */
3524 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3525 || code
== RROTATE_EXPR
)
3528 /* Supportable by target? */
3530 vec_mode
= TYPE_MODE (vectype
);
3531 if (code
== MULT_HIGHPART_EXPR
)
3533 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3534 icode
= LAST_INSN_CODE
;
3536 icode
= CODE_FOR_nothing
;
3540 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3543 if (vect_print_dump_info (REPORT_DETAILS
))
3544 fprintf (vect_dump
, "no optab.");
3547 icode
= (int) optab_handler (optab
, vec_mode
);
3550 if (icode
== CODE_FOR_nothing
)
3552 if (vect_print_dump_info (REPORT_DETAILS
))
3553 fprintf (vect_dump
, "op not supported by target.");
3554 /* Check only during analysis. */
3555 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3556 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3558 if (vect_print_dump_info (REPORT_DETAILS
))
3559 fprintf (vect_dump
, "proceeding using word mode.");
3562 /* Worthwhile without SIMD support? Check only during analysis. */
3563 if (!VECTOR_MODE_P (vec_mode
)
3565 && vf
< vect_min_worthwhile_factor (code
))
3567 if (vect_print_dump_info (REPORT_DETAILS
))
3568 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3572 if (!vec_stmt
) /* transformation not required. */
3574 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3575 if (vect_print_dump_info (REPORT_DETAILS
))
3576 fprintf (vect_dump
, "=== vectorizable_operation ===");
3577 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3583 if (vect_print_dump_info (REPORT_DETAILS
))
3584 fprintf (vect_dump
, "transform binary/unary operation.");
3587 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3589 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3590 created in the previous stages of the recursion, so no allocation is
3591 needed, except for the case of shift with scalar shift argument. In that
3592 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3593 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3594 In case of loop-based vectorization we allocate VECs of size 1. We
3595 allocate VEC_OPRNDS1 only in case of binary operation. */
3598 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3599 if (op_type
== binary_op
|| op_type
== ternary_op
)
3600 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3601 if (op_type
== ternary_op
)
3602 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3605 /* In case the vectorization factor (VF) is bigger than the number
3606 of elements that we can fit in a vectype (nunits), we have to generate
3607 more than one vector stmt - i.e - we need to "unroll" the
3608 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3609 from one copy of the vector stmt to the next, in the field
3610 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3611 stages to find the correct vector defs to be used when vectorizing
3612 stmts that use the defs of the current stmt. The example below
3613 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3614 we need to create 4 vectorized stmts):
3616 before vectorization:
3617 RELATED_STMT VEC_STMT
3621 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3623 RELATED_STMT VEC_STMT
3624 VS1_0: vx0 = memref0 VS1_1 -
3625 VS1_1: vx1 = memref1 VS1_2 -
3626 VS1_2: vx2 = memref2 VS1_3 -
3627 VS1_3: vx3 = memref3 - -
3628 S1: x = load - VS1_0
3631 step2: vectorize stmt S2 (done here):
3632 To vectorize stmt S2 we first need to find the relevant vector
3633 def for the first operand 'x'. This is, as usual, obtained from
3634 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3635 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3636 relevant vector def 'vx0'. Having found 'vx0' we can generate
3637 the vector stmt VS2_0, and as usual, record it in the
3638 STMT_VINFO_VEC_STMT of stmt S2.
3639 When creating the second copy (VS2_1), we obtain the relevant vector
3640 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3641 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3642 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3643 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3644 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3645 chain of stmts and pointers:
3646 RELATED_STMT VEC_STMT
3647 VS1_0: vx0 = memref0 VS1_1 -
3648 VS1_1: vx1 = memref1 VS1_2 -
3649 VS1_2: vx2 = memref2 VS1_3 -
3650 VS1_3: vx3 = memref3 - -
3651 S1: x = load - VS1_0
3652 VS2_0: vz0 = vx0 + v1 VS2_1 -
3653 VS2_1: vz1 = vx1 + v1 VS2_2 -
3654 VS2_2: vz2 = vx2 + v1 VS2_3 -
3655 VS2_3: vz3 = vx3 + v1 - -
3656 S2: z = x + 1 - VS2_0 */
3658 prev_stmt_info
= NULL
;
3659 for (j
= 0; j
< ncopies
; j
++)
3664 if (op_type
== binary_op
|| op_type
== ternary_op
)
3665 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3668 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3670 if (op_type
== ternary_op
)
3672 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3673 VEC_quick_push (tree
, vec_oprnds2
,
3674 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
3679 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3680 if (op_type
== ternary_op
)
3682 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
3683 VEC_quick_push (tree
, vec_oprnds2
,
3684 vect_get_vec_def_for_stmt_copy (dt
[2],
3689 /* Arguments are ready. Create the new vector stmt. */
3690 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3692 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3693 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
3694 vop2
= ((op_type
== ternary_op
)
3695 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
3696 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
3698 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3699 gimple_assign_set_lhs (new_stmt
, new_temp
);
3700 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3702 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3709 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3711 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3712 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3715 VEC_free (tree
, heap
, vec_oprnds0
);
3717 VEC_free (tree
, heap
, vec_oprnds1
);
3719 VEC_free (tree
, heap
, vec_oprnds2
);
3725 /* Function vectorizable_store.
3727 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3729 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3730 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3731 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3734 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3740 tree vec_oprnd
= NULL_TREE
;
3741 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3742 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3743 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3745 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3746 struct loop
*loop
= NULL
;
3747 enum machine_mode vec_mode
;
3749 enum dr_alignment_support alignment_support_scheme
;
3752 enum vect_def_type dt
;
3753 stmt_vec_info prev_stmt_info
= NULL
;
3754 tree dataref_ptr
= NULL_TREE
;
3755 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3758 gimple next_stmt
, first_stmt
= NULL
;
3759 bool grouped_store
= false;
3760 bool store_lanes_p
= false;
3761 unsigned int group_size
, i
;
3762 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3764 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3765 bool slp
= (slp_node
!= NULL
);
3766 unsigned int vec_num
;
3767 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3771 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3773 /* Multiple types in SLP are handled by creating the appropriate number of
3774 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3776 if (slp
|| PURE_SLP_STMT (stmt_info
))
3779 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3781 gcc_assert (ncopies
>= 1);
3783 /* FORNOW. This restriction should be relaxed. */
3784 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3786 if (vect_print_dump_info (REPORT_DETAILS
))
3787 fprintf (vect_dump
, "multiple types in nested loop.");
3791 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3794 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3797 /* Is vectorizable store? */
3799 if (!is_gimple_assign (stmt
))
3802 scalar_dest
= gimple_assign_lhs (stmt
);
3803 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3804 && is_pattern_stmt_p (stmt_info
))
3805 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3806 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3807 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3808 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3809 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3810 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3811 && TREE_CODE (scalar_dest
) != MEM_REF
)
3814 gcc_assert (gimple_assign_single_p (stmt
));
3815 op
= gimple_assign_rhs1 (stmt
);
3816 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3819 if (vect_print_dump_info (REPORT_DETAILS
))
3820 fprintf (vect_dump
, "use not simple.");
3824 elem_type
= TREE_TYPE (vectype
);
3825 vec_mode
= TYPE_MODE (vectype
);
3827 /* FORNOW. In some cases can vectorize even if data-type not supported
3828 (e.g. - array initialization with 0). */
3829 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3832 if (!STMT_VINFO_DATA_REF (stmt_info
))
3835 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3836 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3837 size_zero_node
) < 0)
3839 if (vect_print_dump_info (REPORT_DETAILS
))
3840 fprintf (vect_dump
, "negative step for store.");
3844 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3846 grouped_store
= true;
3847 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3848 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3850 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3851 if (vect_store_lanes_supported (vectype
, group_size
))
3852 store_lanes_p
= true;
3853 else if (!vect_grouped_store_supported (vectype
, group_size
))
3857 if (first_stmt
== stmt
)
3859 /* STMT is the leader of the group. Check the operands of all the
3860 stmts of the group. */
3861 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3864 gcc_assert (gimple_assign_single_p (next_stmt
));
3865 op
= gimple_assign_rhs1 (next_stmt
);
3866 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3867 &def_stmt
, &def
, &dt
))
3869 if (vect_print_dump_info (REPORT_DETAILS
))
3870 fprintf (vect_dump
, "use not simple.");
3873 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3878 if (!vec_stmt
) /* transformation not required. */
3880 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3881 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
, NULL
, NULL
);
3889 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3890 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3892 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3895 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3897 /* We vectorize all the stmts of the interleaving group when we
3898 reach the last stmt in the group. */
3899 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3900 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3909 grouped_store
= false;
3910 /* VEC_NUM is the number of vect stmts to be created for this
3912 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3913 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3914 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3915 op
= gimple_assign_rhs1 (first_stmt
);
3918 /* VEC_NUM is the number of vect stmts to be created for this
3920 vec_num
= group_size
;
3926 group_size
= vec_num
= 1;
3929 if (vect_print_dump_info (REPORT_DETAILS
))
3930 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3932 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3933 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3935 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3936 gcc_assert (alignment_support_scheme
);
3937 /* Targets with store-lane instructions must not require explicit
3939 gcc_assert (!store_lanes_p
3940 || alignment_support_scheme
== dr_aligned
3941 || alignment_support_scheme
== dr_unaligned_supported
);
3944 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3946 aggr_type
= vectype
;
3948 /* In case the vectorization factor (VF) is bigger than the number
3949 of elements that we can fit in a vectype (nunits), we have to generate
3950 more than one vector stmt - i.e - we need to "unroll" the
3951 vector stmt by a factor VF/nunits. For more details see documentation in
3952 vect_get_vec_def_for_copy_stmt. */
3954 /* In case of interleaving (non-unit grouped access):
3961 We create vectorized stores starting from base address (the access of the
3962 first stmt in the chain (S2 in the above example), when the last store stmt
3963 of the chain (S4) is reached:
3966 VS2: &base + vec_size*1 = vx0
3967 VS3: &base + vec_size*2 = vx1
3968 VS4: &base + vec_size*3 = vx3
3970 Then permutation statements are generated:
3972 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3973 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3976 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3977 (the order of the data-refs in the output of vect_permute_store_chain
3978 corresponds to the order of scalar stmts in the interleaving chain - see
3979 the documentation of vect_permute_store_chain()).
3981 In case of both multiple types and interleaving, above vector stores and
3982 permutation stmts are created for every copy. The result vector stmts are
3983 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3984 STMT_VINFO_RELATED_STMT for the next copies.
3987 prev_stmt_info
= NULL
;
3988 for (j
= 0; j
< ncopies
; j
++)
3997 /* Get vectorized arguments for SLP_NODE. */
3998 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
3999 NULL
, slp_node
, -1);
4001 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
4005 /* For interleaved stores we collect vectorized defs for all the
4006 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4007 used as an input to vect_permute_store_chain(), and OPRNDS as
4008 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4010 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4011 OPRNDS are of size 1. */
4012 next_stmt
= first_stmt
;
4013 for (i
= 0; i
< group_size
; i
++)
4015 /* Since gaps are not supported for interleaved stores,
4016 GROUP_SIZE is the exact number of stmts in the chain.
4017 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4018 there is no interleaving, GROUP_SIZE is 1, and only one
4019 iteration of the loop will be executed. */
4020 gcc_assert (next_stmt
4021 && gimple_assign_single_p (next_stmt
));
4022 op
= gimple_assign_rhs1 (next_stmt
);
4024 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4026 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
4027 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
4028 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4032 /* We should have catched mismatched types earlier. */
4033 gcc_assert (useless_type_conversion_p (vectype
,
4034 TREE_TYPE (vec_oprnd
)));
4035 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
4036 NULL_TREE
, &dummy
, gsi
,
4037 &ptr_incr
, false, &inv_p
);
4038 gcc_assert (bb_vinfo
|| !inv_p
);
4042 /* For interleaved stores we created vectorized defs for all the
4043 defs stored in OPRNDS in the previous iteration (previous copy).
4044 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4045 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4047 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4048 OPRNDS are of size 1. */
4049 for (i
= 0; i
< group_size
; i
++)
4051 op
= VEC_index (tree
, oprnds
, i
);
4052 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4054 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4055 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
4056 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
4058 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4059 TYPE_SIZE_UNIT (aggr_type
));
4066 /* Combine all the vectors into an array. */
4067 vec_array
= create_vector_array (vectype
, vec_num
);
4068 for (i
= 0; i
< vec_num
; i
++)
4070 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
4071 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4075 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4076 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4077 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4078 gimple_call_set_lhs (new_stmt
, data_ref
);
4079 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4086 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4088 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4092 next_stmt
= first_stmt
;
4093 for (i
= 0; i
< vec_num
; i
++)
4095 unsigned align
, misalign
;
4098 /* Bump the vector pointer. */
4099 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4103 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4104 else if (grouped_store
)
4105 /* For grouped stores vectorized defs are interleaved in
4106 vect_permute_store_chain(). */
4107 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4109 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4110 build_int_cst (reference_alias_ptr_type
4111 (DR_REF (first_dr
)), 0));
4112 align
= TYPE_ALIGN_UNIT (vectype
);
4113 if (aligned_access_p (first_dr
))
4115 else if (DR_MISALIGNMENT (first_dr
) == -1)
4117 TREE_TYPE (data_ref
)
4118 = build_aligned_type (TREE_TYPE (data_ref
),
4119 TYPE_ALIGN (elem_type
));
4120 align
= TYPE_ALIGN_UNIT (elem_type
);
4125 TREE_TYPE (data_ref
)
4126 = build_aligned_type (TREE_TYPE (data_ref
),
4127 TYPE_ALIGN (elem_type
));
4128 misalign
= DR_MISALIGNMENT (first_dr
);
4130 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4133 /* Arguments are ready. Create the new vector stmt. */
4134 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4135 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4140 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4148 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4150 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4151 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4155 VEC_free (tree
, heap
, dr_chain
);
4156 VEC_free (tree
, heap
, oprnds
);
4158 VEC_free (tree
, heap
, result_chain
);
4160 VEC_free (tree
, heap
, vec_oprnds
);
4165 /* Given a vector type VECTYPE and permutation SEL returns
4166 the VECTOR_CST mask that implements the permutation of the
4167 vector elements. If that is impossible to do, returns NULL. */
4170 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4172 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4175 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4177 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4180 mask_elt_type
= lang_hooks
.types
.type_for_mode
4181 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4182 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4184 mask_elts
= XALLOCAVEC (tree
, nunits
);
4185 for (i
= nunits
- 1; i
>= 0; i
--)
4186 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4187 mask_vec
= build_vector (mask_type
, mask_elts
);
4192 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4193 reversal of the vector elements. If that is impossible to do,
4197 perm_mask_for_reverse (tree vectype
)
4202 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4203 sel
= XALLOCAVEC (unsigned char, nunits
);
4205 for (i
= 0; i
< nunits
; ++i
)
4206 sel
[i
] = nunits
- 1 - i
;
4208 return vect_gen_perm_mask (vectype
, sel
);
4211 /* Given a vector variable X and Y, that was generated for the scalar
4212 STMT, generate instructions to permute the vector elements of X and Y
4213 using permutation mask MASK_VEC, insert them at *GSI and return the
4214 permuted vector variable. */
4217 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4218 gimple_stmt_iterator
*gsi
)
4220 tree vectype
= TREE_TYPE (x
);
4221 tree perm_dest
, data_ref
;
4224 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4225 data_ref
= make_ssa_name (perm_dest
, NULL
);
4227 /* Generate the permute statement. */
4228 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, data_ref
,
4230 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4235 /* vectorizable_load.
4237 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4239 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4240 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4241 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4244 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4245 slp_tree slp_node
, slp_instance slp_node_instance
)
4248 tree vec_dest
= NULL
;
4249 tree data_ref
= NULL
;
4250 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4251 stmt_vec_info prev_stmt_info
;
4252 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4253 struct loop
*loop
= NULL
;
4254 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4255 bool nested_in_vect_loop
= false;
4256 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4257 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4260 enum machine_mode mode
;
4261 gimple new_stmt
= NULL
;
4263 enum dr_alignment_support alignment_support_scheme
;
4264 tree dataref_ptr
= NULL_TREE
;
4266 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4268 int i
, j
, group_size
;
4269 tree msq
= NULL_TREE
, lsq
;
4270 tree offset
= NULL_TREE
;
4271 tree realignment_token
= NULL_TREE
;
4273 VEC(tree
,heap
) *dr_chain
= NULL
;
4274 bool grouped_load
= false;
4275 bool load_lanes_p
= false;
4278 bool negative
= false;
4279 bool compute_in_loop
= false;
4280 struct loop
*at_loop
;
4282 bool slp
= (slp_node
!= NULL
);
4283 bool slp_perm
= false;
4284 enum tree_code code
;
4285 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4288 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4289 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4290 tree stride_base
, stride_step
;
4291 int gather_scale
= 1;
4292 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4296 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4297 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4298 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4303 /* Multiple types in SLP are handled by creating the appropriate number of
4304 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4306 if (slp
|| PURE_SLP_STMT (stmt_info
))
4309 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4311 gcc_assert (ncopies
>= 1);
4313 /* FORNOW. This restriction should be relaxed. */
4314 if (nested_in_vect_loop
&& ncopies
> 1)
4316 if (vect_print_dump_info (REPORT_DETAILS
))
4317 fprintf (vect_dump
, "multiple types in nested loop.");
4321 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4324 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4327 /* Is vectorizable load? */
4328 if (!is_gimple_assign (stmt
))
4331 scalar_dest
= gimple_assign_lhs (stmt
);
4332 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4335 code
= gimple_assign_rhs_code (stmt
);
4336 if (code
!= ARRAY_REF
4337 && code
!= INDIRECT_REF
4338 && code
!= COMPONENT_REF
4339 && code
!= IMAGPART_EXPR
4340 && code
!= REALPART_EXPR
4342 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4345 if (!STMT_VINFO_DATA_REF (stmt_info
))
4348 elem_type
= TREE_TYPE (vectype
);
4349 mode
= TYPE_MODE (vectype
);
4351 /* FORNOW. In some cases can vectorize even if data-type not supported
4352 (e.g. - data copies). */
4353 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4355 if (vect_print_dump_info (REPORT_DETAILS
))
4356 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4360 /* Check if the load is a part of an interleaving chain. */
4361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4363 grouped_load
= true;
4365 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4367 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4368 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4370 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4371 if (vect_load_lanes_supported (vectype
, group_size
))
4372 load_lanes_p
= true;
4373 else if (!vect_grouped_load_supported (vectype
, group_size
))
4379 if (STMT_VINFO_GATHER_P (stmt_info
))
4383 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4384 &gather_off
, &gather_scale
);
4385 gcc_assert (gather_decl
);
4386 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4387 &def_stmt
, &def
, &gather_dt
,
4388 &gather_off_vectype
))
4390 if (vect_print_dump_info (REPORT_DETAILS
))
4391 fprintf (vect_dump
, "gather index use not simple.");
4395 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4397 if (!vect_check_strided_load (stmt
, loop_vinfo
,
4398 &stride_base
, &stride_step
))
4403 negative
= tree_int_cst_compare (nested_in_vect_loop
4404 ? STMT_VINFO_DR_STEP (stmt_info
)
4406 size_zero_node
) < 0;
4407 if (negative
&& ncopies
> 1)
4409 if (vect_print_dump_info (REPORT_DETAILS
))
4410 fprintf (vect_dump
, "multiple types with negative step.");
4416 gcc_assert (!grouped_load
);
4417 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4418 if (alignment_support_scheme
!= dr_aligned
4419 && alignment_support_scheme
!= dr_unaligned_supported
)
4421 if (vect_print_dump_info (REPORT_DETAILS
))
4422 fprintf (vect_dump
, "negative step but alignment required.");
4425 if (!perm_mask_for_reverse (vectype
))
4427 if (vect_print_dump_info (REPORT_DETAILS
))
4428 fprintf (vect_dump
, "negative step and reversing not supported.");
4434 if (!vec_stmt
) /* transformation not required. */
4436 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4437 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
);
4441 if (vect_print_dump_info (REPORT_DETAILS
))
4442 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4446 if (STMT_VINFO_GATHER_P (stmt_info
))
4448 tree vec_oprnd0
= NULL_TREE
, op
;
4449 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4450 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4451 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4452 edge pe
= loop_preheader_edge (loop
);
4455 enum { NARROW
, NONE
, WIDEN
} modifier
;
4456 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4458 if (nunits
== gather_off_nunits
)
4460 else if (nunits
== gather_off_nunits
/ 2)
4462 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4465 for (i
= 0; i
< gather_off_nunits
; ++i
)
4466 sel
[i
] = i
| nunits
;
4468 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4469 gcc_assert (perm_mask
!= NULL_TREE
);
4471 else if (nunits
== gather_off_nunits
* 2)
4473 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4476 for (i
= 0; i
< nunits
; ++i
)
4477 sel
[i
] = i
< gather_off_nunits
4478 ? i
: i
+ nunits
- gather_off_nunits
;
4480 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4481 gcc_assert (perm_mask
!= NULL_TREE
);
4487 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4488 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4489 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4490 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4491 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4492 scaletype
= TREE_VALUE (arglist
);
4493 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4494 && types_compatible_p (srctype
, masktype
));
4496 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4498 ptr
= fold_convert (ptrtype
, gather_base
);
4499 if (!is_gimple_min_invariant (ptr
))
4501 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4502 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4503 gcc_assert (!new_bb
);
4506 /* Currently we support only unconditional gather loads,
4507 so mask should be all ones. */
4508 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4509 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4510 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4514 for (j
= 0; j
< 6; ++j
)
4516 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4517 mask
= build_real (TREE_TYPE (masktype
), r
);
4521 mask
= build_vector_from_val (masktype
, mask
);
4522 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4524 scale
= build_int_cst (scaletype
, gather_scale
);
4526 prev_stmt_info
= NULL
;
4527 for (j
= 0; j
< ncopies
; ++j
)
4529 if (modifier
== WIDEN
&& (j
& 1))
4530 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4531 perm_mask
, stmt
, gsi
);
4534 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4537 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4539 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4541 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4542 == TYPE_VECTOR_SUBPARTS (idxtype
));
4543 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4544 var
= make_ssa_name (var
, NULL
);
4545 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4547 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4549 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4554 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4556 if (!useless_type_conversion_p (vectype
, rettype
))
4558 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4559 == TYPE_VECTOR_SUBPARTS (rettype
));
4560 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4561 op
= make_ssa_name (var
, new_stmt
);
4562 gimple_call_set_lhs (new_stmt
, op
);
4563 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4564 var
= make_ssa_name (vec_dest
, NULL
);
4565 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4567 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4572 var
= make_ssa_name (vec_dest
, new_stmt
);
4573 gimple_call_set_lhs (new_stmt
, var
);
4576 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4578 if (modifier
== NARROW
)
4585 var
= permute_vec_elements (prev_res
, var
,
4586 perm_mask
, stmt
, gsi
);
4587 new_stmt
= SSA_NAME_DEF_STMT (var
);
4590 if (prev_stmt_info
== NULL
)
4591 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4593 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4594 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4598 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4600 gimple_stmt_iterator incr_gsi
;
4604 tree ref
= DR_REF (dr
);
4607 VEC(constructor_elt
, gc
) *v
= NULL
;
4608 gimple_seq stmts
= NULL
;
4610 gcc_assert (stride_base
&& stride_step
);
4612 /* For a load with loop-invariant (but other than power-of-2)
4613 stride (i.e. not a grouped access) like so:
4615 for (i = 0; i < n; i += stride)
4618 we generate a new induction variable and new accesses to
4619 form a new vector (or vectors, depending on ncopies):
4621 for (j = 0; ; j += VF*stride)
4623 tmp2 = array[j + stride];
4625 vectemp = {tmp1, tmp2, ...}
4628 ivstep
= stride_step
;
4629 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4630 build_int_cst (TREE_TYPE (ivstep
), vf
));
4632 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4634 create_iv (stride_base
, ivstep
, NULL
,
4635 loop
, &incr_gsi
, insert_after
,
4637 incr
= gsi_stmt (incr_gsi
);
4638 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4640 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4642 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4644 prev_stmt_info
= NULL
;
4645 running_off
= offvar
;
4646 for (j
= 0; j
< ncopies
; j
++)
4650 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
4651 for (i
= 0; i
< nunits
; i
++)
4653 tree newref
, newoff
;
4655 if (TREE_CODE (ref
) == ARRAY_REF
)
4656 newref
= build4 (ARRAY_REF
, TREE_TYPE (ref
),
4657 unshare_expr (TREE_OPERAND (ref
, 0)),
4659 NULL_TREE
, NULL_TREE
);
4661 newref
= build2 (MEM_REF
, TREE_TYPE (ref
),
4663 TREE_OPERAND (ref
, 1));
4665 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4668 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4669 newoff
= SSA_NAME_VAR (running_off
);
4670 if (POINTER_TYPE_P (TREE_TYPE (newoff
)))
4671 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4672 running_off
, stride_step
);
4674 incr
= gimple_build_assign_with_ops (PLUS_EXPR
, newoff
,
4675 running_off
, stride_step
);
4676 newoff
= make_ssa_name (newoff
, incr
);
4677 gimple_assign_set_lhs (incr
, newoff
);
4678 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4680 running_off
= newoff
;
4683 vec_inv
= build_constructor (vectype
, v
);
4684 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4685 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4688 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4690 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4691 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4698 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4700 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4701 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4702 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4704 /* Check if the chain of loads is already vectorized. */
4705 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4707 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4710 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4711 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4713 /* VEC_NUM is the number of vect stmts to be created for this group. */
4716 grouped_load
= false;
4717 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4718 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4722 vec_num
= group_size
;
4728 group_size
= vec_num
= 1;
4731 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4732 gcc_assert (alignment_support_scheme
);
4733 /* Targets with load-lane instructions must not require explicit
4735 gcc_assert (!load_lanes_p
4736 || alignment_support_scheme
== dr_aligned
4737 || alignment_support_scheme
== dr_unaligned_supported
);
4739 /* In case the vectorization factor (VF) is bigger than the number
4740 of elements that we can fit in a vectype (nunits), we have to generate
4741 more than one vector stmt - i.e - we need to "unroll" the
4742 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4743 from one copy of the vector stmt to the next, in the field
4744 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4745 stages to find the correct vector defs to be used when vectorizing
4746 stmts that use the defs of the current stmt. The example below
4747 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4748 need to create 4 vectorized stmts):
4750 before vectorization:
4751 RELATED_STMT VEC_STMT
4755 step 1: vectorize stmt S1:
4756 We first create the vector stmt VS1_0, and, as usual, record a
4757 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4758 Next, we create the vector stmt VS1_1, and record a pointer to
4759 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4760 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4762 RELATED_STMT VEC_STMT
4763 VS1_0: vx0 = memref0 VS1_1 -
4764 VS1_1: vx1 = memref1 VS1_2 -
4765 VS1_2: vx2 = memref2 VS1_3 -
4766 VS1_3: vx3 = memref3 - -
4767 S1: x = load - VS1_0
4770 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4771 information we recorded in RELATED_STMT field is used to vectorize
4774 /* In case of interleaving (non-unit grouped access):
4781 Vectorized loads are created in the order of memory accesses
4782 starting from the access of the first stmt of the chain:
4785 VS2: vx1 = &base + vec_size*1
4786 VS3: vx3 = &base + vec_size*2
4787 VS4: vx4 = &base + vec_size*3
4789 Then permutation statements are generated:
4791 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4792 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4795 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4796 (the order of the data-refs in the output of vect_permute_load_chain
4797 corresponds to the order of scalar stmts in the interleaving chain - see
4798 the documentation of vect_permute_load_chain()).
4799 The generation of permutation stmts and recording them in
4800 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4802 In case of both multiple types and interleaving, the vector loads and
4803 permutation stmts above are created for every copy. The result vector
4804 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4805 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4807 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4808 on a target that supports unaligned accesses (dr_unaligned_supported)
4809 we generate the following code:
4813 p = p + indx * vectype_size;
4818 Otherwise, the data reference is potentially unaligned on a target that
4819 does not support unaligned accesses (dr_explicit_realign_optimized) -
4820 then generate the following code, in which the data in each iteration is
4821 obtained by two vector loads, one from the previous iteration, and one
4822 from the current iteration:
4824 msq_init = *(floor(p1))
4825 p2 = initial_addr + VS - 1;
4826 realignment_token = call target_builtin;
4829 p2 = p2 + indx * vectype_size
4831 vec_dest = realign_load (msq, lsq, realignment_token)
4836 /* If the misalignment remains the same throughout the execution of the
4837 loop, we can create the init_addr and permutation mask at the loop
4838 preheader. Otherwise, it needs to be created inside the loop.
4839 This can only occur when vectorizing memory accesses in the inner-loop
4840 nested within an outer-loop that is being vectorized. */
4842 if (nested_in_vect_loop
4843 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4844 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4846 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4847 compute_in_loop
= true;
4850 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4851 || alignment_support_scheme
== dr_explicit_realign
)
4852 && !compute_in_loop
)
4854 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4855 alignment_support_scheme
, NULL_TREE
,
4857 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4859 phi
= SSA_NAME_DEF_STMT (msq
);
4860 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4867 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4870 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4872 aggr_type
= vectype
;
4874 prev_stmt_info
= NULL
;
4875 for (j
= 0; j
< ncopies
; j
++)
4877 /* 1. Create the vector or array pointer update chain. */
4879 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4880 offset
, &dummy
, gsi
,
4881 &ptr_incr
, false, &inv_p
);
4883 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4884 TYPE_SIZE_UNIT (aggr_type
));
4886 if (grouped_load
|| slp_perm
)
4887 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4893 vec_array
= create_vector_array (vectype
, vec_num
);
4896 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4897 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4898 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4899 gimple_call_set_lhs (new_stmt
, vec_array
);
4900 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4902 /* Extract each vector into an SSA_NAME. */
4903 for (i
= 0; i
< vec_num
; i
++)
4905 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4907 VEC_quick_push (tree
, dr_chain
, new_temp
);
4910 /* Record the mapping between SSA_NAMEs and statements. */
4911 vect_record_grouped_load_vectors (stmt
, dr_chain
);
4915 for (i
= 0; i
< vec_num
; i
++)
4918 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4921 /* 2. Create the vector-load in the loop. */
4922 switch (alignment_support_scheme
)
4925 case dr_unaligned_supported
:
4927 unsigned int align
, misalign
;
4930 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4931 build_int_cst (reference_alias_ptr_type
4932 (DR_REF (first_dr
)), 0));
4933 align
= TYPE_ALIGN_UNIT (vectype
);
4934 if (alignment_support_scheme
== dr_aligned
)
4936 gcc_assert (aligned_access_p (first_dr
));
4939 else if (DR_MISALIGNMENT (first_dr
) == -1)
4941 TREE_TYPE (data_ref
)
4942 = build_aligned_type (TREE_TYPE (data_ref
),
4943 TYPE_ALIGN (elem_type
));
4944 align
= TYPE_ALIGN_UNIT (elem_type
);
4949 TREE_TYPE (data_ref
)
4950 = build_aligned_type (TREE_TYPE (data_ref
),
4951 TYPE_ALIGN (elem_type
));
4952 misalign
= DR_MISALIGNMENT (first_dr
);
4954 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
4958 case dr_explicit_realign
:
4963 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4965 if (compute_in_loop
)
4966 msq
= vect_setup_realignment (first_stmt
, gsi
,
4968 dr_explicit_realign
,
4971 new_stmt
= gimple_build_assign_with_ops
4972 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
4974 (TREE_TYPE (dataref_ptr
),
4975 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4976 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
4977 gimple_assign_set_lhs (new_stmt
, ptr
);
4978 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4980 = build2 (MEM_REF
, vectype
, ptr
,
4981 build_int_cst (reference_alias_ptr_type
4982 (DR_REF (first_dr
)), 0));
4983 vec_dest
= vect_create_destination_var (scalar_dest
,
4985 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4986 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4987 gimple_assign_set_lhs (new_stmt
, new_temp
);
4988 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
4989 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
4990 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4993 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
4994 TYPE_SIZE_UNIT (elem_type
));
4995 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
4996 new_stmt
= gimple_build_assign_with_ops
4997 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5000 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5001 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
5002 gimple_assign_set_lhs (new_stmt
, ptr
);
5003 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5005 = build2 (MEM_REF
, vectype
, ptr
,
5006 build_int_cst (reference_alias_ptr_type
5007 (DR_REF (first_dr
)), 0));
5010 case dr_explicit_realign_optimized
:
5011 new_stmt
= gimple_build_assign_with_ops
5012 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
5014 (TREE_TYPE (dataref_ptr
),
5015 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5016 new_temp
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
),
5018 gimple_assign_set_lhs (new_stmt
, new_temp
);
5019 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5021 = build2 (MEM_REF
, vectype
, new_temp
,
5022 build_int_cst (reference_alias_ptr_type
5023 (DR_REF (first_dr
)), 0));
5028 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5029 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5030 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5031 gimple_assign_set_lhs (new_stmt
, new_temp
);
5032 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5034 /* 3. Handle explicit realignment if necessary/supported.
5036 vec_dest = realign_load (msq, lsq, realignment_token) */
5037 if (alignment_support_scheme
== dr_explicit_realign_optimized
5038 || alignment_support_scheme
== dr_explicit_realign
)
5040 lsq
= gimple_assign_lhs (new_stmt
);
5041 if (!realignment_token
)
5042 realignment_token
= dataref_ptr
;
5043 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5045 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
5048 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5049 gimple_assign_set_lhs (new_stmt
, new_temp
);
5050 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5052 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5055 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5056 add_phi_arg (phi
, lsq
,
5057 loop_latch_edge (containing_loop
),
5063 /* 4. Handle invariant-load. */
5064 if (inv_p
&& !bb_vinfo
)
5066 gimple_stmt_iterator gsi2
= *gsi
;
5067 gcc_assert (!grouped_load
);
5069 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5071 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5076 tree perm_mask
= perm_mask_for_reverse (vectype
);
5077 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5078 perm_mask
, stmt
, gsi
);
5079 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5082 /* Collect vector loads and later create their permutation in
5083 vect_transform_grouped_load (). */
5084 if (grouped_load
|| slp_perm
)
5085 VEC_quick_push (tree
, dr_chain
, new_temp
);
5087 /* Store vector loads in the corresponding SLP_NODE. */
5088 if (slp
&& !slp_perm
)
5089 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
5094 if (slp
&& !slp_perm
)
5099 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
5100 slp_node_instance
, false))
5102 VEC_free (tree
, heap
, dr_chain
);
5111 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5112 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5117 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5119 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5120 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5124 VEC_free (tree
, heap
, dr_chain
);
5130 /* Function vect_is_simple_cond.
5133 LOOP - the loop that is being vectorized.
5134 COND - Condition that is checked for simple use.
5137 *COMP_VECTYPE - the vector type for the comparison.
5139 Returns whether a COND can be vectorized. Checks whether
5140 condition operands are supportable using vec_is_simple_use. */
5143 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5144 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5148 enum vect_def_type dt
;
5149 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5151 if (!COMPARISON_CLASS_P (cond
))
5154 lhs
= TREE_OPERAND (cond
, 0);
5155 rhs
= TREE_OPERAND (cond
, 1);
5157 if (TREE_CODE (lhs
) == SSA_NAME
)
5159 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5160 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5161 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5164 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5165 && TREE_CODE (lhs
) != FIXED_CST
)
5168 if (TREE_CODE (rhs
) == SSA_NAME
)
5170 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5171 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5172 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5175 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5176 && TREE_CODE (rhs
) != FIXED_CST
)
5179 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5183 /* vectorizable_condition.
5185 Check if STMT is conditional modify expression that can be vectorized.
5186 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5187 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5190 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5191 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5192 else caluse if it is 2).
5194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5197 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5198 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5201 tree scalar_dest
= NULL_TREE
;
5202 tree vec_dest
= NULL_TREE
;
5203 tree cond_expr
, then_clause
, else_clause
;
5204 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5205 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5206 tree comp_vectype
= NULL_TREE
;
5207 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5208 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5209 tree vec_compare
, vec_cond_expr
;
5211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5213 enum vect_def_type dt
, dts
[4];
5214 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5216 enum tree_code code
;
5217 stmt_vec_info prev_stmt_info
= NULL
;
5219 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5220 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
5221 VEC (tree
, heap
) *vec_oprnds2
= NULL
, *vec_oprnds3
= NULL
;
5223 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5226 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5228 gcc_assert (ncopies
>= 1);
5229 if (reduc_index
&& ncopies
> 1)
5230 return false; /* FORNOW */
5232 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5235 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5238 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5239 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5243 /* FORNOW: not yet supported. */
5244 if (STMT_VINFO_LIVE_P (stmt_info
))
5246 if (vect_print_dump_info (REPORT_DETAILS
))
5247 fprintf (vect_dump
, "value used after loop.");
5251 /* Is vectorizable conditional operation? */
5252 if (!is_gimple_assign (stmt
))
5255 code
= gimple_assign_rhs_code (stmt
);
5257 if (code
!= COND_EXPR
)
5260 cond_expr
= gimple_assign_rhs1 (stmt
);
5261 then_clause
= gimple_assign_rhs2 (stmt
);
5262 else_clause
= gimple_assign_rhs3 (stmt
);
5264 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5269 if (TREE_CODE (then_clause
) == SSA_NAME
)
5271 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5272 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5273 &then_def_stmt
, &def
, &dt
))
5276 else if (TREE_CODE (then_clause
) != INTEGER_CST
5277 && TREE_CODE (then_clause
) != REAL_CST
5278 && TREE_CODE (then_clause
) != FIXED_CST
)
5281 if (TREE_CODE (else_clause
) == SSA_NAME
)
5283 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5284 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5285 &else_def_stmt
, &def
, &dt
))
5288 else if (TREE_CODE (else_clause
) != INTEGER_CST
5289 && TREE_CODE (else_clause
) != REAL_CST
5290 && TREE_CODE (else_clause
) != FIXED_CST
)
5295 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5296 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5303 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
5304 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
5305 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
5306 vec_oprnds3
= VEC_alloc (tree
, heap
, 1);
5310 scalar_dest
= gimple_assign_lhs (stmt
);
5311 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5313 /* Handle cond expr. */
5314 for (j
= 0; j
< ncopies
; j
++)
5316 gimple new_stmt
= NULL
;
5321 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, 4);
5322 VEC (slp_void_p
, heap
) *vec_defs
;
5324 vec_defs
= VEC_alloc (slp_void_p
, heap
, 4);
5325 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 0));
5326 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 1));
5327 VEC_safe_push (tree
, heap
, ops
, then_clause
);
5328 VEC_safe_push (tree
, heap
, ops
, else_clause
);
5329 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5330 vec_oprnds3
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5331 vec_oprnds2
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5332 vec_oprnds1
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5333 vec_oprnds0
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5335 VEC_free (tree
, heap
, ops
);
5336 VEC_free (slp_void_p
, heap
, vec_defs
);
5342 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5344 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5345 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5348 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5350 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5351 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5352 if (reduc_index
== 1)
5353 vec_then_clause
= reduc_def
;
5356 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5358 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5359 NULL
, >emp
, &def
, &dts
[2]);
5361 if (reduc_index
== 2)
5362 vec_else_clause
= reduc_def
;
5365 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5367 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5368 NULL
, >emp
, &def
, &dts
[3]);
5374 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5375 VEC_pop (tree
, vec_oprnds0
));
5376 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5377 VEC_pop (tree
, vec_oprnds1
));
5378 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5379 VEC_pop (tree
, vec_oprnds2
));
5380 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5381 VEC_pop (tree
, vec_oprnds3
));
5386 VEC_quick_push (tree
, vec_oprnds0
, vec_cond_lhs
);
5387 VEC_quick_push (tree
, vec_oprnds1
, vec_cond_rhs
);
5388 VEC_quick_push (tree
, vec_oprnds2
, vec_then_clause
);
5389 VEC_quick_push (tree
, vec_oprnds3
, vec_else_clause
);
5392 /* Arguments are ready. Create the new vector stmt. */
5393 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_cond_lhs
)
5395 vec_cond_rhs
= VEC_index (tree
, vec_oprnds1
, i
);
5396 vec_then_clause
= VEC_index (tree
, vec_oprnds2
, i
);
5397 vec_else_clause
= VEC_index (tree
, vec_oprnds3
, i
);
5399 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
5400 vec_cond_lhs
, vec_cond_rhs
);
5401 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5402 vec_compare
, vec_then_clause
, vec_else_clause
);
5404 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5405 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5406 gimple_assign_set_lhs (new_stmt
, new_temp
);
5407 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5409 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5416 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5418 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5420 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5423 VEC_free (tree
, heap
, vec_oprnds0
);
5424 VEC_free (tree
, heap
, vec_oprnds1
);
5425 VEC_free (tree
, heap
, vec_oprnds2
);
5426 VEC_free (tree
, heap
, vec_oprnds3
);
5432 /* Make sure the statement is vectorizable. */
5435 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5437 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5438 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5439 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5441 tree scalar_type
, vectype
;
5442 gimple pattern_stmt
;
5443 gimple_seq pattern_def_seq
;
5445 if (vect_print_dump_info (REPORT_DETAILS
))
5447 fprintf (vect_dump
, "==> examining statement: ");
5448 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5451 if (gimple_has_volatile_ops (stmt
))
5453 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5454 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5459 /* Skip stmts that do not need to be vectorized. In loops this is expected
5461 - the COND_EXPR which is the loop exit condition
5462 - any LABEL_EXPRs in the loop
5463 - computations that are used only for array indexing or loop control.
5464 In basic blocks we only analyze statements that are a part of some SLP
5465 instance, therefore, all the statements are relevant.
5467 Pattern statement needs to be analyzed instead of the original statement
5468 if the original statement is not relevant. Otherwise, we analyze both
5469 statements. In basic blocks we are called from some SLP instance
5470 traversal, don't analyze pattern stmts instead, the pattern stmts
5471 already will be part of SLP instance. */
5473 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5474 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5475 && !STMT_VINFO_LIVE_P (stmt_info
))
5477 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5479 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5480 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5482 /* Analyze PATTERN_STMT instead of the original stmt. */
5483 stmt
= pattern_stmt
;
5484 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5485 if (vect_print_dump_info (REPORT_DETAILS
))
5487 fprintf (vect_dump
, "==> examining pattern statement: ");
5488 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5493 if (vect_print_dump_info (REPORT_DETAILS
))
5494 fprintf (vect_dump
, "irrelevant.");
5499 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5502 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5503 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5505 /* Analyze PATTERN_STMT too. */
5506 if (vect_print_dump_info (REPORT_DETAILS
))
5508 fprintf (vect_dump
, "==> examining pattern statement: ");
5509 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5512 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5516 if (is_pattern_stmt_p (stmt_info
)
5518 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5520 gimple_stmt_iterator si
;
5522 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5524 gimple pattern_def_stmt
= gsi_stmt (si
);
5525 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5526 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5528 /* Analyze def stmt of STMT if it's a pattern stmt. */
5529 if (vect_print_dump_info (REPORT_DETAILS
))
5531 fprintf (vect_dump
, "==> examining pattern def statement: ");
5532 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5535 if (!vect_analyze_stmt (pattern_def_stmt
,
5536 need_to_vectorize
, node
))
5542 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5544 case vect_internal_def
:
5547 case vect_reduction_def
:
5548 case vect_nested_cycle
:
5549 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5550 || relevance
== vect_used_in_outer_by_reduction
5551 || relevance
== vect_unused_in_scope
));
5554 case vect_induction_def
:
5555 case vect_constant_def
:
5556 case vect_external_def
:
5557 case vect_unknown_def_type
:
5564 gcc_assert (PURE_SLP_STMT (stmt_info
));
5566 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5567 if (vect_print_dump_info (REPORT_DETAILS
))
5569 fprintf (vect_dump
, "get vectype for scalar type: ");
5570 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5573 vectype
= get_vectype_for_scalar_type (scalar_type
);
5576 if (vect_print_dump_info (REPORT_DETAILS
))
5578 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5579 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5584 if (vect_print_dump_info (REPORT_DETAILS
))
5586 fprintf (vect_dump
, "vectype: ");
5587 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5590 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5593 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5595 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5596 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5597 *need_to_vectorize
= true;
5602 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5603 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5604 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5605 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5606 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5607 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5608 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5609 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5610 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5611 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5612 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5616 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5617 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5618 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5619 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5620 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5621 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5622 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5623 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5630 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5631 fprintf (vect_dump
, "supported: ");
5632 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5641 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5642 need extra handling, except for vectorizable reductions. */
5643 if (STMT_VINFO_LIVE_P (stmt_info
)
5644 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5645 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5649 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5651 fprintf (vect_dump
, "not vectorized: live stmt not ");
5652 fprintf (vect_dump
, "supported: ");
5653 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5663 /* Function vect_transform_stmt.
5665 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5668 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5669 bool *grouped_store
, slp_tree slp_node
,
5670 slp_instance slp_node_instance
)
5672 bool is_store
= false;
5673 gimple vec_stmt
= NULL
;
5674 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5677 switch (STMT_VINFO_TYPE (stmt_info
))
5679 case type_demotion_vec_info_type
:
5680 case type_promotion_vec_info_type
:
5681 case type_conversion_vec_info_type
:
5682 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5686 case induc_vec_info_type
:
5687 gcc_assert (!slp_node
);
5688 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5692 case shift_vec_info_type
:
5693 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5697 case op_vec_info_type
:
5698 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5702 case assignment_vec_info_type
:
5703 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5707 case load_vec_info_type
:
5708 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5713 case store_vec_info_type
:
5714 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5716 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5718 /* In case of interleaving, the whole chain is vectorized when the
5719 last store in the chain is reached. Store stmts before the last
5720 one are skipped, and there vec_stmt_info shouldn't be freed
5722 *grouped_store
= true;
5723 if (STMT_VINFO_VEC_STMT (stmt_info
))
5730 case condition_vec_info_type
:
5731 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5735 case call_vec_info_type
:
5736 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5737 stmt
= gsi_stmt (*gsi
);
5740 case reduc_vec_info_type
:
5741 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5746 if (!STMT_VINFO_LIVE_P (stmt_info
))
5748 if (vect_print_dump_info (REPORT_DETAILS
))
5749 fprintf (vect_dump
, "stmt not supported.");
5754 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5755 is being vectorized, but outside the immediately enclosing loop. */
5757 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5758 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5759 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5760 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5761 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5762 || STMT_VINFO_RELEVANT (stmt_info
) ==
5763 vect_used_in_outer_by_reduction
))
5765 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5766 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5767 imm_use_iterator imm_iter
;
5768 use_operand_p use_p
;
5772 if (vect_print_dump_info (REPORT_DETAILS
))
5773 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5775 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5776 (to be used when vectorizing outer-loop stmts that use the DEF of
5778 if (gimple_code (stmt
) == GIMPLE_PHI
)
5779 scalar_dest
= PHI_RESULT (stmt
);
5781 scalar_dest
= gimple_assign_lhs (stmt
);
5783 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5785 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5787 exit_phi
= USE_STMT (use_p
);
5788 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5793 /* Handle stmts whose DEF is used outside the loop-nest that is
5794 being vectorized. */
5795 if (STMT_VINFO_LIVE_P (stmt_info
)
5796 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5798 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5803 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5809 /* Remove a group of stores (for SLP or interleaving), free their
5813 vect_remove_stores (gimple first_stmt
)
5815 gimple next
= first_stmt
;
5817 gimple_stmt_iterator next_si
;
5821 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5823 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5824 if (is_pattern_stmt_p (stmt_info
))
5825 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5826 /* Free the attached stmt_vec_info and remove the stmt. */
5827 next_si
= gsi_for_stmt (next
);
5828 unlink_stmt_vdef (next
);
5829 gsi_remove (&next_si
, true);
5830 release_defs (next
);
5831 free_stmt_vec_info (next
);
5837 /* Function new_stmt_vec_info.
5839 Create and initialize a new stmt_vec_info struct for STMT. */
5842 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5843 bb_vec_info bb_vinfo
)
5846 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5848 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5849 STMT_VINFO_STMT (res
) = stmt
;
5850 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5851 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5852 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5853 STMT_VINFO_LIVE_P (res
) = false;
5854 STMT_VINFO_VECTYPE (res
) = NULL
;
5855 STMT_VINFO_VEC_STMT (res
) = NULL
;
5856 STMT_VINFO_VECTORIZABLE (res
) = true;
5857 STMT_VINFO_IN_PATTERN_P (res
) = false;
5858 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5859 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5860 STMT_VINFO_DATA_REF (res
) = NULL
;
5862 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5863 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5864 STMT_VINFO_DR_INIT (res
) = NULL
;
5865 STMT_VINFO_DR_STEP (res
) = NULL
;
5866 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5868 if (gimple_code (stmt
) == GIMPLE_PHI
5869 && is_loop_header_bb_p (gimple_bb (stmt
)))
5870 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5872 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5874 STMT_VINFO_SAME_ALIGN_REFS (res
) = VEC_alloc (dr_p
, heap
, 5);
5875 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res
) = 0;
5876 STMT_SLP_TYPE (res
) = loop_vect
;
5877 GROUP_FIRST_ELEMENT (res
) = NULL
;
5878 GROUP_NEXT_ELEMENT (res
) = NULL
;
5879 GROUP_SIZE (res
) = 0;
5880 GROUP_STORE_COUNT (res
) = 0;
5881 GROUP_GAP (res
) = 0;
5882 GROUP_SAME_DR_STMT (res
) = NULL
;
5883 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5889 /* Create a hash table for stmt_vec_info. */
5892 init_stmt_vec_info_vec (void)
5894 gcc_assert (!stmt_vec_info_vec
);
5895 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5899 /* Free hash table for stmt_vec_info. */
5902 free_stmt_vec_info_vec (void)
5904 gcc_assert (stmt_vec_info_vec
);
5905 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5909 /* Free stmt vectorization related info. */
5912 free_stmt_vec_info (gimple stmt
)
5914 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5919 /* Check if this statement has a related "pattern stmt"
5920 (introduced by the vectorizer during the pattern recognition
5921 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5923 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5925 stmt_vec_info patt_info
5926 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5929 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
5932 gimple_stmt_iterator si
;
5933 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
5934 free_stmt_vec_info (gsi_stmt (si
));
5936 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
5940 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
5941 set_vinfo_for_stmt (stmt
, NULL
);
5946 /* Function get_vectype_for_scalar_type_and_size.
5948 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5952 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
5954 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
5955 enum machine_mode simd_mode
;
5956 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
5963 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
5964 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
5967 /* We can't build a vector type of elements with alignment bigger than
5969 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
5972 /* For vector types of elements whose mode precision doesn't
5973 match their types precision we use a element type of mode
5974 precision. The vectorization routines will have to make sure
5975 they support the proper result truncation/extension.
5976 We also make sure to build vector types with INTEGER_TYPE
5977 component type only. */
5978 if (INTEGRAL_TYPE_P (scalar_type
)
5979 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
5980 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
5981 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
5982 TYPE_UNSIGNED (scalar_type
));
5984 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5985 When the component mode passes the above test simply use a type
5986 corresponding to that mode. The theory is that any use that
5987 would cause problems with this will disable vectorization anyway. */
5988 if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
5989 && !INTEGRAL_TYPE_P (scalar_type
)
5990 && !POINTER_TYPE_P (scalar_type
))
5991 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
5993 /* If no size was supplied use the mode the target prefers. Otherwise
5994 lookup a vector mode of the specified size. */
5996 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
5998 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
5999 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6003 vectype
= build_vector_type (scalar_type
, nunits
);
6004 if (vect_print_dump_info (REPORT_DETAILS
))
6006 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
6007 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
6013 if (vect_print_dump_info (REPORT_DETAILS
))
6015 fprintf (vect_dump
, "vectype: ");
6016 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
6019 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6020 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6022 if (vect_print_dump_info (REPORT_DETAILS
))
6023 fprintf (vect_dump
, "mode not supported by target.");
6030 unsigned int current_vector_size
;
6032 /* Function get_vectype_for_scalar_type.
6034 Returns the vector type corresponding to SCALAR_TYPE as supported
6038 get_vectype_for_scalar_type (tree scalar_type
)
6041 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6042 current_vector_size
);
6044 && current_vector_size
== 0)
6045 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6049 /* Function get_same_sized_vectype
6051 Returns a vector type corresponding to SCALAR_TYPE of size
6052 VECTOR_TYPE if supported by the target. */
6055 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6057 return get_vectype_for_scalar_type_and_size
6058 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6061 /* Function vect_is_simple_use.
6064 LOOP_VINFO - the vect info of the loop that is being vectorized.
6065 BB_VINFO - the vect info of the basic block that is being vectorized.
6066 OPERAND - operand of STMT in the loop or bb.
6067 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6069 Returns whether a stmt with OPERAND can be vectorized.
6070 For loops, supportable operands are constants, loop invariants, and operands
6071 that are defined by the current iteration of the loop. Unsupportable
6072 operands are those that are defined by a previous iteration of the loop (as
6073 is the case in reduction/induction computations).
6074 For basic blocks, supportable operands are constants and bb invariants.
6075 For now, operands defined outside the basic block are not supported. */
6078 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6079 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6080 tree
*def
, enum vect_def_type
*dt
)
6083 stmt_vec_info stmt_vinfo
;
6084 struct loop
*loop
= NULL
;
6087 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6092 if (vect_print_dump_info (REPORT_DETAILS
))
6094 fprintf (vect_dump
, "vect_is_simple_use: operand ");
6095 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
6098 if (CONSTANT_CLASS_P (operand
))
6100 *dt
= vect_constant_def
;
6104 if (is_gimple_min_invariant (operand
))
6107 *dt
= vect_external_def
;
6111 if (TREE_CODE (operand
) == PAREN_EXPR
)
6113 if (vect_print_dump_info (REPORT_DETAILS
))
6114 fprintf (vect_dump
, "non-associatable copy.");
6115 operand
= TREE_OPERAND (operand
, 0);
6118 if (TREE_CODE (operand
) != SSA_NAME
)
6120 if (vect_print_dump_info (REPORT_DETAILS
))
6121 fprintf (vect_dump
, "not ssa-name.");
6125 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6126 if (*def_stmt
== NULL
)
6128 if (vect_print_dump_info (REPORT_DETAILS
))
6129 fprintf (vect_dump
, "no def_stmt.");
6133 if (vect_print_dump_info (REPORT_DETAILS
))
6135 fprintf (vect_dump
, "def_stmt: ");
6136 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
6139 /* Empty stmt is expected only in case of a function argument.
6140 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6141 if (gimple_nop_p (*def_stmt
))
6144 *dt
= vect_external_def
;
6148 bb
= gimple_bb (*def_stmt
);
6150 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6151 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6152 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6153 *dt
= vect_external_def
;
6156 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6157 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6160 if (*dt
== vect_unknown_def_type
6162 && *dt
== vect_double_reduction_def
6163 && gimple_code (stmt
) != GIMPLE_PHI
))
6165 if (vect_print_dump_info (REPORT_DETAILS
))
6166 fprintf (vect_dump
, "Unsupported pattern.");
6170 if (vect_print_dump_info (REPORT_DETAILS
))
6171 fprintf (vect_dump
, "type of def: %d.",*dt
);
6173 switch (gimple_code (*def_stmt
))
6176 *def
= gimple_phi_result (*def_stmt
);
6180 *def
= gimple_assign_lhs (*def_stmt
);
6184 *def
= gimple_call_lhs (*def_stmt
);
6189 if (vect_print_dump_info (REPORT_DETAILS
))
6190 fprintf (vect_dump
, "unsupported defining stmt: ");
6197 /* Function vect_is_simple_use_1.
6199 Same as vect_is_simple_use_1 but also determines the vector operand
6200 type of OPERAND and stores it to *VECTYPE. If the definition of
6201 OPERAND is vect_uninitialized_def, vect_constant_def or
6202 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6203 is responsible to compute the best suited vector type for the
6207 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6208 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6209 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6211 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6215 /* Now get a vector type if the def is internal, otherwise supply
6216 NULL_TREE and leave it up to the caller to figure out a proper
6217 type for the use stmt. */
6218 if (*dt
== vect_internal_def
6219 || *dt
== vect_induction_def
6220 || *dt
== vect_reduction_def
6221 || *dt
== vect_double_reduction_def
6222 || *dt
== vect_nested_cycle
)
6224 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6226 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6227 && !STMT_VINFO_RELEVANT (stmt_info
)
6228 && !STMT_VINFO_LIVE_P (stmt_info
))
6229 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6231 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6232 gcc_assert (*vectype
!= NULL_TREE
);
6234 else if (*dt
== vect_uninitialized_def
6235 || *dt
== vect_constant_def
6236 || *dt
== vect_external_def
)
6237 *vectype
= NULL_TREE
;
6245 /* Function supportable_widening_operation
6247 Check whether an operation represented by the code CODE is a
6248 widening operation that is supported by the target platform in
6249 vector form (i.e., when operating on arguments of type VECTYPE_IN
6250 producing a result of type VECTYPE_OUT).
6252 Widening operations we currently support are NOP (CONVERT), FLOAT
6253 and WIDEN_MULT. This function checks if these operations are supported
6254 by the target platform either directly (via vector tree-codes), or via
6258 - CODE1 and CODE2 are codes of vector operations to be used when
6259 vectorizing the operation, if available.
6260 - MULTI_STEP_CVT determines the number of required intermediate steps in
6261 case of multi-step conversion (like char->short->int - in that case
6262 MULTI_STEP_CVT will be 1).
6263 - INTERM_TYPES contains the intermediate type required to perform the
6264 widening operation (short in the above example). */
6267 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6268 tree vectype_out
, tree vectype_in
,
6269 enum tree_code
*code1
, enum tree_code
*code2
,
6270 int *multi_step_cvt
,
6271 VEC (tree
, heap
) **interm_types
)
6273 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6274 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6275 struct loop
*vect_loop
= NULL
;
6276 enum machine_mode vec_mode
;
6277 enum insn_code icode1
, icode2
;
6278 optab optab1
, optab2
;
6279 tree vectype
= vectype_in
;
6280 tree wide_vectype
= vectype_out
;
6281 enum tree_code c1
, c2
;
6283 tree prev_type
, intermediate_type
;
6284 enum machine_mode intermediate_mode
, prev_mode
;
6285 optab optab3
, optab4
;
6287 *multi_step_cvt
= 0;
6289 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6293 case WIDEN_MULT_EXPR
:
6294 /* The result of a vectorized widening operation usually requires
6295 two vectors (because the widened results do not fit into one vector).
6296 The generated vector results would normally be expected to be
6297 generated in the same order as in the original scalar computation,
6298 i.e. if 8 results are generated in each vector iteration, they are
6299 to be organized as follows:
6300 vect1: [res1,res2,res3,res4],
6301 vect2: [res5,res6,res7,res8].
6303 However, in the special case that the result of the widening
6304 operation is used in a reduction computation only, the order doesn't
6305 matter (because when vectorizing a reduction we change the order of
6306 the computation). Some targets can take advantage of this and
6307 generate more efficient code. For example, targets like Altivec,
6308 that support widen_mult using a sequence of {mult_even,mult_odd}
6309 generate the following vectors:
6310 vect1: [res1,res3,res5,res7],
6311 vect2: [res2,res4,res6,res8].
6313 When vectorizing outer-loops, we execute the inner-loop sequentially
6314 (each vectorized inner-loop iteration contributes to VF outer-loop
6315 iterations in parallel). We therefore don't allow to change the
6316 order of the computation in the inner-loop during outer-loop
6318 /* TODO: Another case in which order doesn't *really* matter is when we
6319 widen and then contract again, e.g. (short)((int)x * y >> 8).
6320 Normally, pack_trunc performs an even/odd permute, whereas the
6321 repack from an even/odd expansion would be an interleave, which
6322 would be significantly simpler for e.g. AVX2. */
6323 /* In any case, in order to avoid duplicating the code below, recurse
6324 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6325 are properly set up for the caller. If we fail, we'll continue with
6326 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6328 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6329 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6330 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6331 stmt
, vectype_out
, vectype_in
,
6332 code1
, code2
, multi_step_cvt
,
6335 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6336 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6339 case VEC_WIDEN_MULT_EVEN_EXPR
:
6340 /* Support the recursion induced just above. */
6341 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6342 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6345 case WIDEN_LSHIFT_EXPR
:
6346 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6347 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6351 c1
= VEC_UNPACK_LO_EXPR
;
6352 c2
= VEC_UNPACK_HI_EXPR
;
6356 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6357 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6360 case FIX_TRUNC_EXPR
:
6361 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6362 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6363 computing the operation. */
6370 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6372 enum tree_code ctmp
= c1
;
6377 if (code
== FIX_TRUNC_EXPR
)
6379 /* The signedness is determined from output operand. */
6380 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6381 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6385 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6386 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6389 if (!optab1
|| !optab2
)
6392 vec_mode
= TYPE_MODE (vectype
);
6393 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6394 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6400 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6401 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6404 /* Check if it's a multi-step conversion that can be done using intermediate
6407 prev_type
= vectype
;
6408 prev_mode
= vec_mode
;
6410 if (!CONVERT_EXPR_CODE_P (code
))
6413 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6414 intermediate steps in promotion sequence. We try
6415 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6417 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6418 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6420 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6422 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6423 TYPE_UNSIGNED (prev_type
));
6424 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6425 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6427 if (!optab3
|| !optab4
6428 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6429 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6430 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6431 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6432 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6433 == CODE_FOR_nothing
)
6434 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6435 == CODE_FOR_nothing
))
6438 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6439 (*multi_step_cvt
)++;
6441 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6442 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6445 prev_type
= intermediate_type
;
6446 prev_mode
= intermediate_mode
;
6449 VEC_free (tree
, heap
, *interm_types
);
6454 /* Function supportable_narrowing_operation
6456 Check whether an operation represented by the code CODE is a
6457 narrowing operation that is supported by the target platform in
6458 vector form (i.e., when operating on arguments of type VECTYPE_IN
6459 and producing a result of type VECTYPE_OUT).
6461 Narrowing operations we currently support are NOP (CONVERT) and
6462 FIX_TRUNC. This function checks if these operations are supported by
6463 the target platform directly via vector tree-codes.
6466 - CODE1 is the code of a vector operation to be used when
6467 vectorizing the operation, if available.
6468 - MULTI_STEP_CVT determines the number of required intermediate steps in
6469 case of multi-step conversion (like int->short->char - in that case
6470 MULTI_STEP_CVT will be 1).
6471 - INTERM_TYPES contains the intermediate type required to perform the
6472 narrowing operation (short in the above example). */
6475 supportable_narrowing_operation (enum tree_code code
,
6476 tree vectype_out
, tree vectype_in
,
6477 enum tree_code
*code1
, int *multi_step_cvt
,
6478 VEC (tree
, heap
) **interm_types
)
6480 enum machine_mode vec_mode
;
6481 enum insn_code icode1
;
6482 optab optab1
, interm_optab
;
6483 tree vectype
= vectype_in
;
6484 tree narrow_vectype
= vectype_out
;
6486 tree intermediate_type
;
6487 enum machine_mode intermediate_mode
, prev_mode
;
6491 *multi_step_cvt
= 0;
6495 c1
= VEC_PACK_TRUNC_EXPR
;
6498 case FIX_TRUNC_EXPR
:
6499 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6503 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6504 tree code and optabs used for computing the operation. */
6511 if (code
== FIX_TRUNC_EXPR
)
6512 /* The signedness is determined from output operand. */
6513 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6515 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6520 vec_mode
= TYPE_MODE (vectype
);
6521 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6526 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6529 /* Check if it's a multi-step conversion that can be done using intermediate
6531 prev_mode
= vec_mode
;
6532 if (code
== FIX_TRUNC_EXPR
)
6533 uns
= TYPE_UNSIGNED (vectype_out
);
6535 uns
= TYPE_UNSIGNED (vectype
);
6537 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6538 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6539 costly than signed. */
6540 if (code
== FIX_TRUNC_EXPR
&& uns
)
6542 enum insn_code icode2
;
6545 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6547 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6548 if (interm_optab
!= NULL
6549 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6550 && insn_data
[icode1
].operand
[0].mode
6551 == insn_data
[icode2
].operand
[0].mode
)
6554 optab1
= interm_optab
;
6559 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6560 intermediate steps in promotion sequence. We try
6561 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6562 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6563 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6565 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6567 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6569 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6572 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6573 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6574 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6575 == CODE_FOR_nothing
))
6578 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6579 (*multi_step_cvt
)++;
6581 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6584 prev_mode
= intermediate_mode
;
6585 optab1
= interm_optab
;
6588 VEC_free (tree
, heap
, *interm_types
);