1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
35 #include "recog.h" /* FIXME: for insn_data */
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
77 int misalign
, enum vect_cost_model_location where
)
81 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
82 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
83 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
86 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
192 if (vect_print_dump_info (REPORT_DETAILS
))
193 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
195 /* If this stmt is an original stmt in a pattern, we might need to mark its
196 related pattern stmt instead of the original stmt. However, such stmts
197 may have their own uses that are not in any pattern, in such cases the
198 stmt itself should be marked. */
199 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
202 if (!used_in_pattern
)
204 imm_use_iterator imm_iter
;
208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
209 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
211 if (is_gimple_assign (stmt
))
212 lhs
= gimple_assign_lhs (stmt
);
214 lhs
= gimple_call_lhs (stmt
);
216 /* This use is out of pattern use, if LHS has other uses that are
217 pattern uses, we should mark the stmt itself, and not the pattern
219 if (TREE_CODE (lhs
) == SSA_NAME
)
220 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
222 if (is_gimple_debug (USE_STMT (use_p
)))
224 use_stmt
= USE_STMT (use_p
);
226 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
229 if (vinfo_for_stmt (use_stmt
)
230 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
240 /* This is the last stmt in a sequence that was detected as a
241 pattern that can potentially be vectorized. Don't mark the stmt
242 as relevant/live because it's not going to be vectorized.
243 Instead mark the pattern-stmt that replaces it. */
245 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
247 if (vect_print_dump_info (REPORT_DETAILS
))
248 fprintf (vect_dump
, "last stmt in pattern. don't mark"
250 stmt_info
= vinfo_for_stmt (pattern_stmt
);
251 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
252 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
253 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
258 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
259 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
260 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
262 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
263 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
265 if (vect_print_dump_info (REPORT_DETAILS
))
266 fprintf (vect_dump
, "already marked relevant/live.");
270 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
274 /* Function vect_stmt_relevant_p.
276 Return true if STMT in loop that is represented by LOOP_VINFO is
277 "relevant for vectorization".
279 A stmt is considered "relevant for vectorization" if:
280 - it has uses outside the loop.
281 - it has vdefs (it alters memory).
282 - control stmts in the loop (except for the exit condition).
284 CHECKME: what other side effects would the vectorizer allow? */
287 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
288 enum vect_relevant
*relevant
, bool *live_p
)
290 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
292 imm_use_iterator imm_iter
;
296 *relevant
= vect_unused_in_scope
;
299 /* cond stmt other than loop exit cond. */
300 if (is_ctrl_stmt (stmt
)
301 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
302 != loop_exit_ctrl_vec_info_type
)
303 *relevant
= vect_used_in_scope
;
305 /* changing memory. */
306 if (gimple_code (stmt
) != GIMPLE_PHI
)
307 if (gimple_vdef (stmt
))
309 if (vect_print_dump_info (REPORT_DETAILS
))
310 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
311 *relevant
= vect_used_in_scope
;
314 /* uses outside the loop. */
315 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
317 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
319 basic_block bb
= gimple_bb (USE_STMT (use_p
));
320 if (!flow_bb_inside_loop_p (loop
, bb
))
322 if (vect_print_dump_info (REPORT_DETAILS
))
323 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
325 if (is_gimple_debug (USE_STMT (use_p
)))
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
331 gcc_assert (bb
== single_exit (loop
)->dest
);
338 return (*live_p
|| *relevant
);
342 /* Function exist_non_indexing_operands_for_use_p
344 USE is one of the uses attached to STMT. Check if USE is
345 used in STMT for anything other than indexing an array. */
348 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
351 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
353 /* USE corresponds to some operand in STMT. If there is no data
354 reference in STMT, then any operand that corresponds to USE
355 is not indexing an array. */
356 if (!STMT_VINFO_DATA_REF (stmt_info
))
359 /* STMT has a data_ref. FORNOW this means that its of one of
363 (This should have been verified in analyze_data_refs).
365 'var' in the second case corresponds to a def, not a use,
366 so USE cannot correspond to any operands that are not used
369 Therefore, all we need to check is if STMT falls into the
370 first case, and whether var corresponds to USE. */
372 if (!gimple_assign_copy_p (stmt
))
374 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
376 operand
= gimple_assign_rhs1 (stmt
);
377 if (TREE_CODE (operand
) != SSA_NAME
)
388 Function process_use.
391 - a USE in STMT in a loop represented by LOOP_VINFO
392 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
393 that defined USE. This is done by calling mark_relevant and passing it
394 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
395 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
399 Generally, LIVE_P and RELEVANT are used to define the liveness and
400 relevance info of the DEF_STMT of this USE:
401 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
402 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
404 - case 1: If USE is used only for address computations (e.g. array indexing),
405 which does not need to be directly vectorized, then the liveness/relevance
406 of the respective DEF_STMT is left unchanged.
407 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
408 skip DEF_STMT cause it had already been processed.
409 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
410 be modified accordingly.
412 Return true if everything is as expected. Return false otherwise. */
415 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
416 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
,
419 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
420 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
421 stmt_vec_info dstmt_vinfo
;
422 basic_block bb
, def_bb
;
425 enum vect_def_type dt
;
427 /* case 1: we are only interested in uses that need to be vectorized. Uses
428 that are used for address computation are not considered relevant. */
429 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
432 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
435 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
439 if (!def_stmt
|| gimple_nop_p (def_stmt
))
442 def_bb
= gimple_bb (def_stmt
);
443 if (!flow_bb_inside_loop_p (loop
, def_bb
))
445 if (vect_print_dump_info (REPORT_DETAILS
))
446 fprintf (vect_dump
, "def_stmt is out of loop.");
450 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
451 DEF_STMT must have already been processed, because this should be the
452 only way that STMT, which is a reduction-phi, was put in the worklist,
453 as there should be no other uses for DEF_STMT in the loop. So we just
454 check that everything is as expected, and we are done. */
455 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
456 bb
= gimple_bb (stmt
);
457 if (gimple_code (stmt
) == GIMPLE_PHI
458 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
459 && gimple_code (def_stmt
) != GIMPLE_PHI
460 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
461 && bb
->loop_father
== def_bb
->loop_father
)
463 if (vect_print_dump_info (REPORT_DETAILS
))
464 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
465 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
466 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
467 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
468 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
469 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
473 /* case 3a: outer-loop stmt defining an inner-loop stmt:
474 outer-loop-header-bb:
480 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
482 if (vect_print_dump_info (REPORT_DETAILS
))
483 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
487 case vect_unused_in_scope
:
488 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
489 vect_used_in_scope
: vect_unused_in_scope
;
492 case vect_used_in_outer_by_reduction
:
493 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
494 relevant
= vect_used_by_reduction
;
497 case vect_used_in_outer
:
498 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
499 relevant
= vect_used_in_scope
;
502 case vect_used_in_scope
:
510 /* case 3b: inner-loop stmt defining an outer-loop stmt:
511 outer-loop-header-bb:
515 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
517 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
519 if (vect_print_dump_info (REPORT_DETAILS
))
520 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
524 case vect_unused_in_scope
:
525 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
526 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
527 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
530 case vect_used_by_reduction
:
531 relevant
= vect_used_in_outer_by_reduction
;
534 case vect_used_in_scope
:
535 relevant
= vect_used_in_outer
;
543 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
544 is_pattern_stmt_p (stmt_vinfo
));
549 /* Function vect_mark_stmts_to_be_vectorized.
551 Not all stmts in the loop need to be vectorized. For example:
560 Stmt 1 and 3 do not need to be vectorized, because loop control and
561 addressing of vectorized data-refs are handled differently.
563 This pass detects such stmts. */
566 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
568 VEC(gimple
,heap
) *worklist
;
569 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
570 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
571 unsigned int nbbs
= loop
->num_nodes
;
572 gimple_stmt_iterator si
;
575 stmt_vec_info stmt_vinfo
;
579 enum vect_relevant relevant
, tmp_relevant
;
580 enum vect_def_type def_type
;
582 if (vect_print_dump_info (REPORT_DETAILS
))
583 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
585 worklist
= VEC_alloc (gimple
, heap
, 64);
587 /* 1. Init worklist. */
588 for (i
= 0; i
< nbbs
; i
++)
591 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
594 if (vect_print_dump_info (REPORT_DETAILS
))
596 fprintf (vect_dump
, "init: phi relevant? ");
597 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
600 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
601 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
603 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
605 stmt
= gsi_stmt (si
);
606 if (vect_print_dump_info (REPORT_DETAILS
))
608 fprintf (vect_dump
, "init: stmt relevant? ");
609 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
612 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
613 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
617 /* 2. Process_worklist */
618 while (VEC_length (gimple
, worklist
) > 0)
623 stmt
= VEC_pop (gimple
, worklist
);
624 if (vect_print_dump_info (REPORT_DETAILS
))
626 fprintf (vect_dump
, "worklist: examine stmt: ");
627 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
630 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
631 (DEF_STMT) as relevant/irrelevant and live/dead according to the
632 liveness and relevance properties of STMT. */
633 stmt_vinfo
= vinfo_for_stmt (stmt
);
634 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
635 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
637 /* Generally, the liveness and relevance properties of STMT are
638 propagated as is to the DEF_STMTs of its USEs:
639 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
640 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
642 One exception is when STMT has been identified as defining a reduction
643 variable; in this case we set the liveness/relevance as follows:
645 relevant = vect_used_by_reduction
646 This is because we distinguish between two kinds of relevant stmts -
647 those that are used by a reduction computation, and those that are
648 (also) used by a regular computation. This allows us later on to
649 identify stmts that are used solely by a reduction, and therefore the
650 order of the results that they produce does not have to be kept. */
652 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
653 tmp_relevant
= relevant
;
656 case vect_reduction_def
:
657 switch (tmp_relevant
)
659 case vect_unused_in_scope
:
660 relevant
= vect_used_by_reduction
;
663 case vect_used_by_reduction
:
664 if (gimple_code (stmt
) == GIMPLE_PHI
)
669 if (vect_print_dump_info (REPORT_DETAILS
))
670 fprintf (vect_dump
, "unsupported use of reduction.");
672 VEC_free (gimple
, heap
, worklist
);
679 case vect_nested_cycle
:
680 if (tmp_relevant
!= vect_unused_in_scope
681 && tmp_relevant
!= vect_used_in_outer_by_reduction
682 && tmp_relevant
!= vect_used_in_outer
)
684 if (vect_print_dump_info (REPORT_DETAILS
))
685 fprintf (vect_dump
, "unsupported use of nested cycle.");
687 VEC_free (gimple
, heap
, worklist
);
694 case vect_double_reduction_def
:
695 if (tmp_relevant
!= vect_unused_in_scope
696 && tmp_relevant
!= vect_used_by_reduction
)
698 if (vect_print_dump_info (REPORT_DETAILS
))
699 fprintf (vect_dump
, "unsupported use of double reduction.");
701 VEC_free (gimple
, heap
, worklist
);
712 if (is_pattern_stmt_p (stmt_vinfo
))
714 /* Pattern statements are not inserted into the code, so
715 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
716 have to scan the RHS or function arguments instead. */
717 if (is_gimple_assign (stmt
))
719 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
720 tree op
= gimple_assign_rhs1 (stmt
);
723 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
725 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
726 live_p
, relevant
, &worklist
, false)
727 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
728 live_p
, relevant
, &worklist
, false))
730 VEC_free (gimple
, heap
, worklist
);
735 for (; i
< gimple_num_ops (stmt
); i
++)
737 op
= gimple_op (stmt
, i
);
738 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
741 VEC_free (gimple
, heap
, worklist
);
746 else if (is_gimple_call (stmt
))
748 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
750 tree arg
= gimple_call_arg (stmt
, i
);
751 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
754 VEC_free (gimple
, heap
, worklist
);
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
764 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
767 VEC_free (gimple
, heap
, worklist
);
772 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
775 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
777 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
780 VEC_free (gimple
, heap
, worklist
);
784 } /* while worklist */
786 VEC_free (gimple
, heap
, worklist
);
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
799 enum vect_def_type
*dt
,
800 stmt_vector_for_cost
*prologue_cost_vec
,
801 stmt_vector_for_cost
*body_cost_vec
)
804 int inside_cost
= 0, prologue_cost
= 0;
806 /* The SLP costs were already calculated during SLP tree build. */
807 if (PURE_SLP_STMT (stmt_info
))
810 /* FORNOW: Assuming maximum 2 args per stmts. */
811 for (i
= 0; i
< 2; i
++)
812 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
813 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
814 stmt_info
, 0, vect_prologue
);
816 /* Pass the inside-of-loop statements to the target-specific cost model. */
817 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
818 stmt_info
, 0, vect_body
);
820 if (vect_print_dump_info (REPORT_COST
))
821 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
822 "prologue_cost = %d .", inside_cost
, prologue_cost
);
826 /* Model cost for type demotion and promotion operations. PWR is normally
827 zero for single-step promotions and demotions. It will be one if
828 two-step promotion/demotion is required, and so on. Each additional
829 step doubles the number of instructions required. */
832 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
833 enum vect_def_type
*dt
, int pwr
)
836 int inside_cost
= 0, prologue_cost
= 0;
837 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
838 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
839 void *target_cost_data
;
841 /* The SLP costs were already calculated during SLP tree build. */
842 if (PURE_SLP_STMT (stmt_info
))
846 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
848 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
850 for (i
= 0; i
< pwr
+ 1; i
++)
852 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
854 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
855 vec_promote_demote
, stmt_info
, 0,
859 /* FORNOW: Assuming maximum 2 args per stmts. */
860 for (i
= 0; i
< 2; i
++)
861 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
862 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
863 stmt_info
, 0, vect_prologue
);
865 if (vect_print_dump_info (REPORT_COST
))
866 fprintf (vect_dump
, "vect_model_promotion_demotion_cost: inside_cost = %d, "
867 "prologue_cost = %d .", inside_cost
, prologue_cost
);
870 /* Function vect_cost_group_size
872 For grouped load or store, return the group_size only if it is the first
873 load or store of a group, else return 1. This ensures that group size is
874 only returned once per group. */
877 vect_cost_group_size (stmt_vec_info stmt_info
)
879 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
881 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
882 return GROUP_SIZE (stmt_info
);
888 /* Function vect_model_store_cost
890 Models cost for stores. In the case of grouped accesses, one access
891 has the overhead of the grouped access attributed to it. */
894 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
895 bool store_lanes_p
, enum vect_def_type dt
,
897 stmt_vector_for_cost
*prologue_cost_vec
,
898 stmt_vector_for_cost
*body_cost_vec
)
901 unsigned int inside_cost
= 0, prologue_cost
= 0;
902 struct data_reference
*first_dr
;
905 /* The SLP costs were already calculated during SLP tree build. */
906 if (PURE_SLP_STMT (stmt_info
))
909 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
910 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
911 stmt_info
, 0, vect_prologue
);
913 /* Grouped access? */
914 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
918 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
923 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
924 group_size
= vect_cost_group_size (stmt_info
);
927 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
929 /* Not a grouped access. */
933 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (!store_lanes_p
&& group_size
> 1)
942 /* Uses a high and low interleave operation for each needed permute. */
944 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
945 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
946 stmt_info
, 0, vect_body
);
948 if (vect_print_dump_info (REPORT_COST
))
949 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
953 /* Costs of the stores. */
954 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
956 if (vect_print_dump_info (REPORT_COST
))
957 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
958 "prologue_cost = %d .", inside_cost
, prologue_cost
);
962 /* Calculate cost of DR's memory access. */
964 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
965 unsigned int *inside_cost
,
966 stmt_vector_for_cost
*body_cost_vec
)
968 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
969 gimple stmt
= DR_STMT (dr
);
970 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
972 switch (alignment_support_scheme
)
976 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
977 vector_store
, stmt_info
, 0,
980 if (vect_print_dump_info (REPORT_COST
))
981 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
986 case dr_unaligned_supported
:
988 /* Here, we assign an additional cost for the unaligned store. */
989 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
990 unaligned_store
, stmt_info
,
991 DR_MISALIGNMENT (dr
), vect_body
);
993 if (vect_print_dump_info (REPORT_COST
))
994 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
1000 case dr_unaligned_unsupported
:
1002 *inside_cost
= VECT_MAX_COST
;
1004 if (vect_print_dump_info (REPORT_COST
))
1005 fprintf (vect_dump
, "vect_model_store_cost: unsupported access.");
1016 /* Function vect_model_load_cost
1018 Models cost for loads. In the case of grouped accesses, the last access
1019 has the overhead of the grouped access attributed to it. Since unaligned
1020 accesses are supported for loads, we also account for the costs of the
1021 access scheme chosen. */
1024 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1025 bool load_lanes_p
, slp_tree slp_node
,
1026 stmt_vector_for_cost
*prologue_cost_vec
,
1027 stmt_vector_for_cost
*body_cost_vec
)
1031 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1032 unsigned int inside_cost
= 0, prologue_cost
= 0;
1034 /* The SLP costs were already calculated during SLP tree build. */
1035 if (PURE_SLP_STMT (stmt_info
))
1038 /* Grouped accesses? */
1039 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1040 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1042 group_size
= vect_cost_group_size (stmt_info
);
1043 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1045 /* Not a grouped access. */
1052 /* We assume that the cost of a single load-lanes instruction is
1053 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1054 access is instead being provided by a load-and-permute operation,
1055 include the cost of the permutes. */
1056 if (!load_lanes_p
&& group_size
> 1)
1058 /* Uses an even and odd extract operations for each needed permute. */
1059 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1060 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1061 stmt_info
, 0, vect_body
);
1063 if (vect_print_dump_info (REPORT_COST
))
1064 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
1068 /* The loads themselves. */
1069 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1071 /* N scalar loads plus gathering them into a vector. */
1072 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1073 inside_cost
+= record_stmt_cost (body_cost_vec
,
1074 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1075 scalar_load
, stmt_info
, 0, vect_body
);
1076 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1077 stmt_info
, 0, vect_body
);
1080 vect_get_load_cost (first_dr
, ncopies
,
1081 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1082 || group_size
> 1 || slp_node
),
1083 &inside_cost
, &prologue_cost
,
1084 prologue_cost_vec
, body_cost_vec
, true);
1086 if (vect_print_dump_info (REPORT_COST
))
1087 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
1088 "prologue_cost = %d .", inside_cost
, prologue_cost
);
1092 /* Calculate cost of DR's memory access. */
1094 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1095 bool add_realign_cost
, unsigned int *inside_cost
,
1096 unsigned int *prologue_cost
,
1097 stmt_vector_for_cost
*prologue_cost_vec
,
1098 stmt_vector_for_cost
*body_cost_vec
,
1099 bool record_prologue_costs
)
1101 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1102 gimple stmt
= DR_STMT (dr
);
1103 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1105 switch (alignment_support_scheme
)
1109 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1110 stmt_info
, 0, vect_body
);
1112 if (vect_print_dump_info (REPORT_COST
))
1113 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1117 case dr_unaligned_supported
:
1119 /* Here, we assign an additional cost for the unaligned load. */
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1121 unaligned_load
, stmt_info
,
1122 DR_MISALIGNMENT (dr
), vect_body
);
1124 if (vect_print_dump_info (REPORT_COST
))
1125 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1130 case dr_explicit_realign
:
1132 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1133 vector_load
, stmt_info
, 0, vect_body
);
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1135 vec_perm
, stmt_info
, 0, vect_body
);
1137 /* FIXME: If the misalignment remains fixed across the iterations of
1138 the containing loop, the following cost should be added to the
1140 if (targetm
.vectorize
.builtin_mask_for_load
)
1141 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1142 stmt_info
, 0, vect_body
);
1144 if (vect_print_dump_info (REPORT_COST
))
1145 fprintf (vect_dump
, "vect_model_load_cost: explicit realign");
1149 case dr_explicit_realign_optimized
:
1151 if (vect_print_dump_info (REPORT_COST
))
1152 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1155 /* Unaligned software pipeline has a load of an address, an initial
1156 load, and possibly a mask operation to "prime" the loop. However,
1157 if this is an access in a group of loads, which provide grouped
1158 access, then the above cost should only be considered for one
1159 access in the group. Inside the loop, there is a load op
1160 and a realignment op. */
1162 if (add_realign_cost
&& record_prologue_costs
)
1164 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1165 vector_stmt
, stmt_info
,
1167 if (targetm
.vectorize
.builtin_mask_for_load
)
1168 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1169 vector_stmt
, stmt_info
,
1173 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1174 stmt_info
, 0, vect_body
);
1175 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1176 stmt_info
, 0, vect_body
);
1178 if (vect_print_dump_info (REPORT_COST
))
1180 "vect_model_load_cost: explicit realign optimized");
1185 case dr_unaligned_unsupported
:
1187 *inside_cost
= VECT_MAX_COST
;
1189 if (vect_print_dump_info (REPORT_COST
))
1190 fprintf (vect_dump
, "vect_model_load_cost: unsupported access.");
1200 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1201 the loop preheader for the vectorized stmt STMT. */
1204 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1207 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1210 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1215 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1219 if (nested_in_vect_loop_p (loop
, stmt
))
1222 pe
= loop_preheader_edge (loop
);
1223 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1224 gcc_assert (!new_bb
);
1228 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1230 gimple_stmt_iterator gsi_bb_start
;
1232 gcc_assert (bb_vinfo
);
1233 bb
= BB_VINFO_BB (bb_vinfo
);
1234 gsi_bb_start
= gsi_after_labels (bb
);
1235 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1239 if (vect_print_dump_info (REPORT_DETAILS
))
1241 fprintf (vect_dump
, "created new init_stmt: ");
1242 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1246 /* Function vect_init_vector.
1248 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1249 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1250 vector type a vector with all elements equal to VAL is created first.
1251 Place the initialization at BSI if it is not NULL. Otherwise, place the
1252 initialization at the loop preheader.
1253 Return the DEF of INIT_STMT.
1254 It will be used in the vectorization of STMT. */
1257 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1264 if (TREE_CODE (type
) == VECTOR_TYPE
1265 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1267 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1269 if (CONSTANT_CLASS_P (val
))
1270 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1273 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1274 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1277 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1281 val
= build_vector_from_val (type
, val
);
1284 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1285 init_stmt
= gimple_build_assign (new_var
, val
);
1286 new_temp
= make_ssa_name (new_var
, init_stmt
);
1287 gimple_assign_set_lhs (init_stmt
, new_temp
);
1288 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1289 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1294 /* Function vect_get_vec_def_for_operand.
1296 OP is an operand in STMT. This function returns a (vector) def that will be
1297 used in the vectorized stmt for STMT.
1299 In the case that OP is an SSA_NAME which is defined in the loop, then
1300 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1302 In case OP is an invariant or constant, a new stmt that creates a vector def
1303 needs to be introduced. */
1306 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1311 stmt_vec_info def_stmt_info
= NULL
;
1312 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1313 unsigned int nunits
;
1314 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1316 enum vect_def_type dt
;
1320 if (vect_print_dump_info (REPORT_DETAILS
))
1322 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1323 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1326 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1327 &def_stmt
, &def
, &dt
);
1328 gcc_assert (is_simple_use
);
1329 if (vect_print_dump_info (REPORT_DETAILS
))
1333 fprintf (vect_dump
, "def = ");
1334 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1338 fprintf (vect_dump
, " def_stmt = ");
1339 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1345 /* Case 1: operand is a constant. */
1346 case vect_constant_def
:
1348 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1349 gcc_assert (vector_type
);
1350 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1355 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1356 if (vect_print_dump_info (REPORT_DETAILS
))
1357 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1359 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1362 /* Case 2: operand is defined outside the loop - loop invariant. */
1363 case vect_external_def
:
1365 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1366 gcc_assert (vector_type
);
1371 /* Create 'vec_inv = {inv,inv,..,inv}' */
1372 if (vect_print_dump_info (REPORT_DETAILS
))
1373 fprintf (vect_dump
, "Create vector_inv.");
1375 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1378 /* Case 3: operand is defined inside the loop. */
1379 case vect_internal_def
:
1382 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1384 /* Get the def from the vectorized stmt. */
1385 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1387 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1388 /* Get vectorized pattern statement. */
1390 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1391 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1392 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1393 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1394 gcc_assert (vec_stmt
);
1395 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1396 vec_oprnd
= PHI_RESULT (vec_stmt
);
1397 else if (is_gimple_call (vec_stmt
))
1398 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1400 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1404 /* Case 4: operand is defined by a loop header phi - reduction */
1405 case vect_reduction_def
:
1406 case vect_double_reduction_def
:
1407 case vect_nested_cycle
:
1411 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1412 loop
= (gimple_bb (def_stmt
))->loop_father
;
1414 /* Get the def before the loop */
1415 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1416 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1419 /* Case 5: operand is defined by loop-header phi - induction. */
1420 case vect_induction_def
:
1422 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1424 /* Get the def from the vectorized stmt. */
1425 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1426 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1427 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1428 vec_oprnd
= PHI_RESULT (vec_stmt
);
1430 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1440 /* Function vect_get_vec_def_for_stmt_copy
1442 Return a vector-def for an operand. This function is used when the
1443 vectorized stmt to be created (by the caller to this function) is a "copy"
1444 created in case the vectorized result cannot fit in one vector, and several
1445 copies of the vector-stmt are required. In this case the vector-def is
1446 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1447 of the stmt that defines VEC_OPRND.
1448 DT is the type of the vector def VEC_OPRND.
1451 In case the vectorization factor (VF) is bigger than the number
1452 of elements that can fit in a vectype (nunits), we have to generate
1453 more than one vector stmt to vectorize the scalar stmt. This situation
1454 arises when there are multiple data-types operated upon in the loop; the
1455 smallest data-type determines the VF, and as a result, when vectorizing
1456 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1457 vector stmt (each computing a vector of 'nunits' results, and together
1458 computing 'VF' results in each iteration). This function is called when
1459 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1460 which VF=16 and nunits=4, so the number of copies required is 4):
1462 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1464 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1465 VS1.1: vx.1 = memref1 VS1.2
1466 VS1.2: vx.2 = memref2 VS1.3
1467 VS1.3: vx.3 = memref3
1469 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1470 VSnew.1: vz1 = vx.1 + ... VSnew.2
1471 VSnew.2: vz2 = vx.2 + ... VSnew.3
1472 VSnew.3: vz3 = vx.3 + ...
1474 The vectorization of S1 is explained in vectorizable_load.
1475 The vectorization of S2:
1476 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1477 the function 'vect_get_vec_def_for_operand' is called to
1478 get the relevant vector-def for each operand of S2. For operand x it
1479 returns the vector-def 'vx.0'.
1481 To create the remaining copies of the vector-stmt (VSnew.j), this
1482 function is called to get the relevant vector-def for each operand. It is
1483 obtained from the respective VS1.j stmt, which is recorded in the
1484 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1486 For example, to obtain the vector-def 'vx.1' in order to create the
1487 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1488 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1489 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1490 and return its def ('vx.1').
1491 Overall, to create the above sequence this function will be called 3 times:
1492 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1493 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1494 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1497 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1499 gimple vec_stmt_for_operand
;
1500 stmt_vec_info def_stmt_info
;
1502 /* Do nothing; can reuse same def. */
1503 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1506 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1507 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1508 gcc_assert (def_stmt_info
);
1509 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1510 gcc_assert (vec_stmt_for_operand
);
1511 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1512 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1513 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1515 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1520 /* Get vectorized definitions for the operands to create a copy of an original
1521 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1524 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1525 VEC(tree
,heap
) **vec_oprnds0
,
1526 VEC(tree
,heap
) **vec_oprnds1
)
1528 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1530 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1531 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1533 if (vec_oprnds1
&& *vec_oprnds1
)
1535 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1536 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1537 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1542 /* Get vectorized definitions for OP0 and OP1.
1543 REDUC_INDEX is the index of reduction operand in case of reduction,
1544 and -1 otherwise. */
1547 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1548 VEC (tree
, heap
) **vec_oprnds0
,
1549 VEC (tree
, heap
) **vec_oprnds1
,
1550 slp_tree slp_node
, int reduc_index
)
1554 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1555 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, nops
);
1556 VEC (slp_void_p
, heap
) *vec_defs
= VEC_alloc (slp_void_p
, heap
, nops
);
1558 VEC_quick_push (tree
, ops
, op0
);
1560 VEC_quick_push (tree
, ops
, op1
);
1562 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1564 *vec_oprnds0
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1566 *vec_oprnds1
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 1);
1568 VEC_free (tree
, heap
, ops
);
1569 VEC_free (slp_void_p
, heap
, vec_defs
);
1575 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1576 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1577 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1581 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1582 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1583 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1589 /* Function vect_finish_stmt_generation.
1591 Insert a new stmt. */
1594 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1595 gimple_stmt_iterator
*gsi
)
1597 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1598 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1599 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1601 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1603 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1605 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1608 if (vect_print_dump_info (REPORT_DETAILS
))
1610 fprintf (vect_dump
, "add new stmt: ");
1611 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1614 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1617 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1618 a function declaration if the target has a vectorized version
1619 of the function, or NULL_TREE if the function cannot be vectorized. */
1622 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1624 tree fndecl
= gimple_call_fndecl (call
);
1626 /* We only handle functions that do not read or clobber memory -- i.e.
1627 const or novops ones. */
1628 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1632 || TREE_CODE (fndecl
) != FUNCTION_DECL
1633 || !DECL_BUILT_IN (fndecl
))
1636 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1640 /* Function vectorizable_call.
1642 Check if STMT performs a function call that can be vectorized.
1643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1648 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1654 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1655 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1656 tree vectype_out
, vectype_in
;
1659 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1660 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1661 tree fndecl
, new_temp
, def
, rhs_type
;
1663 enum vect_def_type dt
[3]
1664 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1665 gimple new_stmt
= NULL
;
1667 VEC(tree
, heap
) *vargs
= NULL
;
1668 enum { NARROW
, NONE
, WIDEN
} modifier
;
1672 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1675 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1678 /* Is STMT a vectorizable call? */
1679 if (!is_gimple_call (stmt
))
1682 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1685 if (stmt_can_throw_internal (stmt
))
1688 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1690 /* Process function arguments. */
1691 rhs_type
= NULL_TREE
;
1692 vectype_in
= NULL_TREE
;
1693 nargs
= gimple_call_num_args (stmt
);
1695 /* Bail out if the function has more than three arguments, we do not have
1696 interesting builtin functions to vectorize with more than two arguments
1697 except for fma. No arguments is also not good. */
1698 if (nargs
== 0 || nargs
> 3)
1701 for (i
= 0; i
< nargs
; i
++)
1705 op
= gimple_call_arg (stmt
, i
);
1707 /* We can only handle calls with arguments of the same type. */
1709 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1711 if (vect_print_dump_info (REPORT_DETAILS
))
1712 fprintf (vect_dump
, "argument types differ.");
1716 rhs_type
= TREE_TYPE (op
);
1718 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1719 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1721 if (vect_print_dump_info (REPORT_DETAILS
))
1722 fprintf (vect_dump
, "use not simple.");
1727 vectype_in
= opvectype
;
1729 && opvectype
!= vectype_in
)
1731 if (vect_print_dump_info (REPORT_DETAILS
))
1732 fprintf (vect_dump
, "argument vector types differ.");
1736 /* If all arguments are external or constant defs use a vector type with
1737 the same size as the output vector type. */
1739 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1741 gcc_assert (vectype_in
);
1744 if (vect_print_dump_info (REPORT_DETAILS
))
1746 fprintf (vect_dump
, "no vectype for scalar type ");
1747 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1754 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1755 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1756 if (nunits_in
== nunits_out
/ 2)
1758 else if (nunits_out
== nunits_in
)
1760 else if (nunits_out
== nunits_in
/ 2)
1765 /* For now, we only vectorize functions if a target specific builtin
1766 is available. TODO -- in some cases, it might be profitable to
1767 insert the calls for pieces of the vector, in order to be able
1768 to vectorize other operations in the loop. */
1769 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1770 if (fndecl
== NULL_TREE
)
1772 if (vect_print_dump_info (REPORT_DETAILS
))
1773 fprintf (vect_dump
, "function is not vectorizable.");
1778 gcc_assert (!gimple_vuse (stmt
));
1780 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1782 else if (modifier
== NARROW
)
1783 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1785 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1787 /* Sanity check: make sure that at least one copy of the vectorized stmt
1788 needs to be generated. */
1789 gcc_assert (ncopies
>= 1);
1791 if (!vec_stmt
) /* transformation not required. */
1793 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1794 if (vect_print_dump_info (REPORT_DETAILS
))
1795 fprintf (vect_dump
, "=== vectorizable_call ===");
1796 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1802 if (vect_print_dump_info (REPORT_DETAILS
))
1803 fprintf (vect_dump
, "transform call.");
1806 scalar_dest
= gimple_call_lhs (stmt
);
1807 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1809 prev_stmt_info
= NULL
;
1813 for (j
= 0; j
< ncopies
; ++j
)
1815 /* Build argument list for the vectorized call. */
1817 vargs
= VEC_alloc (tree
, heap
, nargs
);
1819 VEC_truncate (tree
, vargs
, 0);
1823 VEC (slp_void_p
, heap
) *vec_defs
1824 = VEC_alloc (slp_void_p
, heap
, nargs
);
1825 VEC (tree
, heap
) *vec_oprnds0
;
1827 for (i
= 0; i
< nargs
; i
++)
1828 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1829 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1831 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1833 /* Arguments are ready. Create the new vector stmt. */
1834 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_oprnd0
)
1837 for (k
= 0; k
< nargs
; k
++)
1839 VEC (tree
, heap
) *vec_oprndsk
1840 = (VEC (tree
, heap
) *)
1841 VEC_index (slp_void_p
, vec_defs
, k
);
1842 VEC_replace (tree
, vargs
, k
,
1843 VEC_index (tree
, vec_oprndsk
, i
));
1845 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1846 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1847 gimple_call_set_lhs (new_stmt
, new_temp
);
1848 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1849 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1853 for (i
= 0; i
< nargs
; i
++)
1855 VEC (tree
, heap
) *vec_oprndsi
1856 = (VEC (tree
, heap
) *)
1857 VEC_index (slp_void_p
, vec_defs
, i
);
1858 VEC_free (tree
, heap
, vec_oprndsi
);
1860 VEC_free (slp_void_p
, heap
, vec_defs
);
1864 for (i
= 0; i
< nargs
; i
++)
1866 op
= gimple_call_arg (stmt
, i
);
1869 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1872 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1874 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1877 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1880 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1881 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1882 gimple_call_set_lhs (new_stmt
, new_temp
);
1883 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1886 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1888 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1890 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1896 for (j
= 0; j
< ncopies
; ++j
)
1898 /* Build argument list for the vectorized call. */
1900 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1902 VEC_truncate (tree
, vargs
, 0);
1906 VEC (slp_void_p
, heap
) *vec_defs
1907 = VEC_alloc (slp_void_p
, heap
, nargs
);
1908 VEC (tree
, heap
) *vec_oprnds0
;
1910 for (i
= 0; i
< nargs
; i
++)
1911 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1912 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1914 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1916 /* Arguments are ready. Create the new vector stmt. */
1917 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vec_oprnd0
);
1921 VEC_truncate (tree
, vargs
, 0);
1922 for (k
= 0; k
< nargs
; k
++)
1924 VEC (tree
, heap
) *vec_oprndsk
1925 = (VEC (tree
, heap
) *)
1926 VEC_index (slp_void_p
, vec_defs
, k
);
1927 VEC_quick_push (tree
, vargs
,
1928 VEC_index (tree
, vec_oprndsk
, i
));
1929 VEC_quick_push (tree
, vargs
,
1930 VEC_index (tree
, vec_oprndsk
, i
+ 1));
1932 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1933 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1934 gimple_call_set_lhs (new_stmt
, new_temp
);
1935 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1936 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1940 for (i
= 0; i
< nargs
; i
++)
1942 VEC (tree
, heap
) *vec_oprndsi
1943 = (VEC (tree
, heap
) *)
1944 VEC_index (slp_void_p
, vec_defs
, i
);
1945 VEC_free (tree
, heap
, vec_oprndsi
);
1947 VEC_free (slp_void_p
, heap
, vec_defs
);
1951 for (i
= 0; i
< nargs
; i
++)
1953 op
= gimple_call_arg (stmt
, i
);
1957 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1959 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1963 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1965 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1967 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1970 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1971 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1974 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1975 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1976 gimple_call_set_lhs (new_stmt
, new_temp
);
1977 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1980 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1982 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1984 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1987 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1992 /* No current target implements this case. */
1996 VEC_free (tree
, heap
, vargs
);
1998 /* Update the exception handling table with the vector stmt if necessary. */
1999 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2000 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2002 /* The call in STMT might prevent it from being removed in dce.
2003 We however cannot remove it here, due to the way the ssa name
2004 it defines is mapped to the new definition. So just replace
2005 rhs of the statement with something harmless. */
2010 type
= TREE_TYPE (scalar_dest
);
2011 if (is_pattern_stmt_p (stmt_info
))
2012 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2014 lhs
= gimple_call_lhs (stmt
);
2015 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2016 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2017 set_vinfo_for_stmt (stmt
, NULL
);
2018 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2019 gsi_replace (gsi
, new_stmt
, false);
2020 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2026 /* Function vect_gen_widened_results_half
2028 Create a vector stmt whose code, type, number of arguments, and result
2029 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2030 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2031 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2032 needs to be created (DECL is a function-decl of a target-builtin).
2033 STMT is the original scalar stmt that we are vectorizing. */
2036 vect_gen_widened_results_half (enum tree_code code
,
2038 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2039 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2045 /* Generate half of the widened result: */
2046 if (code
== CALL_EXPR
)
2048 /* Target specific support */
2049 if (op_type
== binary_op
)
2050 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2052 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2053 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2054 gimple_call_set_lhs (new_stmt
, new_temp
);
2058 /* Generic support */
2059 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2060 if (op_type
!= binary_op
)
2062 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2064 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2065 gimple_assign_set_lhs (new_stmt
, new_temp
);
2067 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2073 /* Get vectorized definitions for loop-based vectorization. For the first
2074 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2075 scalar operand), and for the rest we get a copy with
2076 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2077 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2078 The vectors are collected into VEC_OPRNDS. */
2081 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2082 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2086 /* Get first vector operand. */
2087 /* All the vector operands except the very first one (that is scalar oprnd)
2089 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2090 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2092 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2094 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2096 /* Get second vector operand. */
2097 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2098 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2102 /* For conversion in multiple steps, continue to get operands
2105 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2109 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2110 For multi-step conversions store the resulting vectors and call the function
2114 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2115 int multi_step_cvt
, gimple stmt
,
2116 VEC (tree
, heap
) *vec_dsts
,
2117 gimple_stmt_iterator
*gsi
,
2118 slp_tree slp_node
, enum tree_code code
,
2119 stmt_vec_info
*prev_stmt_info
)
2122 tree vop0
, vop1
, new_tmp
, vec_dest
;
2124 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2126 vec_dest
= VEC_pop (tree
, vec_dsts
);
2128 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2130 /* Create demotion operation. */
2131 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2132 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2133 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2134 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2135 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2139 /* Store the resulting vector for next recursive call. */
2140 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2143 /* This is the last step of the conversion sequence. Store the
2144 vectors in SLP_NODE or in vector info of the scalar statement
2145 (or in STMT_VINFO_RELATED_STMT chain). */
2147 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2150 if (!*prev_stmt_info
)
2151 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2153 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2155 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2160 /* For multi-step demotion operations we first generate demotion operations
2161 from the source type to the intermediate types, and then combine the
2162 results (stored in VEC_OPRNDS) in demotion operation to the destination
2166 /* At each level of recursion we have half of the operands we had at the
2168 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2169 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2170 stmt
, vec_dsts
, gsi
, slp_node
,
2171 VEC_PACK_TRUNC_EXPR
,
2175 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2179 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2180 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2181 the resulting vectors and call the function recursively. */
2184 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2185 VEC (tree
, heap
) **vec_oprnds1
,
2186 gimple stmt
, tree vec_dest
,
2187 gimple_stmt_iterator
*gsi
,
2188 enum tree_code code1
,
2189 enum tree_code code2
, tree decl1
,
2190 tree decl2
, int op_type
)
2193 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2194 gimple new_stmt1
, new_stmt2
;
2195 VEC (tree
, heap
) *vec_tmp
= NULL
;
2197 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2198 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
2200 if (op_type
== binary_op
)
2201 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2205 /* Generate the two halves of promotion operation. */
2206 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2207 op_type
, vec_dest
, gsi
, stmt
);
2208 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2209 op_type
, vec_dest
, gsi
, stmt
);
2210 if (is_gimple_call (new_stmt1
))
2212 new_tmp1
= gimple_call_lhs (new_stmt1
);
2213 new_tmp2
= gimple_call_lhs (new_stmt2
);
2217 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2218 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2221 /* Store the results for the next step. */
2222 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2223 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2226 VEC_free (tree
, heap
, *vec_oprnds0
);
2227 *vec_oprnds0
= vec_tmp
;
2231 /* Check if STMT performs a conversion operation, that can be vectorized.
2232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2233 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2237 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2238 gimple
*vec_stmt
, slp_tree slp_node
)
2242 tree op0
, op1
= NULL_TREE
;
2243 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2244 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2245 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2246 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2247 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2248 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2252 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2253 gimple new_stmt
= NULL
;
2254 stmt_vec_info prev_stmt_info
;
2257 tree vectype_out
, vectype_in
;
2259 tree lhs_type
, rhs_type
;
2260 enum { NARROW
, NONE
, WIDEN
} modifier
;
2261 VEC (tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2263 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2264 int multi_step_cvt
= 0;
2265 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
;
2266 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2268 enum machine_mode rhs_mode
;
2269 unsigned short fltsz
;
2271 /* Is STMT a vectorizable conversion? */
2273 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2276 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2279 if (!is_gimple_assign (stmt
))
2282 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2285 code
= gimple_assign_rhs_code (stmt
);
2286 if (!CONVERT_EXPR_CODE_P (code
)
2287 && code
!= FIX_TRUNC_EXPR
2288 && code
!= FLOAT_EXPR
2289 && code
!= WIDEN_MULT_EXPR
2290 && code
!= WIDEN_LSHIFT_EXPR
)
2293 op_type
= TREE_CODE_LENGTH (code
);
2295 /* Check types of lhs and rhs. */
2296 scalar_dest
= gimple_assign_lhs (stmt
);
2297 lhs_type
= TREE_TYPE (scalar_dest
);
2298 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2300 op0
= gimple_assign_rhs1 (stmt
);
2301 rhs_type
= TREE_TYPE (op0
);
2303 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2304 && !((INTEGRAL_TYPE_P (lhs_type
)
2305 && INTEGRAL_TYPE_P (rhs_type
))
2306 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2307 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2310 if ((INTEGRAL_TYPE_P (lhs_type
)
2311 && (TYPE_PRECISION (lhs_type
)
2312 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2313 || (INTEGRAL_TYPE_P (rhs_type
)
2314 && (TYPE_PRECISION (rhs_type
)
2315 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2317 if (vect_print_dump_info (REPORT_DETAILS
))
2319 "type conversion to/from bit-precision unsupported.");
2323 /* Check the operands of the operation. */
2324 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2325 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2327 if (vect_print_dump_info (REPORT_DETAILS
))
2328 fprintf (vect_dump
, "use not simple.");
2331 if (op_type
== binary_op
)
2335 op1
= gimple_assign_rhs2 (stmt
);
2336 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2337 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2339 if (CONSTANT_CLASS_P (op0
))
2340 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2341 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2343 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2348 if (vect_print_dump_info (REPORT_DETAILS
))
2349 fprintf (vect_dump
, "use not simple.");
2354 /* If op0 is an external or constant defs use a vector type of
2355 the same size as the output vector type. */
2357 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2359 gcc_assert (vectype_in
);
2362 if (vect_print_dump_info (REPORT_DETAILS
))
2364 fprintf (vect_dump
, "no vectype for scalar type ");
2365 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
2371 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2372 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2373 if (nunits_in
< nunits_out
)
2375 else if (nunits_out
== nunits_in
)
2380 /* Multiple types in SLP are handled by creating the appropriate number of
2381 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2383 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2385 else if (modifier
== NARROW
)
2386 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2388 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2390 /* Sanity check: make sure that at least one copy of the vectorized stmt
2391 needs to be generated. */
2392 gcc_assert (ncopies
>= 1);
2394 /* Supportable by target? */
2398 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2400 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2405 if (vect_print_dump_info (REPORT_DETAILS
))
2406 fprintf (vect_dump
, "conversion not supported by target.");
2410 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2411 &code1
, &code2
, &multi_step_cvt
,
2414 /* Binary widening operation can only be supported directly by the
2416 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2420 if (code
!= FLOAT_EXPR
2421 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2422 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2425 rhs_mode
= TYPE_MODE (rhs_type
);
2426 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2427 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2428 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2429 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2432 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2433 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2434 if (cvt_type
== NULL_TREE
)
2437 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2439 if (!supportable_convert_operation (code
, vectype_out
,
2440 cvt_type
, &decl1
, &codecvt1
))
2443 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2444 cvt_type
, &codecvt1
,
2445 &codecvt2
, &multi_step_cvt
,
2449 gcc_assert (multi_step_cvt
== 0);
2451 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2452 vectype_in
, &code1
, &code2
,
2453 &multi_step_cvt
, &interm_types
))
2457 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2460 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2461 codecvt2
= ERROR_MARK
;
2465 VEC_safe_push (tree
, heap
, interm_types
, cvt_type
);
2466 cvt_type
= NULL_TREE
;
2471 gcc_assert (op_type
== unary_op
);
2472 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2473 &code1
, &multi_step_cvt
,
2477 if (code
!= FIX_TRUNC_EXPR
2478 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2479 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2482 rhs_mode
= TYPE_MODE (rhs_type
);
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2485 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2486 if (cvt_type
== NULL_TREE
)
2488 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2491 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2492 &code1
, &multi_step_cvt
,
2501 if (!vec_stmt
) /* transformation not required. */
2503 if (vect_print_dump_info (REPORT_DETAILS
))
2504 fprintf (vect_dump
, "=== vectorizable_conversion ===");
2505 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2507 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2508 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2510 else if (modifier
== NARROW
)
2512 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2513 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2517 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2518 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2520 VEC_free (tree
, heap
, interm_types
);
2525 if (vect_print_dump_info (REPORT_DETAILS
))
2526 fprintf (vect_dump
, "transform conversion. ncopies = %d.", ncopies
);
2528 if (op_type
== binary_op
)
2530 if (CONSTANT_CLASS_P (op0
))
2531 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2532 else if (CONSTANT_CLASS_P (op1
))
2533 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2536 /* In case of multi-step conversion, we first generate conversion operations
2537 to the intermediate types, and then from that types to the final one.
2538 We create vector destinations for the intermediate type (TYPES) received
2539 from supportable_*_operation, and store them in the correct order
2540 for future use in vect_create_vectorized_*_stmts (). */
2541 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2542 vec_dest
= vect_create_destination_var (scalar_dest
,
2543 (cvt_type
&& modifier
== WIDEN
)
2544 ? cvt_type
: vectype_out
);
2545 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2549 for (i
= VEC_length (tree
, interm_types
) - 1;
2550 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2552 vec_dest
= vect_create_destination_var (scalar_dest
,
2554 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2559 vec_dest
= vect_create_destination_var (scalar_dest
,
2561 ? vectype_out
: cvt_type
);
2565 if (modifier
== NONE
)
2566 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2567 else if (modifier
== WIDEN
)
2569 vec_oprnds0
= VEC_alloc (tree
, heap
,
2571 ? vect_pow2 (multi_step_cvt
) : 1));
2572 if (op_type
== binary_op
)
2573 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2576 vec_oprnds0
= VEC_alloc (tree
, heap
,
2578 ? vect_pow2 (multi_step_cvt
) : 1));
2580 else if (code
== WIDEN_LSHIFT_EXPR
)
2581 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2584 prev_stmt_info
= NULL
;
2588 for (j
= 0; j
< ncopies
; j
++)
2591 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2594 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2596 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2598 /* Arguments are ready, create the new vector stmt. */
2599 if (code1
== CALL_EXPR
)
2601 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2602 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2603 gimple_call_set_lhs (new_stmt
, new_temp
);
2607 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2608 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2610 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2611 gimple_assign_set_lhs (new_stmt
, new_temp
);
2614 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2616 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2621 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2623 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2624 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2629 /* In case the vectorization factor (VF) is bigger than the number
2630 of elements that we can fit in a vectype (nunits), we have to
2631 generate more than one vector stmt - i.e - we need to "unroll"
2632 the vector stmt by a factor VF/nunits. */
2633 for (j
= 0; j
< ncopies
; j
++)
2640 if (code
== WIDEN_LSHIFT_EXPR
)
2645 /* Store vec_oprnd1 for every vector stmt to be created
2646 for SLP_NODE. We check during the analysis that all
2647 the shift arguments are the same. */
2648 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2649 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2651 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2655 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2656 &vec_oprnds1
, slp_node
, -1);
2660 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2661 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2662 if (op_type
== binary_op
)
2664 if (code
== WIDEN_LSHIFT_EXPR
)
2667 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2669 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2675 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2676 VEC_truncate (tree
, vec_oprnds0
, 0);
2677 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2678 if (op_type
== binary_op
)
2680 if (code
== WIDEN_LSHIFT_EXPR
)
2683 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2685 VEC_truncate (tree
, vec_oprnds1
, 0);
2686 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2690 /* Arguments are ready. Create the new vector stmts. */
2691 for (i
= multi_step_cvt
; i
>= 0; i
--)
2693 tree this_dest
= VEC_index (tree
, vec_dsts
, i
);
2694 enum tree_code c1
= code1
, c2
= code2
;
2695 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2700 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2702 stmt
, this_dest
, gsi
,
2703 c1
, c2
, decl1
, decl2
,
2707 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2711 if (codecvt1
== CALL_EXPR
)
2713 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2714 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2715 gimple_call_set_lhs (new_stmt
, new_temp
);
2719 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2720 new_temp
= make_ssa_name (vec_dest
, NULL
);
2721 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2726 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2729 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2732 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2736 if (!prev_stmt_info
)
2737 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2739 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2740 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2745 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2749 /* In case the vectorization factor (VF) is bigger than the number
2750 of elements that we can fit in a vectype (nunits), we have to
2751 generate more than one vector stmt - i.e - we need to "unroll"
2752 the vector stmt by a factor VF/nunits. */
2753 for (j
= 0; j
< ncopies
; j
++)
2757 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2761 VEC_truncate (tree
, vec_oprnds0
, 0);
2762 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2763 vect_pow2 (multi_step_cvt
) - 1);
2766 /* Arguments are ready. Create the new vector stmts. */
2768 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2770 if (codecvt1
== CALL_EXPR
)
2772 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2773 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2774 gimple_call_set_lhs (new_stmt
, new_temp
);
2778 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2779 new_temp
= make_ssa_name (vec_dest
, NULL
);
2780 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2784 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2785 VEC_replace (tree
, vec_oprnds0
, i
, new_temp
);
2788 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2789 stmt
, vec_dsts
, gsi
,
2794 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2798 VEC_free (tree
, heap
, vec_oprnds0
);
2799 VEC_free (tree
, heap
, vec_oprnds1
);
2800 VEC_free (tree
, heap
, vec_dsts
);
2801 VEC_free (tree
, heap
, interm_types
);
2807 /* Function vectorizable_assignment.
2809 Check if STMT performs an assignment (copy) that can be vectorized.
2810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2815 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2816 gimple
*vec_stmt
, slp_tree slp_node
)
2821 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2822 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2823 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2827 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2828 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2831 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2833 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2834 gimple new_stmt
= NULL
;
2835 stmt_vec_info prev_stmt_info
= NULL
;
2836 enum tree_code code
;
2839 /* Multiple types in SLP are handled by creating the appropriate number of
2840 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2842 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2845 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2847 gcc_assert (ncopies
>= 1);
2849 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2852 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2855 /* Is vectorizable assignment? */
2856 if (!is_gimple_assign (stmt
))
2859 scalar_dest
= gimple_assign_lhs (stmt
);
2860 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2863 code
= gimple_assign_rhs_code (stmt
);
2864 if (gimple_assign_single_p (stmt
)
2865 || code
== PAREN_EXPR
2866 || CONVERT_EXPR_CODE_P (code
))
2867 op
= gimple_assign_rhs1 (stmt
);
2871 if (code
== VIEW_CONVERT_EXPR
)
2872 op
= TREE_OPERAND (op
, 0);
2874 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2875 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2877 if (vect_print_dump_info (REPORT_DETAILS
))
2878 fprintf (vect_dump
, "use not simple.");
2882 /* We can handle NOP_EXPR conversions that do not change the number
2883 of elements or the vector size. */
2884 if ((CONVERT_EXPR_CODE_P (code
)
2885 || code
== VIEW_CONVERT_EXPR
)
2887 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2888 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2889 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2892 /* We do not handle bit-precision changes. */
2893 if ((CONVERT_EXPR_CODE_P (code
)
2894 || code
== VIEW_CONVERT_EXPR
)
2895 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2896 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2897 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2898 || ((TYPE_PRECISION (TREE_TYPE (op
))
2899 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2900 /* But a conversion that does not change the bit-pattern is ok. */
2901 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2902 > TYPE_PRECISION (TREE_TYPE (op
)))
2903 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2905 if (vect_print_dump_info (REPORT_DETAILS
))
2906 fprintf (vect_dump
, "type conversion to/from bit-precision "
2911 if (!vec_stmt
) /* transformation not required. */
2913 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2914 if (vect_print_dump_info (REPORT_DETAILS
))
2915 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2916 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2921 if (vect_print_dump_info (REPORT_DETAILS
))
2922 fprintf (vect_dump
, "transform assignment.");
2925 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2928 for (j
= 0; j
< ncopies
; j
++)
2932 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2934 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2936 /* Arguments are ready. create the new vector stmt. */
2937 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2939 if (CONVERT_EXPR_CODE_P (code
)
2940 || code
== VIEW_CONVERT_EXPR
)
2941 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2942 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2943 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2944 gimple_assign_set_lhs (new_stmt
, new_temp
);
2945 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2947 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2954 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2956 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2958 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2961 VEC_free (tree
, heap
, vec_oprnds
);
2966 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2967 either as shift by a scalar or by a vector. */
2970 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2973 enum machine_mode vec_mode
;
2978 vectype
= get_vectype_for_scalar_type (scalar_type
);
2982 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2984 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2986 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2988 || (optab_handler (optab
, TYPE_MODE (vectype
))
2989 == CODE_FOR_nothing
))
2993 vec_mode
= TYPE_MODE (vectype
);
2994 icode
= (int) optab_handler (optab
, vec_mode
);
2995 if (icode
== CODE_FOR_nothing
)
3002 /* Function vectorizable_shift.
3004 Check if STMT performs a shift operation that can be vectorized.
3005 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3006 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3007 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3010 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3011 gimple
*vec_stmt
, slp_tree slp_node
)
3015 tree op0
, op1
= NULL
;
3016 tree vec_oprnd1
= NULL_TREE
;
3017 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3019 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3020 enum tree_code code
;
3021 enum machine_mode vec_mode
;
3025 enum machine_mode optab_op2_mode
;
3028 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3029 gimple new_stmt
= NULL
;
3030 stmt_vec_info prev_stmt_info
;
3037 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
3040 bool scalar_shift_arg
= true;
3041 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3044 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3047 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3050 /* Is STMT a vectorizable binary/unary operation? */
3051 if (!is_gimple_assign (stmt
))
3054 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3057 code
= gimple_assign_rhs_code (stmt
);
3059 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3060 || code
== RROTATE_EXPR
))
3063 scalar_dest
= gimple_assign_lhs (stmt
);
3064 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3065 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3066 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3068 if (vect_print_dump_info (REPORT_DETAILS
))
3069 fprintf (vect_dump
, "bit-precision shifts not supported.");
3073 op0
= gimple_assign_rhs1 (stmt
);
3074 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3075 &def_stmt
, &def
, &dt
[0], &vectype
))
3077 if (vect_print_dump_info (REPORT_DETAILS
))
3078 fprintf (vect_dump
, "use not simple.");
3081 /* If op0 is an external or constant def use a vector type with
3082 the same size as the output vector type. */
3084 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3086 gcc_assert (vectype
);
3089 if (vect_print_dump_info (REPORT_DETAILS
))
3091 fprintf (vect_dump
, "no vectype for scalar type ");
3092 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3098 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3099 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3100 if (nunits_out
!= nunits_in
)
3103 op1
= gimple_assign_rhs2 (stmt
);
3104 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3105 &def
, &dt
[1], &op1_vectype
))
3107 if (vect_print_dump_info (REPORT_DETAILS
))
3108 fprintf (vect_dump
, "use not simple.");
3113 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3117 /* Multiple types in SLP are handled by creating the appropriate number of
3118 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3120 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3123 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3125 gcc_assert (ncopies
>= 1);
3127 /* Determine whether the shift amount is a vector, or scalar. If the
3128 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3130 if (dt
[1] == vect_internal_def
&& !slp_node
)
3131 scalar_shift_arg
= false;
3132 else if (dt
[1] == vect_constant_def
3133 || dt
[1] == vect_external_def
3134 || dt
[1] == vect_internal_def
)
3136 /* In SLP, need to check whether the shift count is the same,
3137 in loops if it is a constant or invariant, it is always
3141 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3144 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
3145 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3146 scalar_shift_arg
= false;
3151 if (vect_print_dump_info (REPORT_DETAILS
))
3152 fprintf (vect_dump
, "operand mode requires invariant argument.");
3156 /* Vector shifted by vector. */
3157 if (!scalar_shift_arg
)
3159 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3160 if (vect_print_dump_info (REPORT_DETAILS
))
3161 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3163 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3164 if (op1_vectype
== NULL_TREE
3165 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3167 if (vect_print_dump_info (REPORT_DETAILS
))
3168 fprintf (vect_dump
, "unusable type for last operand in"
3169 " vector/vector shift/rotate.");
3173 /* See if the machine has a vector shifted by scalar insn and if not
3174 then see if it has a vector shifted by vector insn. */
3177 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3179 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3181 if (vect_print_dump_info (REPORT_DETAILS
))
3182 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3186 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3188 && (optab_handler (optab
, TYPE_MODE (vectype
))
3189 != CODE_FOR_nothing
))
3191 scalar_shift_arg
= false;
3193 if (vect_print_dump_info (REPORT_DETAILS
))
3194 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3196 /* Unlike the other binary operators, shifts/rotates have
3197 the rhs being int, instead of the same type as the lhs,
3198 so make sure the scalar is the right type if we are
3199 dealing with vectors of long long/long/short/char. */
3200 if (dt
[1] == vect_constant_def
)
3201 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3202 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3206 && TYPE_MODE (TREE_TYPE (vectype
))
3207 != TYPE_MODE (TREE_TYPE (op1
)))
3209 if (vect_print_dump_info (REPORT_DETAILS
))
3210 fprintf (vect_dump
, "unusable type for last operand in"
3211 " vector/vector shift/rotate.");
3214 if (vec_stmt
&& !slp_node
)
3216 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3217 op1
= vect_init_vector (stmt
, op1
,
3218 TREE_TYPE (vectype
), NULL
);
3225 /* Supportable by target? */
3228 if (vect_print_dump_info (REPORT_DETAILS
))
3229 fprintf (vect_dump
, "no optab.");
3232 vec_mode
= TYPE_MODE (vectype
);
3233 icode
= (int) optab_handler (optab
, vec_mode
);
3234 if (icode
== CODE_FOR_nothing
)
3236 if (vect_print_dump_info (REPORT_DETAILS
))
3237 fprintf (vect_dump
, "op not supported by target.");
3238 /* Check only during analysis. */
3239 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3240 || (vf
< vect_min_worthwhile_factor (code
)
3243 if (vect_print_dump_info (REPORT_DETAILS
))
3244 fprintf (vect_dump
, "proceeding using word mode.");
3247 /* Worthwhile without SIMD support? Check only during analysis. */
3248 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3249 && vf
< vect_min_worthwhile_factor (code
)
3252 if (vect_print_dump_info (REPORT_DETAILS
))
3253 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3257 if (!vec_stmt
) /* transformation not required. */
3259 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3260 if (vect_print_dump_info (REPORT_DETAILS
))
3261 fprintf (vect_dump
, "=== vectorizable_shift ===");
3262 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3268 if (vect_print_dump_info (REPORT_DETAILS
))
3269 fprintf (vect_dump
, "transform binary/unary operation.");
3272 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3274 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3275 created in the previous stages of the recursion, so no allocation is
3276 needed, except for the case of shift with scalar shift argument. In that
3277 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3278 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3279 In case of loop-based vectorization we allocate VECs of size 1. We
3280 allocate VEC_OPRNDS1 only in case of binary operation. */
3283 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3284 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3286 else if (scalar_shift_arg
)
3287 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3289 prev_stmt_info
= NULL
;
3290 for (j
= 0; j
< ncopies
; j
++)
3295 if (scalar_shift_arg
)
3297 /* Vector shl and shr insn patterns can be defined with scalar
3298 operand 2 (shift operand). In this case, use constant or loop
3299 invariant op1 directly, without extending it to vector mode
3301 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3302 if (!VECTOR_MODE_P (optab_op2_mode
))
3304 if (vect_print_dump_info (REPORT_DETAILS
))
3305 fprintf (vect_dump
, "operand 1 using scalar mode.");
3307 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3310 /* Store vec_oprnd1 for every vector stmt to be created
3311 for SLP_NODE. We check during the analysis that all
3312 the shift arguments are the same.
3313 TODO: Allow different constants for different vector
3314 stmts generated for an SLP instance. */
3315 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3316 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3321 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3322 (a special case for certain kind of vector shifts); otherwise,
3323 operand 1 should be of a vector type (the usual case). */
3325 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3328 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3332 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3334 /* Arguments are ready. Create the new vector stmt. */
3335 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3337 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
3338 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3339 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3340 gimple_assign_set_lhs (new_stmt
, new_temp
);
3341 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3343 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3350 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3352 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3353 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3356 VEC_free (tree
, heap
, vec_oprnds0
);
3357 VEC_free (tree
, heap
, vec_oprnds1
);
3363 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3364 gimple_stmt_iterator
*);
3367 /* Function vectorizable_operation.
3369 Check if STMT performs a binary, unary or ternary operation that can
3371 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3372 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3373 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3376 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3377 gimple
*vec_stmt
, slp_tree slp_node
)
3381 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3382 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3384 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3385 enum tree_code code
;
3386 enum machine_mode vec_mode
;
3393 enum vect_def_type dt
[3]
3394 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3395 gimple new_stmt
= NULL
;
3396 stmt_vec_info prev_stmt_info
;
3402 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
3403 tree vop0
, vop1
, vop2
;
3404 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3407 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3410 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3413 /* Is STMT a vectorizable binary/unary operation? */
3414 if (!is_gimple_assign (stmt
))
3417 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3420 code
= gimple_assign_rhs_code (stmt
);
3422 /* For pointer addition, we should use the normal plus for
3423 the vector addition. */
3424 if (code
== POINTER_PLUS_EXPR
)
3427 /* Support only unary or binary operations. */
3428 op_type
= TREE_CODE_LENGTH (code
);
3429 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3431 if (vect_print_dump_info (REPORT_DETAILS
))
3432 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
3437 scalar_dest
= gimple_assign_lhs (stmt
);
3438 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3440 /* Most operations cannot handle bit-precision types without extra
3442 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3443 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3444 /* Exception are bitwise binary operations. */
3445 && code
!= BIT_IOR_EXPR
3446 && code
!= BIT_XOR_EXPR
3447 && code
!= BIT_AND_EXPR
)
3449 if (vect_print_dump_info (REPORT_DETAILS
))
3450 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
3454 op0
= gimple_assign_rhs1 (stmt
);
3455 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3456 &def_stmt
, &def
, &dt
[0], &vectype
))
3458 if (vect_print_dump_info (REPORT_DETAILS
))
3459 fprintf (vect_dump
, "use not simple.");
3462 /* If op0 is an external or constant def use a vector type with
3463 the same size as the output vector type. */
3465 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3467 gcc_assert (vectype
);
3470 if (vect_print_dump_info (REPORT_DETAILS
))
3472 fprintf (vect_dump
, "no vectype for scalar type ");
3473 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3479 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3480 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3481 if (nunits_out
!= nunits_in
)
3484 if (op_type
== binary_op
|| op_type
== ternary_op
)
3486 op1
= gimple_assign_rhs2 (stmt
);
3487 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3490 if (vect_print_dump_info (REPORT_DETAILS
))
3491 fprintf (vect_dump
, "use not simple.");
3495 if (op_type
== ternary_op
)
3497 op2
= gimple_assign_rhs3 (stmt
);
3498 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3501 if (vect_print_dump_info (REPORT_DETAILS
))
3502 fprintf (vect_dump
, "use not simple.");
3508 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3512 /* Multiple types in SLP are handled by creating the appropriate number of
3513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3515 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3518 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3520 gcc_assert (ncopies
>= 1);
3522 /* Shifts are handled in vectorizable_shift (). */
3523 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3524 || code
== RROTATE_EXPR
)
3527 /* Supportable by target? */
3529 vec_mode
= TYPE_MODE (vectype
);
3530 if (code
== MULT_HIGHPART_EXPR
)
3532 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3533 icode
= LAST_INSN_CODE
;
3535 icode
= CODE_FOR_nothing
;
3539 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3542 if (vect_print_dump_info (REPORT_DETAILS
))
3543 fprintf (vect_dump
, "no optab.");
3546 icode
= (int) optab_handler (optab
, vec_mode
);
3549 if (icode
== CODE_FOR_nothing
)
3551 if (vect_print_dump_info (REPORT_DETAILS
))
3552 fprintf (vect_dump
, "op not supported by target.");
3553 /* Check only during analysis. */
3554 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3555 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3557 if (vect_print_dump_info (REPORT_DETAILS
))
3558 fprintf (vect_dump
, "proceeding using word mode.");
3561 /* Worthwhile without SIMD support? Check only during analysis. */
3562 if (!VECTOR_MODE_P (vec_mode
)
3564 && vf
< vect_min_worthwhile_factor (code
))
3566 if (vect_print_dump_info (REPORT_DETAILS
))
3567 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3571 if (!vec_stmt
) /* transformation not required. */
3573 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3574 if (vect_print_dump_info (REPORT_DETAILS
))
3575 fprintf (vect_dump
, "=== vectorizable_operation ===");
3576 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3582 if (vect_print_dump_info (REPORT_DETAILS
))
3583 fprintf (vect_dump
, "transform binary/unary operation.");
3586 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3588 /* In case the vectorization factor (VF) is bigger than the number
3589 of elements that we can fit in a vectype (nunits), we have to generate
3590 more than one vector stmt - i.e - we need to "unroll" the
3591 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3592 from one copy of the vector stmt to the next, in the field
3593 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3594 stages to find the correct vector defs to be used when vectorizing
3595 stmts that use the defs of the current stmt. The example below
3596 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3597 we need to create 4 vectorized stmts):
3599 before vectorization:
3600 RELATED_STMT VEC_STMT
3604 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3606 RELATED_STMT VEC_STMT
3607 VS1_0: vx0 = memref0 VS1_1 -
3608 VS1_1: vx1 = memref1 VS1_2 -
3609 VS1_2: vx2 = memref2 VS1_3 -
3610 VS1_3: vx3 = memref3 - -
3611 S1: x = load - VS1_0
3614 step2: vectorize stmt S2 (done here):
3615 To vectorize stmt S2 we first need to find the relevant vector
3616 def for the first operand 'x'. This is, as usual, obtained from
3617 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3618 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3619 relevant vector def 'vx0'. Having found 'vx0' we can generate
3620 the vector stmt VS2_0, and as usual, record it in the
3621 STMT_VINFO_VEC_STMT of stmt S2.
3622 When creating the second copy (VS2_1), we obtain the relevant vector
3623 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3624 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3625 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3626 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3627 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3628 chain of stmts and pointers:
3629 RELATED_STMT VEC_STMT
3630 VS1_0: vx0 = memref0 VS1_1 -
3631 VS1_1: vx1 = memref1 VS1_2 -
3632 VS1_2: vx2 = memref2 VS1_3 -
3633 VS1_3: vx3 = memref3 - -
3634 S1: x = load - VS1_0
3635 VS2_0: vz0 = vx0 + v1 VS2_1 -
3636 VS2_1: vz1 = vx1 + v1 VS2_2 -
3637 VS2_2: vz2 = vx2 + v1 VS2_3 -
3638 VS2_3: vz3 = vx3 + v1 - -
3639 S2: z = x + 1 - VS2_0 */
3641 prev_stmt_info
= NULL
;
3642 for (j
= 0; j
< ncopies
; j
++)
3647 if (op_type
== binary_op
|| op_type
== ternary_op
)
3648 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3651 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3653 if (op_type
== ternary_op
)
3655 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3656 VEC_quick_push (tree
, vec_oprnds2
,
3657 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
3662 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3663 if (op_type
== ternary_op
)
3665 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
3666 VEC_quick_push (tree
, vec_oprnds2
,
3667 vect_get_vec_def_for_stmt_copy (dt
[2],
3672 /* Arguments are ready. Create the new vector stmt. */
3673 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3675 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3676 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
3677 vop2
= ((op_type
== ternary_op
)
3678 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
3679 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
3681 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3682 gimple_assign_set_lhs (new_stmt
, new_temp
);
3683 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3685 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3692 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3694 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3695 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3698 VEC_free (tree
, heap
, vec_oprnds0
);
3700 VEC_free (tree
, heap
, vec_oprnds1
);
3702 VEC_free (tree
, heap
, vec_oprnds2
);
3708 /* Function vectorizable_store.
3710 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3712 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3713 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3714 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3717 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3723 tree vec_oprnd
= NULL_TREE
;
3724 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3725 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3726 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3728 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3729 struct loop
*loop
= NULL
;
3730 enum machine_mode vec_mode
;
3732 enum dr_alignment_support alignment_support_scheme
;
3735 enum vect_def_type dt
;
3736 stmt_vec_info prev_stmt_info
= NULL
;
3737 tree dataref_ptr
= NULL_TREE
;
3738 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3741 gimple next_stmt
, first_stmt
= NULL
;
3742 bool grouped_store
= false;
3743 bool store_lanes_p
= false;
3744 unsigned int group_size
, i
;
3745 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3747 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3748 bool slp
= (slp_node
!= NULL
);
3749 unsigned int vec_num
;
3750 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3754 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3756 /* Multiple types in SLP are handled by creating the appropriate number of
3757 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3759 if (slp
|| PURE_SLP_STMT (stmt_info
))
3762 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3764 gcc_assert (ncopies
>= 1);
3766 /* FORNOW. This restriction should be relaxed. */
3767 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3769 if (vect_print_dump_info (REPORT_DETAILS
))
3770 fprintf (vect_dump
, "multiple types in nested loop.");
3774 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3777 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3780 /* Is vectorizable store? */
3782 if (!is_gimple_assign (stmt
))
3785 scalar_dest
= gimple_assign_lhs (stmt
);
3786 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3787 && is_pattern_stmt_p (stmt_info
))
3788 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3789 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3790 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3791 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3792 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3793 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3794 && TREE_CODE (scalar_dest
) != MEM_REF
)
3797 gcc_assert (gimple_assign_single_p (stmt
));
3798 op
= gimple_assign_rhs1 (stmt
);
3799 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3802 if (vect_print_dump_info (REPORT_DETAILS
))
3803 fprintf (vect_dump
, "use not simple.");
3807 elem_type
= TREE_TYPE (vectype
);
3808 vec_mode
= TYPE_MODE (vectype
);
3810 /* FORNOW. In some cases can vectorize even if data-type not supported
3811 (e.g. - array initialization with 0). */
3812 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3815 if (!STMT_VINFO_DATA_REF (stmt_info
))
3818 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3819 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3820 size_zero_node
) < 0)
3822 if (vect_print_dump_info (REPORT_DETAILS
))
3823 fprintf (vect_dump
, "negative step for store.");
3827 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3829 grouped_store
= true;
3830 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3831 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3833 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3834 if (vect_store_lanes_supported (vectype
, group_size
))
3835 store_lanes_p
= true;
3836 else if (!vect_grouped_store_supported (vectype
, group_size
))
3840 if (first_stmt
== stmt
)
3842 /* STMT is the leader of the group. Check the operands of all the
3843 stmts of the group. */
3844 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3847 gcc_assert (gimple_assign_single_p (next_stmt
));
3848 op
= gimple_assign_rhs1 (next_stmt
);
3849 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3850 &def_stmt
, &def
, &dt
))
3852 if (vect_print_dump_info (REPORT_DETAILS
))
3853 fprintf (vect_dump
, "use not simple.");
3856 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3861 if (!vec_stmt
) /* transformation not required. */
3863 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3864 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
3873 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3874 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3876 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3879 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3881 /* We vectorize all the stmts of the interleaving group when we
3882 reach the last stmt in the group. */
3883 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3884 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3893 grouped_store
= false;
3894 /* VEC_NUM is the number of vect stmts to be created for this
3896 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3897 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3898 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3899 op
= gimple_assign_rhs1 (first_stmt
);
3902 /* VEC_NUM is the number of vect stmts to be created for this
3904 vec_num
= group_size
;
3910 group_size
= vec_num
= 1;
3913 if (vect_print_dump_info (REPORT_DETAILS
))
3914 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3916 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3917 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3919 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3920 gcc_assert (alignment_support_scheme
);
3921 /* Targets with store-lane instructions must not require explicit
3923 gcc_assert (!store_lanes_p
3924 || alignment_support_scheme
== dr_aligned
3925 || alignment_support_scheme
== dr_unaligned_supported
);
3928 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3930 aggr_type
= vectype
;
3932 /* In case the vectorization factor (VF) is bigger than the number
3933 of elements that we can fit in a vectype (nunits), we have to generate
3934 more than one vector stmt - i.e - we need to "unroll" the
3935 vector stmt by a factor VF/nunits. For more details see documentation in
3936 vect_get_vec_def_for_copy_stmt. */
3938 /* In case of interleaving (non-unit grouped access):
3945 We create vectorized stores starting from base address (the access of the
3946 first stmt in the chain (S2 in the above example), when the last store stmt
3947 of the chain (S4) is reached:
3950 VS2: &base + vec_size*1 = vx0
3951 VS3: &base + vec_size*2 = vx1
3952 VS4: &base + vec_size*3 = vx3
3954 Then permutation statements are generated:
3956 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3957 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3960 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3961 (the order of the data-refs in the output of vect_permute_store_chain
3962 corresponds to the order of scalar stmts in the interleaving chain - see
3963 the documentation of vect_permute_store_chain()).
3965 In case of both multiple types and interleaving, above vector stores and
3966 permutation stmts are created for every copy. The result vector stmts are
3967 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3968 STMT_VINFO_RELATED_STMT for the next copies.
3971 prev_stmt_info
= NULL
;
3972 for (j
= 0; j
< ncopies
; j
++)
3981 /* Get vectorized arguments for SLP_NODE. */
3982 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
3983 NULL
, slp_node
, -1);
3985 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
3989 /* For interleaved stores we collect vectorized defs for all the
3990 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3991 used as an input to vect_permute_store_chain(), and OPRNDS as
3992 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3994 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
3995 OPRNDS are of size 1. */
3996 next_stmt
= first_stmt
;
3997 for (i
= 0; i
< group_size
; i
++)
3999 /* Since gaps are not supported for interleaved stores,
4000 GROUP_SIZE is the exact number of stmts in the chain.
4001 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4002 there is no interleaving, GROUP_SIZE is 1, and only one
4003 iteration of the loop will be executed. */
4004 gcc_assert (next_stmt
4005 && gimple_assign_single_p (next_stmt
));
4006 op
= gimple_assign_rhs1 (next_stmt
);
4008 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4010 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
4011 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
4012 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4016 /* We should have catched mismatched types earlier. */
4017 gcc_assert (useless_type_conversion_p (vectype
,
4018 TREE_TYPE (vec_oprnd
)));
4019 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
4020 NULL_TREE
, &dummy
, gsi
,
4021 &ptr_incr
, false, &inv_p
);
4022 gcc_assert (bb_vinfo
|| !inv_p
);
4026 /* For interleaved stores we created vectorized defs for all the
4027 defs stored in OPRNDS in the previous iteration (previous copy).
4028 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4029 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4031 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4032 OPRNDS are of size 1. */
4033 for (i
= 0; i
< group_size
; i
++)
4035 op
= VEC_index (tree
, oprnds
, i
);
4036 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4038 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4039 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
4040 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
4042 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4043 TYPE_SIZE_UNIT (aggr_type
));
4050 /* Combine all the vectors into an array. */
4051 vec_array
= create_vector_array (vectype
, vec_num
);
4052 for (i
= 0; i
< vec_num
; i
++)
4054 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
4055 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4059 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4060 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4061 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4062 gimple_call_set_lhs (new_stmt
, data_ref
);
4063 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4070 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4072 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4076 next_stmt
= first_stmt
;
4077 for (i
= 0; i
< vec_num
; i
++)
4079 unsigned align
, misalign
;
4082 /* Bump the vector pointer. */
4083 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4087 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4088 else if (grouped_store
)
4089 /* For grouped stores vectorized defs are interleaved in
4090 vect_permute_store_chain(). */
4091 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4093 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4094 build_int_cst (reference_alias_ptr_type
4095 (DR_REF (first_dr
)), 0));
4096 align
= TYPE_ALIGN_UNIT (vectype
);
4097 if (aligned_access_p (first_dr
))
4099 else if (DR_MISALIGNMENT (first_dr
) == -1)
4101 TREE_TYPE (data_ref
)
4102 = build_aligned_type (TREE_TYPE (data_ref
),
4103 TYPE_ALIGN (elem_type
));
4104 align
= TYPE_ALIGN_UNIT (elem_type
);
4109 TREE_TYPE (data_ref
)
4110 = build_aligned_type (TREE_TYPE (data_ref
),
4111 TYPE_ALIGN (elem_type
));
4112 misalign
= DR_MISALIGNMENT (first_dr
);
4114 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4117 /* Arguments are ready. Create the new vector stmt. */
4118 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4119 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4124 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4132 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4134 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4135 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4139 VEC_free (tree
, heap
, dr_chain
);
4140 VEC_free (tree
, heap
, oprnds
);
4142 VEC_free (tree
, heap
, result_chain
);
4144 VEC_free (tree
, heap
, vec_oprnds
);
4149 /* Given a vector type VECTYPE and permutation SEL returns
4150 the VECTOR_CST mask that implements the permutation of the
4151 vector elements. If that is impossible to do, returns NULL. */
4154 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4156 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4159 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4161 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4164 mask_elt_type
= lang_hooks
.types
.type_for_mode
4165 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4166 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4168 mask_elts
= XALLOCAVEC (tree
, nunits
);
4169 for (i
= nunits
- 1; i
>= 0; i
--)
4170 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4171 mask_vec
= build_vector (mask_type
, mask_elts
);
4176 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4177 reversal of the vector elements. If that is impossible to do,
4181 perm_mask_for_reverse (tree vectype
)
4186 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4187 sel
= XALLOCAVEC (unsigned char, nunits
);
4189 for (i
= 0; i
< nunits
; ++i
)
4190 sel
[i
] = nunits
- 1 - i
;
4192 return vect_gen_perm_mask (vectype
, sel
);
4195 /* Given a vector variable X and Y, that was generated for the scalar
4196 STMT, generate instructions to permute the vector elements of X and Y
4197 using permutation mask MASK_VEC, insert them at *GSI and return the
4198 permuted vector variable. */
4201 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4202 gimple_stmt_iterator
*gsi
)
4204 tree vectype
= TREE_TYPE (x
);
4205 tree perm_dest
, data_ref
;
4208 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4209 data_ref
= make_ssa_name (perm_dest
, NULL
);
4211 /* Generate the permute statement. */
4212 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, data_ref
,
4214 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4219 /* vectorizable_load.
4221 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4223 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4224 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4225 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4228 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4229 slp_tree slp_node
, slp_instance slp_node_instance
)
4232 tree vec_dest
= NULL
;
4233 tree data_ref
= NULL
;
4234 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4235 stmt_vec_info prev_stmt_info
;
4236 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4237 struct loop
*loop
= NULL
;
4238 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4239 bool nested_in_vect_loop
= false;
4240 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4241 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4244 enum machine_mode mode
;
4245 gimple new_stmt
= NULL
;
4247 enum dr_alignment_support alignment_support_scheme
;
4248 tree dataref_ptr
= NULL_TREE
;
4250 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4252 int i
, j
, group_size
;
4253 tree msq
= NULL_TREE
, lsq
;
4254 tree offset
= NULL_TREE
;
4255 tree realignment_token
= NULL_TREE
;
4257 VEC(tree
,heap
) *dr_chain
= NULL
;
4258 bool grouped_load
= false;
4259 bool load_lanes_p
= false;
4262 bool negative
= false;
4263 bool compute_in_loop
= false;
4264 struct loop
*at_loop
;
4266 bool slp
= (slp_node
!= NULL
);
4267 bool slp_perm
= false;
4268 enum tree_code code
;
4269 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4272 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4273 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4274 tree stride_base
, stride_step
;
4275 int gather_scale
= 1;
4276 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4280 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4281 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4282 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4287 /* Multiple types in SLP are handled by creating the appropriate number of
4288 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4290 if (slp
|| PURE_SLP_STMT (stmt_info
))
4293 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4295 gcc_assert (ncopies
>= 1);
4297 /* FORNOW. This restriction should be relaxed. */
4298 if (nested_in_vect_loop
&& ncopies
> 1)
4300 if (vect_print_dump_info (REPORT_DETAILS
))
4301 fprintf (vect_dump
, "multiple types in nested loop.");
4305 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4308 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4311 /* Is vectorizable load? */
4312 if (!is_gimple_assign (stmt
))
4315 scalar_dest
= gimple_assign_lhs (stmt
);
4316 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4319 code
= gimple_assign_rhs_code (stmt
);
4320 if (code
!= ARRAY_REF
4321 && code
!= INDIRECT_REF
4322 && code
!= COMPONENT_REF
4323 && code
!= IMAGPART_EXPR
4324 && code
!= REALPART_EXPR
4326 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4329 if (!STMT_VINFO_DATA_REF (stmt_info
))
4332 elem_type
= TREE_TYPE (vectype
);
4333 mode
= TYPE_MODE (vectype
);
4335 /* FORNOW. In some cases can vectorize even if data-type not supported
4336 (e.g. - data copies). */
4337 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4339 if (vect_print_dump_info (REPORT_DETAILS
))
4340 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4344 /* Check if the load is a part of an interleaving chain. */
4345 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4347 grouped_load
= true;
4349 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4351 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4352 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4354 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4355 if (vect_load_lanes_supported (vectype
, group_size
))
4356 load_lanes_p
= true;
4357 else if (!vect_grouped_load_supported (vectype
, group_size
))
4363 if (STMT_VINFO_GATHER_P (stmt_info
))
4367 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4368 &gather_off
, &gather_scale
);
4369 gcc_assert (gather_decl
);
4370 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4371 &def_stmt
, &def
, &gather_dt
,
4372 &gather_off_vectype
))
4374 if (vect_print_dump_info (REPORT_DETAILS
))
4375 fprintf (vect_dump
, "gather index use not simple.");
4379 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4381 if (!vect_check_strided_load (stmt
, loop_vinfo
,
4382 &stride_base
, &stride_step
))
4387 negative
= tree_int_cst_compare (nested_in_vect_loop
4388 ? STMT_VINFO_DR_STEP (stmt_info
)
4390 size_zero_node
) < 0;
4391 if (negative
&& ncopies
> 1)
4393 if (vect_print_dump_info (REPORT_DETAILS
))
4394 fprintf (vect_dump
, "multiple types with negative step.");
4400 gcc_assert (!grouped_load
);
4401 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4402 if (alignment_support_scheme
!= dr_aligned
4403 && alignment_support_scheme
!= dr_unaligned_supported
)
4405 if (vect_print_dump_info (REPORT_DETAILS
))
4406 fprintf (vect_dump
, "negative step but alignment required.");
4409 if (!perm_mask_for_reverse (vectype
))
4411 if (vect_print_dump_info (REPORT_DETAILS
))
4412 fprintf (vect_dump
, "negative step and reversing not supported.");
4418 if (!vec_stmt
) /* transformation not required. */
4420 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4421 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4425 if (vect_print_dump_info (REPORT_DETAILS
))
4426 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4430 if (STMT_VINFO_GATHER_P (stmt_info
))
4432 tree vec_oprnd0
= NULL_TREE
, op
;
4433 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4434 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4435 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4436 edge pe
= loop_preheader_edge (loop
);
4439 enum { NARROW
, NONE
, WIDEN
} modifier
;
4440 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4442 if (nunits
== gather_off_nunits
)
4444 else if (nunits
== gather_off_nunits
/ 2)
4446 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4449 for (i
= 0; i
< gather_off_nunits
; ++i
)
4450 sel
[i
] = i
| nunits
;
4452 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4453 gcc_assert (perm_mask
!= NULL_TREE
);
4455 else if (nunits
== gather_off_nunits
* 2)
4457 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4460 for (i
= 0; i
< nunits
; ++i
)
4461 sel
[i
] = i
< gather_off_nunits
4462 ? i
: i
+ nunits
- gather_off_nunits
;
4464 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4465 gcc_assert (perm_mask
!= NULL_TREE
);
4471 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4472 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4473 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4474 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4475 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4476 scaletype
= TREE_VALUE (arglist
);
4477 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4478 && types_compatible_p (srctype
, masktype
));
4480 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4482 ptr
= fold_convert (ptrtype
, gather_base
);
4483 if (!is_gimple_min_invariant (ptr
))
4485 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4486 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4487 gcc_assert (!new_bb
);
4490 /* Currently we support only unconditional gather loads,
4491 so mask should be all ones. */
4492 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4493 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4494 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4498 for (j
= 0; j
< 6; ++j
)
4500 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4501 mask
= build_real (TREE_TYPE (masktype
), r
);
4505 mask
= build_vector_from_val (masktype
, mask
);
4506 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4508 scale
= build_int_cst (scaletype
, gather_scale
);
4510 prev_stmt_info
= NULL
;
4511 for (j
= 0; j
< ncopies
; ++j
)
4513 if (modifier
== WIDEN
&& (j
& 1))
4514 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4515 perm_mask
, stmt
, gsi
);
4518 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4521 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4523 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4525 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4526 == TYPE_VECTOR_SUBPARTS (idxtype
));
4527 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4528 var
= make_ssa_name (var
, NULL
);
4529 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4531 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4533 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4538 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4540 if (!useless_type_conversion_p (vectype
, rettype
))
4542 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4543 == TYPE_VECTOR_SUBPARTS (rettype
));
4544 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4545 op
= make_ssa_name (var
, new_stmt
);
4546 gimple_call_set_lhs (new_stmt
, op
);
4547 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4548 var
= make_ssa_name (vec_dest
, NULL
);
4549 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4551 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4556 var
= make_ssa_name (vec_dest
, new_stmt
);
4557 gimple_call_set_lhs (new_stmt
, var
);
4560 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4562 if (modifier
== NARROW
)
4569 var
= permute_vec_elements (prev_res
, var
,
4570 perm_mask
, stmt
, gsi
);
4571 new_stmt
= SSA_NAME_DEF_STMT (var
);
4574 if (prev_stmt_info
== NULL
)
4575 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4577 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4578 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4582 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4584 gimple_stmt_iterator incr_gsi
;
4588 tree ref
= DR_REF (dr
);
4591 VEC(constructor_elt
, gc
) *v
= NULL
;
4592 gimple_seq stmts
= NULL
;
4594 gcc_assert (stride_base
&& stride_step
);
4596 /* For a load with loop-invariant (but other than power-of-2)
4597 stride (i.e. not a grouped access) like so:
4599 for (i = 0; i < n; i += stride)
4602 we generate a new induction variable and new accesses to
4603 form a new vector (or vectors, depending on ncopies):
4605 for (j = 0; ; j += VF*stride)
4607 tmp2 = array[j + stride];
4609 vectemp = {tmp1, tmp2, ...}
4612 ivstep
= stride_step
;
4613 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4614 build_int_cst (TREE_TYPE (ivstep
), vf
));
4616 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4618 create_iv (stride_base
, ivstep
, NULL
,
4619 loop
, &incr_gsi
, insert_after
,
4621 incr
= gsi_stmt (incr_gsi
);
4622 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4624 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4626 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4628 prev_stmt_info
= NULL
;
4629 running_off
= offvar
;
4630 for (j
= 0; j
< ncopies
; j
++)
4634 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
4635 for (i
= 0; i
< nunits
; i
++)
4637 tree newref
, newoff
;
4639 if (TREE_CODE (ref
) == ARRAY_REF
)
4640 newref
= build4 (ARRAY_REF
, TREE_TYPE (ref
),
4641 unshare_expr (TREE_OPERAND (ref
, 0)),
4643 NULL_TREE
, NULL_TREE
);
4645 newref
= build2 (MEM_REF
, TREE_TYPE (ref
),
4647 TREE_OPERAND (ref
, 1));
4649 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4652 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4653 newoff
= copy_ssa_name (running_off
, NULL
);
4654 if (POINTER_TYPE_P (TREE_TYPE (newoff
)))
4655 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4656 running_off
, stride_step
);
4658 incr
= gimple_build_assign_with_ops (PLUS_EXPR
, newoff
,
4659 running_off
, stride_step
);
4660 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4662 running_off
= newoff
;
4665 vec_inv
= build_constructor (vectype
, v
);
4666 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4667 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4670 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4672 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4673 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4680 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4682 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4683 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4684 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4686 /* Check if the chain of loads is already vectorized. */
4687 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4689 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4692 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4693 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4695 /* VEC_NUM is the number of vect stmts to be created for this group. */
4698 grouped_load
= false;
4699 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4700 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4704 vec_num
= group_size
;
4710 group_size
= vec_num
= 1;
4713 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4714 gcc_assert (alignment_support_scheme
);
4715 /* Targets with load-lane instructions must not require explicit
4717 gcc_assert (!load_lanes_p
4718 || alignment_support_scheme
== dr_aligned
4719 || alignment_support_scheme
== dr_unaligned_supported
);
4721 /* In case the vectorization factor (VF) is bigger than the number
4722 of elements that we can fit in a vectype (nunits), we have to generate
4723 more than one vector stmt - i.e - we need to "unroll" the
4724 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4725 from one copy of the vector stmt to the next, in the field
4726 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4727 stages to find the correct vector defs to be used when vectorizing
4728 stmts that use the defs of the current stmt. The example below
4729 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4730 need to create 4 vectorized stmts):
4732 before vectorization:
4733 RELATED_STMT VEC_STMT
4737 step 1: vectorize stmt S1:
4738 We first create the vector stmt VS1_0, and, as usual, record a
4739 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4740 Next, we create the vector stmt VS1_1, and record a pointer to
4741 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4742 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4744 RELATED_STMT VEC_STMT
4745 VS1_0: vx0 = memref0 VS1_1 -
4746 VS1_1: vx1 = memref1 VS1_2 -
4747 VS1_2: vx2 = memref2 VS1_3 -
4748 VS1_3: vx3 = memref3 - -
4749 S1: x = load - VS1_0
4752 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4753 information we recorded in RELATED_STMT field is used to vectorize
4756 /* In case of interleaving (non-unit grouped access):
4763 Vectorized loads are created in the order of memory accesses
4764 starting from the access of the first stmt of the chain:
4767 VS2: vx1 = &base + vec_size*1
4768 VS3: vx3 = &base + vec_size*2
4769 VS4: vx4 = &base + vec_size*3
4771 Then permutation statements are generated:
4773 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4774 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4777 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4778 (the order of the data-refs in the output of vect_permute_load_chain
4779 corresponds to the order of scalar stmts in the interleaving chain - see
4780 the documentation of vect_permute_load_chain()).
4781 The generation of permutation stmts and recording them in
4782 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4784 In case of both multiple types and interleaving, the vector loads and
4785 permutation stmts above are created for every copy. The result vector
4786 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4787 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4789 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4790 on a target that supports unaligned accesses (dr_unaligned_supported)
4791 we generate the following code:
4795 p = p + indx * vectype_size;
4800 Otherwise, the data reference is potentially unaligned on a target that
4801 does not support unaligned accesses (dr_explicit_realign_optimized) -
4802 then generate the following code, in which the data in each iteration is
4803 obtained by two vector loads, one from the previous iteration, and one
4804 from the current iteration:
4806 msq_init = *(floor(p1))
4807 p2 = initial_addr + VS - 1;
4808 realignment_token = call target_builtin;
4811 p2 = p2 + indx * vectype_size
4813 vec_dest = realign_load (msq, lsq, realignment_token)
4818 /* If the misalignment remains the same throughout the execution of the
4819 loop, we can create the init_addr and permutation mask at the loop
4820 preheader. Otherwise, it needs to be created inside the loop.
4821 This can only occur when vectorizing memory accesses in the inner-loop
4822 nested within an outer-loop that is being vectorized. */
4824 if (nested_in_vect_loop
4825 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4826 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4828 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4829 compute_in_loop
= true;
4832 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4833 || alignment_support_scheme
== dr_explicit_realign
)
4834 && !compute_in_loop
)
4836 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4837 alignment_support_scheme
, NULL_TREE
,
4839 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4841 phi
= SSA_NAME_DEF_STMT (msq
);
4842 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4849 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4852 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4854 aggr_type
= vectype
;
4856 prev_stmt_info
= NULL
;
4857 for (j
= 0; j
< ncopies
; j
++)
4859 /* 1. Create the vector or array pointer update chain. */
4861 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4862 offset
, &dummy
, gsi
,
4863 &ptr_incr
, false, &inv_p
);
4865 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4866 TYPE_SIZE_UNIT (aggr_type
));
4868 if (grouped_load
|| slp_perm
)
4869 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4875 vec_array
= create_vector_array (vectype
, vec_num
);
4878 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4879 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4880 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4881 gimple_call_set_lhs (new_stmt
, vec_array
);
4882 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4884 /* Extract each vector into an SSA_NAME. */
4885 for (i
= 0; i
< vec_num
; i
++)
4887 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4889 VEC_quick_push (tree
, dr_chain
, new_temp
);
4892 /* Record the mapping between SSA_NAMEs and statements. */
4893 vect_record_grouped_load_vectors (stmt
, dr_chain
);
4897 for (i
= 0; i
< vec_num
; i
++)
4900 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4903 /* 2. Create the vector-load in the loop. */
4904 switch (alignment_support_scheme
)
4907 case dr_unaligned_supported
:
4909 unsigned int align
, misalign
;
4912 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4913 build_int_cst (reference_alias_ptr_type
4914 (DR_REF (first_dr
)), 0));
4915 align
= TYPE_ALIGN_UNIT (vectype
);
4916 if (alignment_support_scheme
== dr_aligned
)
4918 gcc_assert (aligned_access_p (first_dr
));
4921 else if (DR_MISALIGNMENT (first_dr
) == -1)
4923 TREE_TYPE (data_ref
)
4924 = build_aligned_type (TREE_TYPE (data_ref
),
4925 TYPE_ALIGN (elem_type
));
4926 align
= TYPE_ALIGN_UNIT (elem_type
);
4931 TREE_TYPE (data_ref
)
4932 = build_aligned_type (TREE_TYPE (data_ref
),
4933 TYPE_ALIGN (elem_type
));
4934 misalign
= DR_MISALIGNMENT (first_dr
);
4936 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
4940 case dr_explicit_realign
:
4945 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4947 if (compute_in_loop
)
4948 msq
= vect_setup_realignment (first_stmt
, gsi
,
4950 dr_explicit_realign
,
4953 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
4954 new_stmt
= gimple_build_assign_with_ops
4955 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
4957 (TREE_TYPE (dataref_ptr
),
4958 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4959 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4961 = build2 (MEM_REF
, vectype
, ptr
,
4962 build_int_cst (reference_alias_ptr_type
4963 (DR_REF (first_dr
)), 0));
4964 vec_dest
= vect_create_destination_var (scalar_dest
,
4966 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4967 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4968 gimple_assign_set_lhs (new_stmt
, new_temp
);
4969 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
4970 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
4971 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4974 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
4975 TYPE_SIZE_UNIT (elem_type
));
4976 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
4977 new_stmt
= gimple_build_assign_with_ops
4978 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
4981 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4982 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
4983 gimple_assign_set_lhs (new_stmt
, ptr
);
4984 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4986 = build2 (MEM_REF
, vectype
, ptr
,
4987 build_int_cst (reference_alias_ptr_type
4988 (DR_REF (first_dr
)), 0));
4991 case dr_explicit_realign_optimized
:
4992 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
4993 new_stmt
= gimple_build_assign_with_ops
4994 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
4996 (TREE_TYPE (dataref_ptr
),
4997 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4998 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5000 = build2 (MEM_REF
, vectype
, new_temp
,
5001 build_int_cst (reference_alias_ptr_type
5002 (DR_REF (first_dr
)), 0));
5007 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5008 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5009 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5010 gimple_assign_set_lhs (new_stmt
, new_temp
);
5011 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5013 /* 3. Handle explicit realignment if necessary/supported.
5015 vec_dest = realign_load (msq, lsq, realignment_token) */
5016 if (alignment_support_scheme
== dr_explicit_realign_optimized
5017 || alignment_support_scheme
== dr_explicit_realign
)
5019 lsq
= gimple_assign_lhs (new_stmt
);
5020 if (!realignment_token
)
5021 realignment_token
= dataref_ptr
;
5022 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5024 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
5027 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5028 gimple_assign_set_lhs (new_stmt
, new_temp
);
5029 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5031 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5034 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5035 add_phi_arg (phi
, lsq
,
5036 loop_latch_edge (containing_loop
),
5042 /* 4. Handle invariant-load. */
5043 if (inv_p
&& !bb_vinfo
)
5045 gimple_stmt_iterator gsi2
= *gsi
;
5046 gcc_assert (!grouped_load
);
5048 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5050 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5055 tree perm_mask
= perm_mask_for_reverse (vectype
);
5056 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5057 perm_mask
, stmt
, gsi
);
5058 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5061 /* Collect vector loads and later create their permutation in
5062 vect_transform_grouped_load (). */
5063 if (grouped_load
|| slp_perm
)
5064 VEC_quick_push (tree
, dr_chain
, new_temp
);
5066 /* Store vector loads in the corresponding SLP_NODE. */
5067 if (slp
&& !slp_perm
)
5068 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
5073 if (slp
&& !slp_perm
)
5078 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
5079 slp_node_instance
, false))
5081 VEC_free (tree
, heap
, dr_chain
);
5090 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5091 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5096 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5098 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5099 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5103 VEC_free (tree
, heap
, dr_chain
);
5109 /* Function vect_is_simple_cond.
5112 LOOP - the loop that is being vectorized.
5113 COND - Condition that is checked for simple use.
5116 *COMP_VECTYPE - the vector type for the comparison.
5118 Returns whether a COND can be vectorized. Checks whether
5119 condition operands are supportable using vec_is_simple_use. */
5122 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5123 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5127 enum vect_def_type dt
;
5128 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5130 if (!COMPARISON_CLASS_P (cond
))
5133 lhs
= TREE_OPERAND (cond
, 0);
5134 rhs
= TREE_OPERAND (cond
, 1);
5136 if (TREE_CODE (lhs
) == SSA_NAME
)
5138 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5139 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5140 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5143 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5144 && TREE_CODE (lhs
) != FIXED_CST
)
5147 if (TREE_CODE (rhs
) == SSA_NAME
)
5149 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5150 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5151 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5154 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5155 && TREE_CODE (rhs
) != FIXED_CST
)
5158 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5162 /* vectorizable_condition.
5164 Check if STMT is conditional modify expression that can be vectorized.
5165 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5166 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5169 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5170 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5171 else caluse if it is 2).
5173 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5176 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5177 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5180 tree scalar_dest
= NULL_TREE
;
5181 tree vec_dest
= NULL_TREE
;
5182 tree cond_expr
, then_clause
, else_clause
;
5183 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5184 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5185 tree comp_vectype
= NULL_TREE
;
5186 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5187 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5188 tree vec_compare
, vec_cond_expr
;
5190 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5192 enum vect_def_type dt
, dts
[4];
5193 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5195 enum tree_code code
;
5196 stmt_vec_info prev_stmt_info
= NULL
;
5198 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5199 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
5200 VEC (tree
, heap
) *vec_oprnds2
= NULL
, *vec_oprnds3
= NULL
;
5202 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5205 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5207 gcc_assert (ncopies
>= 1);
5208 if (reduc_index
&& ncopies
> 1)
5209 return false; /* FORNOW */
5211 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5214 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5217 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5218 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5222 /* FORNOW: not yet supported. */
5223 if (STMT_VINFO_LIVE_P (stmt_info
))
5225 if (vect_print_dump_info (REPORT_DETAILS
))
5226 fprintf (vect_dump
, "value used after loop.");
5230 /* Is vectorizable conditional operation? */
5231 if (!is_gimple_assign (stmt
))
5234 code
= gimple_assign_rhs_code (stmt
);
5236 if (code
!= COND_EXPR
)
5239 cond_expr
= gimple_assign_rhs1 (stmt
);
5240 then_clause
= gimple_assign_rhs2 (stmt
);
5241 else_clause
= gimple_assign_rhs3 (stmt
);
5243 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5248 if (TREE_CODE (then_clause
) == SSA_NAME
)
5250 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5251 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5252 &then_def_stmt
, &def
, &dt
))
5255 else if (TREE_CODE (then_clause
) != INTEGER_CST
5256 && TREE_CODE (then_clause
) != REAL_CST
5257 && TREE_CODE (then_clause
) != FIXED_CST
)
5260 if (TREE_CODE (else_clause
) == SSA_NAME
)
5262 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5263 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5264 &else_def_stmt
, &def
, &dt
))
5267 else if (TREE_CODE (else_clause
) != INTEGER_CST
5268 && TREE_CODE (else_clause
) != REAL_CST
5269 && TREE_CODE (else_clause
) != FIXED_CST
)
5274 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5275 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5282 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
5283 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
5284 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
5285 vec_oprnds3
= VEC_alloc (tree
, heap
, 1);
5289 scalar_dest
= gimple_assign_lhs (stmt
);
5290 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5292 /* Handle cond expr. */
5293 for (j
= 0; j
< ncopies
; j
++)
5295 gimple new_stmt
= NULL
;
5300 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, 4);
5301 VEC (slp_void_p
, heap
) *vec_defs
;
5303 vec_defs
= VEC_alloc (slp_void_p
, heap
, 4);
5304 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 0));
5305 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 1));
5306 VEC_safe_push (tree
, heap
, ops
, then_clause
);
5307 VEC_safe_push (tree
, heap
, ops
, else_clause
);
5308 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5309 vec_oprnds3
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5310 vec_oprnds2
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5311 vec_oprnds1
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5312 vec_oprnds0
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5314 VEC_free (tree
, heap
, ops
);
5315 VEC_free (slp_void_p
, heap
, vec_defs
);
5321 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5323 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5324 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5327 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5329 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5330 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5331 if (reduc_index
== 1)
5332 vec_then_clause
= reduc_def
;
5335 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5337 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5338 NULL
, >emp
, &def
, &dts
[2]);
5340 if (reduc_index
== 2)
5341 vec_else_clause
= reduc_def
;
5344 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5346 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5347 NULL
, >emp
, &def
, &dts
[3]);
5353 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5354 VEC_pop (tree
, vec_oprnds0
));
5355 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5356 VEC_pop (tree
, vec_oprnds1
));
5357 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5358 VEC_pop (tree
, vec_oprnds2
));
5359 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5360 VEC_pop (tree
, vec_oprnds3
));
5365 VEC_quick_push (tree
, vec_oprnds0
, vec_cond_lhs
);
5366 VEC_quick_push (tree
, vec_oprnds1
, vec_cond_rhs
);
5367 VEC_quick_push (tree
, vec_oprnds2
, vec_then_clause
);
5368 VEC_quick_push (tree
, vec_oprnds3
, vec_else_clause
);
5371 /* Arguments are ready. Create the new vector stmt. */
5372 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_cond_lhs
)
5374 vec_cond_rhs
= VEC_index (tree
, vec_oprnds1
, i
);
5375 vec_then_clause
= VEC_index (tree
, vec_oprnds2
, i
);
5376 vec_else_clause
= VEC_index (tree
, vec_oprnds3
, i
);
5378 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
5379 vec_cond_lhs
, vec_cond_rhs
);
5380 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5381 vec_compare
, vec_then_clause
, vec_else_clause
);
5383 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5384 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5385 gimple_assign_set_lhs (new_stmt
, new_temp
);
5386 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5388 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5395 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5397 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5399 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5402 VEC_free (tree
, heap
, vec_oprnds0
);
5403 VEC_free (tree
, heap
, vec_oprnds1
);
5404 VEC_free (tree
, heap
, vec_oprnds2
);
5405 VEC_free (tree
, heap
, vec_oprnds3
);
5411 /* Make sure the statement is vectorizable. */
5414 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5416 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5417 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5418 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5420 tree scalar_type
, vectype
;
5421 gimple pattern_stmt
;
5422 gimple_seq pattern_def_seq
;
5424 if (vect_print_dump_info (REPORT_DETAILS
))
5426 fprintf (vect_dump
, "==> examining statement: ");
5427 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5430 if (gimple_has_volatile_ops (stmt
))
5432 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5433 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5438 /* Skip stmts that do not need to be vectorized. In loops this is expected
5440 - the COND_EXPR which is the loop exit condition
5441 - any LABEL_EXPRs in the loop
5442 - computations that are used only for array indexing or loop control.
5443 In basic blocks we only analyze statements that are a part of some SLP
5444 instance, therefore, all the statements are relevant.
5446 Pattern statement needs to be analyzed instead of the original statement
5447 if the original statement is not relevant. Otherwise, we analyze both
5448 statements. In basic blocks we are called from some SLP instance
5449 traversal, don't analyze pattern stmts instead, the pattern stmts
5450 already will be part of SLP instance. */
5452 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5453 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5454 && !STMT_VINFO_LIVE_P (stmt_info
))
5456 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5458 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5459 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5461 /* Analyze PATTERN_STMT instead of the original stmt. */
5462 stmt
= pattern_stmt
;
5463 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5464 if (vect_print_dump_info (REPORT_DETAILS
))
5466 fprintf (vect_dump
, "==> examining pattern statement: ");
5467 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5472 if (vect_print_dump_info (REPORT_DETAILS
))
5473 fprintf (vect_dump
, "irrelevant.");
5478 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5481 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5482 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5484 /* Analyze PATTERN_STMT too. */
5485 if (vect_print_dump_info (REPORT_DETAILS
))
5487 fprintf (vect_dump
, "==> examining pattern statement: ");
5488 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5491 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5495 if (is_pattern_stmt_p (stmt_info
)
5497 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5499 gimple_stmt_iterator si
;
5501 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5503 gimple pattern_def_stmt
= gsi_stmt (si
);
5504 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5505 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5507 /* Analyze def stmt of STMT if it's a pattern stmt. */
5508 if (vect_print_dump_info (REPORT_DETAILS
))
5510 fprintf (vect_dump
, "==> examining pattern def statement: ");
5511 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5514 if (!vect_analyze_stmt (pattern_def_stmt
,
5515 need_to_vectorize
, node
))
5521 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5523 case vect_internal_def
:
5526 case vect_reduction_def
:
5527 case vect_nested_cycle
:
5528 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5529 || relevance
== vect_used_in_outer_by_reduction
5530 || relevance
== vect_unused_in_scope
));
5533 case vect_induction_def
:
5534 case vect_constant_def
:
5535 case vect_external_def
:
5536 case vect_unknown_def_type
:
5543 gcc_assert (PURE_SLP_STMT (stmt_info
));
5545 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5546 if (vect_print_dump_info (REPORT_DETAILS
))
5548 fprintf (vect_dump
, "get vectype for scalar type: ");
5549 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5552 vectype
= get_vectype_for_scalar_type (scalar_type
);
5555 if (vect_print_dump_info (REPORT_DETAILS
))
5557 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5558 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5563 if (vect_print_dump_info (REPORT_DETAILS
))
5565 fprintf (vect_dump
, "vectype: ");
5566 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5569 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5572 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5574 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5575 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5576 *need_to_vectorize
= true;
5581 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5582 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5583 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5584 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5585 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5586 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5587 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5588 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5589 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5590 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5591 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5595 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5596 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5597 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5598 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5599 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5600 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5601 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5602 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5607 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5609 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5610 fprintf (vect_dump
, "supported: ");
5611 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5620 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5621 need extra handling, except for vectorizable reductions. */
5622 if (STMT_VINFO_LIVE_P (stmt_info
)
5623 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5624 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5630 fprintf (vect_dump
, "not vectorized: live stmt not ");
5631 fprintf (vect_dump
, "supported: ");
5632 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5642 /* Function vect_transform_stmt.
5644 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5647 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5648 bool *grouped_store
, slp_tree slp_node
,
5649 slp_instance slp_node_instance
)
5651 bool is_store
= false;
5652 gimple vec_stmt
= NULL
;
5653 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5656 switch (STMT_VINFO_TYPE (stmt_info
))
5658 case type_demotion_vec_info_type
:
5659 case type_promotion_vec_info_type
:
5660 case type_conversion_vec_info_type
:
5661 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5665 case induc_vec_info_type
:
5666 gcc_assert (!slp_node
);
5667 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5671 case shift_vec_info_type
:
5672 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5676 case op_vec_info_type
:
5677 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5681 case assignment_vec_info_type
:
5682 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5686 case load_vec_info_type
:
5687 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5692 case store_vec_info_type
:
5693 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5695 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5697 /* In case of interleaving, the whole chain is vectorized when the
5698 last store in the chain is reached. Store stmts before the last
5699 one are skipped, and there vec_stmt_info shouldn't be freed
5701 *grouped_store
= true;
5702 if (STMT_VINFO_VEC_STMT (stmt_info
))
5709 case condition_vec_info_type
:
5710 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5714 case call_vec_info_type
:
5715 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5716 stmt
= gsi_stmt (*gsi
);
5719 case reduc_vec_info_type
:
5720 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5725 if (!STMT_VINFO_LIVE_P (stmt_info
))
5727 if (vect_print_dump_info (REPORT_DETAILS
))
5728 fprintf (vect_dump
, "stmt not supported.");
5733 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5734 is being vectorized, but outside the immediately enclosing loop. */
5736 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5737 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5738 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5739 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5740 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5741 || STMT_VINFO_RELEVANT (stmt_info
) ==
5742 vect_used_in_outer_by_reduction
))
5744 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5745 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5746 imm_use_iterator imm_iter
;
5747 use_operand_p use_p
;
5751 if (vect_print_dump_info (REPORT_DETAILS
))
5752 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5754 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5755 (to be used when vectorizing outer-loop stmts that use the DEF of
5757 if (gimple_code (stmt
) == GIMPLE_PHI
)
5758 scalar_dest
= PHI_RESULT (stmt
);
5760 scalar_dest
= gimple_assign_lhs (stmt
);
5762 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5764 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5766 exit_phi
= USE_STMT (use_p
);
5767 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5772 /* Handle stmts whose DEF is used outside the loop-nest that is
5773 being vectorized. */
5774 if (STMT_VINFO_LIVE_P (stmt_info
)
5775 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5777 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5782 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5788 /* Remove a group of stores (for SLP or interleaving), free their
5792 vect_remove_stores (gimple first_stmt
)
5794 gimple next
= first_stmt
;
5796 gimple_stmt_iterator next_si
;
5800 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5802 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5803 if (is_pattern_stmt_p (stmt_info
))
5804 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5805 /* Free the attached stmt_vec_info and remove the stmt. */
5806 next_si
= gsi_for_stmt (next
);
5807 unlink_stmt_vdef (next
);
5808 gsi_remove (&next_si
, true);
5809 release_defs (next
);
5810 free_stmt_vec_info (next
);
5816 /* Function new_stmt_vec_info.
5818 Create and initialize a new stmt_vec_info struct for STMT. */
5821 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5822 bb_vec_info bb_vinfo
)
5825 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5827 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5828 STMT_VINFO_STMT (res
) = stmt
;
5829 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5830 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5831 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5832 STMT_VINFO_LIVE_P (res
) = false;
5833 STMT_VINFO_VECTYPE (res
) = NULL
;
5834 STMT_VINFO_VEC_STMT (res
) = NULL
;
5835 STMT_VINFO_VECTORIZABLE (res
) = true;
5836 STMT_VINFO_IN_PATTERN_P (res
) = false;
5837 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5838 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5839 STMT_VINFO_DATA_REF (res
) = NULL
;
5841 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5842 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5843 STMT_VINFO_DR_INIT (res
) = NULL
;
5844 STMT_VINFO_DR_STEP (res
) = NULL
;
5845 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5847 if (gimple_code (stmt
) == GIMPLE_PHI
5848 && is_loop_header_bb_p (gimple_bb (stmt
)))
5849 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5851 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5853 STMT_VINFO_SAME_ALIGN_REFS (res
) = NULL
;
5854 STMT_SLP_TYPE (res
) = loop_vect
;
5855 GROUP_FIRST_ELEMENT (res
) = NULL
;
5856 GROUP_NEXT_ELEMENT (res
) = NULL
;
5857 GROUP_SIZE (res
) = 0;
5858 GROUP_STORE_COUNT (res
) = 0;
5859 GROUP_GAP (res
) = 0;
5860 GROUP_SAME_DR_STMT (res
) = NULL
;
5861 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5867 /* Create a hash table for stmt_vec_info. */
5870 init_stmt_vec_info_vec (void)
5872 gcc_assert (!stmt_vec_info_vec
);
5873 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5877 /* Free hash table for stmt_vec_info. */
5880 free_stmt_vec_info_vec (void)
5882 gcc_assert (stmt_vec_info_vec
);
5883 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5887 /* Free stmt vectorization related info. */
5890 free_stmt_vec_info (gimple stmt
)
5892 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5897 /* Check if this statement has a related "pattern stmt"
5898 (introduced by the vectorizer during the pattern recognition
5899 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5901 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5903 stmt_vec_info patt_info
5904 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5907 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
5910 gimple_stmt_iterator si
;
5911 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
5912 free_stmt_vec_info (gsi_stmt (si
));
5914 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
5918 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
5919 set_vinfo_for_stmt (stmt
, NULL
);
5924 /* Function get_vectype_for_scalar_type_and_size.
5926 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5930 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
5932 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
5933 enum machine_mode simd_mode
;
5934 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
5941 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
5942 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
5945 /* We can't build a vector type of elements with alignment bigger than
5947 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
5950 /* For vector types of elements whose mode precision doesn't
5951 match their types precision we use a element type of mode
5952 precision. The vectorization routines will have to make sure
5953 they support the proper result truncation/extension.
5954 We also make sure to build vector types with INTEGER_TYPE
5955 component type only. */
5956 if (INTEGRAL_TYPE_P (scalar_type
)
5957 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
5958 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
5959 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
5960 TYPE_UNSIGNED (scalar_type
));
5962 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5963 When the component mode passes the above test simply use a type
5964 corresponding to that mode. The theory is that any use that
5965 would cause problems with this will disable vectorization anyway. */
5966 if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
5967 && !INTEGRAL_TYPE_P (scalar_type
)
5968 && !POINTER_TYPE_P (scalar_type
))
5969 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
5971 /* If no size was supplied use the mode the target prefers. Otherwise
5972 lookup a vector mode of the specified size. */
5974 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
5976 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
5977 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
5981 vectype
= build_vector_type (scalar_type
, nunits
);
5982 if (vect_print_dump_info (REPORT_DETAILS
))
5984 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
5985 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5991 if (vect_print_dump_info (REPORT_DETAILS
))
5993 fprintf (vect_dump
, "vectype: ");
5994 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5997 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
5998 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6000 if (vect_print_dump_info (REPORT_DETAILS
))
6001 fprintf (vect_dump
, "mode not supported by target.");
6008 unsigned int current_vector_size
;
6010 /* Function get_vectype_for_scalar_type.
6012 Returns the vector type corresponding to SCALAR_TYPE as supported
6016 get_vectype_for_scalar_type (tree scalar_type
)
6019 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6020 current_vector_size
);
6022 && current_vector_size
== 0)
6023 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6027 /* Function get_same_sized_vectype
6029 Returns a vector type corresponding to SCALAR_TYPE of size
6030 VECTOR_TYPE if supported by the target. */
6033 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6035 return get_vectype_for_scalar_type_and_size
6036 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6039 /* Function vect_is_simple_use.
6042 LOOP_VINFO - the vect info of the loop that is being vectorized.
6043 BB_VINFO - the vect info of the basic block that is being vectorized.
6044 OPERAND - operand of STMT in the loop or bb.
6045 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6047 Returns whether a stmt with OPERAND can be vectorized.
6048 For loops, supportable operands are constants, loop invariants, and operands
6049 that are defined by the current iteration of the loop. Unsupportable
6050 operands are those that are defined by a previous iteration of the loop (as
6051 is the case in reduction/induction computations).
6052 For basic blocks, supportable operands are constants and bb invariants.
6053 For now, operands defined outside the basic block are not supported. */
6056 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6057 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6058 tree
*def
, enum vect_def_type
*dt
)
6061 stmt_vec_info stmt_vinfo
;
6062 struct loop
*loop
= NULL
;
6065 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6070 if (vect_print_dump_info (REPORT_DETAILS
))
6072 fprintf (vect_dump
, "vect_is_simple_use: operand ");
6073 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
6076 if (CONSTANT_CLASS_P (operand
))
6078 *dt
= vect_constant_def
;
6082 if (is_gimple_min_invariant (operand
))
6085 *dt
= vect_external_def
;
6089 if (TREE_CODE (operand
) == PAREN_EXPR
)
6091 if (vect_print_dump_info (REPORT_DETAILS
))
6092 fprintf (vect_dump
, "non-associatable copy.");
6093 operand
= TREE_OPERAND (operand
, 0);
6096 if (TREE_CODE (operand
) != SSA_NAME
)
6098 if (vect_print_dump_info (REPORT_DETAILS
))
6099 fprintf (vect_dump
, "not ssa-name.");
6103 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6104 if (*def_stmt
== NULL
)
6106 if (vect_print_dump_info (REPORT_DETAILS
))
6107 fprintf (vect_dump
, "no def_stmt.");
6111 if (vect_print_dump_info (REPORT_DETAILS
))
6113 fprintf (vect_dump
, "def_stmt: ");
6114 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
6117 /* Empty stmt is expected only in case of a function argument.
6118 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6119 if (gimple_nop_p (*def_stmt
))
6122 *dt
= vect_external_def
;
6126 bb
= gimple_bb (*def_stmt
);
6128 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6129 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6130 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6131 *dt
= vect_external_def
;
6134 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6135 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6138 if (*dt
== vect_unknown_def_type
6140 && *dt
== vect_double_reduction_def
6141 && gimple_code (stmt
) != GIMPLE_PHI
))
6143 if (vect_print_dump_info (REPORT_DETAILS
))
6144 fprintf (vect_dump
, "Unsupported pattern.");
6148 if (vect_print_dump_info (REPORT_DETAILS
))
6149 fprintf (vect_dump
, "type of def: %d.",*dt
);
6151 switch (gimple_code (*def_stmt
))
6154 *def
= gimple_phi_result (*def_stmt
);
6158 *def
= gimple_assign_lhs (*def_stmt
);
6162 *def
= gimple_call_lhs (*def_stmt
);
6167 if (vect_print_dump_info (REPORT_DETAILS
))
6168 fprintf (vect_dump
, "unsupported defining stmt: ");
6175 /* Function vect_is_simple_use_1.
6177 Same as vect_is_simple_use_1 but also determines the vector operand
6178 type of OPERAND and stores it to *VECTYPE. If the definition of
6179 OPERAND is vect_uninitialized_def, vect_constant_def or
6180 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6181 is responsible to compute the best suited vector type for the
6185 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6186 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6187 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6189 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6193 /* Now get a vector type if the def is internal, otherwise supply
6194 NULL_TREE and leave it up to the caller to figure out a proper
6195 type for the use stmt. */
6196 if (*dt
== vect_internal_def
6197 || *dt
== vect_induction_def
6198 || *dt
== vect_reduction_def
6199 || *dt
== vect_double_reduction_def
6200 || *dt
== vect_nested_cycle
)
6202 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6204 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6205 && !STMT_VINFO_RELEVANT (stmt_info
)
6206 && !STMT_VINFO_LIVE_P (stmt_info
))
6207 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6209 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6210 gcc_assert (*vectype
!= NULL_TREE
);
6212 else if (*dt
== vect_uninitialized_def
6213 || *dt
== vect_constant_def
6214 || *dt
== vect_external_def
)
6215 *vectype
= NULL_TREE
;
6223 /* Function supportable_widening_operation
6225 Check whether an operation represented by the code CODE is a
6226 widening operation that is supported by the target platform in
6227 vector form (i.e., when operating on arguments of type VECTYPE_IN
6228 producing a result of type VECTYPE_OUT).
6230 Widening operations we currently support are NOP (CONVERT), FLOAT
6231 and WIDEN_MULT. This function checks if these operations are supported
6232 by the target platform either directly (via vector tree-codes), or via
6236 - CODE1 and CODE2 are codes of vector operations to be used when
6237 vectorizing the operation, if available.
6238 - MULTI_STEP_CVT determines the number of required intermediate steps in
6239 case of multi-step conversion (like char->short->int - in that case
6240 MULTI_STEP_CVT will be 1).
6241 - INTERM_TYPES contains the intermediate type required to perform the
6242 widening operation (short in the above example). */
6245 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6246 tree vectype_out
, tree vectype_in
,
6247 enum tree_code
*code1
, enum tree_code
*code2
,
6248 int *multi_step_cvt
,
6249 VEC (tree
, heap
) **interm_types
)
6251 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6252 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6253 struct loop
*vect_loop
= NULL
;
6254 enum machine_mode vec_mode
;
6255 enum insn_code icode1
, icode2
;
6256 optab optab1
, optab2
;
6257 tree vectype
= vectype_in
;
6258 tree wide_vectype
= vectype_out
;
6259 enum tree_code c1
, c2
;
6261 tree prev_type
, intermediate_type
;
6262 enum machine_mode intermediate_mode
, prev_mode
;
6263 optab optab3
, optab4
;
6265 *multi_step_cvt
= 0;
6267 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6271 case WIDEN_MULT_EXPR
:
6272 /* The result of a vectorized widening operation usually requires
6273 two vectors (because the widened results do not fit into one vector).
6274 The generated vector results would normally be expected to be
6275 generated in the same order as in the original scalar computation,
6276 i.e. if 8 results are generated in each vector iteration, they are
6277 to be organized as follows:
6278 vect1: [res1,res2,res3,res4],
6279 vect2: [res5,res6,res7,res8].
6281 However, in the special case that the result of the widening
6282 operation is used in a reduction computation only, the order doesn't
6283 matter (because when vectorizing a reduction we change the order of
6284 the computation). Some targets can take advantage of this and
6285 generate more efficient code. For example, targets like Altivec,
6286 that support widen_mult using a sequence of {mult_even,mult_odd}
6287 generate the following vectors:
6288 vect1: [res1,res3,res5,res7],
6289 vect2: [res2,res4,res6,res8].
6291 When vectorizing outer-loops, we execute the inner-loop sequentially
6292 (each vectorized inner-loop iteration contributes to VF outer-loop
6293 iterations in parallel). We therefore don't allow to change the
6294 order of the computation in the inner-loop during outer-loop
6296 /* TODO: Another case in which order doesn't *really* matter is when we
6297 widen and then contract again, e.g. (short)((int)x * y >> 8).
6298 Normally, pack_trunc performs an even/odd permute, whereas the
6299 repack from an even/odd expansion would be an interleave, which
6300 would be significantly simpler for e.g. AVX2. */
6301 /* In any case, in order to avoid duplicating the code below, recurse
6302 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6303 are properly set up for the caller. If we fail, we'll continue with
6304 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6306 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6307 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6308 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6309 stmt
, vectype_out
, vectype_in
,
6310 code1
, code2
, multi_step_cvt
,
6313 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6314 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6317 case VEC_WIDEN_MULT_EVEN_EXPR
:
6318 /* Support the recursion induced just above. */
6319 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6320 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6323 case WIDEN_LSHIFT_EXPR
:
6324 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6325 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6329 c1
= VEC_UNPACK_LO_EXPR
;
6330 c2
= VEC_UNPACK_HI_EXPR
;
6334 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6335 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6338 case FIX_TRUNC_EXPR
:
6339 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6340 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6341 computing the operation. */
6348 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6350 enum tree_code ctmp
= c1
;
6355 if (code
== FIX_TRUNC_EXPR
)
6357 /* The signedness is determined from output operand. */
6358 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6359 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6363 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6364 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6367 if (!optab1
|| !optab2
)
6370 vec_mode
= TYPE_MODE (vectype
);
6371 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6372 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6378 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6379 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6382 /* Check if it's a multi-step conversion that can be done using intermediate
6385 prev_type
= vectype
;
6386 prev_mode
= vec_mode
;
6388 if (!CONVERT_EXPR_CODE_P (code
))
6391 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6392 intermediate steps in promotion sequence. We try
6393 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6395 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6396 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6398 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6400 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6401 TYPE_UNSIGNED (prev_type
));
6402 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6403 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6405 if (!optab3
|| !optab4
6406 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6407 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6408 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6409 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6410 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6411 == CODE_FOR_nothing
)
6412 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6413 == CODE_FOR_nothing
))
6416 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6417 (*multi_step_cvt
)++;
6419 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6420 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6423 prev_type
= intermediate_type
;
6424 prev_mode
= intermediate_mode
;
6427 VEC_free (tree
, heap
, *interm_types
);
6432 /* Function supportable_narrowing_operation
6434 Check whether an operation represented by the code CODE is a
6435 narrowing operation that is supported by the target platform in
6436 vector form (i.e., when operating on arguments of type VECTYPE_IN
6437 and producing a result of type VECTYPE_OUT).
6439 Narrowing operations we currently support are NOP (CONVERT) and
6440 FIX_TRUNC. This function checks if these operations are supported by
6441 the target platform directly via vector tree-codes.
6444 - CODE1 is the code of a vector operation to be used when
6445 vectorizing the operation, if available.
6446 - MULTI_STEP_CVT determines the number of required intermediate steps in
6447 case of multi-step conversion (like int->short->char - in that case
6448 MULTI_STEP_CVT will be 1).
6449 - INTERM_TYPES contains the intermediate type required to perform the
6450 narrowing operation (short in the above example). */
6453 supportable_narrowing_operation (enum tree_code code
,
6454 tree vectype_out
, tree vectype_in
,
6455 enum tree_code
*code1
, int *multi_step_cvt
,
6456 VEC (tree
, heap
) **interm_types
)
6458 enum machine_mode vec_mode
;
6459 enum insn_code icode1
;
6460 optab optab1
, interm_optab
;
6461 tree vectype
= vectype_in
;
6462 tree narrow_vectype
= vectype_out
;
6464 tree intermediate_type
;
6465 enum machine_mode intermediate_mode
, prev_mode
;
6469 *multi_step_cvt
= 0;
6473 c1
= VEC_PACK_TRUNC_EXPR
;
6476 case FIX_TRUNC_EXPR
:
6477 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6481 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6482 tree code and optabs used for computing the operation. */
6489 if (code
== FIX_TRUNC_EXPR
)
6490 /* The signedness is determined from output operand. */
6491 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6493 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6498 vec_mode
= TYPE_MODE (vectype
);
6499 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6504 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6507 /* Check if it's a multi-step conversion that can be done using intermediate
6509 prev_mode
= vec_mode
;
6510 if (code
== FIX_TRUNC_EXPR
)
6511 uns
= TYPE_UNSIGNED (vectype_out
);
6513 uns
= TYPE_UNSIGNED (vectype
);
6515 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6516 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6517 costly than signed. */
6518 if (code
== FIX_TRUNC_EXPR
&& uns
)
6520 enum insn_code icode2
;
6523 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6525 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6526 if (interm_optab
!= unknown_optab
6527 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6528 && insn_data
[icode1
].operand
[0].mode
6529 == insn_data
[icode2
].operand
[0].mode
)
6532 optab1
= interm_optab
;
6537 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6538 intermediate steps in promotion sequence. We try
6539 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6540 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6541 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6543 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6545 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6547 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6550 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6551 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6552 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6553 == CODE_FOR_nothing
))
6556 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6557 (*multi_step_cvt
)++;
6559 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6562 prev_mode
= intermediate_mode
;
6563 optab1
= interm_optab
;
6566 VEC_free (tree
, heap
, *interm_types
);