1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
35 #include "recog.h" /* FIXME: for insn_data */
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
77 int misalign
, enum vect_cost_model_location where
)
81 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
82 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
83 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
86 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
192 if (vect_print_dump_info (REPORT_DETAILS
))
193 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
195 /* If this stmt is an original stmt in a pattern, we might need to mark its
196 related pattern stmt instead of the original stmt. However, such stmts
197 may have their own uses that are not in any pattern, in such cases the
198 stmt itself should be marked. */
199 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
202 if (!used_in_pattern
)
204 imm_use_iterator imm_iter
;
208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
209 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
211 if (is_gimple_assign (stmt
))
212 lhs
= gimple_assign_lhs (stmt
);
214 lhs
= gimple_call_lhs (stmt
);
216 /* This use is out of pattern use, if LHS has other uses that are
217 pattern uses, we should mark the stmt itself, and not the pattern
219 if (TREE_CODE (lhs
) == SSA_NAME
)
220 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
222 if (is_gimple_debug (USE_STMT (use_p
)))
224 use_stmt
= USE_STMT (use_p
);
226 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
229 if (vinfo_for_stmt (use_stmt
)
230 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
240 /* This is the last stmt in a sequence that was detected as a
241 pattern that can potentially be vectorized. Don't mark the stmt
242 as relevant/live because it's not going to be vectorized.
243 Instead mark the pattern-stmt that replaces it. */
245 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
247 if (vect_print_dump_info (REPORT_DETAILS
))
248 fprintf (vect_dump
, "last stmt in pattern. don't mark"
250 stmt_info
= vinfo_for_stmt (pattern_stmt
);
251 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
252 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
253 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
258 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
259 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
260 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
262 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
263 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
265 if (vect_print_dump_info (REPORT_DETAILS
))
266 fprintf (vect_dump
, "already marked relevant/live.");
270 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
274 /* Function vect_stmt_relevant_p.
276 Return true if STMT in loop that is represented by LOOP_VINFO is
277 "relevant for vectorization".
279 A stmt is considered "relevant for vectorization" if:
280 - it has uses outside the loop.
281 - it has vdefs (it alters memory).
282 - control stmts in the loop (except for the exit condition).
284 CHECKME: what other side effects would the vectorizer allow? */
287 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
288 enum vect_relevant
*relevant
, bool *live_p
)
290 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
292 imm_use_iterator imm_iter
;
296 *relevant
= vect_unused_in_scope
;
299 /* cond stmt other than loop exit cond. */
300 if (is_ctrl_stmt (stmt
)
301 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
302 != loop_exit_ctrl_vec_info_type
)
303 *relevant
= vect_used_in_scope
;
305 /* changing memory. */
306 if (gimple_code (stmt
) != GIMPLE_PHI
)
307 if (gimple_vdef (stmt
))
309 if (vect_print_dump_info (REPORT_DETAILS
))
310 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
311 *relevant
= vect_used_in_scope
;
314 /* uses outside the loop. */
315 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
317 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
319 basic_block bb
= gimple_bb (USE_STMT (use_p
));
320 if (!flow_bb_inside_loop_p (loop
, bb
))
322 if (vect_print_dump_info (REPORT_DETAILS
))
323 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
325 if (is_gimple_debug (USE_STMT (use_p
)))
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
331 gcc_assert (bb
== single_exit (loop
)->dest
);
338 return (*live_p
|| *relevant
);
342 /* Function exist_non_indexing_operands_for_use_p
344 USE is one of the uses attached to STMT. Check if USE is
345 used in STMT for anything other than indexing an array. */
348 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
351 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
353 /* USE corresponds to some operand in STMT. If there is no data
354 reference in STMT, then any operand that corresponds to USE
355 is not indexing an array. */
356 if (!STMT_VINFO_DATA_REF (stmt_info
))
359 /* STMT has a data_ref. FORNOW this means that its of one of
363 (This should have been verified in analyze_data_refs).
365 'var' in the second case corresponds to a def, not a use,
366 so USE cannot correspond to any operands that are not used
369 Therefore, all we need to check is if STMT falls into the
370 first case, and whether var corresponds to USE. */
372 if (!gimple_assign_copy_p (stmt
))
374 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
376 operand
= gimple_assign_rhs1 (stmt
);
377 if (TREE_CODE (operand
) != SSA_NAME
)
388 Function process_use.
391 - a USE in STMT in a loop represented by LOOP_VINFO
392 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
393 that defined USE. This is done by calling mark_relevant and passing it
394 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
395 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
399 Generally, LIVE_P and RELEVANT are used to define the liveness and
400 relevance info of the DEF_STMT of this USE:
401 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
402 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
404 - case 1: If USE is used only for address computations (e.g. array indexing),
405 which does not need to be directly vectorized, then the liveness/relevance
406 of the respective DEF_STMT is left unchanged.
407 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
408 skip DEF_STMT cause it had already been processed.
409 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
410 be modified accordingly.
412 Return true if everything is as expected. Return false otherwise. */
415 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
416 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
,
419 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
420 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
421 stmt_vec_info dstmt_vinfo
;
422 basic_block bb
, def_bb
;
425 enum vect_def_type dt
;
427 /* case 1: we are only interested in uses that need to be vectorized. Uses
428 that are used for address computation are not considered relevant. */
429 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
432 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
435 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
439 if (!def_stmt
|| gimple_nop_p (def_stmt
))
442 def_bb
= gimple_bb (def_stmt
);
443 if (!flow_bb_inside_loop_p (loop
, def_bb
))
445 if (vect_print_dump_info (REPORT_DETAILS
))
446 fprintf (vect_dump
, "def_stmt is out of loop.");
450 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
451 DEF_STMT must have already been processed, because this should be the
452 only way that STMT, which is a reduction-phi, was put in the worklist,
453 as there should be no other uses for DEF_STMT in the loop. So we just
454 check that everything is as expected, and we are done. */
455 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
456 bb
= gimple_bb (stmt
);
457 if (gimple_code (stmt
) == GIMPLE_PHI
458 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
459 && gimple_code (def_stmt
) != GIMPLE_PHI
460 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
461 && bb
->loop_father
== def_bb
->loop_father
)
463 if (vect_print_dump_info (REPORT_DETAILS
))
464 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
465 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
466 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
467 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
468 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
469 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
473 /* case 3a: outer-loop stmt defining an inner-loop stmt:
474 outer-loop-header-bb:
480 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
482 if (vect_print_dump_info (REPORT_DETAILS
))
483 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
487 case vect_unused_in_scope
:
488 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
489 vect_used_in_scope
: vect_unused_in_scope
;
492 case vect_used_in_outer_by_reduction
:
493 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
494 relevant
= vect_used_by_reduction
;
497 case vect_used_in_outer
:
498 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
499 relevant
= vect_used_in_scope
;
502 case vect_used_in_scope
:
510 /* case 3b: inner-loop stmt defining an outer-loop stmt:
511 outer-loop-header-bb:
515 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
517 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
519 if (vect_print_dump_info (REPORT_DETAILS
))
520 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
524 case vect_unused_in_scope
:
525 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
526 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
527 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
530 case vect_used_by_reduction
:
531 relevant
= vect_used_in_outer_by_reduction
;
534 case vect_used_in_scope
:
535 relevant
= vect_used_in_outer
;
543 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
544 is_pattern_stmt_p (stmt_vinfo
));
549 /* Function vect_mark_stmts_to_be_vectorized.
551 Not all stmts in the loop need to be vectorized. For example:
560 Stmt 1 and 3 do not need to be vectorized, because loop control and
561 addressing of vectorized data-refs are handled differently.
563 This pass detects such stmts. */
566 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
568 VEC(gimple
,heap
) *worklist
;
569 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
570 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
571 unsigned int nbbs
= loop
->num_nodes
;
572 gimple_stmt_iterator si
;
575 stmt_vec_info stmt_vinfo
;
579 enum vect_relevant relevant
, tmp_relevant
;
580 enum vect_def_type def_type
;
582 if (vect_print_dump_info (REPORT_DETAILS
))
583 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
585 worklist
= VEC_alloc (gimple
, heap
, 64);
587 /* 1. Init worklist. */
588 for (i
= 0; i
< nbbs
; i
++)
591 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
594 if (vect_print_dump_info (REPORT_DETAILS
))
596 fprintf (vect_dump
, "init: phi relevant? ");
597 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
600 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
601 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
603 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
605 stmt
= gsi_stmt (si
);
606 if (vect_print_dump_info (REPORT_DETAILS
))
608 fprintf (vect_dump
, "init: stmt relevant? ");
609 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
612 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
613 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
617 /* 2. Process_worklist */
618 while (VEC_length (gimple
, worklist
) > 0)
623 stmt
= VEC_pop (gimple
, worklist
);
624 if (vect_print_dump_info (REPORT_DETAILS
))
626 fprintf (vect_dump
, "worklist: examine stmt: ");
627 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
630 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
631 (DEF_STMT) as relevant/irrelevant and live/dead according to the
632 liveness and relevance properties of STMT. */
633 stmt_vinfo
= vinfo_for_stmt (stmt
);
634 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
635 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
637 /* Generally, the liveness and relevance properties of STMT are
638 propagated as is to the DEF_STMTs of its USEs:
639 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
640 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
642 One exception is when STMT has been identified as defining a reduction
643 variable; in this case we set the liveness/relevance as follows:
645 relevant = vect_used_by_reduction
646 This is because we distinguish between two kinds of relevant stmts -
647 those that are used by a reduction computation, and those that are
648 (also) used by a regular computation. This allows us later on to
649 identify stmts that are used solely by a reduction, and therefore the
650 order of the results that they produce does not have to be kept. */
652 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
653 tmp_relevant
= relevant
;
656 case vect_reduction_def
:
657 switch (tmp_relevant
)
659 case vect_unused_in_scope
:
660 relevant
= vect_used_by_reduction
;
663 case vect_used_by_reduction
:
664 if (gimple_code (stmt
) == GIMPLE_PHI
)
669 if (vect_print_dump_info (REPORT_DETAILS
))
670 fprintf (vect_dump
, "unsupported use of reduction.");
672 VEC_free (gimple
, heap
, worklist
);
679 case vect_nested_cycle
:
680 if (tmp_relevant
!= vect_unused_in_scope
681 && tmp_relevant
!= vect_used_in_outer_by_reduction
682 && tmp_relevant
!= vect_used_in_outer
)
684 if (vect_print_dump_info (REPORT_DETAILS
))
685 fprintf (vect_dump
, "unsupported use of nested cycle.");
687 VEC_free (gimple
, heap
, worklist
);
694 case vect_double_reduction_def
:
695 if (tmp_relevant
!= vect_unused_in_scope
696 && tmp_relevant
!= vect_used_by_reduction
)
698 if (vect_print_dump_info (REPORT_DETAILS
))
699 fprintf (vect_dump
, "unsupported use of double reduction.");
701 VEC_free (gimple
, heap
, worklist
);
712 if (is_pattern_stmt_p (stmt_vinfo
))
714 /* Pattern statements are not inserted into the code, so
715 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
716 have to scan the RHS or function arguments instead. */
717 if (is_gimple_assign (stmt
))
719 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
720 tree op
= gimple_assign_rhs1 (stmt
);
723 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
725 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
726 live_p
, relevant
, &worklist
, false)
727 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
728 live_p
, relevant
, &worklist
, false))
730 VEC_free (gimple
, heap
, worklist
);
735 for (; i
< gimple_num_ops (stmt
); i
++)
737 op
= gimple_op (stmt
, i
);
738 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
741 VEC_free (gimple
, heap
, worklist
);
746 else if (is_gimple_call (stmt
))
748 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
750 tree arg
= gimple_call_arg (stmt
, i
);
751 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
754 VEC_free (gimple
, heap
, worklist
);
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
764 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
767 VEC_free (gimple
, heap
, worklist
);
772 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
775 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
777 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
780 VEC_free (gimple
, heap
, worklist
);
784 } /* while worklist */
786 VEC_free (gimple
, heap
, worklist
);
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
799 enum vect_def_type
*dt
,
800 stmt_vector_for_cost
*prologue_cost_vec
,
801 stmt_vector_for_cost
*body_cost_vec
)
804 int inside_cost
= 0, prologue_cost
= 0;
806 /* The SLP costs were already calculated during SLP tree build. */
807 if (PURE_SLP_STMT (stmt_info
))
810 /* FORNOW: Assuming maximum 2 args per stmts. */
811 for (i
= 0; i
< 2; i
++)
812 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
813 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
814 stmt_info
, 0, vect_prologue
);
816 /* Pass the inside-of-loop statements to the target-specific cost model. */
817 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
818 stmt_info
, 0, vect_body
);
820 if (vect_print_dump_info (REPORT_COST
))
821 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
822 "prologue_cost = %d .", inside_cost
, prologue_cost
);
826 /* Model cost for type demotion and promotion operations. PWR is normally
827 zero for single-step promotions and demotions. It will be one if
828 two-step promotion/demotion is required, and so on. Each additional
829 step doubles the number of instructions required. */
832 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
833 enum vect_def_type
*dt
, int pwr
)
836 int inside_cost
= 0, prologue_cost
= 0;
837 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
838 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
839 void *target_cost_data
;
841 /* The SLP costs were already calculated during SLP tree build. */
842 if (PURE_SLP_STMT (stmt_info
))
846 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
848 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
850 for (i
= 0; i
< pwr
+ 1; i
++)
852 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
854 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
855 vec_promote_demote
, stmt_info
, 0,
859 /* FORNOW: Assuming maximum 2 args per stmts. */
860 for (i
= 0; i
< 2; i
++)
861 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
862 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
863 stmt_info
, 0, vect_prologue
);
865 if (vect_print_dump_info (REPORT_COST
))
866 fprintf (vect_dump
, "vect_model_promotion_demotion_cost: inside_cost = %d, "
867 "prologue_cost = %d .", inside_cost
, prologue_cost
);
870 /* Function vect_cost_group_size
872 For grouped load or store, return the group_size only if it is the first
873 load or store of a group, else return 1. This ensures that group size is
874 only returned once per group. */
877 vect_cost_group_size (stmt_vec_info stmt_info
)
879 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
881 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
882 return GROUP_SIZE (stmt_info
);
888 /* Function vect_model_store_cost
890 Models cost for stores. In the case of grouped accesses, one access
891 has the overhead of the grouped access attributed to it. */
894 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
895 bool store_lanes_p
, enum vect_def_type dt
,
897 stmt_vector_for_cost
*prologue_cost_vec
,
898 stmt_vector_for_cost
*body_cost_vec
)
901 unsigned int inside_cost
= 0, prologue_cost
= 0;
902 struct data_reference
*first_dr
;
905 /* The SLP costs were already calculated during SLP tree build. */
906 if (PURE_SLP_STMT (stmt_info
))
909 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
910 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
911 stmt_info
, 0, vect_prologue
);
913 /* Grouped access? */
914 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
918 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
923 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
924 group_size
= vect_cost_group_size (stmt_info
);
927 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
929 /* Not a grouped access. */
933 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (!store_lanes_p
&& group_size
> 1)
942 /* Uses a high and low interleave operation for each needed permute. */
944 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
945 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
946 stmt_info
, 0, vect_body
);
948 if (vect_print_dump_info (REPORT_COST
))
949 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
953 /* Costs of the stores. */
954 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
956 if (vect_print_dump_info (REPORT_COST
))
957 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
958 "prologue_cost = %d .", inside_cost
, prologue_cost
);
962 /* Calculate cost of DR's memory access. */
964 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
965 unsigned int *inside_cost
,
966 stmt_vector_for_cost
*body_cost_vec
)
968 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
969 gimple stmt
= DR_STMT (dr
);
970 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
972 switch (alignment_support_scheme
)
976 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
977 vector_store
, stmt_info
, 0,
980 if (vect_print_dump_info (REPORT_COST
))
981 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
986 case dr_unaligned_supported
:
988 /* Here, we assign an additional cost for the unaligned store. */
989 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
990 unaligned_store
, stmt_info
,
991 DR_MISALIGNMENT (dr
), vect_body
);
993 if (vect_print_dump_info (REPORT_COST
))
994 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
1000 case dr_unaligned_unsupported
:
1002 *inside_cost
= VECT_MAX_COST
;
1004 if (vect_print_dump_info (REPORT_COST
))
1005 fprintf (vect_dump
, "vect_model_store_cost: unsupported access.");
1016 /* Function vect_model_load_cost
1018 Models cost for loads. In the case of grouped accesses, the last access
1019 has the overhead of the grouped access attributed to it. Since unaligned
1020 accesses are supported for loads, we also account for the costs of the
1021 access scheme chosen. */
1024 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1025 bool load_lanes_p
, slp_tree slp_node
,
1026 stmt_vector_for_cost
*prologue_cost_vec
,
1027 stmt_vector_for_cost
*body_cost_vec
)
1031 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1032 unsigned int inside_cost
= 0, prologue_cost
= 0;
1034 /* The SLP costs were already calculated during SLP tree build. */
1035 if (PURE_SLP_STMT (stmt_info
))
1038 /* Grouped accesses? */
1039 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1040 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1042 group_size
= vect_cost_group_size (stmt_info
);
1043 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1045 /* Not a grouped access. */
1052 /* We assume that the cost of a single load-lanes instruction is
1053 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1054 access is instead being provided by a load-and-permute operation,
1055 include the cost of the permutes. */
1056 if (!load_lanes_p
&& group_size
> 1)
1058 /* Uses an even and odd extract operations for each needed permute. */
1059 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1060 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1061 stmt_info
, 0, vect_body
);
1063 if (vect_print_dump_info (REPORT_COST
))
1064 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
1068 /* The loads themselves. */
1069 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1071 /* N scalar loads plus gathering them into a vector. */
1072 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1073 inside_cost
+= record_stmt_cost (body_cost_vec
,
1074 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1075 scalar_load
, stmt_info
, 0, vect_body
);
1076 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1077 stmt_info
, 0, vect_body
);
1080 vect_get_load_cost (first_dr
, ncopies
,
1081 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1082 || group_size
> 1 || slp_node
),
1083 &inside_cost
, &prologue_cost
,
1084 prologue_cost_vec
, body_cost_vec
, true);
1086 if (vect_print_dump_info (REPORT_COST
))
1087 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
1088 "prologue_cost = %d .", inside_cost
, prologue_cost
);
1092 /* Calculate cost of DR's memory access. */
1094 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1095 bool add_realign_cost
, unsigned int *inside_cost
,
1096 unsigned int *prologue_cost
,
1097 stmt_vector_for_cost
*prologue_cost_vec
,
1098 stmt_vector_for_cost
*body_cost_vec
,
1099 bool record_prologue_costs
)
1101 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1102 gimple stmt
= DR_STMT (dr
);
1103 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1105 switch (alignment_support_scheme
)
1109 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1110 stmt_info
, 0, vect_body
);
1112 if (vect_print_dump_info (REPORT_COST
))
1113 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1117 case dr_unaligned_supported
:
1119 /* Here, we assign an additional cost for the unaligned load. */
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1121 unaligned_load
, stmt_info
,
1122 DR_MISALIGNMENT (dr
), vect_body
);
1124 if (vect_print_dump_info (REPORT_COST
))
1125 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1130 case dr_explicit_realign
:
1132 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1133 vector_load
, stmt_info
, 0, vect_body
);
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1135 vec_perm
, stmt_info
, 0, vect_body
);
1137 /* FIXME: If the misalignment remains fixed across the iterations of
1138 the containing loop, the following cost should be added to the
1140 if (targetm
.vectorize
.builtin_mask_for_load
)
1141 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1142 stmt_info
, 0, vect_body
);
1144 if (vect_print_dump_info (REPORT_COST
))
1145 fprintf (vect_dump
, "vect_model_load_cost: explicit realign");
1149 case dr_explicit_realign_optimized
:
1151 if (vect_print_dump_info (REPORT_COST
))
1152 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1155 /* Unaligned software pipeline has a load of an address, an initial
1156 load, and possibly a mask operation to "prime" the loop. However,
1157 if this is an access in a group of loads, which provide grouped
1158 access, then the above cost should only be considered for one
1159 access in the group. Inside the loop, there is a load op
1160 and a realignment op. */
1162 if (add_realign_cost
&& record_prologue_costs
)
1164 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1165 vector_stmt
, stmt_info
,
1167 if (targetm
.vectorize
.builtin_mask_for_load
)
1168 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1169 vector_stmt
, stmt_info
,
1173 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1174 stmt_info
, 0, vect_body
);
1175 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1176 stmt_info
, 0, vect_body
);
1178 if (vect_print_dump_info (REPORT_COST
))
1180 "vect_model_load_cost: explicit realign optimized");
1185 case dr_unaligned_unsupported
:
1187 *inside_cost
= VECT_MAX_COST
;
1189 if (vect_print_dump_info (REPORT_COST
))
1190 fprintf (vect_dump
, "vect_model_load_cost: unsupported access.");
1200 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1201 the loop preheader for the vectorized stmt STMT. */
1204 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1207 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1210 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1215 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1219 if (nested_in_vect_loop_p (loop
, stmt
))
1222 pe
= loop_preheader_edge (loop
);
1223 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1224 gcc_assert (!new_bb
);
1228 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1230 gimple_stmt_iterator gsi_bb_start
;
1232 gcc_assert (bb_vinfo
);
1233 bb
= BB_VINFO_BB (bb_vinfo
);
1234 gsi_bb_start
= gsi_after_labels (bb
);
1235 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1239 if (vect_print_dump_info (REPORT_DETAILS
))
1241 fprintf (vect_dump
, "created new init_stmt: ");
1242 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1246 /* Function vect_init_vector.
1248 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1249 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1250 vector type a vector with all elements equal to VAL is created first.
1251 Place the initialization at BSI if it is not NULL. Otherwise, place the
1252 initialization at the loop preheader.
1253 Return the DEF of INIT_STMT.
1254 It will be used in the vectorization of STMT. */
1257 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1264 if (TREE_CODE (type
) == VECTOR_TYPE
1265 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1267 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1269 if (CONSTANT_CLASS_P (val
))
1270 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1273 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1274 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1277 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1281 val
= build_vector_from_val (type
, val
);
1284 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1285 init_stmt
= gimple_build_assign (new_var
, val
);
1286 new_temp
= make_ssa_name (new_var
, init_stmt
);
1287 gimple_assign_set_lhs (init_stmt
, new_temp
);
1288 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1289 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1294 /* Function vect_get_vec_def_for_operand.
1296 OP is an operand in STMT. This function returns a (vector) def that will be
1297 used in the vectorized stmt for STMT.
1299 In the case that OP is an SSA_NAME which is defined in the loop, then
1300 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1302 In case OP is an invariant or constant, a new stmt that creates a vector def
1303 needs to be introduced. */
1306 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1311 stmt_vec_info def_stmt_info
= NULL
;
1312 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1313 unsigned int nunits
;
1314 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1316 enum vect_def_type dt
;
1320 if (vect_print_dump_info (REPORT_DETAILS
))
1322 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1323 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1326 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1327 &def_stmt
, &def
, &dt
);
1328 gcc_assert (is_simple_use
);
1329 if (vect_print_dump_info (REPORT_DETAILS
))
1333 fprintf (vect_dump
, "def = ");
1334 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1338 fprintf (vect_dump
, " def_stmt = ");
1339 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1345 /* Case 1: operand is a constant. */
1346 case vect_constant_def
:
1348 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1349 gcc_assert (vector_type
);
1350 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1355 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1356 if (vect_print_dump_info (REPORT_DETAILS
))
1357 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1359 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1362 /* Case 2: operand is defined outside the loop - loop invariant. */
1363 case vect_external_def
:
1365 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1366 gcc_assert (vector_type
);
1371 /* Create 'vec_inv = {inv,inv,..,inv}' */
1372 if (vect_print_dump_info (REPORT_DETAILS
))
1373 fprintf (vect_dump
, "Create vector_inv.");
1375 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1378 /* Case 3: operand is defined inside the loop. */
1379 case vect_internal_def
:
1382 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1384 /* Get the def from the vectorized stmt. */
1385 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1387 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1388 /* Get vectorized pattern statement. */
1390 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1391 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1392 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1393 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1394 gcc_assert (vec_stmt
);
1395 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1396 vec_oprnd
= PHI_RESULT (vec_stmt
);
1397 else if (is_gimple_call (vec_stmt
))
1398 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1400 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1404 /* Case 4: operand is defined by a loop header phi - reduction */
1405 case vect_reduction_def
:
1406 case vect_double_reduction_def
:
1407 case vect_nested_cycle
:
1411 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1412 loop
= (gimple_bb (def_stmt
))->loop_father
;
1414 /* Get the def before the loop */
1415 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1416 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1419 /* Case 5: operand is defined by loop-header phi - induction. */
1420 case vect_induction_def
:
1422 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1424 /* Get the def from the vectorized stmt. */
1425 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1426 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1427 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1428 vec_oprnd
= PHI_RESULT (vec_stmt
);
1430 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1440 /* Function vect_get_vec_def_for_stmt_copy
1442 Return a vector-def for an operand. This function is used when the
1443 vectorized stmt to be created (by the caller to this function) is a "copy"
1444 created in case the vectorized result cannot fit in one vector, and several
1445 copies of the vector-stmt are required. In this case the vector-def is
1446 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1447 of the stmt that defines VEC_OPRND.
1448 DT is the type of the vector def VEC_OPRND.
1451 In case the vectorization factor (VF) is bigger than the number
1452 of elements that can fit in a vectype (nunits), we have to generate
1453 more than one vector stmt to vectorize the scalar stmt. This situation
1454 arises when there are multiple data-types operated upon in the loop; the
1455 smallest data-type determines the VF, and as a result, when vectorizing
1456 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1457 vector stmt (each computing a vector of 'nunits' results, and together
1458 computing 'VF' results in each iteration). This function is called when
1459 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1460 which VF=16 and nunits=4, so the number of copies required is 4):
1462 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1464 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1465 VS1.1: vx.1 = memref1 VS1.2
1466 VS1.2: vx.2 = memref2 VS1.3
1467 VS1.3: vx.3 = memref3
1469 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1470 VSnew.1: vz1 = vx.1 + ... VSnew.2
1471 VSnew.2: vz2 = vx.2 + ... VSnew.3
1472 VSnew.3: vz3 = vx.3 + ...
1474 The vectorization of S1 is explained in vectorizable_load.
1475 The vectorization of S2:
1476 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1477 the function 'vect_get_vec_def_for_operand' is called to
1478 get the relevant vector-def for each operand of S2. For operand x it
1479 returns the vector-def 'vx.0'.
1481 To create the remaining copies of the vector-stmt (VSnew.j), this
1482 function is called to get the relevant vector-def for each operand. It is
1483 obtained from the respective VS1.j stmt, which is recorded in the
1484 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1486 For example, to obtain the vector-def 'vx.1' in order to create the
1487 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1488 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1489 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1490 and return its def ('vx.1').
1491 Overall, to create the above sequence this function will be called 3 times:
1492 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1493 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1494 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1497 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1499 gimple vec_stmt_for_operand
;
1500 stmt_vec_info def_stmt_info
;
1502 /* Do nothing; can reuse same def. */
1503 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1506 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1507 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1508 gcc_assert (def_stmt_info
);
1509 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1510 gcc_assert (vec_stmt_for_operand
);
1511 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1512 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1513 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1515 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1520 /* Get vectorized definitions for the operands to create a copy of an original
1521 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1524 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1525 VEC(tree
,heap
) **vec_oprnds0
,
1526 VEC(tree
,heap
) **vec_oprnds1
)
1528 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1530 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1531 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1533 if (vec_oprnds1
&& *vec_oprnds1
)
1535 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1536 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1537 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1542 /* Get vectorized definitions for OP0 and OP1.
1543 REDUC_INDEX is the index of reduction operand in case of reduction,
1544 and -1 otherwise. */
1547 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1548 VEC (tree
, heap
) **vec_oprnds0
,
1549 VEC (tree
, heap
) **vec_oprnds1
,
1550 slp_tree slp_node
, int reduc_index
)
1554 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1555 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, nops
);
1556 VEC (slp_void_p
, heap
) *vec_defs
= VEC_alloc (slp_void_p
, heap
, nops
);
1558 VEC_quick_push (tree
, ops
, op0
);
1560 VEC_quick_push (tree
, ops
, op1
);
1562 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1564 *vec_oprnds0
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1566 *vec_oprnds1
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 1);
1568 VEC_free (tree
, heap
, ops
);
1569 VEC_free (slp_void_p
, heap
, vec_defs
);
1575 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1576 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1577 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1581 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1582 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1583 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1589 /* Function vect_finish_stmt_generation.
1591 Insert a new stmt. */
1594 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1595 gimple_stmt_iterator
*gsi
)
1597 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1598 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1599 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1601 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1603 if (!gsi_end_p (*gsi
)
1604 && gimple_has_mem_ops (vec_stmt
))
1606 gimple at_stmt
= gsi_stmt (*gsi
);
1607 tree vuse
= gimple_vuse (at_stmt
);
1608 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1610 tree vdef
= gimple_vdef (at_stmt
);
1611 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1612 /* If we have an SSA vuse and insert a store, update virtual
1613 SSA form to avoid triggering the renamer. Do so only
1614 if we can easily see all uses - which is what almost always
1615 happens with the way vectorized stmts are inserted. */
1616 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1617 && ((is_gimple_assign (vec_stmt
)
1618 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1619 || (is_gimple_call (vec_stmt
)
1620 && !(gimple_call_flags (vec_stmt
)
1621 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1623 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1624 gimple_set_vdef (vec_stmt
, new_vdef
);
1625 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1629 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1631 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1634 if (vect_print_dump_info (REPORT_DETAILS
))
1636 fprintf (vect_dump
, "add new stmt: ");
1637 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1640 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1643 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1644 a function declaration if the target has a vectorized version
1645 of the function, or NULL_TREE if the function cannot be vectorized. */
1648 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1650 tree fndecl
= gimple_call_fndecl (call
);
1652 /* We only handle functions that do not read or clobber memory -- i.e.
1653 const or novops ones. */
1654 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1658 || TREE_CODE (fndecl
) != FUNCTION_DECL
1659 || !DECL_BUILT_IN (fndecl
))
1662 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1666 /* Function vectorizable_call.
1668 Check if STMT performs a function call that can be vectorized.
1669 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1670 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1671 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1674 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1680 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1681 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1682 tree vectype_out
, vectype_in
;
1685 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1686 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1687 tree fndecl
, new_temp
, def
, rhs_type
;
1689 enum vect_def_type dt
[3]
1690 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1691 gimple new_stmt
= NULL
;
1693 VEC(tree
, heap
) *vargs
= NULL
;
1694 enum { NARROW
, NONE
, WIDEN
} modifier
;
1698 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1701 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1704 /* Is STMT a vectorizable call? */
1705 if (!is_gimple_call (stmt
))
1708 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1711 if (stmt_can_throw_internal (stmt
))
1714 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1716 /* Process function arguments. */
1717 rhs_type
= NULL_TREE
;
1718 vectype_in
= NULL_TREE
;
1719 nargs
= gimple_call_num_args (stmt
);
1721 /* Bail out if the function has more than three arguments, we do not have
1722 interesting builtin functions to vectorize with more than two arguments
1723 except for fma. No arguments is also not good. */
1724 if (nargs
== 0 || nargs
> 3)
1727 for (i
= 0; i
< nargs
; i
++)
1731 op
= gimple_call_arg (stmt
, i
);
1733 /* We can only handle calls with arguments of the same type. */
1735 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1737 if (vect_print_dump_info (REPORT_DETAILS
))
1738 fprintf (vect_dump
, "argument types differ.");
1742 rhs_type
= TREE_TYPE (op
);
1744 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1745 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1747 if (vect_print_dump_info (REPORT_DETAILS
))
1748 fprintf (vect_dump
, "use not simple.");
1753 vectype_in
= opvectype
;
1755 && opvectype
!= vectype_in
)
1757 if (vect_print_dump_info (REPORT_DETAILS
))
1758 fprintf (vect_dump
, "argument vector types differ.");
1762 /* If all arguments are external or constant defs use a vector type with
1763 the same size as the output vector type. */
1765 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1767 gcc_assert (vectype_in
);
1770 if (vect_print_dump_info (REPORT_DETAILS
))
1772 fprintf (vect_dump
, "no vectype for scalar type ");
1773 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1780 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1781 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1782 if (nunits_in
== nunits_out
/ 2)
1784 else if (nunits_out
== nunits_in
)
1786 else if (nunits_out
== nunits_in
/ 2)
1791 /* For now, we only vectorize functions if a target specific builtin
1792 is available. TODO -- in some cases, it might be profitable to
1793 insert the calls for pieces of the vector, in order to be able
1794 to vectorize other operations in the loop. */
1795 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1796 if (fndecl
== NULL_TREE
)
1798 if (vect_print_dump_info (REPORT_DETAILS
))
1799 fprintf (vect_dump
, "function is not vectorizable.");
1804 gcc_assert (!gimple_vuse (stmt
));
1806 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1808 else if (modifier
== NARROW
)
1809 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1811 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1813 /* Sanity check: make sure that at least one copy of the vectorized stmt
1814 needs to be generated. */
1815 gcc_assert (ncopies
>= 1);
1817 if (!vec_stmt
) /* transformation not required. */
1819 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1820 if (vect_print_dump_info (REPORT_DETAILS
))
1821 fprintf (vect_dump
, "=== vectorizable_call ===");
1822 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1828 if (vect_print_dump_info (REPORT_DETAILS
))
1829 fprintf (vect_dump
, "transform call.");
1832 scalar_dest
= gimple_call_lhs (stmt
);
1833 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1835 prev_stmt_info
= NULL
;
1839 for (j
= 0; j
< ncopies
; ++j
)
1841 /* Build argument list for the vectorized call. */
1843 vargs
= VEC_alloc (tree
, heap
, nargs
);
1845 VEC_truncate (tree
, vargs
, 0);
1849 VEC (slp_void_p
, heap
) *vec_defs
1850 = VEC_alloc (slp_void_p
, heap
, nargs
);
1851 VEC (tree
, heap
) *vec_oprnds0
;
1853 for (i
= 0; i
< nargs
; i
++)
1854 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1855 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1857 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1859 /* Arguments are ready. Create the new vector stmt. */
1860 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_oprnd0
)
1863 for (k
= 0; k
< nargs
; k
++)
1865 VEC (tree
, heap
) *vec_oprndsk
1866 = (VEC (tree
, heap
) *)
1867 VEC_index (slp_void_p
, vec_defs
, k
);
1868 VEC_replace (tree
, vargs
, k
,
1869 VEC_index (tree
, vec_oprndsk
, i
));
1871 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1872 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1873 gimple_call_set_lhs (new_stmt
, new_temp
);
1874 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1875 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1879 for (i
= 0; i
< nargs
; i
++)
1881 VEC (tree
, heap
) *vec_oprndsi
1882 = (VEC (tree
, heap
) *)
1883 VEC_index (slp_void_p
, vec_defs
, i
);
1884 VEC_free (tree
, heap
, vec_oprndsi
);
1886 VEC_free (slp_void_p
, heap
, vec_defs
);
1890 for (i
= 0; i
< nargs
; i
++)
1892 op
= gimple_call_arg (stmt
, i
);
1895 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1898 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1900 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1903 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1906 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1907 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1908 gimple_call_set_lhs (new_stmt
, new_temp
);
1909 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1912 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1914 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1916 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1922 for (j
= 0; j
< ncopies
; ++j
)
1924 /* Build argument list for the vectorized call. */
1926 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1928 VEC_truncate (tree
, vargs
, 0);
1932 VEC (slp_void_p
, heap
) *vec_defs
1933 = VEC_alloc (slp_void_p
, heap
, nargs
);
1934 VEC (tree
, heap
) *vec_oprnds0
;
1936 for (i
= 0; i
< nargs
; i
++)
1937 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1938 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1940 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1942 /* Arguments are ready. Create the new vector stmt. */
1943 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vec_oprnd0
);
1947 VEC_truncate (tree
, vargs
, 0);
1948 for (k
= 0; k
< nargs
; k
++)
1950 VEC (tree
, heap
) *vec_oprndsk
1951 = (VEC (tree
, heap
) *)
1952 VEC_index (slp_void_p
, vec_defs
, k
);
1953 VEC_quick_push (tree
, vargs
,
1954 VEC_index (tree
, vec_oprndsk
, i
));
1955 VEC_quick_push (tree
, vargs
,
1956 VEC_index (tree
, vec_oprndsk
, i
+ 1));
1958 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1959 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1960 gimple_call_set_lhs (new_stmt
, new_temp
);
1961 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1962 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1966 for (i
= 0; i
< nargs
; i
++)
1968 VEC (tree
, heap
) *vec_oprndsi
1969 = (VEC (tree
, heap
) *)
1970 VEC_index (slp_void_p
, vec_defs
, i
);
1971 VEC_free (tree
, heap
, vec_oprndsi
);
1973 VEC_free (slp_void_p
, heap
, vec_defs
);
1977 for (i
= 0; i
< nargs
; i
++)
1979 op
= gimple_call_arg (stmt
, i
);
1983 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1985 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1989 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1991 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1993 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1996 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1997 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
2000 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2001 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2002 gimple_call_set_lhs (new_stmt
, new_temp
);
2003 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2006 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2008 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2010 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2013 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2018 /* No current target implements this case. */
2022 VEC_free (tree
, heap
, vargs
);
2024 /* Update the exception handling table with the vector stmt if necessary. */
2025 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2026 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2028 /* The call in STMT might prevent it from being removed in dce.
2029 We however cannot remove it here, due to the way the ssa name
2030 it defines is mapped to the new definition. So just replace
2031 rhs of the statement with something harmless. */
2036 type
= TREE_TYPE (scalar_dest
);
2037 if (is_pattern_stmt_p (stmt_info
))
2038 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2040 lhs
= gimple_call_lhs (stmt
);
2041 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2042 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2043 set_vinfo_for_stmt (stmt
, NULL
);
2044 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2045 gsi_replace (gsi
, new_stmt
, false);
2046 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2052 /* Function vect_gen_widened_results_half
2054 Create a vector stmt whose code, type, number of arguments, and result
2055 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2056 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2057 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2058 needs to be created (DECL is a function-decl of a target-builtin).
2059 STMT is the original scalar stmt that we are vectorizing. */
2062 vect_gen_widened_results_half (enum tree_code code
,
2064 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2065 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2071 /* Generate half of the widened result: */
2072 if (code
== CALL_EXPR
)
2074 /* Target specific support */
2075 if (op_type
== binary_op
)
2076 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2078 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2079 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2080 gimple_call_set_lhs (new_stmt
, new_temp
);
2084 /* Generic support */
2085 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2086 if (op_type
!= binary_op
)
2088 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2090 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2091 gimple_assign_set_lhs (new_stmt
, new_temp
);
2093 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2099 /* Get vectorized definitions for loop-based vectorization. For the first
2100 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2101 scalar operand), and for the rest we get a copy with
2102 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2103 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2104 The vectors are collected into VEC_OPRNDS. */
2107 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2108 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2112 /* Get first vector operand. */
2113 /* All the vector operands except the very first one (that is scalar oprnd)
2115 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2116 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2118 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2120 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2122 /* Get second vector operand. */
2123 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2124 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2128 /* For conversion in multiple steps, continue to get operands
2131 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2135 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2136 For multi-step conversions store the resulting vectors and call the function
2140 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2141 int multi_step_cvt
, gimple stmt
,
2142 VEC (tree
, heap
) *vec_dsts
,
2143 gimple_stmt_iterator
*gsi
,
2144 slp_tree slp_node
, enum tree_code code
,
2145 stmt_vec_info
*prev_stmt_info
)
2148 tree vop0
, vop1
, new_tmp
, vec_dest
;
2150 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2152 vec_dest
= VEC_pop (tree
, vec_dsts
);
2154 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2156 /* Create demotion operation. */
2157 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2158 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2159 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2160 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2161 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2162 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2165 /* Store the resulting vector for next recursive call. */
2166 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2169 /* This is the last step of the conversion sequence. Store the
2170 vectors in SLP_NODE or in vector info of the scalar statement
2171 (or in STMT_VINFO_RELATED_STMT chain). */
2173 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2176 if (!*prev_stmt_info
)
2177 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2179 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2181 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2186 /* For multi-step demotion operations we first generate demotion operations
2187 from the source type to the intermediate types, and then combine the
2188 results (stored in VEC_OPRNDS) in demotion operation to the destination
2192 /* At each level of recursion we have half of the operands we had at the
2194 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2195 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2196 stmt
, vec_dsts
, gsi
, slp_node
,
2197 VEC_PACK_TRUNC_EXPR
,
2201 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2205 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2206 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2207 the resulting vectors and call the function recursively. */
2210 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2211 VEC (tree
, heap
) **vec_oprnds1
,
2212 gimple stmt
, tree vec_dest
,
2213 gimple_stmt_iterator
*gsi
,
2214 enum tree_code code1
,
2215 enum tree_code code2
, tree decl1
,
2216 tree decl2
, int op_type
)
2219 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2220 gimple new_stmt1
, new_stmt2
;
2221 VEC (tree
, heap
) *vec_tmp
= NULL
;
2223 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2224 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
2226 if (op_type
== binary_op
)
2227 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2231 /* Generate the two halves of promotion operation. */
2232 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2233 op_type
, vec_dest
, gsi
, stmt
);
2234 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2235 op_type
, vec_dest
, gsi
, stmt
);
2236 if (is_gimple_call (new_stmt1
))
2238 new_tmp1
= gimple_call_lhs (new_stmt1
);
2239 new_tmp2
= gimple_call_lhs (new_stmt2
);
2243 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2244 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2247 /* Store the results for the next step. */
2248 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2249 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2252 VEC_free (tree
, heap
, *vec_oprnds0
);
2253 *vec_oprnds0
= vec_tmp
;
2257 /* Check if STMT performs a conversion operation, that can be vectorized.
2258 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2259 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2260 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2263 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2264 gimple
*vec_stmt
, slp_tree slp_node
)
2268 tree op0
, op1
= NULL_TREE
;
2269 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2270 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2271 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2272 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2273 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2274 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2278 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2279 gimple new_stmt
= NULL
;
2280 stmt_vec_info prev_stmt_info
;
2283 tree vectype_out
, vectype_in
;
2285 tree lhs_type
, rhs_type
;
2286 enum { NARROW
, NONE
, WIDEN
} modifier
;
2287 VEC (tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2289 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2290 int multi_step_cvt
= 0;
2291 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
;
2292 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2294 enum machine_mode rhs_mode
;
2295 unsigned short fltsz
;
2297 /* Is STMT a vectorizable conversion? */
2299 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2302 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2305 if (!is_gimple_assign (stmt
))
2308 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2311 code
= gimple_assign_rhs_code (stmt
);
2312 if (!CONVERT_EXPR_CODE_P (code
)
2313 && code
!= FIX_TRUNC_EXPR
2314 && code
!= FLOAT_EXPR
2315 && code
!= WIDEN_MULT_EXPR
2316 && code
!= WIDEN_LSHIFT_EXPR
)
2319 op_type
= TREE_CODE_LENGTH (code
);
2321 /* Check types of lhs and rhs. */
2322 scalar_dest
= gimple_assign_lhs (stmt
);
2323 lhs_type
= TREE_TYPE (scalar_dest
);
2324 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2326 op0
= gimple_assign_rhs1 (stmt
);
2327 rhs_type
= TREE_TYPE (op0
);
2329 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2330 && !((INTEGRAL_TYPE_P (lhs_type
)
2331 && INTEGRAL_TYPE_P (rhs_type
))
2332 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2333 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2336 if ((INTEGRAL_TYPE_P (lhs_type
)
2337 && (TYPE_PRECISION (lhs_type
)
2338 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2339 || (INTEGRAL_TYPE_P (rhs_type
)
2340 && (TYPE_PRECISION (rhs_type
)
2341 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2343 if (vect_print_dump_info (REPORT_DETAILS
))
2345 "type conversion to/from bit-precision unsupported.");
2349 /* Check the operands of the operation. */
2350 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2351 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2353 if (vect_print_dump_info (REPORT_DETAILS
))
2354 fprintf (vect_dump
, "use not simple.");
2357 if (op_type
== binary_op
)
2361 op1
= gimple_assign_rhs2 (stmt
);
2362 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2363 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2365 if (CONSTANT_CLASS_P (op0
))
2366 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2367 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2369 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2374 if (vect_print_dump_info (REPORT_DETAILS
))
2375 fprintf (vect_dump
, "use not simple.");
2380 /* If op0 is an external or constant defs use a vector type of
2381 the same size as the output vector type. */
2383 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2385 gcc_assert (vectype_in
);
2388 if (vect_print_dump_info (REPORT_DETAILS
))
2390 fprintf (vect_dump
, "no vectype for scalar type ");
2391 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
2397 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2398 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2399 if (nunits_in
< nunits_out
)
2401 else if (nunits_out
== nunits_in
)
2406 /* Multiple types in SLP are handled by creating the appropriate number of
2407 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2409 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2411 else if (modifier
== NARROW
)
2412 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2414 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2416 /* Sanity check: make sure that at least one copy of the vectorized stmt
2417 needs to be generated. */
2418 gcc_assert (ncopies
>= 1);
2420 /* Supportable by target? */
2424 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2426 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2431 if (vect_print_dump_info (REPORT_DETAILS
))
2432 fprintf (vect_dump
, "conversion not supported by target.");
2436 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2437 &code1
, &code2
, &multi_step_cvt
,
2440 /* Binary widening operation can only be supported directly by the
2442 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2446 if (code
!= FLOAT_EXPR
2447 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2448 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2451 rhs_mode
= TYPE_MODE (rhs_type
);
2452 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2453 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2454 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2455 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2458 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2459 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2460 if (cvt_type
== NULL_TREE
)
2463 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2465 if (!supportable_convert_operation (code
, vectype_out
,
2466 cvt_type
, &decl1
, &codecvt1
))
2469 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2470 cvt_type
, &codecvt1
,
2471 &codecvt2
, &multi_step_cvt
,
2475 gcc_assert (multi_step_cvt
== 0);
2477 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2478 vectype_in
, &code1
, &code2
,
2479 &multi_step_cvt
, &interm_types
))
2483 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2486 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2487 codecvt2
= ERROR_MARK
;
2491 VEC_safe_push (tree
, heap
, interm_types
, cvt_type
);
2492 cvt_type
= NULL_TREE
;
2497 gcc_assert (op_type
== unary_op
);
2498 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2499 &code1
, &multi_step_cvt
,
2503 if (code
!= FIX_TRUNC_EXPR
2504 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2505 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2508 rhs_mode
= TYPE_MODE (rhs_type
);
2510 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2511 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2512 if (cvt_type
== NULL_TREE
)
2514 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2517 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2518 &code1
, &multi_step_cvt
,
2527 if (!vec_stmt
) /* transformation not required. */
2529 if (vect_print_dump_info (REPORT_DETAILS
))
2530 fprintf (vect_dump
, "=== vectorizable_conversion ===");
2531 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2533 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2534 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2536 else if (modifier
== NARROW
)
2538 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2539 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2543 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2544 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2546 VEC_free (tree
, heap
, interm_types
);
2551 if (vect_print_dump_info (REPORT_DETAILS
))
2552 fprintf (vect_dump
, "transform conversion. ncopies = %d.", ncopies
);
2554 if (op_type
== binary_op
)
2556 if (CONSTANT_CLASS_P (op0
))
2557 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2558 else if (CONSTANT_CLASS_P (op1
))
2559 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2562 /* In case of multi-step conversion, we first generate conversion operations
2563 to the intermediate types, and then from that types to the final one.
2564 We create vector destinations for the intermediate type (TYPES) received
2565 from supportable_*_operation, and store them in the correct order
2566 for future use in vect_create_vectorized_*_stmts (). */
2567 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2568 vec_dest
= vect_create_destination_var (scalar_dest
,
2569 (cvt_type
&& modifier
== WIDEN
)
2570 ? cvt_type
: vectype_out
);
2571 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2575 for (i
= VEC_length (tree
, interm_types
) - 1;
2576 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2578 vec_dest
= vect_create_destination_var (scalar_dest
,
2580 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2585 vec_dest
= vect_create_destination_var (scalar_dest
,
2587 ? vectype_out
: cvt_type
);
2591 if (modifier
== NONE
)
2592 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2593 else if (modifier
== WIDEN
)
2595 vec_oprnds0
= VEC_alloc (tree
, heap
,
2597 ? vect_pow2 (multi_step_cvt
) : 1));
2598 if (op_type
== binary_op
)
2599 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2602 vec_oprnds0
= VEC_alloc (tree
, heap
,
2604 ? vect_pow2 (multi_step_cvt
) : 1));
2606 else if (code
== WIDEN_LSHIFT_EXPR
)
2607 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2610 prev_stmt_info
= NULL
;
2614 for (j
= 0; j
< ncopies
; j
++)
2617 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2620 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2622 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2624 /* Arguments are ready, create the new vector stmt. */
2625 if (code1
== CALL_EXPR
)
2627 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2628 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2629 gimple_call_set_lhs (new_stmt
, new_temp
);
2633 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2634 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2636 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2637 gimple_assign_set_lhs (new_stmt
, new_temp
);
2640 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2642 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2647 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2649 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2650 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2655 /* In case the vectorization factor (VF) is bigger than the number
2656 of elements that we can fit in a vectype (nunits), we have to
2657 generate more than one vector stmt - i.e - we need to "unroll"
2658 the vector stmt by a factor VF/nunits. */
2659 for (j
= 0; j
< ncopies
; j
++)
2666 if (code
== WIDEN_LSHIFT_EXPR
)
2671 /* Store vec_oprnd1 for every vector stmt to be created
2672 for SLP_NODE. We check during the analysis that all
2673 the shift arguments are the same. */
2674 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2675 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2677 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2681 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2682 &vec_oprnds1
, slp_node
, -1);
2686 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2687 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2688 if (op_type
== binary_op
)
2690 if (code
== WIDEN_LSHIFT_EXPR
)
2693 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2695 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2701 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2702 VEC_truncate (tree
, vec_oprnds0
, 0);
2703 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2704 if (op_type
== binary_op
)
2706 if (code
== WIDEN_LSHIFT_EXPR
)
2709 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2711 VEC_truncate (tree
, vec_oprnds1
, 0);
2712 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2716 /* Arguments are ready. Create the new vector stmts. */
2717 for (i
= multi_step_cvt
; i
>= 0; i
--)
2719 tree this_dest
= VEC_index (tree
, vec_dsts
, i
);
2720 enum tree_code c1
= code1
, c2
= code2
;
2721 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2726 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2728 stmt
, this_dest
, gsi
,
2729 c1
, c2
, decl1
, decl2
,
2733 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2737 if (codecvt1
== CALL_EXPR
)
2739 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2740 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2741 gimple_call_set_lhs (new_stmt
, new_temp
);
2745 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2746 new_temp
= make_ssa_name (vec_dest
, NULL
);
2747 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2752 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2755 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2758 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2762 if (!prev_stmt_info
)
2763 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2765 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2766 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2771 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2775 /* In case the vectorization factor (VF) is bigger than the number
2776 of elements that we can fit in a vectype (nunits), we have to
2777 generate more than one vector stmt - i.e - we need to "unroll"
2778 the vector stmt by a factor VF/nunits. */
2779 for (j
= 0; j
< ncopies
; j
++)
2783 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2787 VEC_truncate (tree
, vec_oprnds0
, 0);
2788 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2789 vect_pow2 (multi_step_cvt
) - 1);
2792 /* Arguments are ready. Create the new vector stmts. */
2794 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2796 if (codecvt1
== CALL_EXPR
)
2798 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2799 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2800 gimple_call_set_lhs (new_stmt
, new_temp
);
2804 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2805 new_temp
= make_ssa_name (vec_dest
, NULL
);
2806 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2810 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2811 VEC_replace (tree
, vec_oprnds0
, i
, new_temp
);
2814 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2815 stmt
, vec_dsts
, gsi
,
2820 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2824 VEC_free (tree
, heap
, vec_oprnds0
);
2825 VEC_free (tree
, heap
, vec_oprnds1
);
2826 VEC_free (tree
, heap
, vec_dsts
);
2827 VEC_free (tree
, heap
, interm_types
);
2833 /* Function vectorizable_assignment.
2835 Check if STMT performs an assignment (copy) that can be vectorized.
2836 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2837 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2838 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2841 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2842 gimple
*vec_stmt
, slp_tree slp_node
)
2847 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2848 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2849 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2853 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2854 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2857 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2859 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2860 gimple new_stmt
= NULL
;
2861 stmt_vec_info prev_stmt_info
= NULL
;
2862 enum tree_code code
;
2865 /* Multiple types in SLP are handled by creating the appropriate number of
2866 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2868 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2871 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2873 gcc_assert (ncopies
>= 1);
2875 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2878 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2881 /* Is vectorizable assignment? */
2882 if (!is_gimple_assign (stmt
))
2885 scalar_dest
= gimple_assign_lhs (stmt
);
2886 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2889 code
= gimple_assign_rhs_code (stmt
);
2890 if (gimple_assign_single_p (stmt
)
2891 || code
== PAREN_EXPR
2892 || CONVERT_EXPR_CODE_P (code
))
2893 op
= gimple_assign_rhs1 (stmt
);
2897 if (code
== VIEW_CONVERT_EXPR
)
2898 op
= TREE_OPERAND (op
, 0);
2900 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2901 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2903 if (vect_print_dump_info (REPORT_DETAILS
))
2904 fprintf (vect_dump
, "use not simple.");
2908 /* We can handle NOP_EXPR conversions that do not change the number
2909 of elements or the vector size. */
2910 if ((CONVERT_EXPR_CODE_P (code
)
2911 || code
== VIEW_CONVERT_EXPR
)
2913 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2914 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2915 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2918 /* We do not handle bit-precision changes. */
2919 if ((CONVERT_EXPR_CODE_P (code
)
2920 || code
== VIEW_CONVERT_EXPR
)
2921 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2922 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2923 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2924 || ((TYPE_PRECISION (TREE_TYPE (op
))
2925 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2926 /* But a conversion that does not change the bit-pattern is ok. */
2927 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2928 > TYPE_PRECISION (TREE_TYPE (op
)))
2929 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2931 if (vect_print_dump_info (REPORT_DETAILS
))
2932 fprintf (vect_dump
, "type conversion to/from bit-precision "
2937 if (!vec_stmt
) /* transformation not required. */
2939 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2940 if (vect_print_dump_info (REPORT_DETAILS
))
2941 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2942 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2947 if (vect_print_dump_info (REPORT_DETAILS
))
2948 fprintf (vect_dump
, "transform assignment.");
2951 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2954 for (j
= 0; j
< ncopies
; j
++)
2958 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2960 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2962 /* Arguments are ready. create the new vector stmt. */
2963 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2965 if (CONVERT_EXPR_CODE_P (code
)
2966 || code
== VIEW_CONVERT_EXPR
)
2967 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2968 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2969 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2970 gimple_assign_set_lhs (new_stmt
, new_temp
);
2971 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2973 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2980 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2982 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2984 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2987 VEC_free (tree
, heap
, vec_oprnds
);
2992 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2993 either as shift by a scalar or by a vector. */
2996 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2999 enum machine_mode vec_mode
;
3004 vectype
= get_vectype_for_scalar_type (scalar_type
);
3008 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3010 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3012 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3014 || (optab_handler (optab
, TYPE_MODE (vectype
))
3015 == CODE_FOR_nothing
))
3019 vec_mode
= TYPE_MODE (vectype
);
3020 icode
= (int) optab_handler (optab
, vec_mode
);
3021 if (icode
== CODE_FOR_nothing
)
3028 /* Function vectorizable_shift.
3030 Check if STMT performs a shift operation that can be vectorized.
3031 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3032 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3033 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3036 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3037 gimple
*vec_stmt
, slp_tree slp_node
)
3041 tree op0
, op1
= NULL
;
3042 tree vec_oprnd1
= NULL_TREE
;
3043 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3045 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3046 enum tree_code code
;
3047 enum machine_mode vec_mode
;
3051 enum machine_mode optab_op2_mode
;
3054 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3055 gimple new_stmt
= NULL
;
3056 stmt_vec_info prev_stmt_info
;
3063 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
3066 bool scalar_shift_arg
= true;
3067 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3070 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3073 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3076 /* Is STMT a vectorizable binary/unary operation? */
3077 if (!is_gimple_assign (stmt
))
3080 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3083 code
= gimple_assign_rhs_code (stmt
);
3085 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3086 || code
== RROTATE_EXPR
))
3089 scalar_dest
= gimple_assign_lhs (stmt
);
3090 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3091 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3092 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3094 if (vect_print_dump_info (REPORT_DETAILS
))
3095 fprintf (vect_dump
, "bit-precision shifts not supported.");
3099 op0
= gimple_assign_rhs1 (stmt
);
3100 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3101 &def_stmt
, &def
, &dt
[0], &vectype
))
3103 if (vect_print_dump_info (REPORT_DETAILS
))
3104 fprintf (vect_dump
, "use not simple.");
3107 /* If op0 is an external or constant def use a vector type with
3108 the same size as the output vector type. */
3110 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3112 gcc_assert (vectype
);
3115 if (vect_print_dump_info (REPORT_DETAILS
))
3117 fprintf (vect_dump
, "no vectype for scalar type ");
3118 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3124 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3125 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3126 if (nunits_out
!= nunits_in
)
3129 op1
= gimple_assign_rhs2 (stmt
);
3130 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3131 &def
, &dt
[1], &op1_vectype
))
3133 if (vect_print_dump_info (REPORT_DETAILS
))
3134 fprintf (vect_dump
, "use not simple.");
3139 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3143 /* Multiple types in SLP are handled by creating the appropriate number of
3144 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3146 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3149 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3151 gcc_assert (ncopies
>= 1);
3153 /* Determine whether the shift amount is a vector, or scalar. If the
3154 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3156 if (dt
[1] == vect_internal_def
&& !slp_node
)
3157 scalar_shift_arg
= false;
3158 else if (dt
[1] == vect_constant_def
3159 || dt
[1] == vect_external_def
3160 || dt
[1] == vect_internal_def
)
3162 /* In SLP, need to check whether the shift count is the same,
3163 in loops if it is a constant or invariant, it is always
3167 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3170 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
3171 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3172 scalar_shift_arg
= false;
3177 if (vect_print_dump_info (REPORT_DETAILS
))
3178 fprintf (vect_dump
, "operand mode requires invariant argument.");
3182 /* Vector shifted by vector. */
3183 if (!scalar_shift_arg
)
3185 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3186 if (vect_print_dump_info (REPORT_DETAILS
))
3187 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3189 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3190 if (op1_vectype
== NULL_TREE
3191 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3193 if (vect_print_dump_info (REPORT_DETAILS
))
3194 fprintf (vect_dump
, "unusable type for last operand in"
3195 " vector/vector shift/rotate.");
3199 /* See if the machine has a vector shifted by scalar insn and if not
3200 then see if it has a vector shifted by vector insn. */
3203 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3205 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3207 if (vect_print_dump_info (REPORT_DETAILS
))
3208 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3212 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3214 && (optab_handler (optab
, TYPE_MODE (vectype
))
3215 != CODE_FOR_nothing
))
3217 scalar_shift_arg
= false;
3219 if (vect_print_dump_info (REPORT_DETAILS
))
3220 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3222 /* Unlike the other binary operators, shifts/rotates have
3223 the rhs being int, instead of the same type as the lhs,
3224 so make sure the scalar is the right type if we are
3225 dealing with vectors of long long/long/short/char. */
3226 if (dt
[1] == vect_constant_def
)
3227 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3228 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3232 && TYPE_MODE (TREE_TYPE (vectype
))
3233 != TYPE_MODE (TREE_TYPE (op1
)))
3235 if (vect_print_dump_info (REPORT_DETAILS
))
3236 fprintf (vect_dump
, "unusable type for last operand in"
3237 " vector/vector shift/rotate.");
3240 if (vec_stmt
&& !slp_node
)
3242 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3243 op1
= vect_init_vector (stmt
, op1
,
3244 TREE_TYPE (vectype
), NULL
);
3251 /* Supportable by target? */
3254 if (vect_print_dump_info (REPORT_DETAILS
))
3255 fprintf (vect_dump
, "no optab.");
3258 vec_mode
= TYPE_MODE (vectype
);
3259 icode
= (int) optab_handler (optab
, vec_mode
);
3260 if (icode
== CODE_FOR_nothing
)
3262 if (vect_print_dump_info (REPORT_DETAILS
))
3263 fprintf (vect_dump
, "op not supported by target.");
3264 /* Check only during analysis. */
3265 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3266 || (vf
< vect_min_worthwhile_factor (code
)
3269 if (vect_print_dump_info (REPORT_DETAILS
))
3270 fprintf (vect_dump
, "proceeding using word mode.");
3273 /* Worthwhile without SIMD support? Check only during analysis. */
3274 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3275 && vf
< vect_min_worthwhile_factor (code
)
3278 if (vect_print_dump_info (REPORT_DETAILS
))
3279 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3283 if (!vec_stmt
) /* transformation not required. */
3285 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3286 if (vect_print_dump_info (REPORT_DETAILS
))
3287 fprintf (vect_dump
, "=== vectorizable_shift ===");
3288 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3294 if (vect_print_dump_info (REPORT_DETAILS
))
3295 fprintf (vect_dump
, "transform binary/unary operation.");
3298 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3300 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3301 created in the previous stages of the recursion, so no allocation is
3302 needed, except for the case of shift with scalar shift argument. In that
3303 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3304 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3305 In case of loop-based vectorization we allocate VECs of size 1. We
3306 allocate VEC_OPRNDS1 only in case of binary operation. */
3309 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3310 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3312 else if (scalar_shift_arg
)
3313 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3315 prev_stmt_info
= NULL
;
3316 for (j
= 0; j
< ncopies
; j
++)
3321 if (scalar_shift_arg
)
3323 /* Vector shl and shr insn patterns can be defined with scalar
3324 operand 2 (shift operand). In this case, use constant or loop
3325 invariant op1 directly, without extending it to vector mode
3327 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3328 if (!VECTOR_MODE_P (optab_op2_mode
))
3330 if (vect_print_dump_info (REPORT_DETAILS
))
3331 fprintf (vect_dump
, "operand 1 using scalar mode.");
3333 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3336 /* Store vec_oprnd1 for every vector stmt to be created
3337 for SLP_NODE. We check during the analysis that all
3338 the shift arguments are the same.
3339 TODO: Allow different constants for different vector
3340 stmts generated for an SLP instance. */
3341 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3342 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3347 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3348 (a special case for certain kind of vector shifts); otherwise,
3349 operand 1 should be of a vector type (the usual case). */
3351 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3354 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3358 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3360 /* Arguments are ready. Create the new vector stmt. */
3361 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3363 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
3364 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3365 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3366 gimple_assign_set_lhs (new_stmt
, new_temp
);
3367 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3369 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3376 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3378 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3379 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3382 VEC_free (tree
, heap
, vec_oprnds0
);
3383 VEC_free (tree
, heap
, vec_oprnds1
);
3389 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3390 gimple_stmt_iterator
*);
3393 /* Function vectorizable_operation.
3395 Check if STMT performs a binary, unary or ternary operation that can
3397 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3398 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3399 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3402 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3403 gimple
*vec_stmt
, slp_tree slp_node
)
3407 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3408 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3410 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3411 enum tree_code code
;
3412 enum machine_mode vec_mode
;
3419 enum vect_def_type dt
[3]
3420 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3421 gimple new_stmt
= NULL
;
3422 stmt_vec_info prev_stmt_info
;
3428 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
3429 tree vop0
, vop1
, vop2
;
3430 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3433 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3436 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3439 /* Is STMT a vectorizable binary/unary operation? */
3440 if (!is_gimple_assign (stmt
))
3443 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3446 code
= gimple_assign_rhs_code (stmt
);
3448 /* For pointer addition, we should use the normal plus for
3449 the vector addition. */
3450 if (code
== POINTER_PLUS_EXPR
)
3453 /* Support only unary or binary operations. */
3454 op_type
= TREE_CODE_LENGTH (code
);
3455 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3457 if (vect_print_dump_info (REPORT_DETAILS
))
3458 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
3463 scalar_dest
= gimple_assign_lhs (stmt
);
3464 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3466 /* Most operations cannot handle bit-precision types without extra
3468 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3469 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3470 /* Exception are bitwise binary operations. */
3471 && code
!= BIT_IOR_EXPR
3472 && code
!= BIT_XOR_EXPR
3473 && code
!= BIT_AND_EXPR
)
3475 if (vect_print_dump_info (REPORT_DETAILS
))
3476 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
3480 op0
= gimple_assign_rhs1 (stmt
);
3481 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3482 &def_stmt
, &def
, &dt
[0], &vectype
))
3484 if (vect_print_dump_info (REPORT_DETAILS
))
3485 fprintf (vect_dump
, "use not simple.");
3488 /* If op0 is an external or constant def use a vector type with
3489 the same size as the output vector type. */
3491 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3493 gcc_assert (vectype
);
3496 if (vect_print_dump_info (REPORT_DETAILS
))
3498 fprintf (vect_dump
, "no vectype for scalar type ");
3499 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3505 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3506 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3507 if (nunits_out
!= nunits_in
)
3510 if (op_type
== binary_op
|| op_type
== ternary_op
)
3512 op1
= gimple_assign_rhs2 (stmt
);
3513 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3516 if (vect_print_dump_info (REPORT_DETAILS
))
3517 fprintf (vect_dump
, "use not simple.");
3521 if (op_type
== ternary_op
)
3523 op2
= gimple_assign_rhs3 (stmt
);
3524 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3527 if (vect_print_dump_info (REPORT_DETAILS
))
3528 fprintf (vect_dump
, "use not simple.");
3534 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3538 /* Multiple types in SLP are handled by creating the appropriate number of
3539 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3541 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3544 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3546 gcc_assert (ncopies
>= 1);
3548 /* Shifts are handled in vectorizable_shift (). */
3549 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3550 || code
== RROTATE_EXPR
)
3553 /* Supportable by target? */
3555 vec_mode
= TYPE_MODE (vectype
);
3556 if (code
== MULT_HIGHPART_EXPR
)
3558 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3559 icode
= LAST_INSN_CODE
;
3561 icode
= CODE_FOR_nothing
;
3565 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3568 if (vect_print_dump_info (REPORT_DETAILS
))
3569 fprintf (vect_dump
, "no optab.");
3572 icode
= (int) optab_handler (optab
, vec_mode
);
3575 if (icode
== CODE_FOR_nothing
)
3577 if (vect_print_dump_info (REPORT_DETAILS
))
3578 fprintf (vect_dump
, "op not supported by target.");
3579 /* Check only during analysis. */
3580 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3581 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3583 if (vect_print_dump_info (REPORT_DETAILS
))
3584 fprintf (vect_dump
, "proceeding using word mode.");
3587 /* Worthwhile without SIMD support? Check only during analysis. */
3588 if (!VECTOR_MODE_P (vec_mode
)
3590 && vf
< vect_min_worthwhile_factor (code
))
3592 if (vect_print_dump_info (REPORT_DETAILS
))
3593 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3597 if (!vec_stmt
) /* transformation not required. */
3599 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3600 if (vect_print_dump_info (REPORT_DETAILS
))
3601 fprintf (vect_dump
, "=== vectorizable_operation ===");
3602 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3608 if (vect_print_dump_info (REPORT_DETAILS
))
3609 fprintf (vect_dump
, "transform binary/unary operation.");
3612 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3614 /* In case the vectorization factor (VF) is bigger than the number
3615 of elements that we can fit in a vectype (nunits), we have to generate
3616 more than one vector stmt - i.e - we need to "unroll" the
3617 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3618 from one copy of the vector stmt to the next, in the field
3619 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3620 stages to find the correct vector defs to be used when vectorizing
3621 stmts that use the defs of the current stmt. The example below
3622 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3623 we need to create 4 vectorized stmts):
3625 before vectorization:
3626 RELATED_STMT VEC_STMT
3630 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3632 RELATED_STMT VEC_STMT
3633 VS1_0: vx0 = memref0 VS1_1 -
3634 VS1_1: vx1 = memref1 VS1_2 -
3635 VS1_2: vx2 = memref2 VS1_3 -
3636 VS1_3: vx3 = memref3 - -
3637 S1: x = load - VS1_0
3640 step2: vectorize stmt S2 (done here):
3641 To vectorize stmt S2 we first need to find the relevant vector
3642 def for the first operand 'x'. This is, as usual, obtained from
3643 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3644 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3645 relevant vector def 'vx0'. Having found 'vx0' we can generate
3646 the vector stmt VS2_0, and as usual, record it in the
3647 STMT_VINFO_VEC_STMT of stmt S2.
3648 When creating the second copy (VS2_1), we obtain the relevant vector
3649 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3650 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3651 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3652 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3653 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3654 chain of stmts and pointers:
3655 RELATED_STMT VEC_STMT
3656 VS1_0: vx0 = memref0 VS1_1 -
3657 VS1_1: vx1 = memref1 VS1_2 -
3658 VS1_2: vx2 = memref2 VS1_3 -
3659 VS1_3: vx3 = memref3 - -
3660 S1: x = load - VS1_0
3661 VS2_0: vz0 = vx0 + v1 VS2_1 -
3662 VS2_1: vz1 = vx1 + v1 VS2_2 -
3663 VS2_2: vz2 = vx2 + v1 VS2_3 -
3664 VS2_3: vz3 = vx3 + v1 - -
3665 S2: z = x + 1 - VS2_0 */
3667 prev_stmt_info
= NULL
;
3668 for (j
= 0; j
< ncopies
; j
++)
3673 if (op_type
== binary_op
|| op_type
== ternary_op
)
3674 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3677 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3679 if (op_type
== ternary_op
)
3681 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3682 VEC_quick_push (tree
, vec_oprnds2
,
3683 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
3688 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3689 if (op_type
== ternary_op
)
3691 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
3692 VEC_quick_push (tree
, vec_oprnds2
,
3693 vect_get_vec_def_for_stmt_copy (dt
[2],
3698 /* Arguments are ready. Create the new vector stmt. */
3699 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3701 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3702 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
3703 vop2
= ((op_type
== ternary_op
)
3704 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
3705 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
3707 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3708 gimple_assign_set_lhs (new_stmt
, new_temp
);
3709 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3711 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3718 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3720 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3721 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3724 VEC_free (tree
, heap
, vec_oprnds0
);
3726 VEC_free (tree
, heap
, vec_oprnds1
);
3728 VEC_free (tree
, heap
, vec_oprnds2
);
3734 /* Function vectorizable_store.
3736 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3738 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3739 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3740 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3743 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3749 tree vec_oprnd
= NULL_TREE
;
3750 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3751 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3752 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3754 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3755 struct loop
*loop
= NULL
;
3756 enum machine_mode vec_mode
;
3758 enum dr_alignment_support alignment_support_scheme
;
3761 enum vect_def_type dt
;
3762 stmt_vec_info prev_stmt_info
= NULL
;
3763 tree dataref_ptr
= NULL_TREE
;
3764 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3767 gimple next_stmt
, first_stmt
= NULL
;
3768 bool grouped_store
= false;
3769 bool store_lanes_p
= false;
3770 unsigned int group_size
, i
;
3771 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3773 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3774 bool slp
= (slp_node
!= NULL
);
3775 unsigned int vec_num
;
3776 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3780 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3782 /* Multiple types in SLP are handled by creating the appropriate number of
3783 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3785 if (slp
|| PURE_SLP_STMT (stmt_info
))
3788 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3790 gcc_assert (ncopies
>= 1);
3792 /* FORNOW. This restriction should be relaxed. */
3793 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3795 if (vect_print_dump_info (REPORT_DETAILS
))
3796 fprintf (vect_dump
, "multiple types in nested loop.");
3800 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3803 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3806 /* Is vectorizable store? */
3808 if (!is_gimple_assign (stmt
))
3811 scalar_dest
= gimple_assign_lhs (stmt
);
3812 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3813 && is_pattern_stmt_p (stmt_info
))
3814 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3815 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3816 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3817 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3818 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3819 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3820 && TREE_CODE (scalar_dest
) != MEM_REF
)
3823 gcc_assert (gimple_assign_single_p (stmt
));
3824 op
= gimple_assign_rhs1 (stmt
);
3825 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3828 if (vect_print_dump_info (REPORT_DETAILS
))
3829 fprintf (vect_dump
, "use not simple.");
3833 elem_type
= TREE_TYPE (vectype
);
3834 vec_mode
= TYPE_MODE (vectype
);
3836 /* FORNOW. In some cases can vectorize even if data-type not supported
3837 (e.g. - array initialization with 0). */
3838 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3841 if (!STMT_VINFO_DATA_REF (stmt_info
))
3844 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3845 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3846 size_zero_node
) < 0)
3848 if (vect_print_dump_info (REPORT_DETAILS
))
3849 fprintf (vect_dump
, "negative step for store.");
3853 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3855 grouped_store
= true;
3856 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3857 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3859 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3860 if (vect_store_lanes_supported (vectype
, group_size
))
3861 store_lanes_p
= true;
3862 else if (!vect_grouped_store_supported (vectype
, group_size
))
3866 if (first_stmt
== stmt
)
3868 /* STMT is the leader of the group. Check the operands of all the
3869 stmts of the group. */
3870 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3873 gcc_assert (gimple_assign_single_p (next_stmt
));
3874 op
= gimple_assign_rhs1 (next_stmt
);
3875 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3876 &def_stmt
, &def
, &dt
))
3878 if (vect_print_dump_info (REPORT_DETAILS
))
3879 fprintf (vect_dump
, "use not simple.");
3882 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3887 if (!vec_stmt
) /* transformation not required. */
3889 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3890 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
3899 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3900 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3902 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3905 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3907 /* We vectorize all the stmts of the interleaving group when we
3908 reach the last stmt in the group. */
3909 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3910 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3919 grouped_store
= false;
3920 /* VEC_NUM is the number of vect stmts to be created for this
3922 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3923 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3924 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3925 op
= gimple_assign_rhs1 (first_stmt
);
3928 /* VEC_NUM is the number of vect stmts to be created for this
3930 vec_num
= group_size
;
3936 group_size
= vec_num
= 1;
3939 if (vect_print_dump_info (REPORT_DETAILS
))
3940 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3942 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3943 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3945 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3946 gcc_assert (alignment_support_scheme
);
3947 /* Targets with store-lane instructions must not require explicit
3949 gcc_assert (!store_lanes_p
3950 || alignment_support_scheme
== dr_aligned
3951 || alignment_support_scheme
== dr_unaligned_supported
);
3954 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3956 aggr_type
= vectype
;
3958 /* In case the vectorization factor (VF) is bigger than the number
3959 of elements that we can fit in a vectype (nunits), we have to generate
3960 more than one vector stmt - i.e - we need to "unroll" the
3961 vector stmt by a factor VF/nunits. For more details see documentation in
3962 vect_get_vec_def_for_copy_stmt. */
3964 /* In case of interleaving (non-unit grouped access):
3971 We create vectorized stores starting from base address (the access of the
3972 first stmt in the chain (S2 in the above example), when the last store stmt
3973 of the chain (S4) is reached:
3976 VS2: &base + vec_size*1 = vx0
3977 VS3: &base + vec_size*2 = vx1
3978 VS4: &base + vec_size*3 = vx3
3980 Then permutation statements are generated:
3982 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3983 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3986 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3987 (the order of the data-refs in the output of vect_permute_store_chain
3988 corresponds to the order of scalar stmts in the interleaving chain - see
3989 the documentation of vect_permute_store_chain()).
3991 In case of both multiple types and interleaving, above vector stores and
3992 permutation stmts are created for every copy. The result vector stmts are
3993 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3994 STMT_VINFO_RELATED_STMT for the next copies.
3997 prev_stmt_info
= NULL
;
3998 for (j
= 0; j
< ncopies
; j
++)
4007 /* Get vectorized arguments for SLP_NODE. */
4008 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4009 NULL
, slp_node
, -1);
4011 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
4015 /* For interleaved stores we collect vectorized defs for all the
4016 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4017 used as an input to vect_permute_store_chain(), and OPRNDS as
4018 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4020 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4021 OPRNDS are of size 1. */
4022 next_stmt
= first_stmt
;
4023 for (i
= 0; i
< group_size
; i
++)
4025 /* Since gaps are not supported for interleaved stores,
4026 GROUP_SIZE is the exact number of stmts in the chain.
4027 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4028 there is no interleaving, GROUP_SIZE is 1, and only one
4029 iteration of the loop will be executed. */
4030 gcc_assert (next_stmt
4031 && gimple_assign_single_p (next_stmt
));
4032 op
= gimple_assign_rhs1 (next_stmt
);
4034 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4036 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
4037 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
4038 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4042 /* We should have catched mismatched types earlier. */
4043 gcc_assert (useless_type_conversion_p (vectype
,
4044 TREE_TYPE (vec_oprnd
)));
4045 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
4046 NULL_TREE
, &dummy
, gsi
,
4047 &ptr_incr
, false, &inv_p
);
4048 gcc_assert (bb_vinfo
|| !inv_p
);
4052 /* For interleaved stores we created vectorized defs for all the
4053 defs stored in OPRNDS in the previous iteration (previous copy).
4054 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4055 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4057 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4058 OPRNDS are of size 1. */
4059 for (i
= 0; i
< group_size
; i
++)
4061 op
= VEC_index (tree
, oprnds
, i
);
4062 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4064 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4065 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
4066 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
4068 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4069 TYPE_SIZE_UNIT (aggr_type
));
4076 /* Combine all the vectors into an array. */
4077 vec_array
= create_vector_array (vectype
, vec_num
);
4078 for (i
= 0; i
< vec_num
; i
++)
4080 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
4081 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4085 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4086 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4087 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4088 gimple_call_set_lhs (new_stmt
, data_ref
);
4089 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4096 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4098 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4102 next_stmt
= first_stmt
;
4103 for (i
= 0; i
< vec_num
; i
++)
4105 unsigned align
, misalign
;
4108 /* Bump the vector pointer. */
4109 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4113 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4114 else if (grouped_store
)
4115 /* For grouped stores vectorized defs are interleaved in
4116 vect_permute_store_chain(). */
4117 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4119 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4120 build_int_cst (reference_alias_ptr_type
4121 (DR_REF (first_dr
)), 0));
4122 align
= TYPE_ALIGN_UNIT (vectype
);
4123 if (aligned_access_p (first_dr
))
4125 else if (DR_MISALIGNMENT (first_dr
) == -1)
4127 TREE_TYPE (data_ref
)
4128 = build_aligned_type (TREE_TYPE (data_ref
),
4129 TYPE_ALIGN (elem_type
));
4130 align
= TYPE_ALIGN_UNIT (elem_type
);
4135 TREE_TYPE (data_ref
)
4136 = build_aligned_type (TREE_TYPE (data_ref
),
4137 TYPE_ALIGN (elem_type
));
4138 misalign
= DR_MISALIGNMENT (first_dr
);
4140 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4143 /* Arguments are ready. Create the new vector stmt. */
4144 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4145 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4150 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4158 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4160 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4161 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4165 VEC_free (tree
, heap
, dr_chain
);
4166 VEC_free (tree
, heap
, oprnds
);
4168 VEC_free (tree
, heap
, result_chain
);
4170 VEC_free (tree
, heap
, vec_oprnds
);
4175 /* Given a vector type VECTYPE and permutation SEL returns
4176 the VECTOR_CST mask that implements the permutation of the
4177 vector elements. If that is impossible to do, returns NULL. */
4180 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4182 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4185 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4187 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4190 mask_elt_type
= lang_hooks
.types
.type_for_mode
4191 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4192 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4194 mask_elts
= XALLOCAVEC (tree
, nunits
);
4195 for (i
= nunits
- 1; i
>= 0; i
--)
4196 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4197 mask_vec
= build_vector (mask_type
, mask_elts
);
4202 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4203 reversal of the vector elements. If that is impossible to do,
4207 perm_mask_for_reverse (tree vectype
)
4212 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4213 sel
= XALLOCAVEC (unsigned char, nunits
);
4215 for (i
= 0; i
< nunits
; ++i
)
4216 sel
[i
] = nunits
- 1 - i
;
4218 return vect_gen_perm_mask (vectype
, sel
);
4221 /* Given a vector variable X and Y, that was generated for the scalar
4222 STMT, generate instructions to permute the vector elements of X and Y
4223 using permutation mask MASK_VEC, insert them at *GSI and return the
4224 permuted vector variable. */
4227 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4228 gimple_stmt_iterator
*gsi
)
4230 tree vectype
= TREE_TYPE (x
);
4231 tree perm_dest
, data_ref
;
4234 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4235 data_ref
= make_ssa_name (perm_dest
, NULL
);
4237 /* Generate the permute statement. */
4238 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, data_ref
,
4240 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4245 /* vectorizable_load.
4247 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4249 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4250 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4251 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4254 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4255 slp_tree slp_node
, slp_instance slp_node_instance
)
4258 tree vec_dest
= NULL
;
4259 tree data_ref
= NULL
;
4260 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4261 stmt_vec_info prev_stmt_info
;
4262 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4263 struct loop
*loop
= NULL
;
4264 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4265 bool nested_in_vect_loop
= false;
4266 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4267 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4270 enum machine_mode mode
;
4271 gimple new_stmt
= NULL
;
4273 enum dr_alignment_support alignment_support_scheme
;
4274 tree dataref_ptr
= NULL_TREE
;
4276 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4278 int i
, j
, group_size
;
4279 tree msq
= NULL_TREE
, lsq
;
4280 tree offset
= NULL_TREE
;
4281 tree realignment_token
= NULL_TREE
;
4283 VEC(tree
,heap
) *dr_chain
= NULL
;
4284 bool grouped_load
= false;
4285 bool load_lanes_p
= false;
4288 bool negative
= false;
4289 bool compute_in_loop
= false;
4290 struct loop
*at_loop
;
4292 bool slp
= (slp_node
!= NULL
);
4293 bool slp_perm
= false;
4294 enum tree_code code
;
4295 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4298 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4299 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4300 tree stride_base
, stride_step
;
4301 int gather_scale
= 1;
4302 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4306 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4307 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4308 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4313 /* Multiple types in SLP are handled by creating the appropriate number of
4314 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4316 if (slp
|| PURE_SLP_STMT (stmt_info
))
4319 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4321 gcc_assert (ncopies
>= 1);
4323 /* FORNOW. This restriction should be relaxed. */
4324 if (nested_in_vect_loop
&& ncopies
> 1)
4326 if (vect_print_dump_info (REPORT_DETAILS
))
4327 fprintf (vect_dump
, "multiple types in nested loop.");
4331 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4334 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4337 /* Is vectorizable load? */
4338 if (!is_gimple_assign (stmt
))
4341 scalar_dest
= gimple_assign_lhs (stmt
);
4342 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4345 code
= gimple_assign_rhs_code (stmt
);
4346 if (code
!= ARRAY_REF
4347 && code
!= INDIRECT_REF
4348 && code
!= COMPONENT_REF
4349 && code
!= IMAGPART_EXPR
4350 && code
!= REALPART_EXPR
4352 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4355 if (!STMT_VINFO_DATA_REF (stmt_info
))
4358 elem_type
= TREE_TYPE (vectype
);
4359 mode
= TYPE_MODE (vectype
);
4361 /* FORNOW. In some cases can vectorize even if data-type not supported
4362 (e.g. - data copies). */
4363 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4365 if (vect_print_dump_info (REPORT_DETAILS
))
4366 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4370 /* Check if the load is a part of an interleaving chain. */
4371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4373 grouped_load
= true;
4375 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4377 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4378 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4380 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4381 if (vect_load_lanes_supported (vectype
, group_size
))
4382 load_lanes_p
= true;
4383 else if (!vect_grouped_load_supported (vectype
, group_size
))
4389 if (STMT_VINFO_GATHER_P (stmt_info
))
4393 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4394 &gather_off
, &gather_scale
);
4395 gcc_assert (gather_decl
);
4396 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4397 &def_stmt
, &def
, &gather_dt
,
4398 &gather_off_vectype
))
4400 if (vect_print_dump_info (REPORT_DETAILS
))
4401 fprintf (vect_dump
, "gather index use not simple.");
4405 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4407 if (!vect_check_strided_load (stmt
, loop_vinfo
,
4408 &stride_base
, &stride_step
))
4413 negative
= tree_int_cst_compare (nested_in_vect_loop
4414 ? STMT_VINFO_DR_STEP (stmt_info
)
4416 size_zero_node
) < 0;
4417 if (negative
&& ncopies
> 1)
4419 if (vect_print_dump_info (REPORT_DETAILS
))
4420 fprintf (vect_dump
, "multiple types with negative step.");
4426 gcc_assert (!grouped_load
);
4427 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4428 if (alignment_support_scheme
!= dr_aligned
4429 && alignment_support_scheme
!= dr_unaligned_supported
)
4431 if (vect_print_dump_info (REPORT_DETAILS
))
4432 fprintf (vect_dump
, "negative step but alignment required.");
4435 if (!perm_mask_for_reverse (vectype
))
4437 if (vect_print_dump_info (REPORT_DETAILS
))
4438 fprintf (vect_dump
, "negative step and reversing not supported.");
4444 if (!vec_stmt
) /* transformation not required. */
4446 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4447 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4451 if (vect_print_dump_info (REPORT_DETAILS
))
4452 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4456 if (STMT_VINFO_GATHER_P (stmt_info
))
4458 tree vec_oprnd0
= NULL_TREE
, op
;
4459 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4460 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4461 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4462 edge pe
= loop_preheader_edge (loop
);
4465 enum { NARROW
, NONE
, WIDEN
} modifier
;
4466 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4468 if (nunits
== gather_off_nunits
)
4470 else if (nunits
== gather_off_nunits
/ 2)
4472 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4475 for (i
= 0; i
< gather_off_nunits
; ++i
)
4476 sel
[i
] = i
| nunits
;
4478 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4479 gcc_assert (perm_mask
!= NULL_TREE
);
4481 else if (nunits
== gather_off_nunits
* 2)
4483 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4486 for (i
= 0; i
< nunits
; ++i
)
4487 sel
[i
] = i
< gather_off_nunits
4488 ? i
: i
+ nunits
- gather_off_nunits
;
4490 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4491 gcc_assert (perm_mask
!= NULL_TREE
);
4497 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4498 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4499 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4500 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4501 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4502 scaletype
= TREE_VALUE (arglist
);
4503 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4504 && types_compatible_p (srctype
, masktype
));
4506 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4508 ptr
= fold_convert (ptrtype
, gather_base
);
4509 if (!is_gimple_min_invariant (ptr
))
4511 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4512 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4513 gcc_assert (!new_bb
);
4516 /* Currently we support only unconditional gather loads,
4517 so mask should be all ones. */
4518 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4519 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4520 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4524 for (j
= 0; j
< 6; ++j
)
4526 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4527 mask
= build_real (TREE_TYPE (masktype
), r
);
4531 mask
= build_vector_from_val (masktype
, mask
);
4532 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4534 scale
= build_int_cst (scaletype
, gather_scale
);
4536 prev_stmt_info
= NULL
;
4537 for (j
= 0; j
< ncopies
; ++j
)
4539 if (modifier
== WIDEN
&& (j
& 1))
4540 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4541 perm_mask
, stmt
, gsi
);
4544 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4547 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4549 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4551 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4552 == TYPE_VECTOR_SUBPARTS (idxtype
));
4553 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4554 var
= make_ssa_name (var
, NULL
);
4555 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4557 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4559 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4564 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4566 if (!useless_type_conversion_p (vectype
, rettype
))
4568 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4569 == TYPE_VECTOR_SUBPARTS (rettype
));
4570 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4571 op
= make_ssa_name (var
, new_stmt
);
4572 gimple_call_set_lhs (new_stmt
, op
);
4573 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4574 var
= make_ssa_name (vec_dest
, NULL
);
4575 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4577 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4582 var
= make_ssa_name (vec_dest
, new_stmt
);
4583 gimple_call_set_lhs (new_stmt
, var
);
4586 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4588 if (modifier
== NARROW
)
4595 var
= permute_vec_elements (prev_res
, var
,
4596 perm_mask
, stmt
, gsi
);
4597 new_stmt
= SSA_NAME_DEF_STMT (var
);
4600 if (prev_stmt_info
== NULL
)
4601 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4603 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4604 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4608 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4610 gimple_stmt_iterator incr_gsi
;
4614 tree ref
= DR_REF (dr
);
4617 VEC(constructor_elt
, gc
) *v
= NULL
;
4618 gimple_seq stmts
= NULL
;
4620 gcc_assert (stride_base
&& stride_step
);
4622 /* For a load with loop-invariant (but other than power-of-2)
4623 stride (i.e. not a grouped access) like so:
4625 for (i = 0; i < n; i += stride)
4628 we generate a new induction variable and new accesses to
4629 form a new vector (or vectors, depending on ncopies):
4631 for (j = 0; ; j += VF*stride)
4633 tmp2 = array[j + stride];
4635 vectemp = {tmp1, tmp2, ...}
4638 ivstep
= stride_step
;
4639 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4640 build_int_cst (TREE_TYPE (ivstep
), vf
));
4642 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4644 create_iv (stride_base
, ivstep
, NULL
,
4645 loop
, &incr_gsi
, insert_after
,
4647 incr
= gsi_stmt (incr_gsi
);
4648 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4650 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4652 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4654 prev_stmt_info
= NULL
;
4655 running_off
= offvar
;
4656 for (j
= 0; j
< ncopies
; j
++)
4660 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
4661 for (i
= 0; i
< nunits
; i
++)
4663 tree newref
, newoff
;
4665 if (TREE_CODE (ref
) == ARRAY_REF
)
4666 newref
= build4 (ARRAY_REF
, TREE_TYPE (ref
),
4667 unshare_expr (TREE_OPERAND (ref
, 0)),
4669 NULL_TREE
, NULL_TREE
);
4671 newref
= build2 (MEM_REF
, TREE_TYPE (ref
),
4673 TREE_OPERAND (ref
, 1));
4675 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4678 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4679 newoff
= copy_ssa_name (running_off
, NULL
);
4680 if (POINTER_TYPE_P (TREE_TYPE (newoff
)))
4681 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4682 running_off
, stride_step
);
4684 incr
= gimple_build_assign_with_ops (PLUS_EXPR
, newoff
,
4685 running_off
, stride_step
);
4686 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4688 running_off
= newoff
;
4691 vec_inv
= build_constructor (vectype
, v
);
4692 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4693 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4696 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4698 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4699 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4706 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4708 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4709 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4710 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4712 /* Check if the chain of loads is already vectorized. */
4713 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4715 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4718 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4719 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4721 /* VEC_NUM is the number of vect stmts to be created for this group. */
4724 grouped_load
= false;
4725 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4726 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4730 vec_num
= group_size
;
4736 group_size
= vec_num
= 1;
4739 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4740 gcc_assert (alignment_support_scheme
);
4741 /* Targets with load-lane instructions must not require explicit
4743 gcc_assert (!load_lanes_p
4744 || alignment_support_scheme
== dr_aligned
4745 || alignment_support_scheme
== dr_unaligned_supported
);
4747 /* In case the vectorization factor (VF) is bigger than the number
4748 of elements that we can fit in a vectype (nunits), we have to generate
4749 more than one vector stmt - i.e - we need to "unroll" the
4750 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4751 from one copy of the vector stmt to the next, in the field
4752 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4753 stages to find the correct vector defs to be used when vectorizing
4754 stmts that use the defs of the current stmt. The example below
4755 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4756 need to create 4 vectorized stmts):
4758 before vectorization:
4759 RELATED_STMT VEC_STMT
4763 step 1: vectorize stmt S1:
4764 We first create the vector stmt VS1_0, and, as usual, record a
4765 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4766 Next, we create the vector stmt VS1_1, and record a pointer to
4767 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4768 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4770 RELATED_STMT VEC_STMT
4771 VS1_0: vx0 = memref0 VS1_1 -
4772 VS1_1: vx1 = memref1 VS1_2 -
4773 VS1_2: vx2 = memref2 VS1_3 -
4774 VS1_3: vx3 = memref3 - -
4775 S1: x = load - VS1_0
4778 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4779 information we recorded in RELATED_STMT field is used to vectorize
4782 /* In case of interleaving (non-unit grouped access):
4789 Vectorized loads are created in the order of memory accesses
4790 starting from the access of the first stmt of the chain:
4793 VS2: vx1 = &base + vec_size*1
4794 VS3: vx3 = &base + vec_size*2
4795 VS4: vx4 = &base + vec_size*3
4797 Then permutation statements are generated:
4799 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4800 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4803 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4804 (the order of the data-refs in the output of vect_permute_load_chain
4805 corresponds to the order of scalar stmts in the interleaving chain - see
4806 the documentation of vect_permute_load_chain()).
4807 The generation of permutation stmts and recording them in
4808 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4810 In case of both multiple types and interleaving, the vector loads and
4811 permutation stmts above are created for every copy. The result vector
4812 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4813 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4815 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4816 on a target that supports unaligned accesses (dr_unaligned_supported)
4817 we generate the following code:
4821 p = p + indx * vectype_size;
4826 Otherwise, the data reference is potentially unaligned on a target that
4827 does not support unaligned accesses (dr_explicit_realign_optimized) -
4828 then generate the following code, in which the data in each iteration is
4829 obtained by two vector loads, one from the previous iteration, and one
4830 from the current iteration:
4832 msq_init = *(floor(p1))
4833 p2 = initial_addr + VS - 1;
4834 realignment_token = call target_builtin;
4837 p2 = p2 + indx * vectype_size
4839 vec_dest = realign_load (msq, lsq, realignment_token)
4844 /* If the misalignment remains the same throughout the execution of the
4845 loop, we can create the init_addr and permutation mask at the loop
4846 preheader. Otherwise, it needs to be created inside the loop.
4847 This can only occur when vectorizing memory accesses in the inner-loop
4848 nested within an outer-loop that is being vectorized. */
4850 if (nested_in_vect_loop
4851 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4852 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4854 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4855 compute_in_loop
= true;
4858 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4859 || alignment_support_scheme
== dr_explicit_realign
)
4860 && !compute_in_loop
)
4862 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4863 alignment_support_scheme
, NULL_TREE
,
4865 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4867 phi
= SSA_NAME_DEF_STMT (msq
);
4868 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4875 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4878 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4880 aggr_type
= vectype
;
4882 prev_stmt_info
= NULL
;
4883 for (j
= 0; j
< ncopies
; j
++)
4885 /* 1. Create the vector or array pointer update chain. */
4887 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4888 offset
, &dummy
, gsi
,
4889 &ptr_incr
, false, &inv_p
);
4891 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4892 TYPE_SIZE_UNIT (aggr_type
));
4894 if (grouped_load
|| slp_perm
)
4895 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4901 vec_array
= create_vector_array (vectype
, vec_num
);
4904 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4905 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4906 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4907 gimple_call_set_lhs (new_stmt
, vec_array
);
4908 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4910 /* Extract each vector into an SSA_NAME. */
4911 for (i
= 0; i
< vec_num
; i
++)
4913 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4915 VEC_quick_push (tree
, dr_chain
, new_temp
);
4918 /* Record the mapping between SSA_NAMEs and statements. */
4919 vect_record_grouped_load_vectors (stmt
, dr_chain
);
4923 for (i
= 0; i
< vec_num
; i
++)
4926 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4929 /* 2. Create the vector-load in the loop. */
4930 switch (alignment_support_scheme
)
4933 case dr_unaligned_supported
:
4935 unsigned int align
, misalign
;
4938 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4939 build_int_cst (reference_alias_ptr_type
4940 (DR_REF (first_dr
)), 0));
4941 align
= TYPE_ALIGN_UNIT (vectype
);
4942 if (alignment_support_scheme
== dr_aligned
)
4944 gcc_assert (aligned_access_p (first_dr
));
4947 else if (DR_MISALIGNMENT (first_dr
) == -1)
4949 TREE_TYPE (data_ref
)
4950 = build_aligned_type (TREE_TYPE (data_ref
),
4951 TYPE_ALIGN (elem_type
));
4952 align
= TYPE_ALIGN_UNIT (elem_type
);
4957 TREE_TYPE (data_ref
)
4958 = build_aligned_type (TREE_TYPE (data_ref
),
4959 TYPE_ALIGN (elem_type
));
4960 misalign
= DR_MISALIGNMENT (first_dr
);
4962 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
4966 case dr_explicit_realign
:
4971 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4973 if (compute_in_loop
)
4974 msq
= vect_setup_realignment (first_stmt
, gsi
,
4976 dr_explicit_realign
,
4979 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
4980 new_stmt
= gimple_build_assign_with_ops
4981 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
4983 (TREE_TYPE (dataref_ptr
),
4984 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4985 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4987 = build2 (MEM_REF
, vectype
, ptr
,
4988 build_int_cst (reference_alias_ptr_type
4989 (DR_REF (first_dr
)), 0));
4990 vec_dest
= vect_create_destination_var (scalar_dest
,
4992 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4993 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4994 gimple_assign_set_lhs (new_stmt
, new_temp
);
4995 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
4996 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
4997 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5000 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5001 TYPE_SIZE_UNIT (elem_type
));
5002 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5003 new_stmt
= gimple_build_assign_with_ops
5004 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5007 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5008 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5009 gimple_assign_set_lhs (new_stmt
, ptr
);
5010 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5012 = build2 (MEM_REF
, vectype
, ptr
,
5013 build_int_cst (reference_alias_ptr_type
5014 (DR_REF (first_dr
)), 0));
5017 case dr_explicit_realign_optimized
:
5018 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5019 new_stmt
= gimple_build_assign_with_ops
5020 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5022 (TREE_TYPE (dataref_ptr
),
5023 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5024 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5026 = build2 (MEM_REF
, vectype
, new_temp
,
5027 build_int_cst (reference_alias_ptr_type
5028 (DR_REF (first_dr
)), 0));
5033 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5034 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5035 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5036 gimple_assign_set_lhs (new_stmt
, new_temp
);
5037 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5039 /* 3. Handle explicit realignment if necessary/supported.
5041 vec_dest = realign_load (msq, lsq, realignment_token) */
5042 if (alignment_support_scheme
== dr_explicit_realign_optimized
5043 || alignment_support_scheme
== dr_explicit_realign
)
5045 lsq
= gimple_assign_lhs (new_stmt
);
5046 if (!realignment_token
)
5047 realignment_token
= dataref_ptr
;
5048 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5050 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
5053 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5054 gimple_assign_set_lhs (new_stmt
, new_temp
);
5055 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5057 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5060 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5061 add_phi_arg (phi
, lsq
,
5062 loop_latch_edge (containing_loop
),
5068 /* 4. Handle invariant-load. */
5069 if (inv_p
&& !bb_vinfo
)
5071 gimple_stmt_iterator gsi2
= *gsi
;
5072 gcc_assert (!grouped_load
);
5074 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5076 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5081 tree perm_mask
= perm_mask_for_reverse (vectype
);
5082 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5083 perm_mask
, stmt
, gsi
);
5084 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5087 /* Collect vector loads and later create their permutation in
5088 vect_transform_grouped_load (). */
5089 if (grouped_load
|| slp_perm
)
5090 VEC_quick_push (tree
, dr_chain
, new_temp
);
5092 /* Store vector loads in the corresponding SLP_NODE. */
5093 if (slp
&& !slp_perm
)
5094 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
5099 if (slp
&& !slp_perm
)
5104 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
5105 slp_node_instance
, false))
5107 VEC_free (tree
, heap
, dr_chain
);
5116 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5117 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5122 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5124 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5125 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5129 VEC_free (tree
, heap
, dr_chain
);
5135 /* Function vect_is_simple_cond.
5138 LOOP - the loop that is being vectorized.
5139 COND - Condition that is checked for simple use.
5142 *COMP_VECTYPE - the vector type for the comparison.
5144 Returns whether a COND can be vectorized. Checks whether
5145 condition operands are supportable using vec_is_simple_use. */
5148 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5149 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5153 enum vect_def_type dt
;
5154 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5156 if (!COMPARISON_CLASS_P (cond
))
5159 lhs
= TREE_OPERAND (cond
, 0);
5160 rhs
= TREE_OPERAND (cond
, 1);
5162 if (TREE_CODE (lhs
) == SSA_NAME
)
5164 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5165 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5166 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5169 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5170 && TREE_CODE (lhs
) != FIXED_CST
)
5173 if (TREE_CODE (rhs
) == SSA_NAME
)
5175 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5176 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5177 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5180 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5181 && TREE_CODE (rhs
) != FIXED_CST
)
5184 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5188 /* vectorizable_condition.
5190 Check if STMT is conditional modify expression that can be vectorized.
5191 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5192 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5195 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5196 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5197 else caluse if it is 2).
5199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5202 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5203 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5206 tree scalar_dest
= NULL_TREE
;
5207 tree vec_dest
= NULL_TREE
;
5208 tree cond_expr
, then_clause
, else_clause
;
5209 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5210 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5211 tree comp_vectype
= NULL_TREE
;
5212 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5213 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5214 tree vec_compare
, vec_cond_expr
;
5216 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5218 enum vect_def_type dt
, dts
[4];
5219 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5221 enum tree_code code
;
5222 stmt_vec_info prev_stmt_info
= NULL
;
5224 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5225 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
5226 VEC (tree
, heap
) *vec_oprnds2
= NULL
, *vec_oprnds3
= NULL
;
5228 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5231 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5233 gcc_assert (ncopies
>= 1);
5234 if (reduc_index
&& ncopies
> 1)
5235 return false; /* FORNOW */
5237 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5240 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5243 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5244 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5248 /* FORNOW: not yet supported. */
5249 if (STMT_VINFO_LIVE_P (stmt_info
))
5251 if (vect_print_dump_info (REPORT_DETAILS
))
5252 fprintf (vect_dump
, "value used after loop.");
5256 /* Is vectorizable conditional operation? */
5257 if (!is_gimple_assign (stmt
))
5260 code
= gimple_assign_rhs_code (stmt
);
5262 if (code
!= COND_EXPR
)
5265 cond_expr
= gimple_assign_rhs1 (stmt
);
5266 then_clause
= gimple_assign_rhs2 (stmt
);
5267 else_clause
= gimple_assign_rhs3 (stmt
);
5269 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5274 if (TREE_CODE (then_clause
) == SSA_NAME
)
5276 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5277 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5278 &then_def_stmt
, &def
, &dt
))
5281 else if (TREE_CODE (then_clause
) != INTEGER_CST
5282 && TREE_CODE (then_clause
) != REAL_CST
5283 && TREE_CODE (then_clause
) != FIXED_CST
)
5286 if (TREE_CODE (else_clause
) == SSA_NAME
)
5288 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5289 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5290 &else_def_stmt
, &def
, &dt
))
5293 else if (TREE_CODE (else_clause
) != INTEGER_CST
5294 && TREE_CODE (else_clause
) != REAL_CST
5295 && TREE_CODE (else_clause
) != FIXED_CST
)
5300 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5301 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5308 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
5309 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
5310 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
5311 vec_oprnds3
= VEC_alloc (tree
, heap
, 1);
5315 scalar_dest
= gimple_assign_lhs (stmt
);
5316 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5318 /* Handle cond expr. */
5319 for (j
= 0; j
< ncopies
; j
++)
5321 gimple new_stmt
= NULL
;
5326 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, 4);
5327 VEC (slp_void_p
, heap
) *vec_defs
;
5329 vec_defs
= VEC_alloc (slp_void_p
, heap
, 4);
5330 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 0));
5331 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 1));
5332 VEC_safe_push (tree
, heap
, ops
, then_clause
);
5333 VEC_safe_push (tree
, heap
, ops
, else_clause
);
5334 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5335 vec_oprnds3
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5336 vec_oprnds2
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5337 vec_oprnds1
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5338 vec_oprnds0
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5340 VEC_free (tree
, heap
, ops
);
5341 VEC_free (slp_void_p
, heap
, vec_defs
);
5347 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5349 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5350 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5353 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5355 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5356 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5357 if (reduc_index
== 1)
5358 vec_then_clause
= reduc_def
;
5361 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5363 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5364 NULL
, >emp
, &def
, &dts
[2]);
5366 if (reduc_index
== 2)
5367 vec_else_clause
= reduc_def
;
5370 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5372 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5373 NULL
, >emp
, &def
, &dts
[3]);
5379 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5380 VEC_pop (tree
, vec_oprnds0
));
5381 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5382 VEC_pop (tree
, vec_oprnds1
));
5383 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5384 VEC_pop (tree
, vec_oprnds2
));
5385 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5386 VEC_pop (tree
, vec_oprnds3
));
5391 VEC_quick_push (tree
, vec_oprnds0
, vec_cond_lhs
);
5392 VEC_quick_push (tree
, vec_oprnds1
, vec_cond_rhs
);
5393 VEC_quick_push (tree
, vec_oprnds2
, vec_then_clause
);
5394 VEC_quick_push (tree
, vec_oprnds3
, vec_else_clause
);
5397 /* Arguments are ready. Create the new vector stmt. */
5398 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_cond_lhs
)
5400 vec_cond_rhs
= VEC_index (tree
, vec_oprnds1
, i
);
5401 vec_then_clause
= VEC_index (tree
, vec_oprnds2
, i
);
5402 vec_else_clause
= VEC_index (tree
, vec_oprnds3
, i
);
5404 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
5405 vec_cond_lhs
, vec_cond_rhs
);
5406 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5407 vec_compare
, vec_then_clause
, vec_else_clause
);
5409 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5410 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5411 gimple_assign_set_lhs (new_stmt
, new_temp
);
5412 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5414 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5421 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5423 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5425 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5428 VEC_free (tree
, heap
, vec_oprnds0
);
5429 VEC_free (tree
, heap
, vec_oprnds1
);
5430 VEC_free (tree
, heap
, vec_oprnds2
);
5431 VEC_free (tree
, heap
, vec_oprnds3
);
5437 /* Make sure the statement is vectorizable. */
5440 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5442 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5443 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5444 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5446 tree scalar_type
, vectype
;
5447 gimple pattern_stmt
;
5448 gimple_seq pattern_def_seq
;
5450 if (vect_print_dump_info (REPORT_DETAILS
))
5452 fprintf (vect_dump
, "==> examining statement: ");
5453 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5456 if (gimple_has_volatile_ops (stmt
))
5458 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5459 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5464 /* Skip stmts that do not need to be vectorized. In loops this is expected
5466 - the COND_EXPR which is the loop exit condition
5467 - any LABEL_EXPRs in the loop
5468 - computations that are used only for array indexing or loop control.
5469 In basic blocks we only analyze statements that are a part of some SLP
5470 instance, therefore, all the statements are relevant.
5472 Pattern statement needs to be analyzed instead of the original statement
5473 if the original statement is not relevant. Otherwise, we analyze both
5474 statements. In basic blocks we are called from some SLP instance
5475 traversal, don't analyze pattern stmts instead, the pattern stmts
5476 already will be part of SLP instance. */
5478 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5479 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5480 && !STMT_VINFO_LIVE_P (stmt_info
))
5482 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5484 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5485 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5487 /* Analyze PATTERN_STMT instead of the original stmt. */
5488 stmt
= pattern_stmt
;
5489 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5490 if (vect_print_dump_info (REPORT_DETAILS
))
5492 fprintf (vect_dump
, "==> examining pattern statement: ");
5493 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5498 if (vect_print_dump_info (REPORT_DETAILS
))
5499 fprintf (vect_dump
, "irrelevant.");
5504 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5507 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5508 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5510 /* Analyze PATTERN_STMT too. */
5511 if (vect_print_dump_info (REPORT_DETAILS
))
5513 fprintf (vect_dump
, "==> examining pattern statement: ");
5514 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5517 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5521 if (is_pattern_stmt_p (stmt_info
)
5523 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5525 gimple_stmt_iterator si
;
5527 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5529 gimple pattern_def_stmt
= gsi_stmt (si
);
5530 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5531 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5533 /* Analyze def stmt of STMT if it's a pattern stmt. */
5534 if (vect_print_dump_info (REPORT_DETAILS
))
5536 fprintf (vect_dump
, "==> examining pattern def statement: ");
5537 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5540 if (!vect_analyze_stmt (pattern_def_stmt
,
5541 need_to_vectorize
, node
))
5547 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5549 case vect_internal_def
:
5552 case vect_reduction_def
:
5553 case vect_nested_cycle
:
5554 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5555 || relevance
== vect_used_in_outer_by_reduction
5556 || relevance
== vect_unused_in_scope
));
5559 case vect_induction_def
:
5560 case vect_constant_def
:
5561 case vect_external_def
:
5562 case vect_unknown_def_type
:
5569 gcc_assert (PURE_SLP_STMT (stmt_info
));
5571 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5572 if (vect_print_dump_info (REPORT_DETAILS
))
5574 fprintf (vect_dump
, "get vectype for scalar type: ");
5575 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5578 vectype
= get_vectype_for_scalar_type (scalar_type
);
5581 if (vect_print_dump_info (REPORT_DETAILS
))
5583 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5584 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5589 if (vect_print_dump_info (REPORT_DETAILS
))
5591 fprintf (vect_dump
, "vectype: ");
5592 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5595 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5598 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5600 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5601 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5602 *need_to_vectorize
= true;
5607 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5608 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5609 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5610 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5611 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5612 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5613 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5614 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5615 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5616 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5617 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5621 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5622 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5623 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5624 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5625 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5626 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5627 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5628 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5635 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5636 fprintf (vect_dump
, "supported: ");
5637 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5646 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5647 need extra handling, except for vectorizable reductions. */
5648 if (STMT_VINFO_LIVE_P (stmt_info
)
5649 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5650 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5654 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5656 fprintf (vect_dump
, "not vectorized: live stmt not ");
5657 fprintf (vect_dump
, "supported: ");
5658 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5668 /* Function vect_transform_stmt.
5670 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5673 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5674 bool *grouped_store
, slp_tree slp_node
,
5675 slp_instance slp_node_instance
)
5677 bool is_store
= false;
5678 gimple vec_stmt
= NULL
;
5679 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5682 switch (STMT_VINFO_TYPE (stmt_info
))
5684 case type_demotion_vec_info_type
:
5685 case type_promotion_vec_info_type
:
5686 case type_conversion_vec_info_type
:
5687 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5691 case induc_vec_info_type
:
5692 gcc_assert (!slp_node
);
5693 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5697 case shift_vec_info_type
:
5698 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5702 case op_vec_info_type
:
5703 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5707 case assignment_vec_info_type
:
5708 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5712 case load_vec_info_type
:
5713 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5718 case store_vec_info_type
:
5719 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5721 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5723 /* In case of interleaving, the whole chain is vectorized when the
5724 last store in the chain is reached. Store stmts before the last
5725 one are skipped, and there vec_stmt_info shouldn't be freed
5727 *grouped_store
= true;
5728 if (STMT_VINFO_VEC_STMT (stmt_info
))
5735 case condition_vec_info_type
:
5736 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5740 case call_vec_info_type
:
5741 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5742 stmt
= gsi_stmt (*gsi
);
5745 case reduc_vec_info_type
:
5746 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5751 if (!STMT_VINFO_LIVE_P (stmt_info
))
5753 if (vect_print_dump_info (REPORT_DETAILS
))
5754 fprintf (vect_dump
, "stmt not supported.");
5759 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5760 is being vectorized, but outside the immediately enclosing loop. */
5762 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5763 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5764 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5765 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5766 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5767 || STMT_VINFO_RELEVANT (stmt_info
) ==
5768 vect_used_in_outer_by_reduction
))
5770 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5771 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5772 imm_use_iterator imm_iter
;
5773 use_operand_p use_p
;
5777 if (vect_print_dump_info (REPORT_DETAILS
))
5778 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5780 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5781 (to be used when vectorizing outer-loop stmts that use the DEF of
5783 if (gimple_code (stmt
) == GIMPLE_PHI
)
5784 scalar_dest
= PHI_RESULT (stmt
);
5786 scalar_dest
= gimple_assign_lhs (stmt
);
5788 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5790 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5792 exit_phi
= USE_STMT (use_p
);
5793 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5798 /* Handle stmts whose DEF is used outside the loop-nest that is
5799 being vectorized. */
5800 if (STMT_VINFO_LIVE_P (stmt_info
)
5801 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5803 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5808 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5814 /* Remove a group of stores (for SLP or interleaving), free their
5818 vect_remove_stores (gimple first_stmt
)
5820 gimple next
= first_stmt
;
5822 gimple_stmt_iterator next_si
;
5826 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5828 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5829 if (is_pattern_stmt_p (stmt_info
))
5830 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5831 /* Free the attached stmt_vec_info and remove the stmt. */
5832 next_si
= gsi_for_stmt (next
);
5833 unlink_stmt_vdef (next
);
5834 gsi_remove (&next_si
, true);
5835 release_defs (next
);
5836 free_stmt_vec_info (next
);
5842 /* Function new_stmt_vec_info.
5844 Create and initialize a new stmt_vec_info struct for STMT. */
5847 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5848 bb_vec_info bb_vinfo
)
5851 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5853 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5854 STMT_VINFO_STMT (res
) = stmt
;
5855 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5856 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5857 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5858 STMT_VINFO_LIVE_P (res
) = false;
5859 STMT_VINFO_VECTYPE (res
) = NULL
;
5860 STMT_VINFO_VEC_STMT (res
) = NULL
;
5861 STMT_VINFO_VECTORIZABLE (res
) = true;
5862 STMT_VINFO_IN_PATTERN_P (res
) = false;
5863 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5864 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5865 STMT_VINFO_DATA_REF (res
) = NULL
;
5867 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5868 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5869 STMT_VINFO_DR_INIT (res
) = NULL
;
5870 STMT_VINFO_DR_STEP (res
) = NULL
;
5871 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5873 if (gimple_code (stmt
) == GIMPLE_PHI
5874 && is_loop_header_bb_p (gimple_bb (stmt
)))
5875 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5877 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5879 STMT_VINFO_SAME_ALIGN_REFS (res
) = NULL
;
5880 STMT_SLP_TYPE (res
) = loop_vect
;
5881 GROUP_FIRST_ELEMENT (res
) = NULL
;
5882 GROUP_NEXT_ELEMENT (res
) = NULL
;
5883 GROUP_SIZE (res
) = 0;
5884 GROUP_STORE_COUNT (res
) = 0;
5885 GROUP_GAP (res
) = 0;
5886 GROUP_SAME_DR_STMT (res
) = NULL
;
5887 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5893 /* Create a hash table for stmt_vec_info. */
5896 init_stmt_vec_info_vec (void)
5898 gcc_assert (!stmt_vec_info_vec
);
5899 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5903 /* Free hash table for stmt_vec_info. */
5906 free_stmt_vec_info_vec (void)
5908 gcc_assert (stmt_vec_info_vec
);
5909 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5913 /* Free stmt vectorization related info. */
5916 free_stmt_vec_info (gimple stmt
)
5918 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5923 /* Check if this statement has a related "pattern stmt"
5924 (introduced by the vectorizer during the pattern recognition
5925 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5927 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5929 stmt_vec_info patt_info
5930 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5933 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
5936 gimple_stmt_iterator si
;
5937 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
5938 free_stmt_vec_info (gsi_stmt (si
));
5940 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
5944 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
5945 set_vinfo_for_stmt (stmt
, NULL
);
5950 /* Function get_vectype_for_scalar_type_and_size.
5952 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5956 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
5958 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
5959 enum machine_mode simd_mode
;
5960 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
5967 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
5968 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
5971 /* We can't build a vector type of elements with alignment bigger than
5973 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
5976 /* For vector types of elements whose mode precision doesn't
5977 match their types precision we use a element type of mode
5978 precision. The vectorization routines will have to make sure
5979 they support the proper result truncation/extension.
5980 We also make sure to build vector types with INTEGER_TYPE
5981 component type only. */
5982 if (INTEGRAL_TYPE_P (scalar_type
)
5983 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
5984 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
5985 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
5986 TYPE_UNSIGNED (scalar_type
));
5988 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5989 When the component mode passes the above test simply use a type
5990 corresponding to that mode. The theory is that any use that
5991 would cause problems with this will disable vectorization anyway. */
5992 if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
5993 && !INTEGRAL_TYPE_P (scalar_type
)
5994 && !POINTER_TYPE_P (scalar_type
))
5995 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
5997 /* If no size was supplied use the mode the target prefers. Otherwise
5998 lookup a vector mode of the specified size. */
6000 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6002 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6003 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6007 vectype
= build_vector_type (scalar_type
, nunits
);
6008 if (vect_print_dump_info (REPORT_DETAILS
))
6010 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
6011 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
6017 if (vect_print_dump_info (REPORT_DETAILS
))
6019 fprintf (vect_dump
, "vectype: ");
6020 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
6023 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6024 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6026 if (vect_print_dump_info (REPORT_DETAILS
))
6027 fprintf (vect_dump
, "mode not supported by target.");
6034 unsigned int current_vector_size
;
6036 /* Function get_vectype_for_scalar_type.
6038 Returns the vector type corresponding to SCALAR_TYPE as supported
6042 get_vectype_for_scalar_type (tree scalar_type
)
6045 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6046 current_vector_size
);
6048 && current_vector_size
== 0)
6049 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6053 /* Function get_same_sized_vectype
6055 Returns a vector type corresponding to SCALAR_TYPE of size
6056 VECTOR_TYPE if supported by the target. */
6059 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6061 return get_vectype_for_scalar_type_and_size
6062 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6065 /* Function vect_is_simple_use.
6068 LOOP_VINFO - the vect info of the loop that is being vectorized.
6069 BB_VINFO - the vect info of the basic block that is being vectorized.
6070 OPERAND - operand of STMT in the loop or bb.
6071 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6073 Returns whether a stmt with OPERAND can be vectorized.
6074 For loops, supportable operands are constants, loop invariants, and operands
6075 that are defined by the current iteration of the loop. Unsupportable
6076 operands are those that are defined by a previous iteration of the loop (as
6077 is the case in reduction/induction computations).
6078 For basic blocks, supportable operands are constants and bb invariants.
6079 For now, operands defined outside the basic block are not supported. */
6082 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6083 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6084 tree
*def
, enum vect_def_type
*dt
)
6087 stmt_vec_info stmt_vinfo
;
6088 struct loop
*loop
= NULL
;
6091 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6096 if (vect_print_dump_info (REPORT_DETAILS
))
6098 fprintf (vect_dump
, "vect_is_simple_use: operand ");
6099 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
6102 if (CONSTANT_CLASS_P (operand
))
6104 *dt
= vect_constant_def
;
6108 if (is_gimple_min_invariant (operand
))
6111 *dt
= vect_external_def
;
6115 if (TREE_CODE (operand
) == PAREN_EXPR
)
6117 if (vect_print_dump_info (REPORT_DETAILS
))
6118 fprintf (vect_dump
, "non-associatable copy.");
6119 operand
= TREE_OPERAND (operand
, 0);
6122 if (TREE_CODE (operand
) != SSA_NAME
)
6124 if (vect_print_dump_info (REPORT_DETAILS
))
6125 fprintf (vect_dump
, "not ssa-name.");
6129 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6130 if (*def_stmt
== NULL
)
6132 if (vect_print_dump_info (REPORT_DETAILS
))
6133 fprintf (vect_dump
, "no def_stmt.");
6137 if (vect_print_dump_info (REPORT_DETAILS
))
6139 fprintf (vect_dump
, "def_stmt: ");
6140 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
6143 /* Empty stmt is expected only in case of a function argument.
6144 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6145 if (gimple_nop_p (*def_stmt
))
6148 *dt
= vect_external_def
;
6152 bb
= gimple_bb (*def_stmt
);
6154 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6155 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6156 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6157 *dt
= vect_external_def
;
6160 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6161 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6164 if (*dt
== vect_unknown_def_type
6166 && *dt
== vect_double_reduction_def
6167 && gimple_code (stmt
) != GIMPLE_PHI
))
6169 if (vect_print_dump_info (REPORT_DETAILS
))
6170 fprintf (vect_dump
, "Unsupported pattern.");
6174 if (vect_print_dump_info (REPORT_DETAILS
))
6175 fprintf (vect_dump
, "type of def: %d.",*dt
);
6177 switch (gimple_code (*def_stmt
))
6180 *def
= gimple_phi_result (*def_stmt
);
6184 *def
= gimple_assign_lhs (*def_stmt
);
6188 *def
= gimple_call_lhs (*def_stmt
);
6193 if (vect_print_dump_info (REPORT_DETAILS
))
6194 fprintf (vect_dump
, "unsupported defining stmt: ");
6201 /* Function vect_is_simple_use_1.
6203 Same as vect_is_simple_use_1 but also determines the vector operand
6204 type of OPERAND and stores it to *VECTYPE. If the definition of
6205 OPERAND is vect_uninitialized_def, vect_constant_def or
6206 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6207 is responsible to compute the best suited vector type for the
6211 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6212 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6213 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6215 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6219 /* Now get a vector type if the def is internal, otherwise supply
6220 NULL_TREE and leave it up to the caller to figure out a proper
6221 type for the use stmt. */
6222 if (*dt
== vect_internal_def
6223 || *dt
== vect_induction_def
6224 || *dt
== vect_reduction_def
6225 || *dt
== vect_double_reduction_def
6226 || *dt
== vect_nested_cycle
)
6228 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6230 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6231 && !STMT_VINFO_RELEVANT (stmt_info
)
6232 && !STMT_VINFO_LIVE_P (stmt_info
))
6233 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6235 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6236 gcc_assert (*vectype
!= NULL_TREE
);
6238 else if (*dt
== vect_uninitialized_def
6239 || *dt
== vect_constant_def
6240 || *dt
== vect_external_def
)
6241 *vectype
= NULL_TREE
;
6249 /* Function supportable_widening_operation
6251 Check whether an operation represented by the code CODE is a
6252 widening operation that is supported by the target platform in
6253 vector form (i.e., when operating on arguments of type VECTYPE_IN
6254 producing a result of type VECTYPE_OUT).
6256 Widening operations we currently support are NOP (CONVERT), FLOAT
6257 and WIDEN_MULT. This function checks if these operations are supported
6258 by the target platform either directly (via vector tree-codes), or via
6262 - CODE1 and CODE2 are codes of vector operations to be used when
6263 vectorizing the operation, if available.
6264 - MULTI_STEP_CVT determines the number of required intermediate steps in
6265 case of multi-step conversion (like char->short->int - in that case
6266 MULTI_STEP_CVT will be 1).
6267 - INTERM_TYPES contains the intermediate type required to perform the
6268 widening operation (short in the above example). */
6271 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6272 tree vectype_out
, tree vectype_in
,
6273 enum tree_code
*code1
, enum tree_code
*code2
,
6274 int *multi_step_cvt
,
6275 VEC (tree
, heap
) **interm_types
)
6277 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6278 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6279 struct loop
*vect_loop
= NULL
;
6280 enum machine_mode vec_mode
;
6281 enum insn_code icode1
, icode2
;
6282 optab optab1
, optab2
;
6283 tree vectype
= vectype_in
;
6284 tree wide_vectype
= vectype_out
;
6285 enum tree_code c1
, c2
;
6287 tree prev_type
, intermediate_type
;
6288 enum machine_mode intermediate_mode
, prev_mode
;
6289 optab optab3
, optab4
;
6291 *multi_step_cvt
= 0;
6293 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6297 case WIDEN_MULT_EXPR
:
6298 /* The result of a vectorized widening operation usually requires
6299 two vectors (because the widened results do not fit into one vector).
6300 The generated vector results would normally be expected to be
6301 generated in the same order as in the original scalar computation,
6302 i.e. if 8 results are generated in each vector iteration, they are
6303 to be organized as follows:
6304 vect1: [res1,res2,res3,res4],
6305 vect2: [res5,res6,res7,res8].
6307 However, in the special case that the result of the widening
6308 operation is used in a reduction computation only, the order doesn't
6309 matter (because when vectorizing a reduction we change the order of
6310 the computation). Some targets can take advantage of this and
6311 generate more efficient code. For example, targets like Altivec,
6312 that support widen_mult using a sequence of {mult_even,mult_odd}
6313 generate the following vectors:
6314 vect1: [res1,res3,res5,res7],
6315 vect2: [res2,res4,res6,res8].
6317 When vectorizing outer-loops, we execute the inner-loop sequentially
6318 (each vectorized inner-loop iteration contributes to VF outer-loop
6319 iterations in parallel). We therefore don't allow to change the
6320 order of the computation in the inner-loop during outer-loop
6322 /* TODO: Another case in which order doesn't *really* matter is when we
6323 widen and then contract again, e.g. (short)((int)x * y >> 8).
6324 Normally, pack_trunc performs an even/odd permute, whereas the
6325 repack from an even/odd expansion would be an interleave, which
6326 would be significantly simpler for e.g. AVX2. */
6327 /* In any case, in order to avoid duplicating the code below, recurse
6328 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6329 are properly set up for the caller. If we fail, we'll continue with
6330 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6332 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6333 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6334 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6335 stmt
, vectype_out
, vectype_in
,
6336 code1
, code2
, multi_step_cvt
,
6339 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6340 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6343 case VEC_WIDEN_MULT_EVEN_EXPR
:
6344 /* Support the recursion induced just above. */
6345 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6346 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6349 case WIDEN_LSHIFT_EXPR
:
6350 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6351 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6355 c1
= VEC_UNPACK_LO_EXPR
;
6356 c2
= VEC_UNPACK_HI_EXPR
;
6360 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6361 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6364 case FIX_TRUNC_EXPR
:
6365 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6366 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6367 computing the operation. */
6374 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6376 enum tree_code ctmp
= c1
;
6381 if (code
== FIX_TRUNC_EXPR
)
6383 /* The signedness is determined from output operand. */
6384 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6385 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6389 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6390 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6393 if (!optab1
|| !optab2
)
6396 vec_mode
= TYPE_MODE (vectype
);
6397 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6398 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6404 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6405 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6408 /* Check if it's a multi-step conversion that can be done using intermediate
6411 prev_type
= vectype
;
6412 prev_mode
= vec_mode
;
6414 if (!CONVERT_EXPR_CODE_P (code
))
6417 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6418 intermediate steps in promotion sequence. We try
6419 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6421 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6422 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6424 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6426 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6427 TYPE_UNSIGNED (prev_type
));
6428 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6429 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6431 if (!optab3
|| !optab4
6432 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6433 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6434 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6435 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6436 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6437 == CODE_FOR_nothing
)
6438 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6439 == CODE_FOR_nothing
))
6442 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6443 (*multi_step_cvt
)++;
6445 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6446 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6449 prev_type
= intermediate_type
;
6450 prev_mode
= intermediate_mode
;
6453 VEC_free (tree
, heap
, *interm_types
);
6458 /* Function supportable_narrowing_operation
6460 Check whether an operation represented by the code CODE is a
6461 narrowing operation that is supported by the target platform in
6462 vector form (i.e., when operating on arguments of type VECTYPE_IN
6463 and producing a result of type VECTYPE_OUT).
6465 Narrowing operations we currently support are NOP (CONVERT) and
6466 FIX_TRUNC. This function checks if these operations are supported by
6467 the target platform directly via vector tree-codes.
6470 - CODE1 is the code of a vector operation to be used when
6471 vectorizing the operation, if available.
6472 - MULTI_STEP_CVT determines the number of required intermediate steps in
6473 case of multi-step conversion (like int->short->char - in that case
6474 MULTI_STEP_CVT will be 1).
6475 - INTERM_TYPES contains the intermediate type required to perform the
6476 narrowing operation (short in the above example). */
6479 supportable_narrowing_operation (enum tree_code code
,
6480 tree vectype_out
, tree vectype_in
,
6481 enum tree_code
*code1
, int *multi_step_cvt
,
6482 VEC (tree
, heap
) **interm_types
)
6484 enum machine_mode vec_mode
;
6485 enum insn_code icode1
;
6486 optab optab1
, interm_optab
;
6487 tree vectype
= vectype_in
;
6488 tree narrow_vectype
= vectype_out
;
6490 tree intermediate_type
;
6491 enum machine_mode intermediate_mode
, prev_mode
;
6495 *multi_step_cvt
= 0;
6499 c1
= VEC_PACK_TRUNC_EXPR
;
6502 case FIX_TRUNC_EXPR
:
6503 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6507 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6508 tree code and optabs used for computing the operation. */
6515 if (code
== FIX_TRUNC_EXPR
)
6516 /* The signedness is determined from output operand. */
6517 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6519 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6524 vec_mode
= TYPE_MODE (vectype
);
6525 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6530 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6533 /* Check if it's a multi-step conversion that can be done using intermediate
6535 prev_mode
= vec_mode
;
6536 if (code
== FIX_TRUNC_EXPR
)
6537 uns
= TYPE_UNSIGNED (vectype_out
);
6539 uns
= TYPE_UNSIGNED (vectype
);
6541 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6542 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6543 costly than signed. */
6544 if (code
== FIX_TRUNC_EXPR
&& uns
)
6546 enum insn_code icode2
;
6549 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6551 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6552 if (interm_optab
!= unknown_optab
6553 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6554 && insn_data
[icode1
].operand
[0].mode
6555 == insn_data
[icode2
].operand
[0].mode
)
6558 optab1
= interm_optab
;
6563 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6564 intermediate steps in promotion sequence. We try
6565 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6566 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6567 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6569 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6571 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6573 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6576 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6577 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6578 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6579 == CODE_FOR_nothing
))
6582 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6583 (*multi_step_cvt
)++;
6585 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6588 prev_mode
= intermediate_mode
;
6589 optab1
= interm_optab
;
6592 VEC_free (tree
, heap
, *interm_types
);