1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
35 #include "recog.h" /* FIXME: for insn_data */
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
47 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
49 return STMT_VINFO_VECTYPE (stmt_info
);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
55 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
57 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
58 basic_block bb
= gimple_bb (stmt
);
59 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
65 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
67 return (bb
->loop_father
== loop
->inner
);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
75 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
76 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
77 int misalign
, enum vect_cost_model_location where
)
81 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
82 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
83 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
86 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
91 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
92 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
93 void *target_cost_data
;
96 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
98 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
100 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
110 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
120 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
121 tree array
, unsigned HOST_WIDE_INT n
)
123 tree vect_type
, vect
, vect_name
, array_ref
;
126 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
127 vect_type
= TREE_TYPE (TREE_TYPE (array
));
128 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
129 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
130 build_int_cst (size_type_node
, n
),
131 NULL_TREE
, NULL_TREE
);
133 new_stmt
= gimple_build_assign (vect
, array_ref
);
134 vect_name
= make_ssa_name (vect
, new_stmt
);
135 gimple_assign_set_lhs (new_stmt
, vect_name
);
136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
146 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
147 tree array
, unsigned HOST_WIDE_INT n
)
152 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
153 build_int_cst (size_type_node
, n
),
154 NULL_TREE
, NULL_TREE
);
156 new_stmt
= gimple_build_assign (array_ref
, vect
);
157 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
165 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
167 tree mem_ref
, alias_ptr_type
;
169 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
170 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
184 enum vect_relevant relevant
, bool live_p
,
185 bool used_in_pattern
)
187 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
188 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
189 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
192 if (vect_print_dump_info (REPORT_DETAILS
))
193 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
195 /* If this stmt is an original stmt in a pattern, we might need to mark its
196 related pattern stmt instead of the original stmt. However, such stmts
197 may have their own uses that are not in any pattern, in such cases the
198 stmt itself should be marked. */
199 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
202 if (!used_in_pattern
)
204 imm_use_iterator imm_iter
;
208 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
209 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
211 if (is_gimple_assign (stmt
))
212 lhs
= gimple_assign_lhs (stmt
);
214 lhs
= gimple_call_lhs (stmt
);
216 /* This use is out of pattern use, if LHS has other uses that are
217 pattern uses, we should mark the stmt itself, and not the pattern
219 if (TREE_CODE (lhs
) == SSA_NAME
)
220 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
222 if (is_gimple_debug (USE_STMT (use_p
)))
224 use_stmt
= USE_STMT (use_p
);
226 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
229 if (vinfo_for_stmt (use_stmt
)
230 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
240 /* This is the last stmt in a sequence that was detected as a
241 pattern that can potentially be vectorized. Don't mark the stmt
242 as relevant/live because it's not going to be vectorized.
243 Instead mark the pattern-stmt that replaces it. */
245 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
247 if (vect_print_dump_info (REPORT_DETAILS
))
248 fprintf (vect_dump
, "last stmt in pattern. don't mark"
250 stmt_info
= vinfo_for_stmt (pattern_stmt
);
251 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
252 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
253 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
258 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
259 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
260 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
262 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
263 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
265 if (vect_print_dump_info (REPORT_DETAILS
))
266 fprintf (vect_dump
, "already marked relevant/live.");
270 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
274 /* Function vect_stmt_relevant_p.
276 Return true if STMT in loop that is represented by LOOP_VINFO is
277 "relevant for vectorization".
279 A stmt is considered "relevant for vectorization" if:
280 - it has uses outside the loop.
281 - it has vdefs (it alters memory).
282 - control stmts in the loop (except for the exit condition).
284 CHECKME: what other side effects would the vectorizer allow? */
287 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
288 enum vect_relevant
*relevant
, bool *live_p
)
290 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
292 imm_use_iterator imm_iter
;
296 *relevant
= vect_unused_in_scope
;
299 /* cond stmt other than loop exit cond. */
300 if (is_ctrl_stmt (stmt
)
301 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
302 != loop_exit_ctrl_vec_info_type
)
303 *relevant
= vect_used_in_scope
;
305 /* changing memory. */
306 if (gimple_code (stmt
) != GIMPLE_PHI
)
307 if (gimple_vdef (stmt
))
309 if (vect_print_dump_info (REPORT_DETAILS
))
310 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
311 *relevant
= vect_used_in_scope
;
314 /* uses outside the loop. */
315 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
317 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
319 basic_block bb
= gimple_bb (USE_STMT (use_p
));
320 if (!flow_bb_inside_loop_p (loop
, bb
))
322 if (vect_print_dump_info (REPORT_DETAILS
))
323 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
325 if (is_gimple_debug (USE_STMT (use_p
)))
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
331 gcc_assert (bb
== single_exit (loop
)->dest
);
338 return (*live_p
|| *relevant
);
342 /* Function exist_non_indexing_operands_for_use_p
344 USE is one of the uses attached to STMT. Check if USE is
345 used in STMT for anything other than indexing an array. */
348 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
351 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
353 /* USE corresponds to some operand in STMT. If there is no data
354 reference in STMT, then any operand that corresponds to USE
355 is not indexing an array. */
356 if (!STMT_VINFO_DATA_REF (stmt_info
))
359 /* STMT has a data_ref. FORNOW this means that its of one of
363 (This should have been verified in analyze_data_refs).
365 'var' in the second case corresponds to a def, not a use,
366 so USE cannot correspond to any operands that are not used
369 Therefore, all we need to check is if STMT falls into the
370 first case, and whether var corresponds to USE. */
372 if (!gimple_assign_copy_p (stmt
))
374 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
376 operand
= gimple_assign_rhs1 (stmt
);
377 if (TREE_CODE (operand
) != SSA_NAME
)
388 Function process_use.
391 - a USE in STMT in a loop represented by LOOP_VINFO
392 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
393 that defined USE. This is done by calling mark_relevant and passing it
394 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
395 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
399 Generally, LIVE_P and RELEVANT are used to define the liveness and
400 relevance info of the DEF_STMT of this USE:
401 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
402 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
404 - case 1: If USE is used only for address computations (e.g. array indexing),
405 which does not need to be directly vectorized, then the liveness/relevance
406 of the respective DEF_STMT is left unchanged.
407 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
408 skip DEF_STMT cause it had already been processed.
409 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
410 be modified accordingly.
412 Return true if everything is as expected. Return false otherwise. */
415 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
416 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
,
419 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
420 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
421 stmt_vec_info dstmt_vinfo
;
422 basic_block bb
, def_bb
;
425 enum vect_def_type dt
;
427 /* case 1: we are only interested in uses that need to be vectorized. Uses
428 that are used for address computation are not considered relevant. */
429 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
432 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
434 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
435 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
439 if (!def_stmt
|| gimple_nop_p (def_stmt
))
442 def_bb
= gimple_bb (def_stmt
);
443 if (!flow_bb_inside_loop_p (loop
, def_bb
))
445 if (vect_print_dump_info (REPORT_DETAILS
))
446 fprintf (vect_dump
, "def_stmt is out of loop.");
450 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
451 DEF_STMT must have already been processed, because this should be the
452 only way that STMT, which is a reduction-phi, was put in the worklist,
453 as there should be no other uses for DEF_STMT in the loop. So we just
454 check that everything is as expected, and we are done. */
455 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
456 bb
= gimple_bb (stmt
);
457 if (gimple_code (stmt
) == GIMPLE_PHI
458 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
459 && gimple_code (def_stmt
) != GIMPLE_PHI
460 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
461 && bb
->loop_father
== def_bb
->loop_father
)
463 if (vect_print_dump_info (REPORT_DETAILS
))
464 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
465 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
466 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
467 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
468 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
469 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
473 /* case 3a: outer-loop stmt defining an inner-loop stmt:
474 outer-loop-header-bb:
480 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
482 if (vect_print_dump_info (REPORT_DETAILS
))
483 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
487 case vect_unused_in_scope
:
488 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
489 vect_used_in_scope
: vect_unused_in_scope
;
492 case vect_used_in_outer_by_reduction
:
493 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
494 relevant
= vect_used_by_reduction
;
497 case vect_used_in_outer
:
498 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
499 relevant
= vect_used_in_scope
;
502 case vect_used_in_scope
:
510 /* case 3b: inner-loop stmt defining an outer-loop stmt:
511 outer-loop-header-bb:
515 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
517 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
519 if (vect_print_dump_info (REPORT_DETAILS
))
520 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
524 case vect_unused_in_scope
:
525 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
526 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
527 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
530 case vect_used_by_reduction
:
531 relevant
= vect_used_in_outer_by_reduction
;
534 case vect_used_in_scope
:
535 relevant
= vect_used_in_outer
;
543 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
544 is_pattern_stmt_p (stmt_vinfo
));
549 /* Function vect_mark_stmts_to_be_vectorized.
551 Not all stmts in the loop need to be vectorized. For example:
560 Stmt 1 and 3 do not need to be vectorized, because loop control and
561 addressing of vectorized data-refs are handled differently.
563 This pass detects such stmts. */
566 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
568 VEC(gimple
,heap
) *worklist
;
569 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
570 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
571 unsigned int nbbs
= loop
->num_nodes
;
572 gimple_stmt_iterator si
;
575 stmt_vec_info stmt_vinfo
;
579 enum vect_relevant relevant
, tmp_relevant
;
580 enum vect_def_type def_type
;
582 if (vect_print_dump_info (REPORT_DETAILS
))
583 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
585 worklist
= VEC_alloc (gimple
, heap
, 64);
587 /* 1. Init worklist. */
588 for (i
= 0; i
< nbbs
; i
++)
591 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
594 if (vect_print_dump_info (REPORT_DETAILS
))
596 fprintf (vect_dump
, "init: phi relevant? ");
597 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
600 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
601 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
603 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
605 stmt
= gsi_stmt (si
);
606 if (vect_print_dump_info (REPORT_DETAILS
))
608 fprintf (vect_dump
, "init: stmt relevant? ");
609 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
612 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
613 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
617 /* 2. Process_worklist */
618 while (VEC_length (gimple
, worklist
) > 0)
623 stmt
= VEC_pop (gimple
, worklist
);
624 if (vect_print_dump_info (REPORT_DETAILS
))
626 fprintf (vect_dump
, "worklist: examine stmt: ");
627 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
630 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
631 (DEF_STMT) as relevant/irrelevant and live/dead according to the
632 liveness and relevance properties of STMT. */
633 stmt_vinfo
= vinfo_for_stmt (stmt
);
634 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
635 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
637 /* Generally, the liveness and relevance properties of STMT are
638 propagated as is to the DEF_STMTs of its USEs:
639 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
640 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
642 One exception is when STMT has been identified as defining a reduction
643 variable; in this case we set the liveness/relevance as follows:
645 relevant = vect_used_by_reduction
646 This is because we distinguish between two kinds of relevant stmts -
647 those that are used by a reduction computation, and those that are
648 (also) used by a regular computation. This allows us later on to
649 identify stmts that are used solely by a reduction, and therefore the
650 order of the results that they produce does not have to be kept. */
652 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
653 tmp_relevant
= relevant
;
656 case vect_reduction_def
:
657 switch (tmp_relevant
)
659 case vect_unused_in_scope
:
660 relevant
= vect_used_by_reduction
;
663 case vect_used_by_reduction
:
664 if (gimple_code (stmt
) == GIMPLE_PHI
)
669 if (vect_print_dump_info (REPORT_DETAILS
))
670 fprintf (vect_dump
, "unsupported use of reduction.");
672 VEC_free (gimple
, heap
, worklist
);
679 case vect_nested_cycle
:
680 if (tmp_relevant
!= vect_unused_in_scope
681 && tmp_relevant
!= vect_used_in_outer_by_reduction
682 && tmp_relevant
!= vect_used_in_outer
)
684 if (vect_print_dump_info (REPORT_DETAILS
))
685 fprintf (vect_dump
, "unsupported use of nested cycle.");
687 VEC_free (gimple
, heap
, worklist
);
694 case vect_double_reduction_def
:
695 if (tmp_relevant
!= vect_unused_in_scope
696 && tmp_relevant
!= vect_used_by_reduction
)
698 if (vect_print_dump_info (REPORT_DETAILS
))
699 fprintf (vect_dump
, "unsupported use of double reduction.");
701 VEC_free (gimple
, heap
, worklist
);
712 if (is_pattern_stmt_p (stmt_vinfo
))
714 /* Pattern statements are not inserted into the code, so
715 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
716 have to scan the RHS or function arguments instead. */
717 if (is_gimple_assign (stmt
))
719 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
720 tree op
= gimple_assign_rhs1 (stmt
);
723 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
725 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
726 live_p
, relevant
, &worklist
, false)
727 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
728 live_p
, relevant
, &worklist
, false))
730 VEC_free (gimple
, heap
, worklist
);
735 for (; i
< gimple_num_ops (stmt
); i
++)
737 op
= gimple_op (stmt
, i
);
738 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
741 VEC_free (gimple
, heap
, worklist
);
746 else if (is_gimple_call (stmt
))
748 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
750 tree arg
= gimple_call_arg (stmt
, i
);
751 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
754 VEC_free (gimple
, heap
, worklist
);
761 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
763 tree op
= USE_FROM_PTR (use_p
);
764 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
767 VEC_free (gimple
, heap
, worklist
);
772 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
775 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
777 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
780 VEC_free (gimple
, heap
, worklist
);
784 } /* while worklist */
786 VEC_free (gimple
, heap
, worklist
);
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
798 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
799 enum vect_def_type
*dt
,
800 stmt_vector_for_cost
*prologue_cost_vec
,
801 stmt_vector_for_cost
*body_cost_vec
)
804 int inside_cost
= 0, prologue_cost
= 0;
806 /* The SLP costs were already calculated during SLP tree build. */
807 if (PURE_SLP_STMT (stmt_info
))
810 /* FORNOW: Assuming maximum 2 args per stmts. */
811 for (i
= 0; i
< 2; i
++)
812 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
813 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
814 stmt_info
, 0, vect_prologue
);
816 /* Pass the inside-of-loop statements to the target-specific cost model. */
817 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
818 stmt_info
, 0, vect_body
);
820 if (vect_print_dump_info (REPORT_COST
))
821 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
822 "prologue_cost = %d .", inside_cost
, prologue_cost
);
826 /* Model cost for type demotion and promotion operations. PWR is normally
827 zero for single-step promotions and demotions. It will be one if
828 two-step promotion/demotion is required, and so on. Each additional
829 step doubles the number of instructions required. */
832 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
833 enum vect_def_type
*dt
, int pwr
)
836 int inside_cost
= 0, prologue_cost
= 0;
837 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
838 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
839 void *target_cost_data
;
841 /* The SLP costs were already calculated during SLP tree build. */
842 if (PURE_SLP_STMT (stmt_info
))
846 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
848 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
850 for (i
= 0; i
< pwr
+ 1; i
++)
852 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
854 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
855 vec_promote_demote
, stmt_info
, 0,
859 /* FORNOW: Assuming maximum 2 args per stmts. */
860 for (i
= 0; i
< 2; i
++)
861 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
862 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
863 stmt_info
, 0, vect_prologue
);
865 if (vect_print_dump_info (REPORT_COST
))
866 fprintf (vect_dump
, "vect_model_promotion_demotion_cost: inside_cost = %d, "
867 "prologue_cost = %d .", inside_cost
, prologue_cost
);
870 /* Function vect_cost_group_size
872 For grouped load or store, return the group_size only if it is the first
873 load or store of a group, else return 1. This ensures that group size is
874 only returned once per group. */
877 vect_cost_group_size (stmt_vec_info stmt_info
)
879 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
881 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
882 return GROUP_SIZE (stmt_info
);
888 /* Function vect_model_store_cost
890 Models cost for stores. In the case of grouped accesses, one access
891 has the overhead of the grouped access attributed to it. */
894 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
895 bool store_lanes_p
, enum vect_def_type dt
,
897 stmt_vector_for_cost
*prologue_cost_vec
,
898 stmt_vector_for_cost
*body_cost_vec
)
901 unsigned int inside_cost
= 0, prologue_cost
= 0;
902 struct data_reference
*first_dr
;
905 /* The SLP costs were already calculated during SLP tree build. */
906 if (PURE_SLP_STMT (stmt_info
))
909 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
910 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
911 stmt_info
, 0, vect_prologue
);
913 /* Grouped access? */
914 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
918 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
923 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
924 group_size
= vect_cost_group_size (stmt_info
);
927 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
929 /* Not a grouped access. */
933 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
936 /* We assume that the cost of a single store-lanes instruction is
937 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
938 access is instead being provided by a permute-and-store operation,
939 include the cost of the permutes. */
940 if (!store_lanes_p
&& group_size
> 1)
942 /* Uses a high and low interleave operation for each needed permute. */
944 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
945 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
946 stmt_info
, 0, vect_body
);
948 if (vect_print_dump_info (REPORT_COST
))
949 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
953 /* Costs of the stores. */
954 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
956 if (vect_print_dump_info (REPORT_COST
))
957 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
958 "prologue_cost = %d .", inside_cost
, prologue_cost
);
962 /* Calculate cost of DR's memory access. */
964 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
965 unsigned int *inside_cost
,
966 stmt_vector_for_cost
*body_cost_vec
)
968 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
969 gimple stmt
= DR_STMT (dr
);
970 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
972 switch (alignment_support_scheme
)
976 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
977 vector_store
, stmt_info
, 0,
980 if (vect_print_dump_info (REPORT_COST
))
981 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
986 case dr_unaligned_supported
:
988 /* Here, we assign an additional cost for the unaligned store. */
989 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
990 unaligned_store
, stmt_info
,
991 DR_MISALIGNMENT (dr
), vect_body
);
993 if (vect_print_dump_info (REPORT_COST
))
994 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
1000 case dr_unaligned_unsupported
:
1002 *inside_cost
= VECT_MAX_COST
;
1004 if (vect_print_dump_info (REPORT_COST
))
1005 fprintf (vect_dump
, "vect_model_store_cost: unsupported access.");
1016 /* Function vect_model_load_cost
1018 Models cost for loads. In the case of grouped accesses, the last access
1019 has the overhead of the grouped access attributed to it. Since unaligned
1020 accesses are supported for loads, we also account for the costs of the
1021 access scheme chosen. */
1024 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1025 bool load_lanes_p
, slp_tree slp_node
,
1026 stmt_vector_for_cost
*prologue_cost_vec
,
1027 stmt_vector_for_cost
*body_cost_vec
)
1031 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1032 unsigned int inside_cost
= 0, prologue_cost
= 0;
1034 /* The SLP costs were already calculated during SLP tree build. */
1035 if (PURE_SLP_STMT (stmt_info
))
1038 /* Grouped accesses? */
1039 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1040 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1042 group_size
= vect_cost_group_size (stmt_info
);
1043 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1045 /* Not a grouped access. */
1052 /* We assume that the cost of a single load-lanes instruction is
1053 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1054 access is instead being provided by a load-and-permute operation,
1055 include the cost of the permutes. */
1056 if (!load_lanes_p
&& group_size
> 1)
1058 /* Uses an even and odd extract operations for each needed permute. */
1059 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1060 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1061 stmt_info
, 0, vect_body
);
1063 if (vect_print_dump_info (REPORT_COST
))
1064 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
1068 /* The loads themselves. */
1069 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1071 /* N scalar loads plus gathering them into a vector. */
1072 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1073 inside_cost
+= record_stmt_cost (body_cost_vec
,
1074 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1075 scalar_load
, stmt_info
, 0, vect_body
);
1076 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1077 stmt_info
, 0, vect_body
);
1080 vect_get_load_cost (first_dr
, ncopies
,
1081 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1082 || group_size
> 1 || slp_node
),
1083 &inside_cost
, &prologue_cost
,
1084 prologue_cost_vec
, body_cost_vec
, true);
1086 if (vect_print_dump_info (REPORT_COST
))
1087 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
1088 "prologue_cost = %d .", inside_cost
, prologue_cost
);
1092 /* Calculate cost of DR's memory access. */
1094 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1095 bool add_realign_cost
, unsigned int *inside_cost
,
1096 unsigned int *prologue_cost
,
1097 stmt_vector_for_cost
*prologue_cost_vec
,
1098 stmt_vector_for_cost
*body_cost_vec
,
1099 bool record_prologue_costs
)
1101 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1102 gimple stmt
= DR_STMT (dr
);
1103 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1105 switch (alignment_support_scheme
)
1109 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1110 stmt_info
, 0, vect_body
);
1112 if (vect_print_dump_info (REPORT_COST
))
1113 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1117 case dr_unaligned_supported
:
1119 /* Here, we assign an additional cost for the unaligned load. */
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1121 unaligned_load
, stmt_info
,
1122 DR_MISALIGNMENT (dr
), vect_body
);
1124 if (vect_print_dump_info (REPORT_COST
))
1125 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1130 case dr_explicit_realign
:
1132 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1133 vector_load
, stmt_info
, 0, vect_body
);
1134 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1135 vec_perm
, stmt_info
, 0, vect_body
);
1137 /* FIXME: If the misalignment remains fixed across the iterations of
1138 the containing loop, the following cost should be added to the
1140 if (targetm
.vectorize
.builtin_mask_for_load
)
1141 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1142 stmt_info
, 0, vect_body
);
1144 if (vect_print_dump_info (REPORT_COST
))
1145 fprintf (vect_dump
, "vect_model_load_cost: explicit realign");
1149 case dr_explicit_realign_optimized
:
1151 if (vect_print_dump_info (REPORT_COST
))
1152 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1155 /* Unaligned software pipeline has a load of an address, an initial
1156 load, and possibly a mask operation to "prime" the loop. However,
1157 if this is an access in a group of loads, which provide grouped
1158 access, then the above cost should only be considered for one
1159 access in the group. Inside the loop, there is a load op
1160 and a realignment op. */
1162 if (add_realign_cost
&& record_prologue_costs
)
1164 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1165 vector_stmt
, stmt_info
,
1167 if (targetm
.vectorize
.builtin_mask_for_load
)
1168 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1169 vector_stmt
, stmt_info
,
1173 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1174 stmt_info
, 0, vect_body
);
1175 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1176 stmt_info
, 0, vect_body
);
1178 if (vect_print_dump_info (REPORT_COST
))
1180 "vect_model_load_cost: explicit realign optimized");
1185 case dr_unaligned_unsupported
:
1187 *inside_cost
= VECT_MAX_COST
;
1189 if (vect_print_dump_info (REPORT_COST
))
1190 fprintf (vect_dump
, "vect_model_load_cost: unsupported access.");
1200 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1201 the loop preheader for the vectorized stmt STMT. */
1204 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1207 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1210 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1211 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1215 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1219 if (nested_in_vect_loop_p (loop
, stmt
))
1222 pe
= loop_preheader_edge (loop
);
1223 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1224 gcc_assert (!new_bb
);
1228 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1230 gimple_stmt_iterator gsi_bb_start
;
1232 gcc_assert (bb_vinfo
);
1233 bb
= BB_VINFO_BB (bb_vinfo
);
1234 gsi_bb_start
= gsi_after_labels (bb
);
1235 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1239 if (vect_print_dump_info (REPORT_DETAILS
))
1241 fprintf (vect_dump
, "created new init_stmt: ");
1242 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1246 /* Function vect_init_vector.
1248 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1249 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1250 vector type a vector with all elements equal to VAL is created first.
1251 Place the initialization at BSI if it is not NULL. Otherwise, place the
1252 initialization at the loop preheader.
1253 Return the DEF of INIT_STMT.
1254 It will be used in the vectorization of STMT. */
1257 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1264 if (TREE_CODE (type
) == VECTOR_TYPE
1265 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1267 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1269 if (CONSTANT_CLASS_P (val
))
1270 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1273 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1274 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1277 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1281 val
= build_vector_from_val (type
, val
);
1284 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1285 init_stmt
= gimple_build_assign (new_var
, val
);
1286 new_temp
= make_ssa_name (new_var
, init_stmt
);
1287 gimple_assign_set_lhs (init_stmt
, new_temp
);
1288 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1289 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1294 /* Function vect_get_vec_def_for_operand.
1296 OP is an operand in STMT. This function returns a (vector) def that will be
1297 used in the vectorized stmt for STMT.
1299 In the case that OP is an SSA_NAME which is defined in the loop, then
1300 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1302 In case OP is an invariant or constant, a new stmt that creates a vector def
1303 needs to be introduced. */
1306 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1311 stmt_vec_info def_stmt_info
= NULL
;
1312 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1313 unsigned int nunits
;
1314 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1316 enum vect_def_type dt
;
1320 if (vect_print_dump_info (REPORT_DETAILS
))
1322 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1323 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1326 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1327 &def_stmt
, &def
, &dt
);
1328 gcc_assert (is_simple_use
);
1329 if (vect_print_dump_info (REPORT_DETAILS
))
1333 fprintf (vect_dump
, "def = ");
1334 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1338 fprintf (vect_dump
, " def_stmt = ");
1339 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1345 /* Case 1: operand is a constant. */
1346 case vect_constant_def
:
1348 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1349 gcc_assert (vector_type
);
1350 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1355 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1356 if (vect_print_dump_info (REPORT_DETAILS
))
1357 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1359 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1362 /* Case 2: operand is defined outside the loop - loop invariant. */
1363 case vect_external_def
:
1365 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1366 gcc_assert (vector_type
);
1371 /* Create 'vec_inv = {inv,inv,..,inv}' */
1372 if (vect_print_dump_info (REPORT_DETAILS
))
1373 fprintf (vect_dump
, "Create vector_inv.");
1375 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1378 /* Case 3: operand is defined inside the loop. */
1379 case vect_internal_def
:
1382 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1384 /* Get the def from the vectorized stmt. */
1385 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1387 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1388 /* Get vectorized pattern statement. */
1390 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1391 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1392 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1393 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1394 gcc_assert (vec_stmt
);
1395 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1396 vec_oprnd
= PHI_RESULT (vec_stmt
);
1397 else if (is_gimple_call (vec_stmt
))
1398 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1400 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1404 /* Case 4: operand is defined by a loop header phi - reduction */
1405 case vect_reduction_def
:
1406 case vect_double_reduction_def
:
1407 case vect_nested_cycle
:
1411 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1412 loop
= (gimple_bb (def_stmt
))->loop_father
;
1414 /* Get the def before the loop */
1415 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1416 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1419 /* Case 5: operand is defined by loop-header phi - induction. */
1420 case vect_induction_def
:
1422 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1424 /* Get the def from the vectorized stmt. */
1425 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1426 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1427 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1428 vec_oprnd
= PHI_RESULT (vec_stmt
);
1430 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1440 /* Function vect_get_vec_def_for_stmt_copy
1442 Return a vector-def for an operand. This function is used when the
1443 vectorized stmt to be created (by the caller to this function) is a "copy"
1444 created in case the vectorized result cannot fit in one vector, and several
1445 copies of the vector-stmt are required. In this case the vector-def is
1446 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1447 of the stmt that defines VEC_OPRND.
1448 DT is the type of the vector def VEC_OPRND.
1451 In case the vectorization factor (VF) is bigger than the number
1452 of elements that can fit in a vectype (nunits), we have to generate
1453 more than one vector stmt to vectorize the scalar stmt. This situation
1454 arises when there are multiple data-types operated upon in the loop; the
1455 smallest data-type determines the VF, and as a result, when vectorizing
1456 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1457 vector stmt (each computing a vector of 'nunits' results, and together
1458 computing 'VF' results in each iteration). This function is called when
1459 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1460 which VF=16 and nunits=4, so the number of copies required is 4):
1462 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1464 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1465 VS1.1: vx.1 = memref1 VS1.2
1466 VS1.2: vx.2 = memref2 VS1.3
1467 VS1.3: vx.3 = memref3
1469 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1470 VSnew.1: vz1 = vx.1 + ... VSnew.2
1471 VSnew.2: vz2 = vx.2 + ... VSnew.3
1472 VSnew.3: vz3 = vx.3 + ...
1474 The vectorization of S1 is explained in vectorizable_load.
1475 The vectorization of S2:
1476 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1477 the function 'vect_get_vec_def_for_operand' is called to
1478 get the relevant vector-def for each operand of S2. For operand x it
1479 returns the vector-def 'vx.0'.
1481 To create the remaining copies of the vector-stmt (VSnew.j), this
1482 function is called to get the relevant vector-def for each operand. It is
1483 obtained from the respective VS1.j stmt, which is recorded in the
1484 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1486 For example, to obtain the vector-def 'vx.1' in order to create the
1487 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1488 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1489 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1490 and return its def ('vx.1').
1491 Overall, to create the above sequence this function will be called 3 times:
1492 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1493 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1494 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1497 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1499 gimple vec_stmt_for_operand
;
1500 stmt_vec_info def_stmt_info
;
1502 /* Do nothing; can reuse same def. */
1503 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1506 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1507 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1508 gcc_assert (def_stmt_info
);
1509 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1510 gcc_assert (vec_stmt_for_operand
);
1511 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1512 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1513 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1515 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1520 /* Get vectorized definitions for the operands to create a copy of an original
1521 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1524 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1525 VEC(tree
,heap
) **vec_oprnds0
,
1526 VEC(tree
,heap
) **vec_oprnds1
)
1528 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1530 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1531 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1533 if (vec_oprnds1
&& *vec_oprnds1
)
1535 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1536 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1537 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1542 /* Get vectorized definitions for OP0 and OP1.
1543 REDUC_INDEX is the index of reduction operand in case of reduction,
1544 and -1 otherwise. */
1547 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1548 VEC (tree
, heap
) **vec_oprnds0
,
1549 VEC (tree
, heap
) **vec_oprnds1
,
1550 slp_tree slp_node
, int reduc_index
)
1554 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1555 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, nops
);
1556 VEC (slp_void_p
, heap
) *vec_defs
= VEC_alloc (slp_void_p
, heap
, nops
);
1558 VEC_quick_push (tree
, ops
, op0
);
1560 VEC_quick_push (tree
, ops
, op1
);
1562 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1564 *vec_oprnds0
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1566 *vec_oprnds1
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 1);
1568 VEC_free (tree
, heap
, ops
);
1569 VEC_free (slp_void_p
, heap
, vec_defs
);
1575 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1576 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1577 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1581 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1582 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1583 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1589 /* Function vect_finish_stmt_generation.
1591 Insert a new stmt. */
1594 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1595 gimple_stmt_iterator
*gsi
)
1597 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1598 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1599 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1601 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1603 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1605 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1608 if (vect_print_dump_info (REPORT_DETAILS
))
1610 fprintf (vect_dump
, "add new stmt: ");
1611 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1614 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1617 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1618 a function declaration if the target has a vectorized version
1619 of the function, or NULL_TREE if the function cannot be vectorized. */
1622 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1624 tree fndecl
= gimple_call_fndecl (call
);
1626 /* We only handle functions that do not read or clobber memory -- i.e.
1627 const or novops ones. */
1628 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1632 || TREE_CODE (fndecl
) != FUNCTION_DECL
1633 || !DECL_BUILT_IN (fndecl
))
1636 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1640 /* Function vectorizable_call.
1642 Check if STMT performs a function call that can be vectorized.
1643 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1644 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1645 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1648 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1654 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1655 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1656 tree vectype_out
, vectype_in
;
1659 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1660 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1661 tree fndecl
, new_temp
, def
, rhs_type
;
1663 enum vect_def_type dt
[3]
1664 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1665 gimple new_stmt
= NULL
;
1667 VEC(tree
, heap
) *vargs
= NULL
;
1668 enum { NARROW
, NONE
, WIDEN
} modifier
;
1672 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1675 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1678 /* Is STMT a vectorizable call? */
1679 if (!is_gimple_call (stmt
))
1682 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1685 if (stmt_can_throw_internal (stmt
))
1688 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1690 /* Process function arguments. */
1691 rhs_type
= NULL_TREE
;
1692 vectype_in
= NULL_TREE
;
1693 nargs
= gimple_call_num_args (stmt
);
1695 /* Bail out if the function has more than three arguments, we do not have
1696 interesting builtin functions to vectorize with more than two arguments
1697 except for fma. No arguments is also not good. */
1698 if (nargs
== 0 || nargs
> 3)
1701 for (i
= 0; i
< nargs
; i
++)
1705 op
= gimple_call_arg (stmt
, i
);
1707 /* We can only handle calls with arguments of the same type. */
1709 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1711 if (vect_print_dump_info (REPORT_DETAILS
))
1712 fprintf (vect_dump
, "argument types differ.");
1716 rhs_type
= TREE_TYPE (op
);
1718 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1719 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1721 if (vect_print_dump_info (REPORT_DETAILS
))
1722 fprintf (vect_dump
, "use not simple.");
1727 vectype_in
= opvectype
;
1729 && opvectype
!= vectype_in
)
1731 if (vect_print_dump_info (REPORT_DETAILS
))
1732 fprintf (vect_dump
, "argument vector types differ.");
1736 /* If all arguments are external or constant defs use a vector type with
1737 the same size as the output vector type. */
1739 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1741 gcc_assert (vectype_in
);
1744 if (vect_print_dump_info (REPORT_DETAILS
))
1746 fprintf (vect_dump
, "no vectype for scalar type ");
1747 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1754 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1755 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1756 if (nunits_in
== nunits_out
/ 2)
1758 else if (nunits_out
== nunits_in
)
1760 else if (nunits_out
== nunits_in
/ 2)
1765 /* For now, we only vectorize functions if a target specific builtin
1766 is available. TODO -- in some cases, it might be profitable to
1767 insert the calls for pieces of the vector, in order to be able
1768 to vectorize other operations in the loop. */
1769 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1770 if (fndecl
== NULL_TREE
)
1772 if (vect_print_dump_info (REPORT_DETAILS
))
1773 fprintf (vect_dump
, "function is not vectorizable.");
1778 gcc_assert (!gimple_vuse (stmt
));
1780 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1782 else if (modifier
== NARROW
)
1783 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1785 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1787 /* Sanity check: make sure that at least one copy of the vectorized stmt
1788 needs to be generated. */
1789 gcc_assert (ncopies
>= 1);
1791 if (!vec_stmt
) /* transformation not required. */
1793 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1794 if (vect_print_dump_info (REPORT_DETAILS
))
1795 fprintf (vect_dump
, "=== vectorizable_call ===");
1796 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1802 if (vect_print_dump_info (REPORT_DETAILS
))
1803 fprintf (vect_dump
, "transform call.");
1806 scalar_dest
= gimple_call_lhs (stmt
);
1807 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1809 prev_stmt_info
= NULL
;
1813 for (j
= 0; j
< ncopies
; ++j
)
1815 /* Build argument list for the vectorized call. */
1817 vargs
= VEC_alloc (tree
, heap
, nargs
);
1819 VEC_truncate (tree
, vargs
, 0);
1823 VEC (slp_void_p
, heap
) *vec_defs
1824 = VEC_alloc (slp_void_p
, heap
, nargs
);
1825 VEC (tree
, heap
) *vec_oprnds0
;
1827 for (i
= 0; i
< nargs
; i
++)
1828 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1829 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1831 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1833 /* Arguments are ready. Create the new vector stmt. */
1834 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_oprnd0
)
1837 for (k
= 0; k
< nargs
; k
++)
1839 VEC (tree
, heap
) *vec_oprndsk
1840 = (VEC (tree
, heap
) *)
1841 VEC_index (slp_void_p
, vec_defs
, k
);
1842 VEC_replace (tree
, vargs
, k
,
1843 VEC_index (tree
, vec_oprndsk
, i
));
1845 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1846 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1847 gimple_call_set_lhs (new_stmt
, new_temp
);
1848 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1849 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1853 for (i
= 0; i
< nargs
; i
++)
1855 VEC (tree
, heap
) *vec_oprndsi
1856 = (VEC (tree
, heap
) *)
1857 VEC_index (slp_void_p
, vec_defs
, i
);
1858 VEC_free (tree
, heap
, vec_oprndsi
);
1860 VEC_free (slp_void_p
, heap
, vec_defs
);
1864 for (i
= 0; i
< nargs
; i
++)
1866 op
= gimple_call_arg (stmt
, i
);
1869 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1872 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1874 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1877 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1880 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1881 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1882 gimple_call_set_lhs (new_stmt
, new_temp
);
1883 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1886 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1888 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1890 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1896 for (j
= 0; j
< ncopies
; ++j
)
1898 /* Build argument list for the vectorized call. */
1900 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1902 VEC_truncate (tree
, vargs
, 0);
1906 VEC (slp_void_p
, heap
) *vec_defs
1907 = VEC_alloc (slp_void_p
, heap
, nargs
);
1908 VEC (tree
, heap
) *vec_oprnds0
;
1910 for (i
= 0; i
< nargs
; i
++)
1911 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1912 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1914 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1916 /* Arguments are ready. Create the new vector stmt. */
1917 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vec_oprnd0
);
1921 VEC_truncate (tree
, vargs
, 0);
1922 for (k
= 0; k
< nargs
; k
++)
1924 VEC (tree
, heap
) *vec_oprndsk
1925 = (VEC (tree
, heap
) *)
1926 VEC_index (slp_void_p
, vec_defs
, k
);
1927 VEC_quick_push (tree
, vargs
,
1928 VEC_index (tree
, vec_oprndsk
, i
));
1929 VEC_quick_push (tree
, vargs
,
1930 VEC_index (tree
, vec_oprndsk
, i
+ 1));
1932 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1933 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1934 gimple_call_set_lhs (new_stmt
, new_temp
);
1935 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1936 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1940 for (i
= 0; i
< nargs
; i
++)
1942 VEC (tree
, heap
) *vec_oprndsi
1943 = (VEC (tree
, heap
) *)
1944 VEC_index (slp_void_p
, vec_defs
, i
);
1945 VEC_free (tree
, heap
, vec_oprndsi
);
1947 VEC_free (slp_void_p
, heap
, vec_defs
);
1951 for (i
= 0; i
< nargs
; i
++)
1953 op
= gimple_call_arg (stmt
, i
);
1957 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1959 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1963 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1965 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1967 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1970 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1971 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1974 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1975 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1976 gimple_call_set_lhs (new_stmt
, new_temp
);
1977 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1980 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1982 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1984 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1987 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1992 /* No current target implements this case. */
1996 VEC_free (tree
, heap
, vargs
);
1998 /* Update the exception handling table with the vector stmt if necessary. */
1999 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2000 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2002 /* The call in STMT might prevent it from being removed in dce.
2003 We however cannot remove it here, due to the way the ssa name
2004 it defines is mapped to the new definition. So just replace
2005 rhs of the statement with something harmless. */
2010 type
= TREE_TYPE (scalar_dest
);
2011 if (is_pattern_stmt_p (stmt_info
))
2012 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2014 lhs
= gimple_call_lhs (stmt
);
2015 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2016 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2017 set_vinfo_for_stmt (stmt
, NULL
);
2018 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2019 gsi_replace (gsi
, new_stmt
, false);
2020 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
2026 /* Function vect_gen_widened_results_half
2028 Create a vector stmt whose code, type, number of arguments, and result
2029 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2030 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2031 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2032 needs to be created (DECL is a function-decl of a target-builtin).
2033 STMT is the original scalar stmt that we are vectorizing. */
2036 vect_gen_widened_results_half (enum tree_code code
,
2038 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2039 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2045 /* Generate half of the widened result: */
2046 if (code
== CALL_EXPR
)
2048 /* Target specific support */
2049 if (op_type
== binary_op
)
2050 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2052 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2053 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2054 gimple_call_set_lhs (new_stmt
, new_temp
);
2058 /* Generic support */
2059 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2060 if (op_type
!= binary_op
)
2062 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2064 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2065 gimple_assign_set_lhs (new_stmt
, new_temp
);
2067 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2073 /* Get vectorized definitions for loop-based vectorization. For the first
2074 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2075 scalar operand), and for the rest we get a copy with
2076 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2077 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2078 The vectors are collected into VEC_OPRNDS. */
2081 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2082 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2086 /* Get first vector operand. */
2087 /* All the vector operands except the very first one (that is scalar oprnd)
2089 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2090 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2092 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2094 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2096 /* Get second vector operand. */
2097 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2098 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2102 /* For conversion in multiple steps, continue to get operands
2105 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2109 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2110 For multi-step conversions store the resulting vectors and call the function
2114 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2115 int multi_step_cvt
, gimple stmt
,
2116 VEC (tree
, heap
) *vec_dsts
,
2117 gimple_stmt_iterator
*gsi
,
2118 slp_tree slp_node
, enum tree_code code
,
2119 stmt_vec_info
*prev_stmt_info
)
2122 tree vop0
, vop1
, new_tmp
, vec_dest
;
2124 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2126 vec_dest
= VEC_pop (tree
, vec_dsts
);
2128 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2130 /* Create demotion operation. */
2131 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2132 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2133 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2134 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2135 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2136 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2139 /* Store the resulting vector for next recursive call. */
2140 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2143 /* This is the last step of the conversion sequence. Store the
2144 vectors in SLP_NODE or in vector info of the scalar statement
2145 (or in STMT_VINFO_RELATED_STMT chain). */
2147 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2150 if (!*prev_stmt_info
)
2151 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2153 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2155 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2160 /* For multi-step demotion operations we first generate demotion operations
2161 from the source type to the intermediate types, and then combine the
2162 results (stored in VEC_OPRNDS) in demotion operation to the destination
2166 /* At each level of recursion we have half of the operands we had at the
2168 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2169 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2170 stmt
, vec_dsts
, gsi
, slp_node
,
2171 VEC_PACK_TRUNC_EXPR
,
2175 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2179 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2180 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2181 the resulting vectors and call the function recursively. */
2184 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2185 VEC (tree
, heap
) **vec_oprnds1
,
2186 gimple stmt
, tree vec_dest
,
2187 gimple_stmt_iterator
*gsi
,
2188 enum tree_code code1
,
2189 enum tree_code code2
, tree decl1
,
2190 tree decl2
, int op_type
)
2193 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2194 gimple new_stmt1
, new_stmt2
;
2195 VEC (tree
, heap
) *vec_tmp
= NULL
;
2197 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2198 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
2200 if (op_type
== binary_op
)
2201 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2205 /* Generate the two halves of promotion operation. */
2206 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2207 op_type
, vec_dest
, gsi
, stmt
);
2208 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2209 op_type
, vec_dest
, gsi
, stmt
);
2210 if (is_gimple_call (new_stmt1
))
2212 new_tmp1
= gimple_call_lhs (new_stmt1
);
2213 new_tmp2
= gimple_call_lhs (new_stmt2
);
2217 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2218 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2221 /* Store the results for the next step. */
2222 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2223 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2226 VEC_free (tree
, heap
, *vec_oprnds0
);
2227 *vec_oprnds0
= vec_tmp
;
2231 /* Check if STMT performs a conversion operation, that can be vectorized.
2232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2233 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2237 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2238 gimple
*vec_stmt
, slp_tree slp_node
)
2242 tree op0
, op1
= NULL_TREE
;
2243 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2244 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2245 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2246 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2247 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2248 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2252 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2253 gimple new_stmt
= NULL
;
2254 stmt_vec_info prev_stmt_info
;
2257 tree vectype_out
, vectype_in
;
2259 tree lhs_type
, rhs_type
;
2260 enum { NARROW
, NONE
, WIDEN
} modifier
;
2261 VEC (tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2263 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2264 int multi_step_cvt
= 0;
2265 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
;
2266 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2268 enum machine_mode rhs_mode
;
2269 unsigned short fltsz
;
2271 /* Is STMT a vectorizable conversion? */
2273 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2276 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2279 if (!is_gimple_assign (stmt
))
2282 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2285 code
= gimple_assign_rhs_code (stmt
);
2286 if (!CONVERT_EXPR_CODE_P (code
)
2287 && code
!= FIX_TRUNC_EXPR
2288 && code
!= FLOAT_EXPR
2289 && code
!= WIDEN_MULT_EXPR
2290 && code
!= WIDEN_LSHIFT_EXPR
)
2293 op_type
= TREE_CODE_LENGTH (code
);
2295 /* Check types of lhs and rhs. */
2296 scalar_dest
= gimple_assign_lhs (stmt
);
2297 lhs_type
= TREE_TYPE (scalar_dest
);
2298 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2300 op0
= gimple_assign_rhs1 (stmt
);
2301 rhs_type
= TREE_TYPE (op0
);
2303 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2304 && !((INTEGRAL_TYPE_P (lhs_type
)
2305 && INTEGRAL_TYPE_P (rhs_type
))
2306 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2307 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2310 if ((INTEGRAL_TYPE_P (lhs_type
)
2311 && (TYPE_PRECISION (lhs_type
)
2312 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2313 || (INTEGRAL_TYPE_P (rhs_type
)
2314 && (TYPE_PRECISION (rhs_type
)
2315 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2317 if (vect_print_dump_info (REPORT_DETAILS
))
2319 "type conversion to/from bit-precision unsupported.");
2323 /* Check the operands of the operation. */
2324 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2325 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2327 if (vect_print_dump_info (REPORT_DETAILS
))
2328 fprintf (vect_dump
, "use not simple.");
2331 if (op_type
== binary_op
)
2335 op1
= gimple_assign_rhs2 (stmt
);
2336 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2337 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2339 if (CONSTANT_CLASS_P (op0
))
2340 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2341 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2343 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2348 if (vect_print_dump_info (REPORT_DETAILS
))
2349 fprintf (vect_dump
, "use not simple.");
2354 /* If op0 is an external or constant defs use a vector type of
2355 the same size as the output vector type. */
2357 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2359 gcc_assert (vectype_in
);
2362 if (vect_print_dump_info (REPORT_DETAILS
))
2364 fprintf (vect_dump
, "no vectype for scalar type ");
2365 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
2371 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2372 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2373 if (nunits_in
< nunits_out
)
2375 else if (nunits_out
== nunits_in
)
2380 /* Multiple types in SLP are handled by creating the appropriate number of
2381 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2383 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2385 else if (modifier
== NARROW
)
2386 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2388 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2390 /* Sanity check: make sure that at least one copy of the vectorized stmt
2391 needs to be generated. */
2392 gcc_assert (ncopies
>= 1);
2394 /* Supportable by target? */
2398 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2400 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2405 if (vect_print_dump_info (REPORT_DETAILS
))
2406 fprintf (vect_dump
, "conversion not supported by target.");
2410 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2411 &code1
, &code2
, &multi_step_cvt
,
2414 /* Binary widening operation can only be supported directly by the
2416 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2420 if (code
!= FLOAT_EXPR
2421 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2422 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2425 rhs_mode
= TYPE_MODE (rhs_type
);
2426 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2427 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2428 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2429 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2432 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2433 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2434 if (cvt_type
== NULL_TREE
)
2437 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2439 if (!supportable_convert_operation (code
, vectype_out
,
2440 cvt_type
, &decl1
, &codecvt1
))
2443 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2444 cvt_type
, &codecvt1
,
2445 &codecvt2
, &multi_step_cvt
,
2449 gcc_assert (multi_step_cvt
== 0);
2451 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2452 vectype_in
, &code1
, &code2
,
2453 &multi_step_cvt
, &interm_types
))
2457 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2460 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2461 codecvt2
= ERROR_MARK
;
2465 VEC_safe_push (tree
, heap
, interm_types
, cvt_type
);
2466 cvt_type
= NULL_TREE
;
2471 gcc_assert (op_type
== unary_op
);
2472 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2473 &code1
, &multi_step_cvt
,
2477 if (code
!= FIX_TRUNC_EXPR
2478 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2479 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2482 rhs_mode
= TYPE_MODE (rhs_type
);
2484 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2485 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2486 if (cvt_type
== NULL_TREE
)
2488 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2491 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2492 &code1
, &multi_step_cvt
,
2501 if (!vec_stmt
) /* transformation not required. */
2503 if (vect_print_dump_info (REPORT_DETAILS
))
2504 fprintf (vect_dump
, "=== vectorizable_conversion ===");
2505 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2507 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2508 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2510 else if (modifier
== NARROW
)
2512 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2513 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2517 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2518 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2520 VEC_free (tree
, heap
, interm_types
);
2525 if (vect_print_dump_info (REPORT_DETAILS
))
2526 fprintf (vect_dump
, "transform conversion. ncopies = %d.", ncopies
);
2528 if (op_type
== binary_op
)
2530 if (CONSTANT_CLASS_P (op0
))
2531 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2532 else if (CONSTANT_CLASS_P (op1
))
2533 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2536 /* In case of multi-step conversion, we first generate conversion operations
2537 to the intermediate types, and then from that types to the final one.
2538 We create vector destinations for the intermediate type (TYPES) received
2539 from supportable_*_operation, and store them in the correct order
2540 for future use in vect_create_vectorized_*_stmts (). */
2541 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2542 vec_dest
= vect_create_destination_var (scalar_dest
,
2543 (cvt_type
&& modifier
== WIDEN
)
2544 ? cvt_type
: vectype_out
);
2545 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2549 for (i
= VEC_length (tree
, interm_types
) - 1;
2550 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2552 vec_dest
= vect_create_destination_var (scalar_dest
,
2554 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2559 vec_dest
= vect_create_destination_var (scalar_dest
,
2561 ? vectype_out
: cvt_type
);
2565 if (modifier
== NONE
)
2566 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2567 else if (modifier
== WIDEN
)
2569 vec_oprnds0
= VEC_alloc (tree
, heap
,
2571 ? vect_pow2 (multi_step_cvt
) : 1));
2572 if (op_type
== binary_op
)
2573 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2576 vec_oprnds0
= VEC_alloc (tree
, heap
,
2578 ? vect_pow2 (multi_step_cvt
) : 1));
2580 else if (code
== WIDEN_LSHIFT_EXPR
)
2581 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2584 prev_stmt_info
= NULL
;
2588 for (j
= 0; j
< ncopies
; j
++)
2591 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2594 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2596 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2598 /* Arguments are ready, create the new vector stmt. */
2599 if (code1
== CALL_EXPR
)
2601 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2602 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2603 gimple_call_set_lhs (new_stmt
, new_temp
);
2607 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2608 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2610 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2611 gimple_assign_set_lhs (new_stmt
, new_temp
);
2614 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2616 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2621 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2623 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2624 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2629 /* In case the vectorization factor (VF) is bigger than the number
2630 of elements that we can fit in a vectype (nunits), we have to
2631 generate more than one vector stmt - i.e - we need to "unroll"
2632 the vector stmt by a factor VF/nunits. */
2633 for (j
= 0; j
< ncopies
; j
++)
2640 if (code
== WIDEN_LSHIFT_EXPR
)
2645 /* Store vec_oprnd1 for every vector stmt to be created
2646 for SLP_NODE. We check during the analysis that all
2647 the shift arguments are the same. */
2648 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2649 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2651 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2655 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2656 &vec_oprnds1
, slp_node
, -1);
2660 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2661 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2662 if (op_type
== binary_op
)
2664 if (code
== WIDEN_LSHIFT_EXPR
)
2667 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2669 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2675 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2676 VEC_truncate (tree
, vec_oprnds0
, 0);
2677 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2678 if (op_type
== binary_op
)
2680 if (code
== WIDEN_LSHIFT_EXPR
)
2683 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2685 VEC_truncate (tree
, vec_oprnds1
, 0);
2686 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2690 /* Arguments are ready. Create the new vector stmts. */
2691 for (i
= multi_step_cvt
; i
>= 0; i
--)
2693 tree this_dest
= VEC_index (tree
, vec_dsts
, i
);
2694 enum tree_code c1
= code1
, c2
= code2
;
2695 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2700 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2702 stmt
, this_dest
, gsi
,
2703 c1
, c2
, decl1
, decl2
,
2707 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2711 if (codecvt1
== CALL_EXPR
)
2713 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2714 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2715 gimple_call_set_lhs (new_stmt
, new_temp
);
2719 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2720 new_temp
= make_ssa_name (vec_dest
, NULL
);
2721 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2726 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2729 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2732 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2736 if (!prev_stmt_info
)
2737 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2739 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2740 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2745 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2749 /* In case the vectorization factor (VF) is bigger than the number
2750 of elements that we can fit in a vectype (nunits), we have to
2751 generate more than one vector stmt - i.e - we need to "unroll"
2752 the vector stmt by a factor VF/nunits. */
2753 for (j
= 0; j
< ncopies
; j
++)
2757 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2761 VEC_truncate (tree
, vec_oprnds0
, 0);
2762 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2763 vect_pow2 (multi_step_cvt
) - 1);
2766 /* Arguments are ready. Create the new vector stmts. */
2768 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2770 if (codecvt1
== CALL_EXPR
)
2772 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2773 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2774 gimple_call_set_lhs (new_stmt
, new_temp
);
2778 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2779 new_temp
= make_ssa_name (vec_dest
, NULL
);
2780 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2784 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2785 VEC_replace (tree
, vec_oprnds0
, i
, new_temp
);
2788 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2789 stmt
, vec_dsts
, gsi
,
2794 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2798 VEC_free (tree
, heap
, vec_oprnds0
);
2799 VEC_free (tree
, heap
, vec_oprnds1
);
2800 VEC_free (tree
, heap
, vec_dsts
);
2801 VEC_free (tree
, heap
, interm_types
);
2807 /* Function vectorizable_assignment.
2809 Check if STMT performs an assignment (copy) that can be vectorized.
2810 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2811 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2812 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2815 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2816 gimple
*vec_stmt
, slp_tree slp_node
)
2821 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2822 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2823 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2827 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2828 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2831 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2833 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2834 gimple new_stmt
= NULL
;
2835 stmt_vec_info prev_stmt_info
= NULL
;
2836 enum tree_code code
;
2839 /* Multiple types in SLP are handled by creating the appropriate number of
2840 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2842 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2845 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2847 gcc_assert (ncopies
>= 1);
2849 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2852 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2855 /* Is vectorizable assignment? */
2856 if (!is_gimple_assign (stmt
))
2859 scalar_dest
= gimple_assign_lhs (stmt
);
2860 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2863 code
= gimple_assign_rhs_code (stmt
);
2864 if (gimple_assign_single_p (stmt
)
2865 || code
== PAREN_EXPR
2866 || CONVERT_EXPR_CODE_P (code
))
2867 op
= gimple_assign_rhs1 (stmt
);
2871 if (code
== VIEW_CONVERT_EXPR
)
2872 op
= TREE_OPERAND (op
, 0);
2874 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2875 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2877 if (vect_print_dump_info (REPORT_DETAILS
))
2878 fprintf (vect_dump
, "use not simple.");
2882 /* We can handle NOP_EXPR conversions that do not change the number
2883 of elements or the vector size. */
2884 if ((CONVERT_EXPR_CODE_P (code
)
2885 || code
== VIEW_CONVERT_EXPR
)
2887 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2888 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2889 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2892 /* We do not handle bit-precision changes. */
2893 if ((CONVERT_EXPR_CODE_P (code
)
2894 || code
== VIEW_CONVERT_EXPR
)
2895 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2896 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2897 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2898 || ((TYPE_PRECISION (TREE_TYPE (op
))
2899 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2900 /* But a conversion that does not change the bit-pattern is ok. */
2901 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2902 > TYPE_PRECISION (TREE_TYPE (op
)))
2903 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2905 if (vect_print_dump_info (REPORT_DETAILS
))
2906 fprintf (vect_dump
, "type conversion to/from bit-precision "
2911 if (!vec_stmt
) /* transformation not required. */
2913 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2914 if (vect_print_dump_info (REPORT_DETAILS
))
2915 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2916 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2921 if (vect_print_dump_info (REPORT_DETAILS
))
2922 fprintf (vect_dump
, "transform assignment.");
2925 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2928 for (j
= 0; j
< ncopies
; j
++)
2932 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2934 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2936 /* Arguments are ready. create the new vector stmt. */
2937 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2939 if (CONVERT_EXPR_CODE_P (code
)
2940 || code
== VIEW_CONVERT_EXPR
)
2941 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2942 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2943 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2944 gimple_assign_set_lhs (new_stmt
, new_temp
);
2945 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2947 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2954 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2956 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2958 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2961 VEC_free (tree
, heap
, vec_oprnds
);
2966 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2967 either as shift by a scalar or by a vector. */
2970 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2973 enum machine_mode vec_mode
;
2978 vectype
= get_vectype_for_scalar_type (scalar_type
);
2982 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2984 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2986 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2988 || (optab_handler (optab
, TYPE_MODE (vectype
))
2989 == CODE_FOR_nothing
))
2993 vec_mode
= TYPE_MODE (vectype
);
2994 icode
= (int) optab_handler (optab
, vec_mode
);
2995 if (icode
== CODE_FOR_nothing
)
3002 /* Function vectorizable_shift.
3004 Check if STMT performs a shift operation that can be vectorized.
3005 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3006 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3007 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3010 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3011 gimple
*vec_stmt
, slp_tree slp_node
)
3015 tree op0
, op1
= NULL
;
3016 tree vec_oprnd1
= NULL_TREE
;
3017 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3019 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3020 enum tree_code code
;
3021 enum machine_mode vec_mode
;
3025 enum machine_mode optab_op2_mode
;
3028 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3029 gimple new_stmt
= NULL
;
3030 stmt_vec_info prev_stmt_info
;
3037 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
3040 bool scalar_shift_arg
= true;
3041 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3044 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3047 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3050 /* Is STMT a vectorizable binary/unary operation? */
3051 if (!is_gimple_assign (stmt
))
3054 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3057 code
= gimple_assign_rhs_code (stmt
);
3059 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3060 || code
== RROTATE_EXPR
))
3063 scalar_dest
= gimple_assign_lhs (stmt
);
3064 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3065 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3066 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3068 if (vect_print_dump_info (REPORT_DETAILS
))
3069 fprintf (vect_dump
, "bit-precision shifts not supported.");
3073 op0
= gimple_assign_rhs1 (stmt
);
3074 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3075 &def_stmt
, &def
, &dt
[0], &vectype
))
3077 if (vect_print_dump_info (REPORT_DETAILS
))
3078 fprintf (vect_dump
, "use not simple.");
3081 /* If op0 is an external or constant def use a vector type with
3082 the same size as the output vector type. */
3084 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3086 gcc_assert (vectype
);
3089 if (vect_print_dump_info (REPORT_DETAILS
))
3091 fprintf (vect_dump
, "no vectype for scalar type ");
3092 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3098 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3099 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3100 if (nunits_out
!= nunits_in
)
3103 op1
= gimple_assign_rhs2 (stmt
);
3104 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3105 &def
, &dt
[1], &op1_vectype
))
3107 if (vect_print_dump_info (REPORT_DETAILS
))
3108 fprintf (vect_dump
, "use not simple.");
3113 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3117 /* Multiple types in SLP are handled by creating the appropriate number of
3118 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3120 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3123 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3125 gcc_assert (ncopies
>= 1);
3127 /* Determine whether the shift amount is a vector, or scalar. If the
3128 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3130 if (dt
[1] == vect_internal_def
&& !slp_node
)
3131 scalar_shift_arg
= false;
3132 else if (dt
[1] == vect_constant_def
3133 || dt
[1] == vect_external_def
3134 || dt
[1] == vect_internal_def
)
3136 /* In SLP, need to check whether the shift count is the same,
3137 in loops if it is a constant or invariant, it is always
3141 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3144 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
3145 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3146 scalar_shift_arg
= false;
3151 if (vect_print_dump_info (REPORT_DETAILS
))
3152 fprintf (vect_dump
, "operand mode requires invariant argument.");
3156 /* Vector shifted by vector. */
3157 if (!scalar_shift_arg
)
3159 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3160 if (vect_print_dump_info (REPORT_DETAILS
))
3161 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3163 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3164 if (op1_vectype
== NULL_TREE
3165 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3167 if (vect_print_dump_info (REPORT_DETAILS
))
3168 fprintf (vect_dump
, "unusable type for last operand in"
3169 " vector/vector shift/rotate.");
3173 /* See if the machine has a vector shifted by scalar insn and if not
3174 then see if it has a vector shifted by vector insn. */
3177 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3179 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3181 if (vect_print_dump_info (REPORT_DETAILS
))
3182 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3186 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3188 && (optab_handler (optab
, TYPE_MODE (vectype
))
3189 != CODE_FOR_nothing
))
3191 scalar_shift_arg
= false;
3193 if (vect_print_dump_info (REPORT_DETAILS
))
3194 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3196 /* Unlike the other binary operators, shifts/rotates have
3197 the rhs being int, instead of the same type as the lhs,
3198 so make sure the scalar is the right type if we are
3199 dealing with vectors of long long/long/short/char. */
3200 if (dt
[1] == vect_constant_def
)
3201 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3202 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3206 && TYPE_MODE (TREE_TYPE (vectype
))
3207 != TYPE_MODE (TREE_TYPE (op1
)))
3209 if (vect_print_dump_info (REPORT_DETAILS
))
3210 fprintf (vect_dump
, "unusable type for last operand in"
3211 " vector/vector shift/rotate.");
3214 if (vec_stmt
&& !slp_node
)
3216 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3217 op1
= vect_init_vector (stmt
, op1
,
3218 TREE_TYPE (vectype
), NULL
);
3225 /* Supportable by target? */
3228 if (vect_print_dump_info (REPORT_DETAILS
))
3229 fprintf (vect_dump
, "no optab.");
3232 vec_mode
= TYPE_MODE (vectype
);
3233 icode
= (int) optab_handler (optab
, vec_mode
);
3234 if (icode
== CODE_FOR_nothing
)
3236 if (vect_print_dump_info (REPORT_DETAILS
))
3237 fprintf (vect_dump
, "op not supported by target.");
3238 /* Check only during analysis. */
3239 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3240 || (vf
< vect_min_worthwhile_factor (code
)
3243 if (vect_print_dump_info (REPORT_DETAILS
))
3244 fprintf (vect_dump
, "proceeding using word mode.");
3247 /* Worthwhile without SIMD support? Check only during analysis. */
3248 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3249 && vf
< vect_min_worthwhile_factor (code
)
3252 if (vect_print_dump_info (REPORT_DETAILS
))
3253 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3257 if (!vec_stmt
) /* transformation not required. */
3259 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3260 if (vect_print_dump_info (REPORT_DETAILS
))
3261 fprintf (vect_dump
, "=== vectorizable_shift ===");
3262 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3268 if (vect_print_dump_info (REPORT_DETAILS
))
3269 fprintf (vect_dump
, "transform binary/unary operation.");
3272 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3274 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3275 created in the previous stages of the recursion, so no allocation is
3276 needed, except for the case of shift with scalar shift argument. In that
3277 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3278 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3279 In case of loop-based vectorization we allocate VECs of size 1. We
3280 allocate VEC_OPRNDS1 only in case of binary operation. */
3283 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3284 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3286 else if (scalar_shift_arg
)
3287 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3289 prev_stmt_info
= NULL
;
3290 for (j
= 0; j
< ncopies
; j
++)
3295 if (scalar_shift_arg
)
3297 /* Vector shl and shr insn patterns can be defined with scalar
3298 operand 2 (shift operand). In this case, use constant or loop
3299 invariant op1 directly, without extending it to vector mode
3301 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3302 if (!VECTOR_MODE_P (optab_op2_mode
))
3304 if (vect_print_dump_info (REPORT_DETAILS
))
3305 fprintf (vect_dump
, "operand 1 using scalar mode.");
3307 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3310 /* Store vec_oprnd1 for every vector stmt to be created
3311 for SLP_NODE. We check during the analysis that all
3312 the shift arguments are the same.
3313 TODO: Allow different constants for different vector
3314 stmts generated for an SLP instance. */
3315 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3316 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3321 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3322 (a special case for certain kind of vector shifts); otherwise,
3323 operand 1 should be of a vector type (the usual case). */
3325 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3328 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3332 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3334 /* Arguments are ready. Create the new vector stmt. */
3335 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3337 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
3338 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3339 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3340 gimple_assign_set_lhs (new_stmt
, new_temp
);
3341 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3343 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3350 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3352 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3353 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3356 VEC_free (tree
, heap
, vec_oprnds0
);
3357 VEC_free (tree
, heap
, vec_oprnds1
);
3363 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3364 gimple_stmt_iterator
*);
3367 /* Function vectorizable_operation.
3369 Check if STMT performs a binary, unary or ternary operation that can
3371 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3372 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3373 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3376 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3377 gimple
*vec_stmt
, slp_tree slp_node
)
3381 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3382 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3384 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3385 enum tree_code code
;
3386 enum machine_mode vec_mode
;
3393 enum vect_def_type dt
[3]
3394 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3395 gimple new_stmt
= NULL
;
3396 stmt_vec_info prev_stmt_info
;
3402 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
3403 tree vop0
, vop1
, vop2
;
3404 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3407 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3410 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3413 /* Is STMT a vectorizable binary/unary operation? */
3414 if (!is_gimple_assign (stmt
))
3417 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3420 code
= gimple_assign_rhs_code (stmt
);
3422 /* For pointer addition, we should use the normal plus for
3423 the vector addition. */
3424 if (code
== POINTER_PLUS_EXPR
)
3427 /* Support only unary or binary operations. */
3428 op_type
= TREE_CODE_LENGTH (code
);
3429 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3431 if (vect_print_dump_info (REPORT_DETAILS
))
3432 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
3437 scalar_dest
= gimple_assign_lhs (stmt
);
3438 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3440 /* Most operations cannot handle bit-precision types without extra
3442 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3443 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3444 /* Exception are bitwise binary operations. */
3445 && code
!= BIT_IOR_EXPR
3446 && code
!= BIT_XOR_EXPR
3447 && code
!= BIT_AND_EXPR
)
3449 if (vect_print_dump_info (REPORT_DETAILS
))
3450 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
3454 op0
= gimple_assign_rhs1 (stmt
);
3455 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3456 &def_stmt
, &def
, &dt
[0], &vectype
))
3458 if (vect_print_dump_info (REPORT_DETAILS
))
3459 fprintf (vect_dump
, "use not simple.");
3462 /* If op0 is an external or constant def use a vector type with
3463 the same size as the output vector type. */
3465 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3467 gcc_assert (vectype
);
3470 if (vect_print_dump_info (REPORT_DETAILS
))
3472 fprintf (vect_dump
, "no vectype for scalar type ");
3473 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3479 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3480 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3481 if (nunits_out
!= nunits_in
)
3484 if (op_type
== binary_op
|| op_type
== ternary_op
)
3486 op1
= gimple_assign_rhs2 (stmt
);
3487 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3490 if (vect_print_dump_info (REPORT_DETAILS
))
3491 fprintf (vect_dump
, "use not simple.");
3495 if (op_type
== ternary_op
)
3497 op2
= gimple_assign_rhs3 (stmt
);
3498 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3501 if (vect_print_dump_info (REPORT_DETAILS
))
3502 fprintf (vect_dump
, "use not simple.");
3508 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3512 /* Multiple types in SLP are handled by creating the appropriate number of
3513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3515 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3518 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3520 gcc_assert (ncopies
>= 1);
3522 /* Shifts are handled in vectorizable_shift (). */
3523 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3524 || code
== RROTATE_EXPR
)
3527 /* Supportable by target? */
3529 vec_mode
= TYPE_MODE (vectype
);
3530 if (code
== MULT_HIGHPART_EXPR
)
3532 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3533 icode
= LAST_INSN_CODE
;
3535 icode
= CODE_FOR_nothing
;
3539 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3542 if (vect_print_dump_info (REPORT_DETAILS
))
3543 fprintf (vect_dump
, "no optab.");
3546 icode
= (int) optab_handler (optab
, vec_mode
);
3549 if (icode
== CODE_FOR_nothing
)
3551 if (vect_print_dump_info (REPORT_DETAILS
))
3552 fprintf (vect_dump
, "op not supported by target.");
3553 /* Check only during analysis. */
3554 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3555 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3557 if (vect_print_dump_info (REPORT_DETAILS
))
3558 fprintf (vect_dump
, "proceeding using word mode.");
3561 /* Worthwhile without SIMD support? Check only during analysis. */
3562 if (!VECTOR_MODE_P (vec_mode
)
3564 && vf
< vect_min_worthwhile_factor (code
))
3566 if (vect_print_dump_info (REPORT_DETAILS
))
3567 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3571 if (!vec_stmt
) /* transformation not required. */
3573 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3574 if (vect_print_dump_info (REPORT_DETAILS
))
3575 fprintf (vect_dump
, "=== vectorizable_operation ===");
3576 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3582 if (vect_print_dump_info (REPORT_DETAILS
))
3583 fprintf (vect_dump
, "transform binary/unary operation.");
3586 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3588 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3589 created in the previous stages of the recursion, so no allocation is
3590 needed, except for the case of shift with scalar shift argument. In that
3591 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3592 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3593 In case of loop-based vectorization we allocate VECs of size 1. We
3594 allocate VEC_OPRNDS1 only in case of binary operation. */
3597 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3598 if (op_type
== binary_op
|| op_type
== ternary_op
)
3599 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3600 if (op_type
== ternary_op
)
3601 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3604 /* In case the vectorization factor (VF) is bigger than the number
3605 of elements that we can fit in a vectype (nunits), we have to generate
3606 more than one vector stmt - i.e - we need to "unroll" the
3607 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3608 from one copy of the vector stmt to the next, in the field
3609 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3610 stages to find the correct vector defs to be used when vectorizing
3611 stmts that use the defs of the current stmt. The example below
3612 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3613 we need to create 4 vectorized stmts):
3615 before vectorization:
3616 RELATED_STMT VEC_STMT
3620 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3622 RELATED_STMT VEC_STMT
3623 VS1_0: vx0 = memref0 VS1_1 -
3624 VS1_1: vx1 = memref1 VS1_2 -
3625 VS1_2: vx2 = memref2 VS1_3 -
3626 VS1_3: vx3 = memref3 - -
3627 S1: x = load - VS1_0
3630 step2: vectorize stmt S2 (done here):
3631 To vectorize stmt S2 we first need to find the relevant vector
3632 def for the first operand 'x'. This is, as usual, obtained from
3633 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3634 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3635 relevant vector def 'vx0'. Having found 'vx0' we can generate
3636 the vector stmt VS2_0, and as usual, record it in the
3637 STMT_VINFO_VEC_STMT of stmt S2.
3638 When creating the second copy (VS2_1), we obtain the relevant vector
3639 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3640 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3641 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3642 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3643 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3644 chain of stmts and pointers:
3645 RELATED_STMT VEC_STMT
3646 VS1_0: vx0 = memref0 VS1_1 -
3647 VS1_1: vx1 = memref1 VS1_2 -
3648 VS1_2: vx2 = memref2 VS1_3 -
3649 VS1_3: vx3 = memref3 - -
3650 S1: x = load - VS1_0
3651 VS2_0: vz0 = vx0 + v1 VS2_1 -
3652 VS2_1: vz1 = vx1 + v1 VS2_2 -
3653 VS2_2: vz2 = vx2 + v1 VS2_3 -
3654 VS2_3: vz3 = vx3 + v1 - -
3655 S2: z = x + 1 - VS2_0 */
3657 prev_stmt_info
= NULL
;
3658 for (j
= 0; j
< ncopies
; j
++)
3663 if (op_type
== binary_op
|| op_type
== ternary_op
)
3664 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3667 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3669 if (op_type
== ternary_op
)
3671 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3672 VEC_quick_push (tree
, vec_oprnds2
,
3673 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
3678 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3679 if (op_type
== ternary_op
)
3681 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
3682 VEC_quick_push (tree
, vec_oprnds2
,
3683 vect_get_vec_def_for_stmt_copy (dt
[2],
3688 /* Arguments are ready. Create the new vector stmt. */
3689 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3691 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3692 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
3693 vop2
= ((op_type
== ternary_op
)
3694 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
3695 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
3697 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3698 gimple_assign_set_lhs (new_stmt
, new_temp
);
3699 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3701 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3708 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3710 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3711 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3714 VEC_free (tree
, heap
, vec_oprnds0
);
3716 VEC_free (tree
, heap
, vec_oprnds1
);
3718 VEC_free (tree
, heap
, vec_oprnds2
);
3724 /* Function vectorizable_store.
3726 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3728 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3729 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3730 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3733 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3739 tree vec_oprnd
= NULL_TREE
;
3740 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3741 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3742 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3744 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3745 struct loop
*loop
= NULL
;
3746 enum machine_mode vec_mode
;
3748 enum dr_alignment_support alignment_support_scheme
;
3751 enum vect_def_type dt
;
3752 stmt_vec_info prev_stmt_info
= NULL
;
3753 tree dataref_ptr
= NULL_TREE
;
3754 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3757 gimple next_stmt
, first_stmt
= NULL
;
3758 bool grouped_store
= false;
3759 bool store_lanes_p
= false;
3760 unsigned int group_size
, i
;
3761 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3763 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3764 bool slp
= (slp_node
!= NULL
);
3765 unsigned int vec_num
;
3766 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3770 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3772 /* Multiple types in SLP are handled by creating the appropriate number of
3773 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3775 if (slp
|| PURE_SLP_STMT (stmt_info
))
3778 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3780 gcc_assert (ncopies
>= 1);
3782 /* FORNOW. This restriction should be relaxed. */
3783 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3785 if (vect_print_dump_info (REPORT_DETAILS
))
3786 fprintf (vect_dump
, "multiple types in nested loop.");
3790 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3793 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3796 /* Is vectorizable store? */
3798 if (!is_gimple_assign (stmt
))
3801 scalar_dest
= gimple_assign_lhs (stmt
);
3802 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3803 && is_pattern_stmt_p (stmt_info
))
3804 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3805 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3806 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3807 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3808 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3809 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3810 && TREE_CODE (scalar_dest
) != MEM_REF
)
3813 gcc_assert (gimple_assign_single_p (stmt
));
3814 op
= gimple_assign_rhs1 (stmt
);
3815 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3818 if (vect_print_dump_info (REPORT_DETAILS
))
3819 fprintf (vect_dump
, "use not simple.");
3823 elem_type
= TREE_TYPE (vectype
);
3824 vec_mode
= TYPE_MODE (vectype
);
3826 /* FORNOW. In some cases can vectorize even if data-type not supported
3827 (e.g. - array initialization with 0). */
3828 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3831 if (!STMT_VINFO_DATA_REF (stmt_info
))
3834 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3835 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3836 size_zero_node
) < 0)
3838 if (vect_print_dump_info (REPORT_DETAILS
))
3839 fprintf (vect_dump
, "negative step for store.");
3843 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3845 grouped_store
= true;
3846 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3847 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3849 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3850 if (vect_store_lanes_supported (vectype
, group_size
))
3851 store_lanes_p
= true;
3852 else if (!vect_grouped_store_supported (vectype
, group_size
))
3856 if (first_stmt
== stmt
)
3858 /* STMT is the leader of the group. Check the operands of all the
3859 stmts of the group. */
3860 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3863 gcc_assert (gimple_assign_single_p (next_stmt
));
3864 op
= gimple_assign_rhs1 (next_stmt
);
3865 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3866 &def_stmt
, &def
, &dt
))
3868 if (vect_print_dump_info (REPORT_DETAILS
))
3869 fprintf (vect_dump
, "use not simple.");
3872 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3877 if (!vec_stmt
) /* transformation not required. */
3879 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3880 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
3889 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3890 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3892 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3895 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3897 /* We vectorize all the stmts of the interleaving group when we
3898 reach the last stmt in the group. */
3899 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3900 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3909 grouped_store
= false;
3910 /* VEC_NUM is the number of vect stmts to be created for this
3912 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3913 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3914 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3915 op
= gimple_assign_rhs1 (first_stmt
);
3918 /* VEC_NUM is the number of vect stmts to be created for this
3920 vec_num
= group_size
;
3926 group_size
= vec_num
= 1;
3929 if (vect_print_dump_info (REPORT_DETAILS
))
3930 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3932 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3933 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3935 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3936 gcc_assert (alignment_support_scheme
);
3937 /* Targets with store-lane instructions must not require explicit
3939 gcc_assert (!store_lanes_p
3940 || alignment_support_scheme
== dr_aligned
3941 || alignment_support_scheme
== dr_unaligned_supported
);
3944 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
3946 aggr_type
= vectype
;
3948 /* In case the vectorization factor (VF) is bigger than the number
3949 of elements that we can fit in a vectype (nunits), we have to generate
3950 more than one vector stmt - i.e - we need to "unroll" the
3951 vector stmt by a factor VF/nunits. For more details see documentation in
3952 vect_get_vec_def_for_copy_stmt. */
3954 /* In case of interleaving (non-unit grouped access):
3961 We create vectorized stores starting from base address (the access of the
3962 first stmt in the chain (S2 in the above example), when the last store stmt
3963 of the chain (S4) is reached:
3966 VS2: &base + vec_size*1 = vx0
3967 VS3: &base + vec_size*2 = vx1
3968 VS4: &base + vec_size*3 = vx3
3970 Then permutation statements are generated:
3972 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3973 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3976 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3977 (the order of the data-refs in the output of vect_permute_store_chain
3978 corresponds to the order of scalar stmts in the interleaving chain - see
3979 the documentation of vect_permute_store_chain()).
3981 In case of both multiple types and interleaving, above vector stores and
3982 permutation stmts are created for every copy. The result vector stmts are
3983 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3984 STMT_VINFO_RELATED_STMT for the next copies.
3987 prev_stmt_info
= NULL
;
3988 for (j
= 0; j
< ncopies
; j
++)
3997 /* Get vectorized arguments for SLP_NODE. */
3998 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
3999 NULL
, slp_node
, -1);
4001 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
4005 /* For interleaved stores we collect vectorized defs for all the
4006 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4007 used as an input to vect_permute_store_chain(), and OPRNDS as
4008 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4010 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4011 OPRNDS are of size 1. */
4012 next_stmt
= first_stmt
;
4013 for (i
= 0; i
< group_size
; i
++)
4015 /* Since gaps are not supported for interleaved stores,
4016 GROUP_SIZE is the exact number of stmts in the chain.
4017 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4018 there is no interleaving, GROUP_SIZE is 1, and only one
4019 iteration of the loop will be executed. */
4020 gcc_assert (next_stmt
4021 && gimple_assign_single_p (next_stmt
));
4022 op
= gimple_assign_rhs1 (next_stmt
);
4024 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4026 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
4027 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
4028 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4032 /* We should have catched mismatched types earlier. */
4033 gcc_assert (useless_type_conversion_p (vectype
,
4034 TREE_TYPE (vec_oprnd
)));
4035 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
4036 NULL_TREE
, &dummy
, gsi
,
4037 &ptr_incr
, false, &inv_p
);
4038 gcc_assert (bb_vinfo
|| !inv_p
);
4042 /* For interleaved stores we created vectorized defs for all the
4043 defs stored in OPRNDS in the previous iteration (previous copy).
4044 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4045 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4047 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4048 OPRNDS are of size 1. */
4049 for (i
= 0; i
< group_size
; i
++)
4051 op
= VEC_index (tree
, oprnds
, i
);
4052 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4054 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4055 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
4056 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
4058 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4059 TYPE_SIZE_UNIT (aggr_type
));
4066 /* Combine all the vectors into an array. */
4067 vec_array
= create_vector_array (vectype
, vec_num
);
4068 for (i
= 0; i
< vec_num
; i
++)
4070 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
4071 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4075 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4076 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4077 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4078 gimple_call_set_lhs (new_stmt
, data_ref
);
4079 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4086 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4088 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4092 next_stmt
= first_stmt
;
4093 for (i
= 0; i
< vec_num
; i
++)
4095 unsigned align
, misalign
;
4098 /* Bump the vector pointer. */
4099 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4103 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4104 else if (grouped_store
)
4105 /* For grouped stores vectorized defs are interleaved in
4106 vect_permute_store_chain(). */
4107 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4109 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4110 build_int_cst (reference_alias_ptr_type
4111 (DR_REF (first_dr
)), 0));
4112 align
= TYPE_ALIGN_UNIT (vectype
);
4113 if (aligned_access_p (first_dr
))
4115 else if (DR_MISALIGNMENT (first_dr
) == -1)
4117 TREE_TYPE (data_ref
)
4118 = build_aligned_type (TREE_TYPE (data_ref
),
4119 TYPE_ALIGN (elem_type
));
4120 align
= TYPE_ALIGN_UNIT (elem_type
);
4125 TREE_TYPE (data_ref
)
4126 = build_aligned_type (TREE_TYPE (data_ref
),
4127 TYPE_ALIGN (elem_type
));
4128 misalign
= DR_MISALIGNMENT (first_dr
);
4130 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4133 /* Arguments are ready. Create the new vector stmt. */
4134 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4135 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4140 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4148 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4150 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4151 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4155 VEC_free (tree
, heap
, dr_chain
);
4156 VEC_free (tree
, heap
, oprnds
);
4158 VEC_free (tree
, heap
, result_chain
);
4160 VEC_free (tree
, heap
, vec_oprnds
);
4165 /* Given a vector type VECTYPE and permutation SEL returns
4166 the VECTOR_CST mask that implements the permutation of the
4167 vector elements. If that is impossible to do, returns NULL. */
4170 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4172 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4175 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4177 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4180 mask_elt_type
= lang_hooks
.types
.type_for_mode
4181 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4182 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4184 mask_elts
= XALLOCAVEC (tree
, nunits
);
4185 for (i
= nunits
- 1; i
>= 0; i
--)
4186 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4187 mask_vec
= build_vector (mask_type
, mask_elts
);
4192 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4193 reversal of the vector elements. If that is impossible to do,
4197 perm_mask_for_reverse (tree vectype
)
4202 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4203 sel
= XALLOCAVEC (unsigned char, nunits
);
4205 for (i
= 0; i
< nunits
; ++i
)
4206 sel
[i
] = nunits
- 1 - i
;
4208 return vect_gen_perm_mask (vectype
, sel
);
4211 /* Given a vector variable X and Y, that was generated for the scalar
4212 STMT, generate instructions to permute the vector elements of X and Y
4213 using permutation mask MASK_VEC, insert them at *GSI and return the
4214 permuted vector variable. */
4217 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4218 gimple_stmt_iterator
*gsi
)
4220 tree vectype
= TREE_TYPE (x
);
4221 tree perm_dest
, data_ref
;
4224 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4225 data_ref
= make_ssa_name (perm_dest
, NULL
);
4227 /* Generate the permute statement. */
4228 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, data_ref
,
4230 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4235 /* vectorizable_load.
4237 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4239 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4240 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4241 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4244 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4245 slp_tree slp_node
, slp_instance slp_node_instance
)
4248 tree vec_dest
= NULL
;
4249 tree data_ref
= NULL
;
4250 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4251 stmt_vec_info prev_stmt_info
;
4252 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4253 struct loop
*loop
= NULL
;
4254 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4255 bool nested_in_vect_loop
= false;
4256 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4257 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4260 enum machine_mode mode
;
4261 gimple new_stmt
= NULL
;
4263 enum dr_alignment_support alignment_support_scheme
;
4264 tree dataref_ptr
= NULL_TREE
;
4266 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4268 int i
, j
, group_size
;
4269 tree msq
= NULL_TREE
, lsq
;
4270 tree offset
= NULL_TREE
;
4271 tree realignment_token
= NULL_TREE
;
4273 VEC(tree
,heap
) *dr_chain
= NULL
;
4274 bool grouped_load
= false;
4275 bool load_lanes_p
= false;
4278 bool negative
= false;
4279 bool compute_in_loop
= false;
4280 struct loop
*at_loop
;
4282 bool slp
= (slp_node
!= NULL
);
4283 bool slp_perm
= false;
4284 enum tree_code code
;
4285 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4288 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4289 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4290 tree stride_base
, stride_step
;
4291 int gather_scale
= 1;
4292 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4296 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4297 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4298 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4303 /* Multiple types in SLP are handled by creating the appropriate number of
4304 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4306 if (slp
|| PURE_SLP_STMT (stmt_info
))
4309 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4311 gcc_assert (ncopies
>= 1);
4313 /* FORNOW. This restriction should be relaxed. */
4314 if (nested_in_vect_loop
&& ncopies
> 1)
4316 if (vect_print_dump_info (REPORT_DETAILS
))
4317 fprintf (vect_dump
, "multiple types in nested loop.");
4321 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4324 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4327 /* Is vectorizable load? */
4328 if (!is_gimple_assign (stmt
))
4331 scalar_dest
= gimple_assign_lhs (stmt
);
4332 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4335 code
= gimple_assign_rhs_code (stmt
);
4336 if (code
!= ARRAY_REF
4337 && code
!= INDIRECT_REF
4338 && code
!= COMPONENT_REF
4339 && code
!= IMAGPART_EXPR
4340 && code
!= REALPART_EXPR
4342 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4345 if (!STMT_VINFO_DATA_REF (stmt_info
))
4348 elem_type
= TREE_TYPE (vectype
);
4349 mode
= TYPE_MODE (vectype
);
4351 /* FORNOW. In some cases can vectorize even if data-type not supported
4352 (e.g. - data copies). */
4353 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4355 if (vect_print_dump_info (REPORT_DETAILS
))
4356 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4360 /* Check if the load is a part of an interleaving chain. */
4361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4363 grouped_load
= true;
4365 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4367 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4368 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4370 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4371 if (vect_load_lanes_supported (vectype
, group_size
))
4372 load_lanes_p
= true;
4373 else if (!vect_grouped_load_supported (vectype
, group_size
))
4379 if (STMT_VINFO_GATHER_P (stmt_info
))
4383 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4384 &gather_off
, &gather_scale
);
4385 gcc_assert (gather_decl
);
4386 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4387 &def_stmt
, &def
, &gather_dt
,
4388 &gather_off_vectype
))
4390 if (vect_print_dump_info (REPORT_DETAILS
))
4391 fprintf (vect_dump
, "gather index use not simple.");
4395 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4397 if (!vect_check_strided_load (stmt
, loop_vinfo
,
4398 &stride_base
, &stride_step
))
4403 negative
= tree_int_cst_compare (nested_in_vect_loop
4404 ? STMT_VINFO_DR_STEP (stmt_info
)
4406 size_zero_node
) < 0;
4407 if (negative
&& ncopies
> 1)
4409 if (vect_print_dump_info (REPORT_DETAILS
))
4410 fprintf (vect_dump
, "multiple types with negative step.");
4416 gcc_assert (!grouped_load
);
4417 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4418 if (alignment_support_scheme
!= dr_aligned
4419 && alignment_support_scheme
!= dr_unaligned_supported
)
4421 if (vect_print_dump_info (REPORT_DETAILS
))
4422 fprintf (vect_dump
, "negative step but alignment required.");
4425 if (!perm_mask_for_reverse (vectype
))
4427 if (vect_print_dump_info (REPORT_DETAILS
))
4428 fprintf (vect_dump
, "negative step and reversing not supported.");
4434 if (!vec_stmt
) /* transformation not required. */
4436 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4437 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4441 if (vect_print_dump_info (REPORT_DETAILS
))
4442 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4446 if (STMT_VINFO_GATHER_P (stmt_info
))
4448 tree vec_oprnd0
= NULL_TREE
, op
;
4449 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4450 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4451 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4452 edge pe
= loop_preheader_edge (loop
);
4455 enum { NARROW
, NONE
, WIDEN
} modifier
;
4456 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4458 if (nunits
== gather_off_nunits
)
4460 else if (nunits
== gather_off_nunits
/ 2)
4462 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4465 for (i
= 0; i
< gather_off_nunits
; ++i
)
4466 sel
[i
] = i
| nunits
;
4468 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4469 gcc_assert (perm_mask
!= NULL_TREE
);
4471 else if (nunits
== gather_off_nunits
* 2)
4473 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4476 for (i
= 0; i
< nunits
; ++i
)
4477 sel
[i
] = i
< gather_off_nunits
4478 ? i
: i
+ nunits
- gather_off_nunits
;
4480 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4481 gcc_assert (perm_mask
!= NULL_TREE
);
4487 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4488 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4489 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4490 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4491 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4492 scaletype
= TREE_VALUE (arglist
);
4493 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4494 && types_compatible_p (srctype
, masktype
));
4496 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4498 ptr
= fold_convert (ptrtype
, gather_base
);
4499 if (!is_gimple_min_invariant (ptr
))
4501 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4502 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4503 gcc_assert (!new_bb
);
4506 /* Currently we support only unconditional gather loads,
4507 so mask should be all ones. */
4508 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4509 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4510 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4514 for (j
= 0; j
< 6; ++j
)
4516 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4517 mask
= build_real (TREE_TYPE (masktype
), r
);
4521 mask
= build_vector_from_val (masktype
, mask
);
4522 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4524 scale
= build_int_cst (scaletype
, gather_scale
);
4526 prev_stmt_info
= NULL
;
4527 for (j
= 0; j
< ncopies
; ++j
)
4529 if (modifier
== WIDEN
&& (j
& 1))
4530 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4531 perm_mask
, stmt
, gsi
);
4534 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4537 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4539 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4541 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4542 == TYPE_VECTOR_SUBPARTS (idxtype
));
4543 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4544 var
= make_ssa_name (var
, NULL
);
4545 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4547 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4549 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4554 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4556 if (!useless_type_conversion_p (vectype
, rettype
))
4558 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4559 == TYPE_VECTOR_SUBPARTS (rettype
));
4560 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4561 op
= make_ssa_name (var
, new_stmt
);
4562 gimple_call_set_lhs (new_stmt
, op
);
4563 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4564 var
= make_ssa_name (vec_dest
, NULL
);
4565 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4567 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4572 var
= make_ssa_name (vec_dest
, new_stmt
);
4573 gimple_call_set_lhs (new_stmt
, var
);
4576 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4578 if (modifier
== NARROW
)
4585 var
= permute_vec_elements (prev_res
, var
,
4586 perm_mask
, stmt
, gsi
);
4587 new_stmt
= SSA_NAME_DEF_STMT (var
);
4590 if (prev_stmt_info
== NULL
)
4591 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4593 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4594 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4598 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4600 gimple_stmt_iterator incr_gsi
;
4604 tree ref
= DR_REF (dr
);
4607 VEC(constructor_elt
, gc
) *v
= NULL
;
4608 gimple_seq stmts
= NULL
;
4610 gcc_assert (stride_base
&& stride_step
);
4612 /* For a load with loop-invariant (but other than power-of-2)
4613 stride (i.e. not a grouped access) like so:
4615 for (i = 0; i < n; i += stride)
4618 we generate a new induction variable and new accesses to
4619 form a new vector (or vectors, depending on ncopies):
4621 for (j = 0; ; j += VF*stride)
4623 tmp2 = array[j + stride];
4625 vectemp = {tmp1, tmp2, ...}
4628 ivstep
= stride_step
;
4629 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4630 build_int_cst (TREE_TYPE (ivstep
), vf
));
4632 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4634 create_iv (stride_base
, ivstep
, NULL
,
4635 loop
, &incr_gsi
, insert_after
,
4637 incr
= gsi_stmt (incr_gsi
);
4638 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4640 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4642 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4644 prev_stmt_info
= NULL
;
4645 running_off
= offvar
;
4646 for (j
= 0; j
< ncopies
; j
++)
4650 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
4651 for (i
= 0; i
< nunits
; i
++)
4653 tree newref
, newoff
;
4655 if (TREE_CODE (ref
) == ARRAY_REF
)
4656 newref
= build4 (ARRAY_REF
, TREE_TYPE (ref
),
4657 unshare_expr (TREE_OPERAND (ref
, 0)),
4659 NULL_TREE
, NULL_TREE
);
4661 newref
= build2 (MEM_REF
, TREE_TYPE (ref
),
4663 TREE_OPERAND (ref
, 1));
4665 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4668 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4669 newoff
= copy_ssa_name (running_off
, NULL
);
4670 if (POINTER_TYPE_P (TREE_TYPE (newoff
)))
4671 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4672 running_off
, stride_step
);
4674 incr
= gimple_build_assign_with_ops (PLUS_EXPR
, newoff
,
4675 running_off
, stride_step
);
4676 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4678 running_off
= newoff
;
4681 vec_inv
= build_constructor (vectype
, v
);
4682 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4683 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4686 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4688 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4689 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4696 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4698 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4699 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4700 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4702 /* Check if the chain of loads is already vectorized. */
4703 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4705 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4708 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4709 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4711 /* VEC_NUM is the number of vect stmts to be created for this group. */
4714 grouped_load
= false;
4715 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4716 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4720 vec_num
= group_size
;
4726 group_size
= vec_num
= 1;
4729 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4730 gcc_assert (alignment_support_scheme
);
4731 /* Targets with load-lane instructions must not require explicit
4733 gcc_assert (!load_lanes_p
4734 || alignment_support_scheme
== dr_aligned
4735 || alignment_support_scheme
== dr_unaligned_supported
);
4737 /* In case the vectorization factor (VF) is bigger than the number
4738 of elements that we can fit in a vectype (nunits), we have to generate
4739 more than one vector stmt - i.e - we need to "unroll" the
4740 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4741 from one copy of the vector stmt to the next, in the field
4742 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4743 stages to find the correct vector defs to be used when vectorizing
4744 stmts that use the defs of the current stmt. The example below
4745 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4746 need to create 4 vectorized stmts):
4748 before vectorization:
4749 RELATED_STMT VEC_STMT
4753 step 1: vectorize stmt S1:
4754 We first create the vector stmt VS1_0, and, as usual, record a
4755 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4756 Next, we create the vector stmt VS1_1, and record a pointer to
4757 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4758 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4760 RELATED_STMT VEC_STMT
4761 VS1_0: vx0 = memref0 VS1_1 -
4762 VS1_1: vx1 = memref1 VS1_2 -
4763 VS1_2: vx2 = memref2 VS1_3 -
4764 VS1_3: vx3 = memref3 - -
4765 S1: x = load - VS1_0
4768 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4769 information we recorded in RELATED_STMT field is used to vectorize
4772 /* In case of interleaving (non-unit grouped access):
4779 Vectorized loads are created in the order of memory accesses
4780 starting from the access of the first stmt of the chain:
4783 VS2: vx1 = &base + vec_size*1
4784 VS3: vx3 = &base + vec_size*2
4785 VS4: vx4 = &base + vec_size*3
4787 Then permutation statements are generated:
4789 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4790 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4793 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4794 (the order of the data-refs in the output of vect_permute_load_chain
4795 corresponds to the order of scalar stmts in the interleaving chain - see
4796 the documentation of vect_permute_load_chain()).
4797 The generation of permutation stmts and recording them in
4798 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4800 In case of both multiple types and interleaving, the vector loads and
4801 permutation stmts above are created for every copy. The result vector
4802 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4803 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4805 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4806 on a target that supports unaligned accesses (dr_unaligned_supported)
4807 we generate the following code:
4811 p = p + indx * vectype_size;
4816 Otherwise, the data reference is potentially unaligned on a target that
4817 does not support unaligned accesses (dr_explicit_realign_optimized) -
4818 then generate the following code, in which the data in each iteration is
4819 obtained by two vector loads, one from the previous iteration, and one
4820 from the current iteration:
4822 msq_init = *(floor(p1))
4823 p2 = initial_addr + VS - 1;
4824 realignment_token = call target_builtin;
4827 p2 = p2 + indx * vectype_size
4829 vec_dest = realign_load (msq, lsq, realignment_token)
4834 /* If the misalignment remains the same throughout the execution of the
4835 loop, we can create the init_addr and permutation mask at the loop
4836 preheader. Otherwise, it needs to be created inside the loop.
4837 This can only occur when vectorizing memory accesses in the inner-loop
4838 nested within an outer-loop that is being vectorized. */
4840 if (nested_in_vect_loop
4841 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4842 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4844 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4845 compute_in_loop
= true;
4848 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4849 || alignment_support_scheme
== dr_explicit_realign
)
4850 && !compute_in_loop
)
4852 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4853 alignment_support_scheme
, NULL_TREE
,
4855 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4857 phi
= SSA_NAME_DEF_STMT (msq
);
4858 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4865 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4868 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4870 aggr_type
= vectype
;
4872 prev_stmt_info
= NULL
;
4873 for (j
= 0; j
< ncopies
; j
++)
4875 /* 1. Create the vector or array pointer update chain. */
4877 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4878 offset
, &dummy
, gsi
,
4879 &ptr_incr
, false, &inv_p
);
4881 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4882 TYPE_SIZE_UNIT (aggr_type
));
4884 if (grouped_load
|| slp_perm
)
4885 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4891 vec_array
= create_vector_array (vectype
, vec_num
);
4894 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4895 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4896 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4897 gimple_call_set_lhs (new_stmt
, vec_array
);
4898 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4900 /* Extract each vector into an SSA_NAME. */
4901 for (i
= 0; i
< vec_num
; i
++)
4903 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4905 VEC_quick_push (tree
, dr_chain
, new_temp
);
4908 /* Record the mapping between SSA_NAMEs and statements. */
4909 vect_record_grouped_load_vectors (stmt
, dr_chain
);
4913 for (i
= 0; i
< vec_num
; i
++)
4916 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4919 /* 2. Create the vector-load in the loop. */
4920 switch (alignment_support_scheme
)
4923 case dr_unaligned_supported
:
4925 unsigned int align
, misalign
;
4928 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4929 build_int_cst (reference_alias_ptr_type
4930 (DR_REF (first_dr
)), 0));
4931 align
= TYPE_ALIGN_UNIT (vectype
);
4932 if (alignment_support_scheme
== dr_aligned
)
4934 gcc_assert (aligned_access_p (first_dr
));
4937 else if (DR_MISALIGNMENT (first_dr
) == -1)
4939 TREE_TYPE (data_ref
)
4940 = build_aligned_type (TREE_TYPE (data_ref
),
4941 TYPE_ALIGN (elem_type
));
4942 align
= TYPE_ALIGN_UNIT (elem_type
);
4947 TREE_TYPE (data_ref
)
4948 = build_aligned_type (TREE_TYPE (data_ref
),
4949 TYPE_ALIGN (elem_type
));
4950 misalign
= DR_MISALIGNMENT (first_dr
);
4952 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
4956 case dr_explicit_realign
:
4961 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4963 if (compute_in_loop
)
4964 msq
= vect_setup_realignment (first_stmt
, gsi
,
4966 dr_explicit_realign
,
4969 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
4970 new_stmt
= gimple_build_assign_with_ops
4971 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
4973 (TREE_TYPE (dataref_ptr
),
4974 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4975 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4977 = build2 (MEM_REF
, vectype
, ptr
,
4978 build_int_cst (reference_alias_ptr_type
4979 (DR_REF (first_dr
)), 0));
4980 vec_dest
= vect_create_destination_var (scalar_dest
,
4982 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4983 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4984 gimple_assign_set_lhs (new_stmt
, new_temp
);
4985 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
4986 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
4987 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4990 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
4991 TYPE_SIZE_UNIT (elem_type
));
4992 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
4993 new_stmt
= gimple_build_assign_with_ops
4994 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
4997 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4998 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
4999 gimple_assign_set_lhs (new_stmt
, ptr
);
5000 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5002 = build2 (MEM_REF
, vectype
, ptr
,
5003 build_int_cst (reference_alias_ptr_type
5004 (DR_REF (first_dr
)), 0));
5007 case dr_explicit_realign_optimized
:
5008 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5009 new_stmt
= gimple_build_assign_with_ops
5010 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5012 (TREE_TYPE (dataref_ptr
),
5013 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5014 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5016 = build2 (MEM_REF
, vectype
, new_temp
,
5017 build_int_cst (reference_alias_ptr_type
5018 (DR_REF (first_dr
)), 0));
5023 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5024 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5025 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5026 gimple_assign_set_lhs (new_stmt
, new_temp
);
5027 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5029 /* 3. Handle explicit realignment if necessary/supported.
5031 vec_dest = realign_load (msq, lsq, realignment_token) */
5032 if (alignment_support_scheme
== dr_explicit_realign_optimized
5033 || alignment_support_scheme
== dr_explicit_realign
)
5035 lsq
= gimple_assign_lhs (new_stmt
);
5036 if (!realignment_token
)
5037 realignment_token
= dataref_ptr
;
5038 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5040 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
5043 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5044 gimple_assign_set_lhs (new_stmt
, new_temp
);
5045 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5047 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5050 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5051 add_phi_arg (phi
, lsq
,
5052 loop_latch_edge (containing_loop
),
5058 /* 4. Handle invariant-load. */
5059 if (inv_p
&& !bb_vinfo
)
5061 gimple_stmt_iterator gsi2
= *gsi
;
5062 gcc_assert (!grouped_load
);
5064 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5066 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5071 tree perm_mask
= perm_mask_for_reverse (vectype
);
5072 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5073 perm_mask
, stmt
, gsi
);
5074 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5077 /* Collect vector loads and later create their permutation in
5078 vect_transform_grouped_load (). */
5079 if (grouped_load
|| slp_perm
)
5080 VEC_quick_push (tree
, dr_chain
, new_temp
);
5082 /* Store vector loads in the corresponding SLP_NODE. */
5083 if (slp
&& !slp_perm
)
5084 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
5089 if (slp
&& !slp_perm
)
5094 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
5095 slp_node_instance
, false))
5097 VEC_free (tree
, heap
, dr_chain
);
5106 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5107 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5112 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5114 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5115 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5119 VEC_free (tree
, heap
, dr_chain
);
5125 /* Function vect_is_simple_cond.
5128 LOOP - the loop that is being vectorized.
5129 COND - Condition that is checked for simple use.
5132 *COMP_VECTYPE - the vector type for the comparison.
5134 Returns whether a COND can be vectorized. Checks whether
5135 condition operands are supportable using vec_is_simple_use. */
5138 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5139 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5143 enum vect_def_type dt
;
5144 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5146 if (!COMPARISON_CLASS_P (cond
))
5149 lhs
= TREE_OPERAND (cond
, 0);
5150 rhs
= TREE_OPERAND (cond
, 1);
5152 if (TREE_CODE (lhs
) == SSA_NAME
)
5154 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5155 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5156 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5159 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5160 && TREE_CODE (lhs
) != FIXED_CST
)
5163 if (TREE_CODE (rhs
) == SSA_NAME
)
5165 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5166 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5167 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5170 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5171 && TREE_CODE (rhs
) != FIXED_CST
)
5174 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5178 /* vectorizable_condition.
5180 Check if STMT is conditional modify expression that can be vectorized.
5181 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5182 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5185 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5186 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5187 else caluse if it is 2).
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5192 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5193 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5196 tree scalar_dest
= NULL_TREE
;
5197 tree vec_dest
= NULL_TREE
;
5198 tree cond_expr
, then_clause
, else_clause
;
5199 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5200 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5201 tree comp_vectype
= NULL_TREE
;
5202 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5203 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5204 tree vec_compare
, vec_cond_expr
;
5206 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5208 enum vect_def_type dt
, dts
[4];
5209 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5211 enum tree_code code
;
5212 stmt_vec_info prev_stmt_info
= NULL
;
5214 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5215 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
5216 VEC (tree
, heap
) *vec_oprnds2
= NULL
, *vec_oprnds3
= NULL
;
5218 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5221 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5223 gcc_assert (ncopies
>= 1);
5224 if (reduc_index
&& ncopies
> 1)
5225 return false; /* FORNOW */
5227 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5230 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5233 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5234 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5238 /* FORNOW: not yet supported. */
5239 if (STMT_VINFO_LIVE_P (stmt_info
))
5241 if (vect_print_dump_info (REPORT_DETAILS
))
5242 fprintf (vect_dump
, "value used after loop.");
5246 /* Is vectorizable conditional operation? */
5247 if (!is_gimple_assign (stmt
))
5250 code
= gimple_assign_rhs_code (stmt
);
5252 if (code
!= COND_EXPR
)
5255 cond_expr
= gimple_assign_rhs1 (stmt
);
5256 then_clause
= gimple_assign_rhs2 (stmt
);
5257 else_clause
= gimple_assign_rhs3 (stmt
);
5259 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5264 if (TREE_CODE (then_clause
) == SSA_NAME
)
5266 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5267 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5268 &then_def_stmt
, &def
, &dt
))
5271 else if (TREE_CODE (then_clause
) != INTEGER_CST
5272 && TREE_CODE (then_clause
) != REAL_CST
5273 && TREE_CODE (then_clause
) != FIXED_CST
)
5276 if (TREE_CODE (else_clause
) == SSA_NAME
)
5278 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5279 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5280 &else_def_stmt
, &def
, &dt
))
5283 else if (TREE_CODE (else_clause
) != INTEGER_CST
5284 && TREE_CODE (else_clause
) != REAL_CST
5285 && TREE_CODE (else_clause
) != FIXED_CST
)
5290 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5291 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5298 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
5299 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
5300 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
5301 vec_oprnds3
= VEC_alloc (tree
, heap
, 1);
5305 scalar_dest
= gimple_assign_lhs (stmt
);
5306 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5308 /* Handle cond expr. */
5309 for (j
= 0; j
< ncopies
; j
++)
5311 gimple new_stmt
= NULL
;
5316 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, 4);
5317 VEC (slp_void_p
, heap
) *vec_defs
;
5319 vec_defs
= VEC_alloc (slp_void_p
, heap
, 4);
5320 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 0));
5321 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 1));
5322 VEC_safe_push (tree
, heap
, ops
, then_clause
);
5323 VEC_safe_push (tree
, heap
, ops
, else_clause
);
5324 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5325 vec_oprnds3
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5326 vec_oprnds2
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5327 vec_oprnds1
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5328 vec_oprnds0
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5330 VEC_free (tree
, heap
, ops
);
5331 VEC_free (slp_void_p
, heap
, vec_defs
);
5337 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5339 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5340 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5343 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5345 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5346 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5347 if (reduc_index
== 1)
5348 vec_then_clause
= reduc_def
;
5351 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5353 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5354 NULL
, >emp
, &def
, &dts
[2]);
5356 if (reduc_index
== 2)
5357 vec_else_clause
= reduc_def
;
5360 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5362 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5363 NULL
, >emp
, &def
, &dts
[3]);
5369 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5370 VEC_pop (tree
, vec_oprnds0
));
5371 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5372 VEC_pop (tree
, vec_oprnds1
));
5373 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5374 VEC_pop (tree
, vec_oprnds2
));
5375 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5376 VEC_pop (tree
, vec_oprnds3
));
5381 VEC_quick_push (tree
, vec_oprnds0
, vec_cond_lhs
);
5382 VEC_quick_push (tree
, vec_oprnds1
, vec_cond_rhs
);
5383 VEC_quick_push (tree
, vec_oprnds2
, vec_then_clause
);
5384 VEC_quick_push (tree
, vec_oprnds3
, vec_else_clause
);
5387 /* Arguments are ready. Create the new vector stmt. */
5388 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_cond_lhs
)
5390 vec_cond_rhs
= VEC_index (tree
, vec_oprnds1
, i
);
5391 vec_then_clause
= VEC_index (tree
, vec_oprnds2
, i
);
5392 vec_else_clause
= VEC_index (tree
, vec_oprnds3
, i
);
5394 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
5395 vec_cond_lhs
, vec_cond_rhs
);
5396 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5397 vec_compare
, vec_then_clause
, vec_else_clause
);
5399 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5400 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5401 gimple_assign_set_lhs (new_stmt
, new_temp
);
5402 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5404 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5411 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5413 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5415 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5418 VEC_free (tree
, heap
, vec_oprnds0
);
5419 VEC_free (tree
, heap
, vec_oprnds1
);
5420 VEC_free (tree
, heap
, vec_oprnds2
);
5421 VEC_free (tree
, heap
, vec_oprnds3
);
5427 /* Make sure the statement is vectorizable. */
5430 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5432 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5433 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5434 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5436 tree scalar_type
, vectype
;
5437 gimple pattern_stmt
;
5438 gimple_seq pattern_def_seq
;
5440 if (vect_print_dump_info (REPORT_DETAILS
))
5442 fprintf (vect_dump
, "==> examining statement: ");
5443 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5446 if (gimple_has_volatile_ops (stmt
))
5448 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5449 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5454 /* Skip stmts that do not need to be vectorized. In loops this is expected
5456 - the COND_EXPR which is the loop exit condition
5457 - any LABEL_EXPRs in the loop
5458 - computations that are used only for array indexing or loop control.
5459 In basic blocks we only analyze statements that are a part of some SLP
5460 instance, therefore, all the statements are relevant.
5462 Pattern statement needs to be analyzed instead of the original statement
5463 if the original statement is not relevant. Otherwise, we analyze both
5464 statements. In basic blocks we are called from some SLP instance
5465 traversal, don't analyze pattern stmts instead, the pattern stmts
5466 already will be part of SLP instance. */
5468 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5469 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5470 && !STMT_VINFO_LIVE_P (stmt_info
))
5472 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5474 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5475 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5477 /* Analyze PATTERN_STMT instead of the original stmt. */
5478 stmt
= pattern_stmt
;
5479 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5480 if (vect_print_dump_info (REPORT_DETAILS
))
5482 fprintf (vect_dump
, "==> examining pattern statement: ");
5483 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5488 if (vect_print_dump_info (REPORT_DETAILS
))
5489 fprintf (vect_dump
, "irrelevant.");
5494 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5497 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5498 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5500 /* Analyze PATTERN_STMT too. */
5501 if (vect_print_dump_info (REPORT_DETAILS
))
5503 fprintf (vect_dump
, "==> examining pattern statement: ");
5504 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5507 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5511 if (is_pattern_stmt_p (stmt_info
)
5513 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5515 gimple_stmt_iterator si
;
5517 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5519 gimple pattern_def_stmt
= gsi_stmt (si
);
5520 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5521 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5523 /* Analyze def stmt of STMT if it's a pattern stmt. */
5524 if (vect_print_dump_info (REPORT_DETAILS
))
5526 fprintf (vect_dump
, "==> examining pattern def statement: ");
5527 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5530 if (!vect_analyze_stmt (pattern_def_stmt
,
5531 need_to_vectorize
, node
))
5537 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5539 case vect_internal_def
:
5542 case vect_reduction_def
:
5543 case vect_nested_cycle
:
5544 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5545 || relevance
== vect_used_in_outer_by_reduction
5546 || relevance
== vect_unused_in_scope
));
5549 case vect_induction_def
:
5550 case vect_constant_def
:
5551 case vect_external_def
:
5552 case vect_unknown_def_type
:
5559 gcc_assert (PURE_SLP_STMT (stmt_info
));
5561 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5562 if (vect_print_dump_info (REPORT_DETAILS
))
5564 fprintf (vect_dump
, "get vectype for scalar type: ");
5565 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5568 vectype
= get_vectype_for_scalar_type (scalar_type
);
5571 if (vect_print_dump_info (REPORT_DETAILS
))
5573 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5574 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5579 if (vect_print_dump_info (REPORT_DETAILS
))
5581 fprintf (vect_dump
, "vectype: ");
5582 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5585 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5588 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5590 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5591 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5592 *need_to_vectorize
= true;
5597 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5598 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5599 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5600 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5601 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5602 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5603 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5604 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5605 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5606 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5607 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5611 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5612 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5613 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5614 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5615 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5616 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5617 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5618 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5623 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5625 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5626 fprintf (vect_dump
, "supported: ");
5627 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5636 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5637 need extra handling, except for vectorizable reductions. */
5638 if (STMT_VINFO_LIVE_P (stmt_info
)
5639 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5640 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5644 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5646 fprintf (vect_dump
, "not vectorized: live stmt not ");
5647 fprintf (vect_dump
, "supported: ");
5648 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5658 /* Function vect_transform_stmt.
5660 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5663 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5664 bool *grouped_store
, slp_tree slp_node
,
5665 slp_instance slp_node_instance
)
5667 bool is_store
= false;
5668 gimple vec_stmt
= NULL
;
5669 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5672 switch (STMT_VINFO_TYPE (stmt_info
))
5674 case type_demotion_vec_info_type
:
5675 case type_promotion_vec_info_type
:
5676 case type_conversion_vec_info_type
:
5677 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5681 case induc_vec_info_type
:
5682 gcc_assert (!slp_node
);
5683 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5687 case shift_vec_info_type
:
5688 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5692 case op_vec_info_type
:
5693 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5697 case assignment_vec_info_type
:
5698 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5702 case load_vec_info_type
:
5703 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5708 case store_vec_info_type
:
5709 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5711 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5713 /* In case of interleaving, the whole chain is vectorized when the
5714 last store in the chain is reached. Store stmts before the last
5715 one are skipped, and there vec_stmt_info shouldn't be freed
5717 *grouped_store
= true;
5718 if (STMT_VINFO_VEC_STMT (stmt_info
))
5725 case condition_vec_info_type
:
5726 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5730 case call_vec_info_type
:
5731 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5732 stmt
= gsi_stmt (*gsi
);
5735 case reduc_vec_info_type
:
5736 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5741 if (!STMT_VINFO_LIVE_P (stmt_info
))
5743 if (vect_print_dump_info (REPORT_DETAILS
))
5744 fprintf (vect_dump
, "stmt not supported.");
5749 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5750 is being vectorized, but outside the immediately enclosing loop. */
5752 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5753 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5754 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5755 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5756 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5757 || STMT_VINFO_RELEVANT (stmt_info
) ==
5758 vect_used_in_outer_by_reduction
))
5760 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5761 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5762 imm_use_iterator imm_iter
;
5763 use_operand_p use_p
;
5767 if (vect_print_dump_info (REPORT_DETAILS
))
5768 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5770 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5771 (to be used when vectorizing outer-loop stmts that use the DEF of
5773 if (gimple_code (stmt
) == GIMPLE_PHI
)
5774 scalar_dest
= PHI_RESULT (stmt
);
5776 scalar_dest
= gimple_assign_lhs (stmt
);
5778 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5780 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5782 exit_phi
= USE_STMT (use_p
);
5783 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5788 /* Handle stmts whose DEF is used outside the loop-nest that is
5789 being vectorized. */
5790 if (STMT_VINFO_LIVE_P (stmt_info
)
5791 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5793 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5798 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5804 /* Remove a group of stores (for SLP or interleaving), free their
5808 vect_remove_stores (gimple first_stmt
)
5810 gimple next
= first_stmt
;
5812 gimple_stmt_iterator next_si
;
5816 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5818 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5819 if (is_pattern_stmt_p (stmt_info
))
5820 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5821 /* Free the attached stmt_vec_info and remove the stmt. */
5822 next_si
= gsi_for_stmt (next
);
5823 unlink_stmt_vdef (next
);
5824 gsi_remove (&next_si
, true);
5825 release_defs (next
);
5826 free_stmt_vec_info (next
);
5832 /* Function new_stmt_vec_info.
5834 Create and initialize a new stmt_vec_info struct for STMT. */
5837 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5838 bb_vec_info bb_vinfo
)
5841 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5843 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5844 STMT_VINFO_STMT (res
) = stmt
;
5845 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5846 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5847 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5848 STMT_VINFO_LIVE_P (res
) = false;
5849 STMT_VINFO_VECTYPE (res
) = NULL
;
5850 STMT_VINFO_VEC_STMT (res
) = NULL
;
5851 STMT_VINFO_VECTORIZABLE (res
) = true;
5852 STMT_VINFO_IN_PATTERN_P (res
) = false;
5853 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5854 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5855 STMT_VINFO_DATA_REF (res
) = NULL
;
5857 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5858 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5859 STMT_VINFO_DR_INIT (res
) = NULL
;
5860 STMT_VINFO_DR_STEP (res
) = NULL
;
5861 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5863 if (gimple_code (stmt
) == GIMPLE_PHI
5864 && is_loop_header_bb_p (gimple_bb (stmt
)))
5865 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5867 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5869 STMT_VINFO_SAME_ALIGN_REFS (res
) = VEC_alloc (dr_p
, heap
, 5);
5870 STMT_SLP_TYPE (res
) = loop_vect
;
5871 GROUP_FIRST_ELEMENT (res
) = NULL
;
5872 GROUP_NEXT_ELEMENT (res
) = NULL
;
5873 GROUP_SIZE (res
) = 0;
5874 GROUP_STORE_COUNT (res
) = 0;
5875 GROUP_GAP (res
) = 0;
5876 GROUP_SAME_DR_STMT (res
) = NULL
;
5877 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5883 /* Create a hash table for stmt_vec_info. */
5886 init_stmt_vec_info_vec (void)
5888 gcc_assert (!stmt_vec_info_vec
);
5889 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5893 /* Free hash table for stmt_vec_info. */
5896 free_stmt_vec_info_vec (void)
5898 gcc_assert (stmt_vec_info_vec
);
5899 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5903 /* Free stmt vectorization related info. */
5906 free_stmt_vec_info (gimple stmt
)
5908 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5913 /* Check if this statement has a related "pattern stmt"
5914 (introduced by the vectorizer during the pattern recognition
5915 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5917 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5919 stmt_vec_info patt_info
5920 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5923 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
5926 gimple_stmt_iterator si
;
5927 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
5928 free_stmt_vec_info (gsi_stmt (si
));
5930 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
5934 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
5935 set_vinfo_for_stmt (stmt
, NULL
);
5940 /* Function get_vectype_for_scalar_type_and_size.
5942 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5946 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
5948 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
5949 enum machine_mode simd_mode
;
5950 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
5957 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
5958 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
5961 /* We can't build a vector type of elements with alignment bigger than
5963 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
5966 /* For vector types of elements whose mode precision doesn't
5967 match their types precision we use a element type of mode
5968 precision. The vectorization routines will have to make sure
5969 they support the proper result truncation/extension.
5970 We also make sure to build vector types with INTEGER_TYPE
5971 component type only. */
5972 if (INTEGRAL_TYPE_P (scalar_type
)
5973 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
5974 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
5975 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
5976 TYPE_UNSIGNED (scalar_type
));
5978 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5979 When the component mode passes the above test simply use a type
5980 corresponding to that mode. The theory is that any use that
5981 would cause problems with this will disable vectorization anyway. */
5982 if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
5983 && !INTEGRAL_TYPE_P (scalar_type
)
5984 && !POINTER_TYPE_P (scalar_type
))
5985 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
5987 /* If no size was supplied use the mode the target prefers. Otherwise
5988 lookup a vector mode of the specified size. */
5990 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
5992 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
5993 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
5997 vectype
= build_vector_type (scalar_type
, nunits
);
5998 if (vect_print_dump_info (REPORT_DETAILS
))
6000 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
6001 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
6007 if (vect_print_dump_info (REPORT_DETAILS
))
6009 fprintf (vect_dump
, "vectype: ");
6010 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
6013 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6014 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6016 if (vect_print_dump_info (REPORT_DETAILS
))
6017 fprintf (vect_dump
, "mode not supported by target.");
6024 unsigned int current_vector_size
;
6026 /* Function get_vectype_for_scalar_type.
6028 Returns the vector type corresponding to SCALAR_TYPE as supported
6032 get_vectype_for_scalar_type (tree scalar_type
)
6035 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6036 current_vector_size
);
6038 && current_vector_size
== 0)
6039 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6043 /* Function get_same_sized_vectype
6045 Returns a vector type corresponding to SCALAR_TYPE of size
6046 VECTOR_TYPE if supported by the target. */
6049 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6051 return get_vectype_for_scalar_type_and_size
6052 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6055 /* Function vect_is_simple_use.
6058 LOOP_VINFO - the vect info of the loop that is being vectorized.
6059 BB_VINFO - the vect info of the basic block that is being vectorized.
6060 OPERAND - operand of STMT in the loop or bb.
6061 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6063 Returns whether a stmt with OPERAND can be vectorized.
6064 For loops, supportable operands are constants, loop invariants, and operands
6065 that are defined by the current iteration of the loop. Unsupportable
6066 operands are those that are defined by a previous iteration of the loop (as
6067 is the case in reduction/induction computations).
6068 For basic blocks, supportable operands are constants and bb invariants.
6069 For now, operands defined outside the basic block are not supported. */
6072 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6073 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6074 tree
*def
, enum vect_def_type
*dt
)
6077 stmt_vec_info stmt_vinfo
;
6078 struct loop
*loop
= NULL
;
6081 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6086 if (vect_print_dump_info (REPORT_DETAILS
))
6088 fprintf (vect_dump
, "vect_is_simple_use: operand ");
6089 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
6092 if (CONSTANT_CLASS_P (operand
))
6094 *dt
= vect_constant_def
;
6098 if (is_gimple_min_invariant (operand
))
6101 *dt
= vect_external_def
;
6105 if (TREE_CODE (operand
) == PAREN_EXPR
)
6107 if (vect_print_dump_info (REPORT_DETAILS
))
6108 fprintf (vect_dump
, "non-associatable copy.");
6109 operand
= TREE_OPERAND (operand
, 0);
6112 if (TREE_CODE (operand
) != SSA_NAME
)
6114 if (vect_print_dump_info (REPORT_DETAILS
))
6115 fprintf (vect_dump
, "not ssa-name.");
6119 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6120 if (*def_stmt
== NULL
)
6122 if (vect_print_dump_info (REPORT_DETAILS
))
6123 fprintf (vect_dump
, "no def_stmt.");
6127 if (vect_print_dump_info (REPORT_DETAILS
))
6129 fprintf (vect_dump
, "def_stmt: ");
6130 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
6133 /* Empty stmt is expected only in case of a function argument.
6134 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6135 if (gimple_nop_p (*def_stmt
))
6138 *dt
= vect_external_def
;
6142 bb
= gimple_bb (*def_stmt
);
6144 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6145 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6146 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6147 *dt
= vect_external_def
;
6150 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6151 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6154 if (*dt
== vect_unknown_def_type
6156 && *dt
== vect_double_reduction_def
6157 && gimple_code (stmt
) != GIMPLE_PHI
))
6159 if (vect_print_dump_info (REPORT_DETAILS
))
6160 fprintf (vect_dump
, "Unsupported pattern.");
6164 if (vect_print_dump_info (REPORT_DETAILS
))
6165 fprintf (vect_dump
, "type of def: %d.",*dt
);
6167 switch (gimple_code (*def_stmt
))
6170 *def
= gimple_phi_result (*def_stmt
);
6174 *def
= gimple_assign_lhs (*def_stmt
);
6178 *def
= gimple_call_lhs (*def_stmt
);
6183 if (vect_print_dump_info (REPORT_DETAILS
))
6184 fprintf (vect_dump
, "unsupported defining stmt: ");
6191 /* Function vect_is_simple_use_1.
6193 Same as vect_is_simple_use_1 but also determines the vector operand
6194 type of OPERAND and stores it to *VECTYPE. If the definition of
6195 OPERAND is vect_uninitialized_def, vect_constant_def or
6196 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6197 is responsible to compute the best suited vector type for the
6201 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6202 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6203 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6205 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6209 /* Now get a vector type if the def is internal, otherwise supply
6210 NULL_TREE and leave it up to the caller to figure out a proper
6211 type for the use stmt. */
6212 if (*dt
== vect_internal_def
6213 || *dt
== vect_induction_def
6214 || *dt
== vect_reduction_def
6215 || *dt
== vect_double_reduction_def
6216 || *dt
== vect_nested_cycle
)
6218 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6220 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6221 && !STMT_VINFO_RELEVANT (stmt_info
)
6222 && !STMT_VINFO_LIVE_P (stmt_info
))
6223 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6225 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6226 gcc_assert (*vectype
!= NULL_TREE
);
6228 else if (*dt
== vect_uninitialized_def
6229 || *dt
== vect_constant_def
6230 || *dt
== vect_external_def
)
6231 *vectype
= NULL_TREE
;
6239 /* Function supportable_widening_operation
6241 Check whether an operation represented by the code CODE is a
6242 widening operation that is supported by the target platform in
6243 vector form (i.e., when operating on arguments of type VECTYPE_IN
6244 producing a result of type VECTYPE_OUT).
6246 Widening operations we currently support are NOP (CONVERT), FLOAT
6247 and WIDEN_MULT. This function checks if these operations are supported
6248 by the target platform either directly (via vector tree-codes), or via
6252 - CODE1 and CODE2 are codes of vector operations to be used when
6253 vectorizing the operation, if available.
6254 - MULTI_STEP_CVT determines the number of required intermediate steps in
6255 case of multi-step conversion (like char->short->int - in that case
6256 MULTI_STEP_CVT will be 1).
6257 - INTERM_TYPES contains the intermediate type required to perform the
6258 widening operation (short in the above example). */
6261 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6262 tree vectype_out
, tree vectype_in
,
6263 enum tree_code
*code1
, enum tree_code
*code2
,
6264 int *multi_step_cvt
,
6265 VEC (tree
, heap
) **interm_types
)
6267 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6268 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6269 struct loop
*vect_loop
= NULL
;
6270 enum machine_mode vec_mode
;
6271 enum insn_code icode1
, icode2
;
6272 optab optab1
, optab2
;
6273 tree vectype
= vectype_in
;
6274 tree wide_vectype
= vectype_out
;
6275 enum tree_code c1
, c2
;
6277 tree prev_type
, intermediate_type
;
6278 enum machine_mode intermediate_mode
, prev_mode
;
6279 optab optab3
, optab4
;
6281 *multi_step_cvt
= 0;
6283 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6287 case WIDEN_MULT_EXPR
:
6288 /* The result of a vectorized widening operation usually requires
6289 two vectors (because the widened results do not fit into one vector).
6290 The generated vector results would normally be expected to be
6291 generated in the same order as in the original scalar computation,
6292 i.e. if 8 results are generated in each vector iteration, they are
6293 to be organized as follows:
6294 vect1: [res1,res2,res3,res4],
6295 vect2: [res5,res6,res7,res8].
6297 However, in the special case that the result of the widening
6298 operation is used in a reduction computation only, the order doesn't
6299 matter (because when vectorizing a reduction we change the order of
6300 the computation). Some targets can take advantage of this and
6301 generate more efficient code. For example, targets like Altivec,
6302 that support widen_mult using a sequence of {mult_even,mult_odd}
6303 generate the following vectors:
6304 vect1: [res1,res3,res5,res7],
6305 vect2: [res2,res4,res6,res8].
6307 When vectorizing outer-loops, we execute the inner-loop sequentially
6308 (each vectorized inner-loop iteration contributes to VF outer-loop
6309 iterations in parallel). We therefore don't allow to change the
6310 order of the computation in the inner-loop during outer-loop
6312 /* TODO: Another case in which order doesn't *really* matter is when we
6313 widen and then contract again, e.g. (short)((int)x * y >> 8).
6314 Normally, pack_trunc performs an even/odd permute, whereas the
6315 repack from an even/odd expansion would be an interleave, which
6316 would be significantly simpler for e.g. AVX2. */
6317 /* In any case, in order to avoid duplicating the code below, recurse
6318 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6319 are properly set up for the caller. If we fail, we'll continue with
6320 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6322 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6323 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6324 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6325 stmt
, vectype_out
, vectype_in
,
6326 code1
, code2
, multi_step_cvt
,
6329 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6330 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6333 case VEC_WIDEN_MULT_EVEN_EXPR
:
6334 /* Support the recursion induced just above. */
6335 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6336 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6339 case WIDEN_LSHIFT_EXPR
:
6340 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6341 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6345 c1
= VEC_UNPACK_LO_EXPR
;
6346 c2
= VEC_UNPACK_HI_EXPR
;
6350 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6351 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6354 case FIX_TRUNC_EXPR
:
6355 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6356 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6357 computing the operation. */
6364 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6366 enum tree_code ctmp
= c1
;
6371 if (code
== FIX_TRUNC_EXPR
)
6373 /* The signedness is determined from output operand. */
6374 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6375 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6379 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6380 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6383 if (!optab1
|| !optab2
)
6386 vec_mode
= TYPE_MODE (vectype
);
6387 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6388 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6394 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6395 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6398 /* Check if it's a multi-step conversion that can be done using intermediate
6401 prev_type
= vectype
;
6402 prev_mode
= vec_mode
;
6404 if (!CONVERT_EXPR_CODE_P (code
))
6407 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6408 intermediate steps in promotion sequence. We try
6409 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6411 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6412 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6414 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6416 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6417 TYPE_UNSIGNED (prev_type
));
6418 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6419 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6421 if (!optab3
|| !optab4
6422 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6423 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6424 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6425 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6426 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6427 == CODE_FOR_nothing
)
6428 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6429 == CODE_FOR_nothing
))
6432 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6433 (*multi_step_cvt
)++;
6435 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6436 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6439 prev_type
= intermediate_type
;
6440 prev_mode
= intermediate_mode
;
6443 VEC_free (tree
, heap
, *interm_types
);
6448 /* Function supportable_narrowing_operation
6450 Check whether an operation represented by the code CODE is a
6451 narrowing operation that is supported by the target platform in
6452 vector form (i.e., when operating on arguments of type VECTYPE_IN
6453 and producing a result of type VECTYPE_OUT).
6455 Narrowing operations we currently support are NOP (CONVERT) and
6456 FIX_TRUNC. This function checks if these operations are supported by
6457 the target platform directly via vector tree-codes.
6460 - CODE1 is the code of a vector operation to be used when
6461 vectorizing the operation, if available.
6462 - MULTI_STEP_CVT determines the number of required intermediate steps in
6463 case of multi-step conversion (like int->short->char - in that case
6464 MULTI_STEP_CVT will be 1).
6465 - INTERM_TYPES contains the intermediate type required to perform the
6466 narrowing operation (short in the above example). */
6469 supportable_narrowing_operation (enum tree_code code
,
6470 tree vectype_out
, tree vectype_in
,
6471 enum tree_code
*code1
, int *multi_step_cvt
,
6472 VEC (tree
, heap
) **interm_types
)
6474 enum machine_mode vec_mode
;
6475 enum insn_code icode1
;
6476 optab optab1
, interm_optab
;
6477 tree vectype
= vectype_in
;
6478 tree narrow_vectype
= vectype_out
;
6480 tree intermediate_type
;
6481 enum machine_mode intermediate_mode
, prev_mode
;
6485 *multi_step_cvt
= 0;
6489 c1
= VEC_PACK_TRUNC_EXPR
;
6492 case FIX_TRUNC_EXPR
:
6493 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6497 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6498 tree code and optabs used for computing the operation. */
6505 if (code
== FIX_TRUNC_EXPR
)
6506 /* The signedness is determined from output operand. */
6507 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6509 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6514 vec_mode
= TYPE_MODE (vectype
);
6515 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6520 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6523 /* Check if it's a multi-step conversion that can be done using intermediate
6525 prev_mode
= vec_mode
;
6526 if (code
== FIX_TRUNC_EXPR
)
6527 uns
= TYPE_UNSIGNED (vectype_out
);
6529 uns
= TYPE_UNSIGNED (vectype
);
6531 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6532 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6533 costly than signed. */
6534 if (code
== FIX_TRUNC_EXPR
&& uns
)
6536 enum insn_code icode2
;
6539 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6541 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6542 if (interm_optab
!= unknown_optab
6543 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6544 && insn_data
[icode1
].operand
[0].mode
6545 == insn_data
[icode2
].operand
[0].mode
)
6548 optab1
= interm_optab
;
6553 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6554 intermediate steps in promotion sequence. We try
6555 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6556 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6557 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6559 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6561 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6563 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6566 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6567 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6568 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6569 == CODE_FOR_nothing
))
6572 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6573 (*multi_step_cvt
)++;
6575 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6578 prev_mode
= intermediate_mode
;
6579 optab1
= interm_optab
;
6582 VEC_free (tree
, heap
, *interm_types
);