1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "diagnostic-core.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
44 /* Return a variable of type ELEM_TYPE[NELEMS]. */
47 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
49 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
53 /* ARRAY is an array of vectors created by create_vector_array.
54 Return an SSA_NAME for the vector in index N. The reference
55 is part of the vectorization of STMT and the vector is associated
56 with scalar destination SCALAR_DEST. */
59 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
60 tree array
, unsigned HOST_WIDE_INT n
)
62 tree vect_type
, vect
, vect_name
, array_ref
;
65 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
66 vect_type
= TREE_TYPE (TREE_TYPE (array
));
67 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
68 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
69 build_int_cst (size_type_node
, n
),
70 NULL_TREE
, NULL_TREE
);
72 new_stmt
= gimple_build_assign (vect
, array_ref
);
73 vect_name
= make_ssa_name (vect
, new_stmt
);
74 gimple_assign_set_lhs (new_stmt
, vect_name
);
75 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
80 /* ARRAY is an array of vectors created by create_vector_array.
81 Emit code to store SSA_NAME VECT in index N of the array.
82 The store is part of the vectorization of STMT. */
85 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
86 tree array
, unsigned HOST_WIDE_INT n
)
91 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
92 build_int_cst (size_type_node
, n
),
93 NULL_TREE
, NULL_TREE
);
95 new_stmt
= gimple_build_assign (array_ref
, vect
);
96 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
99 /* PTR is a pointer to an array of type TYPE. Return a representation
100 of *PTR. The memory reference replaces those in FIRST_DR
104 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
106 tree mem_ref
, alias_ptr_type
;
108 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
109 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
110 /* Arrays have the same alignment as their type. */
111 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
115 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
117 /* Function vect_mark_relevant.
119 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
122 vect_mark_relevant (VEC(gimple
,heap
) **worklist
, gimple stmt
,
123 enum vect_relevant relevant
, bool live_p
,
124 bool used_in_pattern
)
126 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
127 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
128 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
131 if (vect_print_dump_info (REPORT_DETAILS
))
132 fprintf (vect_dump
, "mark relevant %d, live %d.", relevant
, live_p
);
134 /* If this stmt is an original stmt in a pattern, we might need to mark its
135 related pattern stmt instead of the original stmt. However, such stmts
136 may have their own uses that are not in any pattern, in such cases the
137 stmt itself should be marked. */
138 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
141 if (!used_in_pattern
)
143 imm_use_iterator imm_iter
;
147 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
148 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
150 if (is_gimple_assign (stmt
))
151 lhs
= gimple_assign_lhs (stmt
);
153 lhs
= gimple_call_lhs (stmt
);
155 /* This use is out of pattern use, if LHS has other uses that are
156 pattern uses, we should mark the stmt itself, and not the pattern
158 if (TREE_CODE (lhs
) == SSA_NAME
)
159 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
161 if (is_gimple_debug (USE_STMT (use_p
)))
163 use_stmt
= USE_STMT (use_p
);
165 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
168 if (vinfo_for_stmt (use_stmt
)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
184 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
186 if (vect_print_dump_info (REPORT_DETAILS
))
187 fprintf (vect_dump
, "last stmt in pattern. don't mark"
189 stmt_info
= vinfo_for_stmt (pattern_stmt
);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
191 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
192 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
197 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
198 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
199 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
201 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
204 if (vect_print_dump_info (REPORT_DETAILS
))
205 fprintf (vect_dump
, "already marked relevant/live.");
209 VEC_safe_push (gimple
, heap
, *worklist
, stmt
);
213 /* Function vect_stmt_relevant_p.
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
223 CHECKME: what other side effects would the vectorizer allow? */
226 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
227 enum vect_relevant
*relevant
, bool *live_p
)
229 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
231 imm_use_iterator imm_iter
;
235 *relevant
= vect_unused_in_scope
;
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt
)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
241 != loop_exit_ctrl_vec_info_type
)
242 *relevant
= vect_used_in_scope
;
244 /* changing memory. */
245 if (gimple_code (stmt
) != GIMPLE_PHI
)
246 if (gimple_vdef (stmt
))
248 if (vect_print_dump_info (REPORT_DETAILS
))
249 fprintf (vect_dump
, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant
= vect_used_in_scope
;
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
256 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
258 basic_block bb
= gimple_bb (USE_STMT (use_p
));
259 if (!flow_bb_inside_loop_p (loop
, bb
))
261 if (vect_print_dump_info (REPORT_DETAILS
))
262 fprintf (vect_dump
, "vec_stmt_relevant_p: used out of loop.");
264 if (is_gimple_debug (USE_STMT (use_p
)))
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
270 gcc_assert (bb
== single_exit (loop
)->dest
);
277 return (*live_p
|| *relevant
);
281 /* Function exist_non_indexing_operands_for_use_p
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
287 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
290 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info
))
298 /* STMT has a data_ref. FORNOW this means that its of one of
302 (This should have been verified in analyze_data_refs).
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
311 if (!gimple_assign_copy_p (stmt
))
313 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
315 operand
= gimple_assign_rhs1 (stmt
);
316 if (TREE_CODE (operand
) != SSA_NAME
)
327 Function process_use.
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
334 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
338 Generally, LIVE_P and RELEVANT are used to define the liveness and
339 relevance info of the DEF_STMT of this USE:
340 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
341 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
343 - case 1: If USE is used only for address computations (e.g. array indexing),
344 which does not need to be directly vectorized, then the liveness/relevance
345 of the respective DEF_STMT is left unchanged.
346 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
347 skip DEF_STMT cause it had already been processed.
348 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
349 be modified accordingly.
351 Return true if everything is as expected. Return false otherwise. */
354 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
355 enum vect_relevant relevant
, VEC(gimple
,heap
) **worklist
,
358 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
359 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
360 stmt_vec_info dstmt_vinfo
;
361 basic_block bb
, def_bb
;
364 enum vect_def_type dt
;
366 /* case 1: we are only interested in uses that need to be vectorized. Uses
367 that are used for address computation are not considered relevant. */
368 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
371 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
374 fprintf (vect_dump
, "not vectorized: unsupported use in stmt.");
378 if (!def_stmt
|| gimple_nop_p (def_stmt
))
381 def_bb
= gimple_bb (def_stmt
);
382 if (!flow_bb_inside_loop_p (loop
, def_bb
))
384 if (vect_print_dump_info (REPORT_DETAILS
))
385 fprintf (vect_dump
, "def_stmt is out of loop.");
389 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
390 DEF_STMT must have already been processed, because this should be the
391 only way that STMT, which is a reduction-phi, was put in the worklist,
392 as there should be no other uses for DEF_STMT in the loop. So we just
393 check that everything is as expected, and we are done. */
394 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
395 bb
= gimple_bb (stmt
);
396 if (gimple_code (stmt
) == GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
398 && gimple_code (def_stmt
) != GIMPLE_PHI
399 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
400 && bb
->loop_father
== def_bb
->loop_father
)
402 if (vect_print_dump_info (REPORT_DETAILS
))
403 fprintf (vect_dump
, "reduc-stmt defining reduc-phi in the same nest.");
404 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
405 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
406 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
407 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
408 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
412 /* case 3a: outer-loop stmt defining an inner-loop stmt:
413 outer-loop-header-bb:
419 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
421 if (vect_print_dump_info (REPORT_DETAILS
))
422 fprintf (vect_dump
, "outer-loop def-stmt defining inner-loop stmt.");
426 case vect_unused_in_scope
:
427 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
428 vect_used_in_scope
: vect_unused_in_scope
;
431 case vect_used_in_outer_by_reduction
:
432 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
433 relevant
= vect_used_by_reduction
;
436 case vect_used_in_outer
:
437 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
438 relevant
= vect_used_in_scope
;
441 case vect_used_in_scope
:
449 /* case 3b: inner-loop stmt defining an outer-loop stmt:
450 outer-loop-header-bb:
454 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
456 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
458 if (vect_print_dump_info (REPORT_DETAILS
))
459 fprintf (vect_dump
, "inner-loop def-stmt defining outer-loop stmt.");
463 case vect_unused_in_scope
:
464 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
465 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
466 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
469 case vect_used_by_reduction
:
470 relevant
= vect_used_in_outer_by_reduction
;
473 case vect_used_in_scope
:
474 relevant
= vect_used_in_outer
;
482 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
483 is_pattern_stmt_p (stmt_vinfo
));
488 /* Function vect_mark_stmts_to_be_vectorized.
490 Not all stmts in the loop need to be vectorized. For example:
499 Stmt 1 and 3 do not need to be vectorized, because loop control and
500 addressing of vectorized data-refs are handled differently.
502 This pass detects such stmts. */
505 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
507 VEC(gimple
,heap
) *worklist
;
508 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
509 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
510 unsigned int nbbs
= loop
->num_nodes
;
511 gimple_stmt_iterator si
;
514 stmt_vec_info stmt_vinfo
;
518 enum vect_relevant relevant
, tmp_relevant
;
519 enum vect_def_type def_type
;
521 if (vect_print_dump_info (REPORT_DETAILS
))
522 fprintf (vect_dump
, "=== vect_mark_stmts_to_be_vectorized ===");
524 worklist
= VEC_alloc (gimple
, heap
, 64);
526 /* 1. Init worklist. */
527 for (i
= 0; i
< nbbs
; i
++)
530 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
533 if (vect_print_dump_info (REPORT_DETAILS
))
535 fprintf (vect_dump
, "init: phi relevant? ");
536 print_gimple_stmt (vect_dump
, phi
, 0, TDF_SLIM
);
539 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
540 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
542 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
544 stmt
= gsi_stmt (si
);
545 if (vect_print_dump_info (REPORT_DETAILS
))
547 fprintf (vect_dump
, "init: stmt relevant? ");
548 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
551 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
552 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
556 /* 2. Process_worklist */
557 while (VEC_length (gimple
, worklist
) > 0)
562 stmt
= VEC_pop (gimple
, worklist
);
563 if (vect_print_dump_info (REPORT_DETAILS
))
565 fprintf (vect_dump
, "worklist: examine stmt: ");
566 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
569 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
570 (DEF_STMT) as relevant/irrelevant and live/dead according to the
571 liveness and relevance properties of STMT. */
572 stmt_vinfo
= vinfo_for_stmt (stmt
);
573 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
574 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
576 /* Generally, the liveness and relevance properties of STMT are
577 propagated as is to the DEF_STMTs of its USEs:
578 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
579 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
581 One exception is when STMT has been identified as defining a reduction
582 variable; in this case we set the liveness/relevance as follows:
584 relevant = vect_used_by_reduction
585 This is because we distinguish between two kinds of relevant stmts -
586 those that are used by a reduction computation, and those that are
587 (also) used by a regular computation. This allows us later on to
588 identify stmts that are used solely by a reduction, and therefore the
589 order of the results that they produce does not have to be kept. */
591 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
592 tmp_relevant
= relevant
;
595 case vect_reduction_def
:
596 switch (tmp_relevant
)
598 case vect_unused_in_scope
:
599 relevant
= vect_used_by_reduction
;
602 case vect_used_by_reduction
:
603 if (gimple_code (stmt
) == GIMPLE_PHI
)
608 if (vect_print_dump_info (REPORT_DETAILS
))
609 fprintf (vect_dump
, "unsupported use of reduction.");
611 VEC_free (gimple
, heap
, worklist
);
618 case vect_nested_cycle
:
619 if (tmp_relevant
!= vect_unused_in_scope
620 && tmp_relevant
!= vect_used_in_outer_by_reduction
621 && tmp_relevant
!= vect_used_in_outer
)
623 if (vect_print_dump_info (REPORT_DETAILS
))
624 fprintf (vect_dump
, "unsupported use of nested cycle.");
626 VEC_free (gimple
, heap
, worklist
);
633 case vect_double_reduction_def
:
634 if (tmp_relevant
!= vect_unused_in_scope
635 && tmp_relevant
!= vect_used_by_reduction
)
637 if (vect_print_dump_info (REPORT_DETAILS
))
638 fprintf (vect_dump
, "unsupported use of double reduction.");
640 VEC_free (gimple
, heap
, worklist
);
651 if (is_pattern_stmt_p (stmt_vinfo
))
653 /* Pattern statements are not inserted into the code, so
654 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
655 have to scan the RHS or function arguments instead. */
656 if (is_gimple_assign (stmt
))
658 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
659 tree op
= gimple_assign_rhs1 (stmt
);
662 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
664 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
665 live_p
, relevant
, &worklist
, false)
666 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
667 live_p
, relevant
, &worklist
, false))
669 VEC_free (gimple
, heap
, worklist
);
674 for (; i
< gimple_num_ops (stmt
); i
++)
676 op
= gimple_op (stmt
, i
);
677 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
680 VEC_free (gimple
, heap
, worklist
);
685 else if (is_gimple_call (stmt
))
687 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
689 tree arg
= gimple_call_arg (stmt
, i
);
690 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
693 VEC_free (gimple
, heap
, worklist
);
700 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
702 tree op
= USE_FROM_PTR (use_p
);
703 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
706 VEC_free (gimple
, heap
, worklist
);
711 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
714 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
716 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
719 VEC_free (gimple
, heap
, worklist
);
723 } /* while worklist */
725 VEC_free (gimple
, heap
, worklist
);
730 /* Function vect_model_simple_cost.
732 Models cost for simple operations, i.e. those that only emit ncopies of a
733 single op. Right now, this does not account for multiple insns that could
734 be generated for the single vector op. We will handle that shortly. */
737 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
738 enum vect_def_type
*dt
, slp_tree slp_node
)
741 int inside_cost
= 0, outside_cost
= 0;
743 /* The SLP costs were already calculated during SLP tree build. */
744 if (PURE_SLP_STMT (stmt_info
))
747 inside_cost
= ncopies
* vect_get_stmt_cost (vector_stmt
);
749 /* FORNOW: Assuming maximum 2 args per stmts. */
750 for (i
= 0; i
< 2; i
++)
752 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
753 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
756 if (vect_print_dump_info (REPORT_COST
))
757 fprintf (vect_dump
, "vect_model_simple_cost: inside_cost = %d, "
758 "outside_cost = %d .", inside_cost
, outside_cost
);
760 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
761 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
762 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
766 /* Model cost for type demotion and promotion operations. PWR is normally
767 zero for single-step promotions and demotions. It will be one if
768 two-step promotion/demotion is required, and so on. Each additional
769 step doubles the number of instructions required. */
772 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
773 enum vect_def_type
*dt
, int pwr
)
776 int inside_cost
= 0, outside_cost
= 0, single_stmt_cost
;
778 /* The SLP costs were already calculated during SLP tree build. */
779 if (PURE_SLP_STMT (stmt_info
))
782 single_stmt_cost
= vect_get_stmt_cost (vec_promote_demote
);
783 for (i
= 0; i
< pwr
+ 1; i
++)
785 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
787 inside_cost
+= vect_pow2 (tmp
) * single_stmt_cost
;
790 /* FORNOW: Assuming maximum 2 args per stmts. */
791 for (i
= 0; i
< 2; i
++)
793 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
794 outside_cost
+= vect_get_stmt_cost (vector_stmt
);
797 if (vect_print_dump_info (REPORT_COST
))
798 fprintf (vect_dump
, "vect_model_promotion_demotion_cost: inside_cost = %d, "
799 "outside_cost = %d .", inside_cost
, outside_cost
);
801 /* Set the costs in STMT_INFO. */
802 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, NULL
, inside_cost
);
803 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, NULL
, outside_cost
);
806 /* Function vect_cost_group_size
808 For grouped load or store, return the group_size only if it is the first
809 load or store of a group, else return 1. This ensures that group size is
810 only returned once per group. */
813 vect_cost_group_size (stmt_vec_info stmt_info
)
815 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
817 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
818 return GROUP_SIZE (stmt_info
);
824 /* Function vect_model_store_cost
826 Models cost for stores. In the case of grouped accesses, one access
827 has the overhead of the grouped access attributed to it. */
830 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
831 bool store_lanes_p
, enum vect_def_type dt
,
835 unsigned int inside_cost
= 0, outside_cost
= 0;
836 struct data_reference
*first_dr
;
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info
))
843 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
844 outside_cost
= vect_get_stmt_cost (scalar_to_vec
);
846 /* Grouped access? */
847 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
851 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
856 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
857 group_size
= vect_cost_group_size (stmt_info
);
860 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
862 /* Not a grouped access. */
866 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
869 /* We assume that the cost of a single store-lanes instruction is
870 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
871 access is instead being provided by a permute-and-store operation,
872 include the cost of the permutes. */
873 if (!store_lanes_p
&& group_size
> 1)
875 /* Uses a high and low interleave operation for each needed permute. */
876 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
877 * vect_get_stmt_cost (vec_perm
);
879 if (vect_print_dump_info (REPORT_COST
))
880 fprintf (vect_dump
, "vect_model_store_cost: strided group_size = %d .",
884 /* Costs of the stores. */
885 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
);
887 if (vect_print_dump_info (REPORT_COST
))
888 fprintf (vect_dump
, "vect_model_store_cost: inside_cost = %d, "
889 "outside_cost = %d .", inside_cost
, outside_cost
);
891 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
892 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
893 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
897 /* Calculate cost of DR's memory access. */
899 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
900 unsigned int *inside_cost
)
902 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
904 switch (alignment_support_scheme
)
908 *inside_cost
+= ncopies
* vect_get_stmt_cost (vector_store
);
910 if (vect_print_dump_info (REPORT_COST
))
911 fprintf (vect_dump
, "vect_model_store_cost: aligned.");
916 case dr_unaligned_supported
:
918 gimple stmt
= DR_STMT (dr
);
919 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
920 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
922 /* Here, we assign an additional cost for the unaligned store. */
923 *inside_cost
+= ncopies
924 * targetm
.vectorize
.builtin_vectorization_cost (unaligned_store
,
925 vectype
, DR_MISALIGNMENT (dr
));
927 if (vect_print_dump_info (REPORT_COST
))
928 fprintf (vect_dump
, "vect_model_store_cost: unaligned supported by "
934 case dr_unaligned_unsupported
:
936 *inside_cost
= VECT_MAX_COST
;
938 if (vect_print_dump_info (REPORT_COST
))
939 fprintf (vect_dump
, "vect_model_store_cost: unsupported access.");
950 /* Function vect_model_load_cost
952 Models cost for loads. In the case of grouped accesses, the last access
953 has the overhead of the grouped access attributed to it. Since unaligned
954 accesses are supported for loads, we also account for the costs of the
955 access scheme chosen. */
958 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
, bool load_lanes_p
,
963 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
964 unsigned int inside_cost
= 0, outside_cost
= 0;
966 /* The SLP costs were already calculated during SLP tree build. */
967 if (PURE_SLP_STMT (stmt_info
))
970 /* Grouped accesses? */
971 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
972 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
974 group_size
= vect_cost_group_size (stmt_info
);
975 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
977 /* Not a grouped access. */
984 /* We assume that the cost of a single load-lanes instruction is
985 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
986 access is instead being provided by a load-and-permute operation,
987 include the cost of the permutes. */
988 if (!load_lanes_p
&& group_size
> 1)
990 /* Uses an even and odd extract operations for each needed permute. */
991 inside_cost
= ncopies
* exact_log2(group_size
) * group_size
992 * vect_get_stmt_cost (vec_perm
);
994 if (vect_print_dump_info (REPORT_COST
))
995 fprintf (vect_dump
, "vect_model_load_cost: strided group_size = %d .",
999 /* The loads themselves. */
1000 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1002 /* N scalar loads plus gathering them into a vector. */
1003 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1004 inside_cost
+= (vect_get_stmt_cost (scalar_load
) * ncopies
1005 * TYPE_VECTOR_SUBPARTS (vectype
));
1006 inside_cost
+= ncopies
1007 * targetm
.vectorize
.builtin_vectorization_cost (vec_construct
,
1011 vect_get_load_cost (first_dr
, ncopies
,
1012 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1013 || group_size
> 1 || slp_node
),
1014 &inside_cost
, &outside_cost
);
1016 if (vect_print_dump_info (REPORT_COST
))
1017 fprintf (vect_dump
, "vect_model_load_cost: inside_cost = %d, "
1018 "outside_cost = %d .", inside_cost
, outside_cost
);
1020 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1021 stmt_vinfo_set_inside_of_loop_cost (stmt_info
, slp_node
, inside_cost
);
1022 stmt_vinfo_set_outside_of_loop_cost (stmt_info
, slp_node
, outside_cost
);
1026 /* Calculate cost of DR's memory access. */
1028 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1029 bool add_realign_cost
, unsigned int *inside_cost
,
1030 unsigned int *outside_cost
)
1032 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1034 switch (alignment_support_scheme
)
1038 *inside_cost
+= ncopies
* vect_get_stmt_cost (vector_load
);
1040 if (vect_print_dump_info (REPORT_COST
))
1041 fprintf (vect_dump
, "vect_model_load_cost: aligned.");
1045 case dr_unaligned_supported
:
1047 gimple stmt
= DR_STMT (dr
);
1048 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1049 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1051 /* Here, we assign an additional cost for the unaligned load. */
1052 *inside_cost
+= ncopies
1053 * targetm
.vectorize
.builtin_vectorization_cost (unaligned_load
,
1054 vectype
, DR_MISALIGNMENT (dr
));
1055 if (vect_print_dump_info (REPORT_COST
))
1056 fprintf (vect_dump
, "vect_model_load_cost: unaligned supported by "
1061 case dr_explicit_realign
:
1063 *inside_cost
+= ncopies
* (2 * vect_get_stmt_cost (vector_load
)
1064 + vect_get_stmt_cost (vec_perm
));
1066 /* FIXME: If the misalignment remains fixed across the iterations of
1067 the containing loop, the following cost should be added to the
1069 if (targetm
.vectorize
.builtin_mask_for_load
)
1070 *inside_cost
+= vect_get_stmt_cost (vector_stmt
);
1072 if (vect_print_dump_info (REPORT_COST
))
1073 fprintf (vect_dump
, "vect_model_load_cost: explicit realign");
1077 case dr_explicit_realign_optimized
:
1079 if (vect_print_dump_info (REPORT_COST
))
1080 fprintf (vect_dump
, "vect_model_load_cost: unaligned software "
1083 /* Unaligned software pipeline has a load of an address, an initial
1084 load, and possibly a mask operation to "prime" the loop. However,
1085 if this is an access in a group of loads, which provide grouped
1086 access, then the above cost should only be considered for one
1087 access in the group. Inside the loop, there is a load op
1088 and a realignment op. */
1090 if (add_realign_cost
)
1092 *outside_cost
= 2 * vect_get_stmt_cost (vector_stmt
);
1093 if (targetm
.vectorize
.builtin_mask_for_load
)
1094 *outside_cost
+= vect_get_stmt_cost (vector_stmt
);
1097 *inside_cost
+= ncopies
* (vect_get_stmt_cost (vector_load
)
1098 + vect_get_stmt_cost (vec_perm
));
1100 if (vect_print_dump_info (REPORT_COST
))
1102 "vect_model_load_cost: explicit realign optimized");
1107 case dr_unaligned_unsupported
:
1109 *inside_cost
= VECT_MAX_COST
;
1111 if (vect_print_dump_info (REPORT_COST
))
1112 fprintf (vect_dump
, "vect_model_load_cost: unsupported access.");
1122 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1123 the loop preheader for the vectorized stmt STMT. */
1126 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1129 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1132 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1133 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1137 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1141 if (nested_in_vect_loop_p (loop
, stmt
))
1144 pe
= loop_preheader_edge (loop
);
1145 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1146 gcc_assert (!new_bb
);
1150 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1152 gimple_stmt_iterator gsi_bb_start
;
1154 gcc_assert (bb_vinfo
);
1155 bb
= BB_VINFO_BB (bb_vinfo
);
1156 gsi_bb_start
= gsi_after_labels (bb
);
1157 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1161 if (vect_print_dump_info (REPORT_DETAILS
))
1163 fprintf (vect_dump
, "created new init_stmt: ");
1164 print_gimple_stmt (vect_dump
, new_stmt
, 0, TDF_SLIM
);
1168 /* Function vect_init_vector.
1170 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1171 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1172 vector type a vector with all elements equal to VAL is created first.
1173 Place the initialization at BSI if it is not NULL. Otherwise, place the
1174 initialization at the loop preheader.
1175 Return the DEF of INIT_STMT.
1176 It will be used in the vectorization of STMT. */
1179 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1186 if (TREE_CODE (type
) == VECTOR_TYPE
1187 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1189 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1191 if (CONSTANT_CLASS_P (val
))
1192 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1195 new_var
= create_tmp_reg (TREE_TYPE (type
), NULL
);
1196 add_referenced_var (new_var
);
1197 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1200 new_temp
= make_ssa_name (new_var
, init_stmt
);
1201 gimple_assign_set_lhs (init_stmt
, new_temp
);
1202 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1206 val
= build_vector_from_val (type
, val
);
1209 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1210 add_referenced_var (new_var
);
1211 init_stmt
= gimple_build_assign (new_var
, val
);
1212 new_temp
= make_ssa_name (new_var
, init_stmt
);
1213 gimple_assign_set_lhs (init_stmt
, new_temp
);
1214 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1215 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1220 /* Function vect_get_vec_def_for_operand.
1222 OP is an operand in STMT. This function returns a (vector) def that will be
1223 used in the vectorized stmt for STMT.
1225 In the case that OP is an SSA_NAME which is defined in the loop, then
1226 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1228 In case OP is an invariant or constant, a new stmt that creates a vector def
1229 needs to be introduced. */
1232 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1237 stmt_vec_info def_stmt_info
= NULL
;
1238 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1239 unsigned int nunits
;
1240 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1242 enum vect_def_type dt
;
1246 if (vect_print_dump_info (REPORT_DETAILS
))
1248 fprintf (vect_dump
, "vect_get_vec_def_for_operand: ");
1249 print_generic_expr (vect_dump
, op
, TDF_SLIM
);
1252 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1253 &def_stmt
, &def
, &dt
);
1254 gcc_assert (is_simple_use
);
1255 if (vect_print_dump_info (REPORT_DETAILS
))
1259 fprintf (vect_dump
, "def = ");
1260 print_generic_expr (vect_dump
, def
, TDF_SLIM
);
1264 fprintf (vect_dump
, " def_stmt = ");
1265 print_gimple_stmt (vect_dump
, def_stmt
, 0, TDF_SLIM
);
1271 /* Case 1: operand is a constant. */
1272 case vect_constant_def
:
1274 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1275 gcc_assert (vector_type
);
1276 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1281 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1282 if (vect_print_dump_info (REPORT_DETAILS
))
1283 fprintf (vect_dump
, "Create vector_cst. nunits = %d", nunits
);
1285 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1288 /* Case 2: operand is defined outside the loop - loop invariant. */
1289 case vect_external_def
:
1291 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1292 gcc_assert (vector_type
);
1297 /* Create 'vec_inv = {inv,inv,..,inv}' */
1298 if (vect_print_dump_info (REPORT_DETAILS
))
1299 fprintf (vect_dump
, "Create vector_inv.");
1301 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1304 /* Case 3: operand is defined inside the loop. */
1305 case vect_internal_def
:
1308 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1310 /* Get the def from the vectorized stmt. */
1311 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1313 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1314 /* Get vectorized pattern statement. */
1316 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1317 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1318 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1319 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1320 gcc_assert (vec_stmt
);
1321 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1322 vec_oprnd
= PHI_RESULT (vec_stmt
);
1323 else if (is_gimple_call (vec_stmt
))
1324 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1326 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1330 /* Case 4: operand is defined by a loop header phi - reduction */
1331 case vect_reduction_def
:
1332 case vect_double_reduction_def
:
1333 case vect_nested_cycle
:
1337 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1338 loop
= (gimple_bb (def_stmt
))->loop_father
;
1340 /* Get the def before the loop */
1341 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1342 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1345 /* Case 5: operand is defined by loop-header phi - induction. */
1346 case vect_induction_def
:
1348 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1350 /* Get the def from the vectorized stmt. */
1351 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1352 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1353 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1354 vec_oprnd
= PHI_RESULT (vec_stmt
);
1356 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1366 /* Function vect_get_vec_def_for_stmt_copy
1368 Return a vector-def for an operand. This function is used when the
1369 vectorized stmt to be created (by the caller to this function) is a "copy"
1370 created in case the vectorized result cannot fit in one vector, and several
1371 copies of the vector-stmt are required. In this case the vector-def is
1372 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1373 of the stmt that defines VEC_OPRND.
1374 DT is the type of the vector def VEC_OPRND.
1377 In case the vectorization factor (VF) is bigger than the number
1378 of elements that can fit in a vectype (nunits), we have to generate
1379 more than one vector stmt to vectorize the scalar stmt. This situation
1380 arises when there are multiple data-types operated upon in the loop; the
1381 smallest data-type determines the VF, and as a result, when vectorizing
1382 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1383 vector stmt (each computing a vector of 'nunits' results, and together
1384 computing 'VF' results in each iteration). This function is called when
1385 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1386 which VF=16 and nunits=4, so the number of copies required is 4):
1388 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1390 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1391 VS1.1: vx.1 = memref1 VS1.2
1392 VS1.2: vx.2 = memref2 VS1.3
1393 VS1.3: vx.3 = memref3
1395 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1396 VSnew.1: vz1 = vx.1 + ... VSnew.2
1397 VSnew.2: vz2 = vx.2 + ... VSnew.3
1398 VSnew.3: vz3 = vx.3 + ...
1400 The vectorization of S1 is explained in vectorizable_load.
1401 The vectorization of S2:
1402 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1403 the function 'vect_get_vec_def_for_operand' is called to
1404 get the relevant vector-def for each operand of S2. For operand x it
1405 returns the vector-def 'vx.0'.
1407 To create the remaining copies of the vector-stmt (VSnew.j), this
1408 function is called to get the relevant vector-def for each operand. It is
1409 obtained from the respective VS1.j stmt, which is recorded in the
1410 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1412 For example, to obtain the vector-def 'vx.1' in order to create the
1413 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1414 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1415 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1416 and return its def ('vx.1').
1417 Overall, to create the above sequence this function will be called 3 times:
1418 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1419 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1420 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1423 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1425 gimple vec_stmt_for_operand
;
1426 stmt_vec_info def_stmt_info
;
1428 /* Do nothing; can reuse same def. */
1429 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1432 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1433 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1434 gcc_assert (def_stmt_info
);
1435 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1436 gcc_assert (vec_stmt_for_operand
);
1437 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1438 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1439 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1441 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1446 /* Get vectorized definitions for the operands to create a copy of an original
1447 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1450 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1451 VEC(tree
,heap
) **vec_oprnds0
,
1452 VEC(tree
,heap
) **vec_oprnds1
)
1454 tree vec_oprnd
= VEC_pop (tree
, *vec_oprnds0
);
1456 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1457 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1459 if (vec_oprnds1
&& *vec_oprnds1
)
1461 vec_oprnd
= VEC_pop (tree
, *vec_oprnds1
);
1462 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1463 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1468 /* Get vectorized definitions for OP0 and OP1.
1469 REDUC_INDEX is the index of reduction operand in case of reduction,
1470 and -1 otherwise. */
1473 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1474 VEC (tree
, heap
) **vec_oprnds0
,
1475 VEC (tree
, heap
) **vec_oprnds1
,
1476 slp_tree slp_node
, int reduc_index
)
1480 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1481 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, nops
);
1482 VEC (slp_void_p
, heap
) *vec_defs
= VEC_alloc (slp_void_p
, heap
, nops
);
1484 VEC_quick_push (tree
, ops
, op0
);
1486 VEC_quick_push (tree
, ops
, op1
);
1488 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1490 *vec_oprnds0
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1492 *vec_oprnds1
= (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 1);
1494 VEC_free (tree
, heap
, ops
);
1495 VEC_free (slp_void_p
, heap
, vec_defs
);
1501 *vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
1502 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1503 VEC_quick_push (tree
, *vec_oprnds0
, vec_oprnd
);
1507 *vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
1508 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1509 VEC_quick_push (tree
, *vec_oprnds1
, vec_oprnd
);
1515 /* Function vect_finish_stmt_generation.
1517 Insert a new stmt. */
1520 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1521 gimple_stmt_iterator
*gsi
)
1523 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1524 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1525 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1527 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1529 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1531 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1534 if (vect_print_dump_info (REPORT_DETAILS
))
1536 fprintf (vect_dump
, "add new stmt: ");
1537 print_gimple_stmt (vect_dump
, vec_stmt
, 0, TDF_SLIM
);
1540 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1543 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1544 a function declaration if the target has a vectorized version
1545 of the function, or NULL_TREE if the function cannot be vectorized. */
1548 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1550 tree fndecl
= gimple_call_fndecl (call
);
1552 /* We only handle functions that do not read or clobber memory -- i.e.
1553 const or novops ones. */
1554 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1558 || TREE_CODE (fndecl
) != FUNCTION_DECL
1559 || !DECL_BUILT_IN (fndecl
))
1562 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1566 /* Function vectorizable_call.
1568 Check if STMT performs a function call that can be vectorized.
1569 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1570 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1571 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1574 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1580 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1581 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1582 tree vectype_out
, vectype_in
;
1585 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1586 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1587 tree fndecl
, new_temp
, def
, rhs_type
;
1589 enum vect_def_type dt
[3]
1590 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1591 gimple new_stmt
= NULL
;
1593 VEC(tree
, heap
) *vargs
= NULL
;
1594 enum { NARROW
, NONE
, WIDEN
} modifier
;
1598 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1601 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1604 /* Is STMT a vectorizable call? */
1605 if (!is_gimple_call (stmt
))
1608 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1611 if (stmt_can_throw_internal (stmt
))
1614 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1616 /* Process function arguments. */
1617 rhs_type
= NULL_TREE
;
1618 vectype_in
= NULL_TREE
;
1619 nargs
= gimple_call_num_args (stmt
);
1621 /* Bail out if the function has more than three arguments, we do not have
1622 interesting builtin functions to vectorize with more than two arguments
1623 except for fma. No arguments is also not good. */
1624 if (nargs
== 0 || nargs
> 3)
1627 for (i
= 0; i
< nargs
; i
++)
1631 op
= gimple_call_arg (stmt
, i
);
1633 /* We can only handle calls with arguments of the same type. */
1635 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1637 if (vect_print_dump_info (REPORT_DETAILS
))
1638 fprintf (vect_dump
, "argument types differ.");
1642 rhs_type
= TREE_TYPE (op
);
1644 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1645 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1647 if (vect_print_dump_info (REPORT_DETAILS
))
1648 fprintf (vect_dump
, "use not simple.");
1653 vectype_in
= opvectype
;
1655 && opvectype
!= vectype_in
)
1657 if (vect_print_dump_info (REPORT_DETAILS
))
1658 fprintf (vect_dump
, "argument vector types differ.");
1662 /* If all arguments are external or constant defs use a vector type with
1663 the same size as the output vector type. */
1665 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1667 gcc_assert (vectype_in
);
1670 if (vect_print_dump_info (REPORT_DETAILS
))
1672 fprintf (vect_dump
, "no vectype for scalar type ");
1673 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
1680 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1681 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1682 if (nunits_in
== nunits_out
/ 2)
1684 else if (nunits_out
== nunits_in
)
1686 else if (nunits_out
== nunits_in
/ 2)
1691 /* For now, we only vectorize functions if a target specific builtin
1692 is available. TODO -- in some cases, it might be profitable to
1693 insert the calls for pieces of the vector, in order to be able
1694 to vectorize other operations in the loop. */
1695 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1696 if (fndecl
== NULL_TREE
)
1698 if (vect_print_dump_info (REPORT_DETAILS
))
1699 fprintf (vect_dump
, "function is not vectorizable.");
1704 gcc_assert (!gimple_vuse (stmt
));
1706 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1708 else if (modifier
== NARROW
)
1709 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1711 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1713 /* Sanity check: make sure that at least one copy of the vectorized stmt
1714 needs to be generated. */
1715 gcc_assert (ncopies
>= 1);
1717 if (!vec_stmt
) /* transformation not required. */
1719 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1720 if (vect_print_dump_info (REPORT_DETAILS
))
1721 fprintf (vect_dump
, "=== vectorizable_call ===");
1722 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
1728 if (vect_print_dump_info (REPORT_DETAILS
))
1729 fprintf (vect_dump
, "transform call.");
1732 scalar_dest
= gimple_call_lhs (stmt
);
1733 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1735 prev_stmt_info
= NULL
;
1739 for (j
= 0; j
< ncopies
; ++j
)
1741 /* Build argument list for the vectorized call. */
1743 vargs
= VEC_alloc (tree
, heap
, nargs
);
1745 VEC_truncate (tree
, vargs
, 0);
1749 VEC (slp_void_p
, heap
) *vec_defs
1750 = VEC_alloc (slp_void_p
, heap
, nargs
);
1751 VEC (tree
, heap
) *vec_oprnds0
;
1753 for (i
= 0; i
< nargs
; i
++)
1754 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1755 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1757 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1759 /* Arguments are ready. Create the new vector stmt. */
1760 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_oprnd0
)
1763 for (k
= 0; k
< nargs
; k
++)
1765 VEC (tree
, heap
) *vec_oprndsk
1766 = (VEC (tree
, heap
) *)
1767 VEC_index (slp_void_p
, vec_defs
, k
);
1768 VEC_replace (tree
, vargs
, k
,
1769 VEC_index (tree
, vec_oprndsk
, i
));
1771 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1772 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1773 gimple_call_set_lhs (new_stmt
, new_temp
);
1774 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1775 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1779 for (i
= 0; i
< nargs
; i
++)
1781 VEC (tree
, heap
) *vec_oprndsi
1782 = (VEC (tree
, heap
) *)
1783 VEC_index (slp_void_p
, vec_defs
, i
);
1784 VEC_free (tree
, heap
, vec_oprndsi
);
1786 VEC_free (slp_void_p
, heap
, vec_defs
);
1790 for (i
= 0; i
< nargs
; i
++)
1792 op
= gimple_call_arg (stmt
, i
);
1795 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1798 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1800 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1803 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1806 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1807 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1808 gimple_call_set_lhs (new_stmt
, new_temp
);
1809 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1812 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1814 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1816 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1822 for (j
= 0; j
< ncopies
; ++j
)
1824 /* Build argument list for the vectorized call. */
1826 vargs
= VEC_alloc (tree
, heap
, nargs
* 2);
1828 VEC_truncate (tree
, vargs
, 0);
1832 VEC (slp_void_p
, heap
) *vec_defs
1833 = VEC_alloc (slp_void_p
, heap
, nargs
);
1834 VEC (tree
, heap
) *vec_oprnds0
;
1836 for (i
= 0; i
< nargs
; i
++)
1837 VEC_quick_push (tree
, vargs
, gimple_call_arg (stmt
, i
));
1838 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1840 = (VEC (tree
, heap
) *) VEC_index (slp_void_p
, vec_defs
, 0);
1842 /* Arguments are ready. Create the new vector stmt. */
1843 for (i
= 0; VEC_iterate (tree
, vec_oprnds0
, i
, vec_oprnd0
);
1847 VEC_truncate (tree
, vargs
, 0);
1848 for (k
= 0; k
< nargs
; k
++)
1850 VEC (tree
, heap
) *vec_oprndsk
1851 = (VEC (tree
, heap
) *)
1852 VEC_index (slp_void_p
, vec_defs
, k
);
1853 VEC_quick_push (tree
, vargs
,
1854 VEC_index (tree
, vec_oprndsk
, i
));
1855 VEC_quick_push (tree
, vargs
,
1856 VEC_index (tree
, vec_oprndsk
, i
+ 1));
1858 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1859 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1860 gimple_call_set_lhs (new_stmt
, new_temp
);
1861 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1862 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
1866 for (i
= 0; i
< nargs
; i
++)
1868 VEC (tree
, heap
) *vec_oprndsi
1869 = (VEC (tree
, heap
) *)
1870 VEC_index (slp_void_p
, vec_defs
, i
);
1871 VEC_free (tree
, heap
, vec_oprndsi
);
1873 VEC_free (slp_void_p
, heap
, vec_defs
);
1877 for (i
= 0; i
< nargs
; i
++)
1879 op
= gimple_call_arg (stmt
, i
);
1883 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1885 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1889 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
1891 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
1893 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1896 VEC_quick_push (tree
, vargs
, vec_oprnd0
);
1897 VEC_quick_push (tree
, vargs
, vec_oprnd1
);
1900 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1901 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1902 gimple_call_set_lhs (new_stmt
, new_temp
);
1903 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1906 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
1908 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1910 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1913 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
1918 /* No current target implements this case. */
1922 VEC_free (tree
, heap
, vargs
);
1924 /* Update the exception handling table with the vector stmt if necessary. */
1925 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
1926 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
1928 /* The call in STMT might prevent it from being removed in dce.
1929 We however cannot remove it here, due to the way the ssa name
1930 it defines is mapped to the new definition. So just replace
1931 rhs of the statement with something harmless. */
1936 type
= TREE_TYPE (scalar_dest
);
1937 if (is_pattern_stmt_p (stmt_info
))
1938 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
1940 lhs
= gimple_call_lhs (stmt
);
1941 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
1942 set_vinfo_for_stmt (new_stmt
, stmt_info
);
1943 set_vinfo_for_stmt (stmt
, NULL
);
1944 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
1945 gsi_replace (gsi
, new_stmt
, false);
1946 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
1952 /* Function vect_gen_widened_results_half
1954 Create a vector stmt whose code, type, number of arguments, and result
1955 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1956 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1957 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1958 needs to be created (DECL is a function-decl of a target-builtin).
1959 STMT is the original scalar stmt that we are vectorizing. */
1962 vect_gen_widened_results_half (enum tree_code code
,
1964 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
1965 tree vec_dest
, gimple_stmt_iterator
*gsi
,
1971 /* Generate half of the widened result: */
1972 if (code
== CALL_EXPR
)
1974 /* Target specific support */
1975 if (op_type
== binary_op
)
1976 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
1978 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
1979 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1980 gimple_call_set_lhs (new_stmt
, new_temp
);
1984 /* Generic support */
1985 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
1986 if (op_type
!= binary_op
)
1988 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
1990 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1991 gimple_assign_set_lhs (new_stmt
, new_temp
);
1993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1999 /* Get vectorized definitions for loop-based vectorization. For the first
2000 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2001 scalar operand), and for the rest we get a copy with
2002 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2003 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2004 The vectors are collected into VEC_OPRNDS. */
2007 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2008 VEC (tree
, heap
) **vec_oprnds
, int multi_step_cvt
)
2012 /* Get first vector operand. */
2013 /* All the vector operands except the very first one (that is scalar oprnd)
2015 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2016 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2018 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2020 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2022 /* Get second vector operand. */
2023 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2024 VEC_quick_push (tree
, *vec_oprnds
, vec_oprnd
);
2028 /* For conversion in multiple steps, continue to get operands
2031 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2035 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2036 For multi-step conversions store the resulting vectors and call the function
2040 vect_create_vectorized_demotion_stmts (VEC (tree
, heap
) **vec_oprnds
,
2041 int multi_step_cvt
, gimple stmt
,
2042 VEC (tree
, heap
) *vec_dsts
,
2043 gimple_stmt_iterator
*gsi
,
2044 slp_tree slp_node
, enum tree_code code
,
2045 stmt_vec_info
*prev_stmt_info
)
2048 tree vop0
, vop1
, new_tmp
, vec_dest
;
2050 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2052 vec_dest
= VEC_pop (tree
, vec_dsts
);
2054 for (i
= 0; i
< VEC_length (tree
, *vec_oprnds
); i
+= 2)
2056 /* Create demotion operation. */
2057 vop0
= VEC_index (tree
, *vec_oprnds
, i
);
2058 vop1
= VEC_index (tree
, *vec_oprnds
, i
+ 1);
2059 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2060 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2061 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2062 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2065 /* Store the resulting vector for next recursive call. */
2066 VEC_replace (tree
, *vec_oprnds
, i
/2, new_tmp
);
2069 /* This is the last step of the conversion sequence. Store the
2070 vectors in SLP_NODE or in vector info of the scalar statement
2071 (or in STMT_VINFO_RELATED_STMT chain). */
2073 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2076 if (!*prev_stmt_info
)
2077 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2079 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2081 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2086 /* For multi-step demotion operations we first generate demotion operations
2087 from the source type to the intermediate types, and then combine the
2088 results (stored in VEC_OPRNDS) in demotion operation to the destination
2092 /* At each level of recursion we have half of the operands we had at the
2094 VEC_truncate (tree
, *vec_oprnds
, (i
+1)/2);
2095 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2096 stmt
, vec_dsts
, gsi
, slp_node
,
2097 VEC_PACK_TRUNC_EXPR
,
2101 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2105 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2106 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2107 the resulting vectors and call the function recursively. */
2110 vect_create_vectorized_promotion_stmts (VEC (tree
, heap
) **vec_oprnds0
,
2111 VEC (tree
, heap
) **vec_oprnds1
,
2112 gimple stmt
, tree vec_dest
,
2113 gimple_stmt_iterator
*gsi
,
2114 enum tree_code code1
,
2115 enum tree_code code2
, tree decl1
,
2116 tree decl2
, int op_type
)
2119 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2120 gimple new_stmt1
, new_stmt2
;
2121 VEC (tree
, heap
) *vec_tmp
= NULL
;
2123 vec_tmp
= VEC_alloc (tree
, heap
, VEC_length (tree
, *vec_oprnds0
) * 2);
2124 FOR_EACH_VEC_ELT (tree
, *vec_oprnds0
, i
, vop0
)
2126 if (op_type
== binary_op
)
2127 vop1
= VEC_index (tree
, *vec_oprnds1
, i
);
2131 /* Generate the two halves of promotion operation. */
2132 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2133 op_type
, vec_dest
, gsi
, stmt
);
2134 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2135 op_type
, vec_dest
, gsi
, stmt
);
2136 if (is_gimple_call (new_stmt1
))
2138 new_tmp1
= gimple_call_lhs (new_stmt1
);
2139 new_tmp2
= gimple_call_lhs (new_stmt2
);
2143 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2144 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2147 /* Store the results for the next step. */
2148 VEC_quick_push (tree
, vec_tmp
, new_tmp1
);
2149 VEC_quick_push (tree
, vec_tmp
, new_tmp2
);
2152 VEC_free (tree
, heap
, *vec_oprnds0
);
2153 *vec_oprnds0
= vec_tmp
;
2157 /* Check if STMT performs a conversion operation, that can be vectorized.
2158 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2159 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2160 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2163 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2164 gimple
*vec_stmt
, slp_tree slp_node
)
2168 tree op0
, op1
= NULL_TREE
;
2169 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2170 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2171 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2172 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2173 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2174 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2178 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2179 gimple new_stmt
= NULL
;
2180 stmt_vec_info prev_stmt_info
;
2183 tree vectype_out
, vectype_in
;
2185 tree lhs_type
, rhs_type
;
2186 enum { NARROW
, NONE
, WIDEN
} modifier
;
2187 VEC (tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2189 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2190 int multi_step_cvt
= 0;
2191 VEC (tree
, heap
) *vec_dsts
= NULL
, *interm_types
= NULL
;
2192 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2194 enum machine_mode rhs_mode
;
2195 unsigned short fltsz
;
2197 /* Is STMT a vectorizable conversion? */
2199 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2202 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2205 if (!is_gimple_assign (stmt
))
2208 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2211 code
= gimple_assign_rhs_code (stmt
);
2212 if (!CONVERT_EXPR_CODE_P (code
)
2213 && code
!= FIX_TRUNC_EXPR
2214 && code
!= FLOAT_EXPR
2215 && code
!= WIDEN_MULT_EXPR
2216 && code
!= WIDEN_LSHIFT_EXPR
)
2219 op_type
= TREE_CODE_LENGTH (code
);
2221 /* Check types of lhs and rhs. */
2222 scalar_dest
= gimple_assign_lhs (stmt
);
2223 lhs_type
= TREE_TYPE (scalar_dest
);
2224 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2226 op0
= gimple_assign_rhs1 (stmt
);
2227 rhs_type
= TREE_TYPE (op0
);
2229 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2230 && !((INTEGRAL_TYPE_P (lhs_type
)
2231 && INTEGRAL_TYPE_P (rhs_type
))
2232 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2233 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2236 if ((INTEGRAL_TYPE_P (lhs_type
)
2237 && (TYPE_PRECISION (lhs_type
)
2238 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2239 || (INTEGRAL_TYPE_P (rhs_type
)
2240 && (TYPE_PRECISION (rhs_type
)
2241 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2243 if (vect_print_dump_info (REPORT_DETAILS
))
2245 "type conversion to/from bit-precision unsupported.");
2249 /* Check the operands of the operation. */
2250 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2251 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2253 if (vect_print_dump_info (REPORT_DETAILS
))
2254 fprintf (vect_dump
, "use not simple.");
2257 if (op_type
== binary_op
)
2261 op1
= gimple_assign_rhs2 (stmt
);
2262 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2263 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2265 if (CONSTANT_CLASS_P (op0
))
2266 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2267 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2269 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2274 if (vect_print_dump_info (REPORT_DETAILS
))
2275 fprintf (vect_dump
, "use not simple.");
2280 /* If op0 is an external or constant defs use a vector type of
2281 the same size as the output vector type. */
2283 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2285 gcc_assert (vectype_in
);
2288 if (vect_print_dump_info (REPORT_DETAILS
))
2290 fprintf (vect_dump
, "no vectype for scalar type ");
2291 print_generic_expr (vect_dump
, rhs_type
, TDF_SLIM
);
2297 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2298 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2299 if (nunits_in
< nunits_out
)
2301 else if (nunits_out
== nunits_in
)
2306 /* Multiple types in SLP are handled by creating the appropriate number of
2307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2309 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2311 else if (modifier
== NARROW
)
2312 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2314 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2316 /* Sanity check: make sure that at least one copy of the vectorized stmt
2317 needs to be generated. */
2318 gcc_assert (ncopies
>= 1);
2320 /* Supportable by target? */
2324 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2326 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2331 if (vect_print_dump_info (REPORT_DETAILS
))
2332 fprintf (vect_dump
, "conversion not supported by target.");
2336 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2337 &decl1
, &decl2
, &code1
, &code2
,
2338 &multi_step_cvt
, &interm_types
))
2340 /* Binary widening operation can only be supported directly by the
2342 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2346 if (code
!= FLOAT_EXPR
2347 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2348 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2351 rhs_mode
= TYPE_MODE (rhs_type
);
2352 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2353 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2354 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2355 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2358 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2359 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2360 if (cvt_type
== NULL_TREE
)
2363 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2365 if (!supportable_convert_operation (code
, vectype_out
,
2366 cvt_type
, &decl1
, &codecvt1
))
2369 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2370 cvt_type
, &decl1
, &decl2
,
2371 &codecvt1
, &codecvt2
,
2376 gcc_assert (multi_step_cvt
== 0);
2378 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2379 vectype_in
, NULL
, NULL
, &code1
,
2380 &code2
, &multi_step_cvt
,
2385 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2388 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2389 codecvt2
= ERROR_MARK
;
2393 VEC_safe_push (tree
, heap
, interm_types
, cvt_type
);
2394 cvt_type
= NULL_TREE
;
2399 gcc_assert (op_type
== unary_op
);
2400 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2401 &code1
, &multi_step_cvt
,
2405 if (code
!= FIX_TRUNC_EXPR
2406 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2407 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2410 rhs_mode
= TYPE_MODE (rhs_type
);
2412 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2413 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2414 if (cvt_type
== NULL_TREE
)
2416 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2419 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2420 &code1
, &multi_step_cvt
,
2429 if (!vec_stmt
) /* transformation not required. */
2431 if (vect_print_dump_info (REPORT_DETAILS
))
2432 fprintf (vect_dump
, "=== vectorizable_conversion ===");
2433 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2435 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2436 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2438 else if (modifier
== NARROW
)
2440 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2441 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2445 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2446 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2448 VEC_free (tree
, heap
, interm_types
);
2453 if (vect_print_dump_info (REPORT_DETAILS
))
2454 fprintf (vect_dump
, "transform conversion. ncopies = %d.", ncopies
);
2456 if (op_type
== binary_op
)
2458 if (CONSTANT_CLASS_P (op0
))
2459 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2460 else if (CONSTANT_CLASS_P (op1
))
2461 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2464 /* In case of multi-step conversion, we first generate conversion operations
2465 to the intermediate types, and then from that types to the final one.
2466 We create vector destinations for the intermediate type (TYPES) received
2467 from supportable_*_operation, and store them in the correct order
2468 for future use in vect_create_vectorized_*_stmts (). */
2469 vec_dsts
= VEC_alloc (tree
, heap
, multi_step_cvt
+ 1);
2470 vec_dest
= vect_create_destination_var (scalar_dest
,
2471 (cvt_type
&& modifier
== WIDEN
)
2472 ? cvt_type
: vectype_out
);
2473 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2477 for (i
= VEC_length (tree
, interm_types
) - 1;
2478 VEC_iterate (tree
, interm_types
, i
, intermediate_type
); i
--)
2480 vec_dest
= vect_create_destination_var (scalar_dest
,
2482 VEC_quick_push (tree
, vec_dsts
, vec_dest
);
2487 vec_dest
= vect_create_destination_var (scalar_dest
,
2489 ? vectype_out
: cvt_type
);
2493 if (modifier
== NONE
)
2494 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
2495 else if (modifier
== WIDEN
)
2497 vec_oprnds0
= VEC_alloc (tree
, heap
,
2499 ? vect_pow2 (multi_step_cvt
) : 1));
2500 if (op_type
== binary_op
)
2501 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
2504 vec_oprnds0
= VEC_alloc (tree
, heap
,
2506 ? vect_pow2 (multi_step_cvt
) : 1));
2508 else if (code
== WIDEN_LSHIFT_EXPR
)
2509 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
2512 prev_stmt_info
= NULL
;
2516 for (j
= 0; j
< ncopies
; j
++)
2519 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2522 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2524 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2526 /* Arguments are ready, create the new vector stmt. */
2527 if (code1
== CALL_EXPR
)
2529 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2530 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2531 gimple_call_set_lhs (new_stmt
, new_temp
);
2535 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2536 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2538 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2539 gimple_assign_set_lhs (new_stmt
, new_temp
);
2542 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2544 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2549 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2551 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2552 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2557 /* In case the vectorization factor (VF) is bigger than the number
2558 of elements that we can fit in a vectype (nunits), we have to
2559 generate more than one vector stmt - i.e - we need to "unroll"
2560 the vector stmt by a factor VF/nunits. */
2561 for (j
= 0; j
< ncopies
; j
++)
2568 if (code
== WIDEN_LSHIFT_EXPR
)
2573 /* Store vec_oprnd1 for every vector stmt to be created
2574 for SLP_NODE. We check during the analysis that all
2575 the shift arguments are the same. */
2576 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2577 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2579 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2583 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2584 &vec_oprnds1
, slp_node
, -1);
2588 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2589 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2590 if (op_type
== binary_op
)
2592 if (code
== WIDEN_LSHIFT_EXPR
)
2595 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2597 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2603 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2604 VEC_truncate (tree
, vec_oprnds0
, 0);
2605 VEC_quick_push (tree
, vec_oprnds0
, vec_oprnd0
);
2606 if (op_type
== binary_op
)
2608 if (code
== WIDEN_LSHIFT_EXPR
)
2611 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2613 VEC_truncate (tree
, vec_oprnds1
, 0);
2614 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
2618 /* Arguments are ready. Create the new vector stmts. */
2619 for (i
= multi_step_cvt
; i
>= 0; i
--)
2621 tree this_dest
= VEC_index (tree
, vec_dsts
, i
);
2622 enum tree_code c1
= code1
, c2
= code2
;
2623 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2628 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2630 stmt
, this_dest
, gsi
,
2631 c1
, c2
, decl1
, decl2
,
2635 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2639 if (codecvt1
== CALL_EXPR
)
2641 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2642 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2643 gimple_call_set_lhs (new_stmt
, new_temp
);
2647 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2648 new_temp
= make_ssa_name (vec_dest
, NULL
);
2649 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2654 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2657 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2660 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
2664 if (!prev_stmt_info
)
2665 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2667 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2668 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2673 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2677 /* In case the vectorization factor (VF) is bigger than the number
2678 of elements that we can fit in a vectype (nunits), we have to
2679 generate more than one vector stmt - i.e - we need to "unroll"
2680 the vector stmt by a factor VF/nunits. */
2681 for (j
= 0; j
< ncopies
; j
++)
2685 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2689 VEC_truncate (tree
, vec_oprnds0
, 0);
2690 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2691 vect_pow2 (multi_step_cvt
) - 1);
2694 /* Arguments are ready. Create the new vector stmts. */
2696 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
2698 if (codecvt1
== CALL_EXPR
)
2700 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2701 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2702 gimple_call_set_lhs (new_stmt
, new_temp
);
2706 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2707 new_temp
= make_ssa_name (vec_dest
, NULL
);
2708 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2712 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2713 VEC_replace (tree
, vec_oprnds0
, i
, new_temp
);
2716 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2717 stmt
, vec_dsts
, gsi
,
2722 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2726 VEC_free (tree
, heap
, vec_oprnds0
);
2727 VEC_free (tree
, heap
, vec_oprnds1
);
2728 VEC_free (tree
, heap
, vec_dsts
);
2729 VEC_free (tree
, heap
, interm_types
);
2735 /* Function vectorizable_assignment.
2737 Check if STMT performs an assignment (copy) that can be vectorized.
2738 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2739 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2740 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2743 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2744 gimple
*vec_stmt
, slp_tree slp_node
)
2749 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2750 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2751 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2755 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2756 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2759 VEC(tree
,heap
) *vec_oprnds
= NULL
;
2761 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2762 gimple new_stmt
= NULL
;
2763 stmt_vec_info prev_stmt_info
= NULL
;
2764 enum tree_code code
;
2767 /* Multiple types in SLP are handled by creating the appropriate number of
2768 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2770 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2773 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2775 gcc_assert (ncopies
>= 1);
2777 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2780 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2783 /* Is vectorizable assignment? */
2784 if (!is_gimple_assign (stmt
))
2787 scalar_dest
= gimple_assign_lhs (stmt
);
2788 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2791 code
= gimple_assign_rhs_code (stmt
);
2792 if (gimple_assign_single_p (stmt
)
2793 || code
== PAREN_EXPR
2794 || CONVERT_EXPR_CODE_P (code
))
2795 op
= gimple_assign_rhs1 (stmt
);
2799 if (code
== VIEW_CONVERT_EXPR
)
2800 op
= TREE_OPERAND (op
, 0);
2802 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2803 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2805 if (vect_print_dump_info (REPORT_DETAILS
))
2806 fprintf (vect_dump
, "use not simple.");
2810 /* We can handle NOP_EXPR conversions that do not change the number
2811 of elements or the vector size. */
2812 if ((CONVERT_EXPR_CODE_P (code
)
2813 || code
== VIEW_CONVERT_EXPR
)
2815 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2816 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2817 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2820 /* We do not handle bit-precision changes. */
2821 if ((CONVERT_EXPR_CODE_P (code
)
2822 || code
== VIEW_CONVERT_EXPR
)
2823 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2824 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2825 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2826 || ((TYPE_PRECISION (TREE_TYPE (op
))
2827 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2828 /* But a conversion that does not change the bit-pattern is ok. */
2829 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2830 > TYPE_PRECISION (TREE_TYPE (op
)))
2831 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2833 if (vect_print_dump_info (REPORT_DETAILS
))
2834 fprintf (vect_dump
, "type conversion to/from bit-precision "
2839 if (!vec_stmt
) /* transformation not required. */
2841 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
2842 if (vect_print_dump_info (REPORT_DETAILS
))
2843 fprintf (vect_dump
, "=== vectorizable_assignment ===");
2844 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
2849 if (vect_print_dump_info (REPORT_DETAILS
))
2850 fprintf (vect_dump
, "transform assignment.");
2853 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2856 for (j
= 0; j
< ncopies
; j
++)
2860 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
2862 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
2864 /* Arguments are ready. create the new vector stmt. */
2865 FOR_EACH_VEC_ELT (tree
, vec_oprnds
, i
, vop
)
2867 if (CONVERT_EXPR_CODE_P (code
)
2868 || code
== VIEW_CONVERT_EXPR
)
2869 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
2870 new_stmt
= gimple_build_assign (vec_dest
, vop
);
2871 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2872 gimple_assign_set_lhs (new_stmt
, new_temp
);
2873 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2875 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
2882 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2884 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2886 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2889 VEC_free (tree
, heap
, vec_oprnds
);
2894 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2895 either as shift by a scalar or by a vector. */
2898 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
2901 enum machine_mode vec_mode
;
2906 vectype
= get_vectype_for_scalar_type (scalar_type
);
2910 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
2912 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
2914 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
2916 || (optab_handler (optab
, TYPE_MODE (vectype
))
2917 == CODE_FOR_nothing
))
2921 vec_mode
= TYPE_MODE (vectype
);
2922 icode
= (int) optab_handler (optab
, vec_mode
);
2923 if (icode
== CODE_FOR_nothing
)
2930 /* Function vectorizable_shift.
2932 Check if STMT performs a shift operation that can be vectorized.
2933 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2934 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2935 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2938 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
2939 gimple
*vec_stmt
, slp_tree slp_node
)
2943 tree op0
, op1
= NULL
;
2944 tree vec_oprnd1
= NULL_TREE
;
2945 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2947 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2948 enum tree_code code
;
2949 enum machine_mode vec_mode
;
2953 enum machine_mode optab_op2_mode
;
2956 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2957 gimple new_stmt
= NULL
;
2958 stmt_vec_info prev_stmt_info
;
2965 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
2968 bool scalar_shift_arg
= true;
2969 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2972 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2975 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2978 /* Is STMT a vectorizable binary/unary operation? */
2979 if (!is_gimple_assign (stmt
))
2982 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2985 code
= gimple_assign_rhs_code (stmt
);
2987 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
2988 || code
== RROTATE_EXPR
))
2991 scalar_dest
= gimple_assign_lhs (stmt
);
2992 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2993 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2994 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2996 if (vect_print_dump_info (REPORT_DETAILS
))
2997 fprintf (vect_dump
, "bit-precision shifts not supported.");
3001 op0
= gimple_assign_rhs1 (stmt
);
3002 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3003 &def_stmt
, &def
, &dt
[0], &vectype
))
3005 if (vect_print_dump_info (REPORT_DETAILS
))
3006 fprintf (vect_dump
, "use not simple.");
3009 /* If op0 is an external or constant def use a vector type with
3010 the same size as the output vector type. */
3012 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3014 gcc_assert (vectype
);
3017 if (vect_print_dump_info (REPORT_DETAILS
))
3019 fprintf (vect_dump
, "no vectype for scalar type ");
3020 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3026 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3027 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3028 if (nunits_out
!= nunits_in
)
3031 op1
= gimple_assign_rhs2 (stmt
);
3032 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3033 &def
, &dt
[1], &op1_vectype
))
3035 if (vect_print_dump_info (REPORT_DETAILS
))
3036 fprintf (vect_dump
, "use not simple.");
3041 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3045 /* Multiple types in SLP are handled by creating the appropriate number of
3046 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3048 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3051 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3053 gcc_assert (ncopies
>= 1);
3055 /* Determine whether the shift amount is a vector, or scalar. If the
3056 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3058 if (dt
[1] == vect_internal_def
&& !slp_node
)
3059 scalar_shift_arg
= false;
3060 else if (dt
[1] == vect_constant_def
3061 || dt
[1] == vect_external_def
3062 || dt
[1] == vect_internal_def
)
3064 /* In SLP, need to check whether the shift count is the same,
3065 in loops if it is a constant or invariant, it is always
3069 VEC (gimple
, heap
) *stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3072 FOR_EACH_VEC_ELT (gimple
, stmts
, k
, slpstmt
)
3073 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3074 scalar_shift_arg
= false;
3079 if (vect_print_dump_info (REPORT_DETAILS
))
3080 fprintf (vect_dump
, "operand mode requires invariant argument.");
3084 /* Vector shifted by vector. */
3085 if (!scalar_shift_arg
)
3087 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3088 if (vect_print_dump_info (REPORT_DETAILS
))
3089 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3091 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3092 if (op1_vectype
== NULL_TREE
3093 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3095 if (vect_print_dump_info (REPORT_DETAILS
))
3096 fprintf (vect_dump
, "unusable type for last operand in"
3097 " vector/vector shift/rotate.");
3101 /* See if the machine has a vector shifted by scalar insn and if not
3102 then see if it has a vector shifted by vector insn. */
3105 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3107 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3109 if (vect_print_dump_info (REPORT_DETAILS
))
3110 fprintf (vect_dump
, "vector/scalar shift/rotate found.");
3114 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3116 && (optab_handler (optab
, TYPE_MODE (vectype
))
3117 != CODE_FOR_nothing
))
3119 scalar_shift_arg
= false;
3121 if (vect_print_dump_info (REPORT_DETAILS
))
3122 fprintf (vect_dump
, "vector/vector shift/rotate found.");
3124 /* Unlike the other binary operators, shifts/rotates have
3125 the rhs being int, instead of the same type as the lhs,
3126 so make sure the scalar is the right type if we are
3127 dealing with vectors of long long/long/short/char. */
3128 if (dt
[1] == vect_constant_def
)
3129 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3130 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3134 && TYPE_MODE (TREE_TYPE (vectype
))
3135 != TYPE_MODE (TREE_TYPE (op1
)))
3137 if (vect_print_dump_info (REPORT_DETAILS
))
3138 fprintf (vect_dump
, "unusable type for last operand in"
3139 " vector/vector shift/rotate.");
3142 if (vec_stmt
&& !slp_node
)
3144 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3145 op1
= vect_init_vector (stmt
, op1
,
3146 TREE_TYPE (vectype
), NULL
);
3153 /* Supportable by target? */
3156 if (vect_print_dump_info (REPORT_DETAILS
))
3157 fprintf (vect_dump
, "no optab.");
3160 vec_mode
= TYPE_MODE (vectype
);
3161 icode
= (int) optab_handler (optab
, vec_mode
);
3162 if (icode
== CODE_FOR_nothing
)
3164 if (vect_print_dump_info (REPORT_DETAILS
))
3165 fprintf (vect_dump
, "op not supported by target.");
3166 /* Check only during analysis. */
3167 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3168 || (vf
< vect_min_worthwhile_factor (code
)
3171 if (vect_print_dump_info (REPORT_DETAILS
))
3172 fprintf (vect_dump
, "proceeding using word mode.");
3175 /* Worthwhile without SIMD support? Check only during analysis. */
3176 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3177 && vf
< vect_min_worthwhile_factor (code
)
3180 if (vect_print_dump_info (REPORT_DETAILS
))
3181 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3185 if (!vec_stmt
) /* transformation not required. */
3187 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3188 if (vect_print_dump_info (REPORT_DETAILS
))
3189 fprintf (vect_dump
, "=== vectorizable_shift ===");
3190 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3196 if (vect_print_dump_info (REPORT_DETAILS
))
3197 fprintf (vect_dump
, "transform binary/unary operation.");
3200 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3202 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3203 created in the previous stages of the recursion, so no allocation is
3204 needed, except for the case of shift with scalar shift argument. In that
3205 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3206 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3207 In case of loop-based vectorization we allocate VECs of size 1. We
3208 allocate VEC_OPRNDS1 only in case of binary operation. */
3211 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3212 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3214 else if (scalar_shift_arg
)
3215 vec_oprnds1
= VEC_alloc (tree
, heap
, slp_node
->vec_stmts_size
);
3217 prev_stmt_info
= NULL
;
3218 for (j
= 0; j
< ncopies
; j
++)
3223 if (scalar_shift_arg
)
3225 /* Vector shl and shr insn patterns can be defined with scalar
3226 operand 2 (shift operand). In this case, use constant or loop
3227 invariant op1 directly, without extending it to vector mode
3229 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3230 if (!VECTOR_MODE_P (optab_op2_mode
))
3232 if (vect_print_dump_info (REPORT_DETAILS
))
3233 fprintf (vect_dump
, "operand 1 using scalar mode.");
3235 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3238 /* Store vec_oprnd1 for every vector stmt to be created
3239 for SLP_NODE. We check during the analysis that all
3240 the shift arguments are the same.
3241 TODO: Allow different constants for different vector
3242 stmts generated for an SLP instance. */
3243 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3244 VEC_quick_push (tree
, vec_oprnds1
, vec_oprnd1
);
3249 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3250 (a special case for certain kind of vector shifts); otherwise,
3251 operand 1 should be of a vector type (the usual case). */
3253 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3256 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3260 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3262 /* Arguments are ready. Create the new vector stmt. */
3263 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3265 vop1
= VEC_index (tree
, vec_oprnds1
, i
);
3266 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3267 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3268 gimple_assign_set_lhs (new_stmt
, new_temp
);
3269 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3271 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3278 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3280 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3281 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3284 VEC_free (tree
, heap
, vec_oprnds0
);
3285 VEC_free (tree
, heap
, vec_oprnds1
);
3291 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3292 gimple_stmt_iterator
*);
3295 /* Function vectorizable_operation.
3297 Check if STMT performs a binary, unary or ternary operation that can
3299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3304 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3305 gimple
*vec_stmt
, slp_tree slp_node
)
3307 tree vec_dest
, vec_dest2
= NULL_TREE
;
3308 tree vec_dest3
= NULL_TREE
, vec_dest4
= NULL_TREE
;
3310 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3311 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3312 tree vectype
, wide_vectype
= NULL_TREE
;
3313 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3314 enum tree_code code
;
3315 enum machine_mode vec_mode
;
3318 optab optab
, optab2
= NULL
;
3322 enum vect_def_type dt
[3]
3323 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3324 gimple new_stmt
= NULL
;
3325 stmt_vec_info prev_stmt_info
;
3331 VEC(tree
,heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
, *vec_oprnds2
= NULL
;
3332 tree vop0
, vop1
, vop2
;
3333 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3335 unsigned char *sel
= NULL
;
3336 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
, perm_mask
= NULL_TREE
;
3338 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3341 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3344 /* Is STMT a vectorizable binary/unary operation? */
3345 if (!is_gimple_assign (stmt
))
3348 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3351 code
= gimple_assign_rhs_code (stmt
);
3353 /* For pointer addition, we should use the normal plus for
3354 the vector addition. */
3355 if (code
== POINTER_PLUS_EXPR
)
3358 /* Support only unary or binary operations. */
3359 op_type
= TREE_CODE_LENGTH (code
);
3360 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3362 if (vect_print_dump_info (REPORT_DETAILS
))
3363 fprintf (vect_dump
, "num. args = %d (not unary/binary/ternary op).",
3368 scalar_dest
= gimple_assign_lhs (stmt
);
3369 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3371 /* Most operations cannot handle bit-precision types without extra
3373 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3374 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3375 /* Exception are bitwise binary operations. */
3376 && code
!= BIT_IOR_EXPR
3377 && code
!= BIT_XOR_EXPR
3378 && code
!= BIT_AND_EXPR
)
3380 if (vect_print_dump_info (REPORT_DETAILS
))
3381 fprintf (vect_dump
, "bit-precision arithmetic not supported.");
3385 op0
= gimple_assign_rhs1 (stmt
);
3386 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3387 &def_stmt
, &def
, &dt
[0], &vectype
))
3389 if (vect_print_dump_info (REPORT_DETAILS
))
3390 fprintf (vect_dump
, "use not simple.");
3393 /* If op0 is an external or constant def use a vector type with
3394 the same size as the output vector type. */
3396 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3398 gcc_assert (vectype
);
3401 if (vect_print_dump_info (REPORT_DETAILS
))
3403 fprintf (vect_dump
, "no vectype for scalar type ");
3404 print_generic_expr (vect_dump
, TREE_TYPE (op0
), TDF_SLIM
);
3410 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3411 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3412 if (nunits_out
!= nunits_in
)
3415 if (op_type
== binary_op
|| op_type
== ternary_op
)
3417 op1
= gimple_assign_rhs2 (stmt
);
3418 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3421 if (vect_print_dump_info (REPORT_DETAILS
))
3422 fprintf (vect_dump
, "use not simple.");
3426 if (op_type
== ternary_op
)
3428 op2
= gimple_assign_rhs3 (stmt
);
3429 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3432 if (vect_print_dump_info (REPORT_DETAILS
))
3433 fprintf (vect_dump
, "use not simple.");
3439 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3443 /* Multiple types in SLP are handled by creating the appropriate number of
3444 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3446 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3449 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3451 gcc_assert (ncopies
>= 1);
3453 /* Shifts are handled in vectorizable_shift (). */
3454 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3455 || code
== RROTATE_EXPR
)
3458 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3460 /* Supportable by target? */
3461 if (!optab
&& code
!= MULT_HIGHPART_EXPR
)
3463 if (vect_print_dump_info (REPORT_DETAILS
))
3464 fprintf (vect_dump
, "no optab.");
3467 vec_mode
= TYPE_MODE (vectype
);
3468 icode
= optab
? (int) optab_handler (optab
, vec_mode
) : CODE_FOR_nothing
;
3470 if (icode
== CODE_FOR_nothing
3471 && code
== MULT_HIGHPART_EXPR
3472 && VECTOR_MODE_P (vec_mode
)
3473 && BYTES_BIG_ENDIAN
== WORDS_BIG_ENDIAN
)
3475 /* If MULT_HIGHPART_EXPR isn't supported by the backend, see
3476 if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
3477 or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
3478 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (scalar_dest
));
3479 unsigned int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (scalar_dest
));
3481 = build_nonstandard_integer_type (prec
* 2, unsignedp
);
3483 = get_same_sized_vectype (wide_type
, vectype
);
3485 sel
= XALLOCAVEC (unsigned char, nunits_in
);
3486 if (VECTOR_MODE_P (TYPE_MODE (wide_vectype
))
3487 && GET_MODE_SIZE (TYPE_MODE (wide_vectype
))
3488 == GET_MODE_SIZE (vec_mode
))
3490 if (targetm
.vectorize
.builtin_mul_widen_even
3491 && (decl1
= targetm
.vectorize
.builtin_mul_widen_even (vectype
))
3492 && targetm
.vectorize
.builtin_mul_widen_odd
3493 && (decl2
= targetm
.vectorize
.builtin_mul_widen_odd (vectype
))
3494 && TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1
)))
3495 == TYPE_MODE (wide_vectype
))
3497 for (i
= 0; i
< nunits_in
; i
++)
3498 sel
[i
] = !BYTES_BIG_ENDIAN
+ (i
& ~1)
3499 + ((i
& 1) ? nunits_in
: 0);
3500 if (can_vec_perm_p (vec_mode
, false, sel
))
3503 if (icode
== CODE_FOR_nothing
)
3507 optab
= optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR
,
3508 vectype
, optab_default
);
3509 optab2
= optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR
,
3510 vectype
, optab_default
);
3513 && optab_handler (optab
, vec_mode
) != CODE_FOR_nothing
3514 && optab_handler (optab2
, vec_mode
) != CODE_FOR_nothing
3515 && insn_data
[optab_handler (optab
, vec_mode
)].operand
[0].mode
3516 == TYPE_MODE (wide_vectype
)
3517 && insn_data
[optab_handler (optab2
,
3518 vec_mode
)].operand
[0].mode
3519 == TYPE_MODE (wide_vectype
))
3521 for (i
= 0; i
< nunits_in
; i
++)
3522 sel
[i
] = !BYTES_BIG_ENDIAN
+ 2 * i
;
3523 if (can_vec_perm_p (vec_mode
, false, sel
))
3524 icode
= optab_handler (optab
, vec_mode
);
3528 if (icode
== CODE_FOR_nothing
)
3530 if (optab_for_tree_code (code
, vectype
, optab_default
) == NULL
)
3532 if (vect_print_dump_info (REPORT_DETAILS
))
3533 fprintf (vect_dump
, "no optab.");
3536 wide_vectype
= NULL_TREE
;
3541 if (icode
== CODE_FOR_nothing
)
3543 if (vect_print_dump_info (REPORT_DETAILS
))
3544 fprintf (vect_dump
, "op not supported by target.");
3545 /* Check only during analysis. */
3546 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3547 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3549 if (vect_print_dump_info (REPORT_DETAILS
))
3550 fprintf (vect_dump
, "proceeding using word mode.");
3553 /* Worthwhile without SIMD support? Check only during analysis. */
3554 if (!VECTOR_MODE_P (vec_mode
)
3556 && vf
< vect_min_worthwhile_factor (code
))
3558 if (vect_print_dump_info (REPORT_DETAILS
))
3559 fprintf (vect_dump
, "not worthwhile without SIMD support.");
3563 if (!vec_stmt
) /* transformation not required. */
3565 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3566 if (vect_print_dump_info (REPORT_DETAILS
))
3567 fprintf (vect_dump
, "=== vectorizable_operation ===");
3568 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
);
3574 if (vect_print_dump_info (REPORT_DETAILS
))
3575 fprintf (vect_dump
, "transform binary/unary operation.");
3580 vec_dest
= vect_create_destination_var (scalar_dest
, wide_vectype
);
3581 vec_dest2
= vect_create_destination_var (scalar_dest
, wide_vectype
);
3582 vec_dest3
= vect_create_destination_var (scalar_dest
, vectype
);
3583 vec_dest4
= vect_create_destination_var (scalar_dest
, vectype
);
3584 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
3587 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3589 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3590 created in the previous stages of the recursion, so no allocation is
3591 needed, except for the case of shift with scalar shift argument. In that
3592 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3593 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3594 In case of loop-based vectorization we allocate VECs of size 1. We
3595 allocate VEC_OPRNDS1 only in case of binary operation. */
3598 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
3599 if (op_type
== binary_op
|| op_type
== ternary_op
)
3600 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
3601 if (op_type
== ternary_op
)
3602 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3605 /* In case the vectorization factor (VF) is bigger than the number
3606 of elements that we can fit in a vectype (nunits), we have to generate
3607 more than one vector stmt - i.e - we need to "unroll" the
3608 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3609 from one copy of the vector stmt to the next, in the field
3610 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3611 stages to find the correct vector defs to be used when vectorizing
3612 stmts that use the defs of the current stmt. The example below
3613 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3614 we need to create 4 vectorized stmts):
3616 before vectorization:
3617 RELATED_STMT VEC_STMT
3621 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3623 RELATED_STMT VEC_STMT
3624 VS1_0: vx0 = memref0 VS1_1 -
3625 VS1_1: vx1 = memref1 VS1_2 -
3626 VS1_2: vx2 = memref2 VS1_3 -
3627 VS1_3: vx3 = memref3 - -
3628 S1: x = load - VS1_0
3631 step2: vectorize stmt S2 (done here):
3632 To vectorize stmt S2 we first need to find the relevant vector
3633 def for the first operand 'x'. This is, as usual, obtained from
3634 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3635 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3636 relevant vector def 'vx0'. Having found 'vx0' we can generate
3637 the vector stmt VS2_0, and as usual, record it in the
3638 STMT_VINFO_VEC_STMT of stmt S2.
3639 When creating the second copy (VS2_1), we obtain the relevant vector
3640 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3641 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3642 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3643 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3644 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3645 chain of stmts and pointers:
3646 RELATED_STMT VEC_STMT
3647 VS1_0: vx0 = memref0 VS1_1 -
3648 VS1_1: vx1 = memref1 VS1_2 -
3649 VS1_2: vx2 = memref2 VS1_3 -
3650 VS1_3: vx3 = memref3 - -
3651 S1: x = load - VS1_0
3652 VS2_0: vz0 = vx0 + v1 VS2_1 -
3653 VS2_1: vz1 = vx1 + v1 VS2_2 -
3654 VS2_2: vz2 = vx2 + v1 VS2_3 -
3655 VS2_3: vz3 = vx3 + v1 - -
3656 S2: z = x + 1 - VS2_0 */
3658 prev_stmt_info
= NULL
;
3659 for (j
= 0; j
< ncopies
; j
++)
3664 if (op_type
== binary_op
|| op_type
== ternary_op
)
3665 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3668 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3670 if (op_type
== ternary_op
)
3672 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
3673 VEC_quick_push (tree
, vec_oprnds2
,
3674 vect_get_vec_def_for_operand (op2
, stmt
, NULL
));
3679 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3680 if (op_type
== ternary_op
)
3682 tree vec_oprnd
= VEC_pop (tree
, vec_oprnds2
);
3683 VEC_quick_push (tree
, vec_oprnds2
,
3684 vect_get_vec_def_for_stmt_copy (dt
[2],
3689 /* Arguments are ready. Create the new vector stmt. */
3690 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vop0
)
3692 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3693 ? VEC_index (tree
, vec_oprnds1
, i
) : NULL_TREE
);
3694 vop2
= ((op_type
== ternary_op
)
3695 ? VEC_index (tree
, vec_oprnds2
, i
) : NULL_TREE
);
3698 tree new_temp2
, vce
;
3700 gcc_assert (code
== MULT_HIGHPART_EXPR
);
3701 if (decl1
!= NULL_TREE
)
3703 new_stmt
= gimple_build_call (decl1
, 2, vop0
, vop1
);
3704 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3705 gimple_call_set_lhs (new_stmt
, new_temp
);
3706 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3708 new_stmt
= gimple_build_call (decl2
, 2, vop0
, vop1
);
3709 new_temp2
= make_ssa_name (vec_dest2
, new_stmt
);
3710 gimple_call_set_lhs (new_stmt
, new_temp2
);
3711 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3715 new_temp
= make_ssa_name (vec_dest
, NULL
);
3717 = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
3718 ? VEC_WIDEN_MULT_HI_EXPR
3719 : VEC_WIDEN_MULT_LO_EXPR
,
3720 new_temp
, vop0
, vop1
);
3721 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3723 new_temp2
= make_ssa_name (vec_dest2
, NULL
);
3725 = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
3726 ? VEC_WIDEN_MULT_LO_EXPR
3727 : VEC_WIDEN_MULT_HI_EXPR
,
3728 new_temp2
, vop0
, vop1
);
3729 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3732 vce
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
3733 new_stmt
= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
,
3736 new_temp
= make_ssa_name (vec_dest3
, new_stmt
);
3737 gimple_assign_set_lhs (new_stmt
, new_temp
);
3738 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3740 vce
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp2
);
3741 new_stmt
= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
,
3744 new_temp2
= make_ssa_name (vec_dest4
, new_stmt
);
3745 gimple_assign_set_lhs (new_stmt
, new_temp2
);
3746 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3748 new_temp
= permute_vec_elements (new_temp
, new_temp2
,
3749 perm_mask
, stmt
, gsi
);
3750 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
3752 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
3756 new_stmt
= gimple_build_assign_with_ops3 (code
, vec_dest
,
3758 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3759 gimple_assign_set_lhs (new_stmt
, new_temp
);
3760 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3762 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
3769 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3771 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3772 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3775 VEC_free (tree
, heap
, vec_oprnds0
);
3777 VEC_free (tree
, heap
, vec_oprnds1
);
3779 VEC_free (tree
, heap
, vec_oprnds2
);
3785 /* Function vectorizable_store.
3787 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3789 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3790 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3791 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3794 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3800 tree vec_oprnd
= NULL_TREE
;
3801 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3802 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3803 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3805 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3806 struct loop
*loop
= NULL
;
3807 enum machine_mode vec_mode
;
3809 enum dr_alignment_support alignment_support_scheme
;
3812 enum vect_def_type dt
;
3813 stmt_vec_info prev_stmt_info
= NULL
;
3814 tree dataref_ptr
= NULL_TREE
;
3815 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3818 gimple next_stmt
, first_stmt
= NULL
;
3819 bool grouped_store
= false;
3820 bool store_lanes_p
= false;
3821 unsigned int group_size
, i
;
3822 VEC(tree
,heap
) *dr_chain
= NULL
, *oprnds
= NULL
, *result_chain
= NULL
;
3824 VEC(tree
,heap
) *vec_oprnds
= NULL
;
3825 bool slp
= (slp_node
!= NULL
);
3826 unsigned int vec_num
;
3827 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3831 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3833 /* Multiple types in SLP are handled by creating the appropriate number of
3834 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3836 if (slp
|| PURE_SLP_STMT (stmt_info
))
3839 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3841 gcc_assert (ncopies
>= 1);
3843 /* FORNOW. This restriction should be relaxed. */
3844 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3846 if (vect_print_dump_info (REPORT_DETAILS
))
3847 fprintf (vect_dump
, "multiple types in nested loop.");
3851 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3854 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3857 /* Is vectorizable store? */
3859 if (!is_gimple_assign (stmt
))
3862 scalar_dest
= gimple_assign_lhs (stmt
);
3863 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3864 && is_pattern_stmt_p (stmt_info
))
3865 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3866 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3867 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3868 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3869 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3870 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3871 && TREE_CODE (scalar_dest
) != MEM_REF
)
3874 gcc_assert (gimple_assign_single_p (stmt
));
3875 op
= gimple_assign_rhs1 (stmt
);
3876 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3879 if (vect_print_dump_info (REPORT_DETAILS
))
3880 fprintf (vect_dump
, "use not simple.");
3884 elem_type
= TREE_TYPE (vectype
);
3885 vec_mode
= TYPE_MODE (vectype
);
3887 /* FORNOW. In some cases can vectorize even if data-type not supported
3888 (e.g. - array initialization with 0). */
3889 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3892 if (!STMT_VINFO_DATA_REF (stmt_info
))
3895 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3896 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3897 size_zero_node
) < 0)
3899 if (vect_print_dump_info (REPORT_DETAILS
))
3900 fprintf (vect_dump
, "negative step for store.");
3904 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3906 grouped_store
= true;
3907 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3908 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3910 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3911 if (vect_store_lanes_supported (vectype
, group_size
))
3912 store_lanes_p
= true;
3913 else if (!vect_grouped_store_supported (vectype
, group_size
))
3917 if (first_stmt
== stmt
)
3919 /* STMT is the leader of the group. Check the operands of all the
3920 stmts of the group. */
3921 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3924 gcc_assert (gimple_assign_single_p (next_stmt
));
3925 op
= gimple_assign_rhs1 (next_stmt
);
3926 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3927 &def_stmt
, &def
, &dt
))
3929 if (vect_print_dump_info (REPORT_DETAILS
))
3930 fprintf (vect_dump
, "use not simple.");
3933 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3938 if (!vec_stmt
) /* transformation not required. */
3940 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3941 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
, NULL
);
3949 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3950 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3952 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
3955 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
3957 /* We vectorize all the stmts of the interleaving group when we
3958 reach the last stmt in the group. */
3959 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
3960 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
3969 grouped_store
= false;
3970 /* VEC_NUM is the number of vect stmts to be created for this
3972 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3973 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
3974 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
3975 op
= gimple_assign_rhs1 (first_stmt
);
3978 /* VEC_NUM is the number of vect stmts to be created for this
3980 vec_num
= group_size
;
3986 group_size
= vec_num
= 1;
3989 if (vect_print_dump_info (REPORT_DETAILS
))
3990 fprintf (vect_dump
, "transform store. ncopies = %d",ncopies
);
3992 dr_chain
= VEC_alloc (tree
, heap
, group_size
);
3993 oprnds
= VEC_alloc (tree
, heap
, group_size
);
3995 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
3996 gcc_assert (alignment_support_scheme
);
3997 /* Targets with store-lane instructions must not require explicit
3999 gcc_assert (!store_lanes_p
4000 || alignment_support_scheme
== dr_aligned
4001 || alignment_support_scheme
== dr_unaligned_supported
);
4004 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4006 aggr_type
= vectype
;
4008 /* In case the vectorization factor (VF) is bigger than the number
4009 of elements that we can fit in a vectype (nunits), we have to generate
4010 more than one vector stmt - i.e - we need to "unroll" the
4011 vector stmt by a factor VF/nunits. For more details see documentation in
4012 vect_get_vec_def_for_copy_stmt. */
4014 /* In case of interleaving (non-unit grouped access):
4021 We create vectorized stores starting from base address (the access of the
4022 first stmt in the chain (S2 in the above example), when the last store stmt
4023 of the chain (S4) is reached:
4026 VS2: &base + vec_size*1 = vx0
4027 VS3: &base + vec_size*2 = vx1
4028 VS4: &base + vec_size*3 = vx3
4030 Then permutation statements are generated:
4032 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4033 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4036 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4037 (the order of the data-refs in the output of vect_permute_store_chain
4038 corresponds to the order of scalar stmts in the interleaving chain - see
4039 the documentation of vect_permute_store_chain()).
4041 In case of both multiple types and interleaving, above vector stores and
4042 permutation stmts are created for every copy. The result vector stmts are
4043 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4044 STMT_VINFO_RELATED_STMT for the next copies.
4047 prev_stmt_info
= NULL
;
4048 for (j
= 0; j
< ncopies
; j
++)
4057 /* Get vectorized arguments for SLP_NODE. */
4058 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4059 NULL
, slp_node
, -1);
4061 vec_oprnd
= VEC_index (tree
, vec_oprnds
, 0);
4065 /* For interleaved stores we collect vectorized defs for all the
4066 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4067 used as an input to vect_permute_store_chain(), and OPRNDS as
4068 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4070 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4071 OPRNDS are of size 1. */
4072 next_stmt
= first_stmt
;
4073 for (i
= 0; i
< group_size
; i
++)
4075 /* Since gaps are not supported for interleaved stores,
4076 GROUP_SIZE is the exact number of stmts in the chain.
4077 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4078 there is no interleaving, GROUP_SIZE is 1, and only one
4079 iteration of the loop will be executed. */
4080 gcc_assert (next_stmt
4081 && gimple_assign_single_p (next_stmt
));
4082 op
= gimple_assign_rhs1 (next_stmt
);
4084 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4086 VEC_quick_push(tree
, dr_chain
, vec_oprnd
);
4087 VEC_quick_push(tree
, oprnds
, vec_oprnd
);
4088 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4092 /* We should have catched mismatched types earlier. */
4093 gcc_assert (useless_type_conversion_p (vectype
,
4094 TREE_TYPE (vec_oprnd
)));
4095 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, NULL
,
4096 NULL_TREE
, &dummy
, gsi
,
4097 &ptr_incr
, false, &inv_p
);
4098 gcc_assert (bb_vinfo
|| !inv_p
);
4102 /* For interleaved stores we created vectorized defs for all the
4103 defs stored in OPRNDS in the previous iteration (previous copy).
4104 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4105 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4107 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4108 OPRNDS are of size 1. */
4109 for (i
= 0; i
< group_size
; i
++)
4111 op
= VEC_index (tree
, oprnds
, i
);
4112 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4114 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4115 VEC_replace(tree
, dr_chain
, i
, vec_oprnd
);
4116 VEC_replace(tree
, oprnds
, i
, vec_oprnd
);
4118 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4119 TYPE_SIZE_UNIT (aggr_type
));
4126 /* Combine all the vectors into an array. */
4127 vec_array
= create_vector_array (vectype
, vec_num
);
4128 for (i
= 0; i
< vec_num
; i
++)
4130 vec_oprnd
= VEC_index (tree
, dr_chain
, i
);
4131 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4135 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4136 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4137 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4138 gimple_call_set_lhs (new_stmt
, data_ref
);
4139 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4146 result_chain
= VEC_alloc (tree
, heap
, group_size
);
4148 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4152 next_stmt
= first_stmt
;
4153 for (i
= 0; i
< vec_num
; i
++)
4155 unsigned align
, misalign
;
4158 /* Bump the vector pointer. */
4159 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4163 vec_oprnd
= VEC_index (tree
, vec_oprnds
, i
);
4164 else if (grouped_store
)
4165 /* For grouped stores vectorized defs are interleaved in
4166 vect_permute_store_chain(). */
4167 vec_oprnd
= VEC_index (tree
, result_chain
, i
);
4169 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4170 build_int_cst (reference_alias_ptr_type
4171 (DR_REF (first_dr
)), 0));
4172 align
= TYPE_ALIGN_UNIT (vectype
);
4173 if (aligned_access_p (first_dr
))
4175 else if (DR_MISALIGNMENT (first_dr
) == -1)
4177 TREE_TYPE (data_ref
)
4178 = build_aligned_type (TREE_TYPE (data_ref
),
4179 TYPE_ALIGN (elem_type
));
4180 align
= TYPE_ALIGN_UNIT (elem_type
);
4185 TREE_TYPE (data_ref
)
4186 = build_aligned_type (TREE_TYPE (data_ref
),
4187 TYPE_ALIGN (elem_type
));
4188 misalign
= DR_MISALIGNMENT (first_dr
);
4190 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4193 /* Arguments are ready. Create the new vector stmt. */
4194 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4195 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4200 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4208 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4210 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4211 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4215 VEC_free (tree
, heap
, dr_chain
);
4216 VEC_free (tree
, heap
, oprnds
);
4218 VEC_free (tree
, heap
, result_chain
);
4220 VEC_free (tree
, heap
, vec_oprnds
);
4225 /* Given a vector type VECTYPE and permutation SEL returns
4226 the VECTOR_CST mask that implements the permutation of the
4227 vector elements. If that is impossible to do, returns NULL. */
4230 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4232 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4235 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4237 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4240 mask_elt_type
= lang_hooks
.types
.type_for_mode
4241 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4242 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4244 mask_elts
= XALLOCAVEC (tree
, nunits
);
4245 for (i
= nunits
- 1; i
>= 0; i
--)
4246 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4247 mask_vec
= build_vector (mask_type
, mask_elts
);
4252 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4253 reversal of the vector elements. If that is impossible to do,
4257 perm_mask_for_reverse (tree vectype
)
4262 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4263 sel
= XALLOCAVEC (unsigned char, nunits
);
4265 for (i
= 0; i
< nunits
; ++i
)
4266 sel
[i
] = nunits
- 1 - i
;
4268 return vect_gen_perm_mask (vectype
, sel
);
4271 /* Given a vector variable X and Y, that was generated for the scalar
4272 STMT, generate instructions to permute the vector elements of X and Y
4273 using permutation mask MASK_VEC, insert them at *GSI and return the
4274 permuted vector variable. */
4277 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4278 gimple_stmt_iterator
*gsi
)
4280 tree vectype
= TREE_TYPE (x
);
4281 tree perm_dest
, data_ref
;
4284 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4285 data_ref
= make_ssa_name (perm_dest
, NULL
);
4287 /* Generate the permute statement. */
4288 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, data_ref
,
4290 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4295 /* vectorizable_load.
4297 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4304 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4305 slp_tree slp_node
, slp_instance slp_node_instance
)
4308 tree vec_dest
= NULL
;
4309 tree data_ref
= NULL
;
4310 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4311 stmt_vec_info prev_stmt_info
;
4312 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4313 struct loop
*loop
= NULL
;
4314 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4315 bool nested_in_vect_loop
= false;
4316 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
4317 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4320 enum machine_mode mode
;
4321 gimple new_stmt
= NULL
;
4323 enum dr_alignment_support alignment_support_scheme
;
4324 tree dataref_ptr
= NULL_TREE
;
4326 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4328 int i
, j
, group_size
;
4329 tree msq
= NULL_TREE
, lsq
;
4330 tree offset
= NULL_TREE
;
4331 tree realignment_token
= NULL_TREE
;
4333 VEC(tree
,heap
) *dr_chain
= NULL
;
4334 bool grouped_load
= false;
4335 bool load_lanes_p
= false;
4338 bool negative
= false;
4339 bool compute_in_loop
= false;
4340 struct loop
*at_loop
;
4342 bool slp
= (slp_node
!= NULL
);
4343 bool slp_perm
= false;
4344 enum tree_code code
;
4345 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4348 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4349 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4350 tree stride_base
, stride_step
;
4351 int gather_scale
= 1;
4352 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4356 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4357 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4358 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4363 /* Multiple types in SLP are handled by creating the appropriate number of
4364 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4366 if (slp
|| PURE_SLP_STMT (stmt_info
))
4369 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4371 gcc_assert (ncopies
>= 1);
4373 /* FORNOW. This restriction should be relaxed. */
4374 if (nested_in_vect_loop
&& ncopies
> 1)
4376 if (vect_print_dump_info (REPORT_DETAILS
))
4377 fprintf (vect_dump
, "multiple types in nested loop.");
4381 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4384 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4387 /* Is vectorizable load? */
4388 if (!is_gimple_assign (stmt
))
4391 scalar_dest
= gimple_assign_lhs (stmt
);
4392 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4395 code
= gimple_assign_rhs_code (stmt
);
4396 if (code
!= ARRAY_REF
4397 && code
!= INDIRECT_REF
4398 && code
!= COMPONENT_REF
4399 && code
!= IMAGPART_EXPR
4400 && code
!= REALPART_EXPR
4402 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4405 if (!STMT_VINFO_DATA_REF (stmt_info
))
4408 elem_type
= TREE_TYPE (vectype
);
4409 mode
= TYPE_MODE (vectype
);
4411 /* FORNOW. In some cases can vectorize even if data-type not supported
4412 (e.g. - data copies). */
4413 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4415 if (vect_print_dump_info (REPORT_DETAILS
))
4416 fprintf (vect_dump
, "Aligned load, but unsupported type.");
4420 /* Check if the load is a part of an interleaving chain. */
4421 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4423 grouped_load
= true;
4425 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4427 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4428 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4430 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4431 if (vect_load_lanes_supported (vectype
, group_size
))
4432 load_lanes_p
= true;
4433 else if (!vect_grouped_load_supported (vectype
, group_size
))
4439 if (STMT_VINFO_GATHER_P (stmt_info
))
4443 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4444 &gather_off
, &gather_scale
);
4445 gcc_assert (gather_decl
);
4446 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4447 &def_stmt
, &def
, &gather_dt
,
4448 &gather_off_vectype
))
4450 if (vect_print_dump_info (REPORT_DETAILS
))
4451 fprintf (vect_dump
, "gather index use not simple.");
4455 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4457 if (!vect_check_strided_load (stmt
, loop_vinfo
,
4458 &stride_base
, &stride_step
))
4463 negative
= tree_int_cst_compare (nested_in_vect_loop
4464 ? STMT_VINFO_DR_STEP (stmt_info
)
4466 size_zero_node
) < 0;
4467 if (negative
&& ncopies
> 1)
4469 if (vect_print_dump_info (REPORT_DETAILS
))
4470 fprintf (vect_dump
, "multiple types with negative step.");
4476 gcc_assert (!grouped_load
);
4477 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4478 if (alignment_support_scheme
!= dr_aligned
4479 && alignment_support_scheme
!= dr_unaligned_supported
)
4481 if (vect_print_dump_info (REPORT_DETAILS
))
4482 fprintf (vect_dump
, "negative step but alignment required.");
4485 if (!perm_mask_for_reverse (vectype
))
4487 if (vect_print_dump_info (REPORT_DETAILS
))
4488 fprintf (vect_dump
, "negative step and reversing not supported.");
4494 if (!vec_stmt
) /* transformation not required. */
4496 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4497 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
);
4501 if (vect_print_dump_info (REPORT_DETAILS
))
4502 fprintf (vect_dump
, "transform load. ncopies = %d", ncopies
);
4506 if (STMT_VINFO_GATHER_P (stmt_info
))
4508 tree vec_oprnd0
= NULL_TREE
, op
;
4509 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4510 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4511 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4512 edge pe
= loop_preheader_edge (loop
);
4515 enum { NARROW
, NONE
, WIDEN
} modifier
;
4516 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4518 if (nunits
== gather_off_nunits
)
4520 else if (nunits
== gather_off_nunits
/ 2)
4522 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4525 for (i
= 0; i
< gather_off_nunits
; ++i
)
4526 sel
[i
] = i
| nunits
;
4528 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4529 gcc_assert (perm_mask
!= NULL_TREE
);
4531 else if (nunits
== gather_off_nunits
* 2)
4533 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4536 for (i
= 0; i
< nunits
; ++i
)
4537 sel
[i
] = i
< gather_off_nunits
4538 ? i
: i
+ nunits
- gather_off_nunits
;
4540 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4541 gcc_assert (perm_mask
!= NULL_TREE
);
4547 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4548 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4549 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4550 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4551 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4552 scaletype
= TREE_VALUE (arglist
);
4553 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4554 && types_compatible_p (srctype
, masktype
));
4556 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4558 ptr
= fold_convert (ptrtype
, gather_base
);
4559 if (!is_gimple_min_invariant (ptr
))
4561 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4562 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4563 gcc_assert (!new_bb
);
4566 /* Currently we support only unconditional gather loads,
4567 so mask should be all ones. */
4568 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4569 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4570 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4574 for (j
= 0; j
< 6; ++j
)
4576 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4577 mask
= build_real (TREE_TYPE (masktype
), r
);
4581 mask
= build_vector_from_val (masktype
, mask
);
4582 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4584 scale
= build_int_cst (scaletype
, gather_scale
);
4586 prev_stmt_info
= NULL
;
4587 for (j
= 0; j
< ncopies
; ++j
)
4589 if (modifier
== WIDEN
&& (j
& 1))
4590 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4591 perm_mask
, stmt
, gsi
);
4594 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4597 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4599 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4601 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4602 == TYPE_VECTOR_SUBPARTS (idxtype
));
4603 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4604 add_referenced_var (var
);
4605 var
= make_ssa_name (var
, NULL
);
4606 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4608 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4610 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4615 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4617 if (!useless_type_conversion_p (vectype
, rettype
))
4619 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4620 == TYPE_VECTOR_SUBPARTS (rettype
));
4621 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4622 add_referenced_var (var
);
4623 op
= make_ssa_name (var
, new_stmt
);
4624 gimple_call_set_lhs (new_stmt
, op
);
4625 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4626 var
= make_ssa_name (vec_dest
, NULL
);
4627 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4629 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4634 var
= make_ssa_name (vec_dest
, new_stmt
);
4635 gimple_call_set_lhs (new_stmt
, var
);
4638 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4640 if (modifier
== NARROW
)
4647 var
= permute_vec_elements (prev_res
, var
,
4648 perm_mask
, stmt
, gsi
);
4649 new_stmt
= SSA_NAME_DEF_STMT (var
);
4652 if (prev_stmt_info
== NULL
)
4653 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4655 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4656 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4660 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4662 gimple_stmt_iterator incr_gsi
;
4666 tree ref
= DR_REF (dr
);
4669 VEC(constructor_elt
, gc
) *v
= NULL
;
4670 gimple_seq stmts
= NULL
;
4672 gcc_assert (stride_base
&& stride_step
);
4674 /* For a load with loop-invariant (but other than power-of-2)
4675 stride (i.e. not a grouped access) like so:
4677 for (i = 0; i < n; i += stride)
4680 we generate a new induction variable and new accesses to
4681 form a new vector (or vectors, depending on ncopies):
4683 for (j = 0; ; j += VF*stride)
4685 tmp2 = array[j + stride];
4687 vectemp = {tmp1, tmp2, ...}
4690 ivstep
= stride_step
;
4691 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4692 build_int_cst (TREE_TYPE (ivstep
), vf
));
4694 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4696 create_iv (stride_base
, ivstep
, NULL
,
4697 loop
, &incr_gsi
, insert_after
,
4699 incr
= gsi_stmt (incr_gsi
);
4700 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4702 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4704 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4706 prev_stmt_info
= NULL
;
4707 running_off
= offvar
;
4708 for (j
= 0; j
< ncopies
; j
++)
4712 v
= VEC_alloc (constructor_elt
, gc
, nunits
);
4713 for (i
= 0; i
< nunits
; i
++)
4715 tree newref
, newoff
;
4717 if (TREE_CODE (ref
) == ARRAY_REF
)
4718 newref
= build4 (ARRAY_REF
, TREE_TYPE (ref
),
4719 unshare_expr (TREE_OPERAND (ref
, 0)),
4721 NULL_TREE
, NULL_TREE
);
4723 newref
= build2 (MEM_REF
, TREE_TYPE (ref
),
4725 TREE_OPERAND (ref
, 1));
4727 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4730 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4731 newoff
= SSA_NAME_VAR (running_off
);
4732 if (POINTER_TYPE_P (TREE_TYPE (newoff
)))
4733 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4734 running_off
, stride_step
);
4736 incr
= gimple_build_assign_with_ops (PLUS_EXPR
, newoff
,
4737 running_off
, stride_step
);
4738 newoff
= make_ssa_name (newoff
, incr
);
4739 gimple_assign_set_lhs (incr
, newoff
);
4740 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4742 running_off
= newoff
;
4745 vec_inv
= build_constructor (vectype
, v
);
4746 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4747 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4750 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4752 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4753 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4760 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4762 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
)
4763 && first_stmt
!= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0))
4764 first_stmt
= VEC_index (gimple
, SLP_TREE_SCALAR_STMTS (slp_node
), 0);
4766 /* Check if the chain of loads is already vectorized. */
4767 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
)))
4769 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4772 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4773 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4775 /* VEC_NUM is the number of vect stmts to be created for this group. */
4778 grouped_load
= false;
4779 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4780 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance
))
4784 vec_num
= group_size
;
4790 group_size
= vec_num
= 1;
4793 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4794 gcc_assert (alignment_support_scheme
);
4795 /* Targets with load-lane instructions must not require explicit
4797 gcc_assert (!load_lanes_p
4798 || alignment_support_scheme
== dr_aligned
4799 || alignment_support_scheme
== dr_unaligned_supported
);
4801 /* In case the vectorization factor (VF) is bigger than the number
4802 of elements that we can fit in a vectype (nunits), we have to generate
4803 more than one vector stmt - i.e - we need to "unroll" the
4804 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4805 from one copy of the vector stmt to the next, in the field
4806 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4807 stages to find the correct vector defs to be used when vectorizing
4808 stmts that use the defs of the current stmt. The example below
4809 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4810 need to create 4 vectorized stmts):
4812 before vectorization:
4813 RELATED_STMT VEC_STMT
4817 step 1: vectorize stmt S1:
4818 We first create the vector stmt VS1_0, and, as usual, record a
4819 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4820 Next, we create the vector stmt VS1_1, and record a pointer to
4821 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4822 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4824 RELATED_STMT VEC_STMT
4825 VS1_0: vx0 = memref0 VS1_1 -
4826 VS1_1: vx1 = memref1 VS1_2 -
4827 VS1_2: vx2 = memref2 VS1_3 -
4828 VS1_3: vx3 = memref3 - -
4829 S1: x = load - VS1_0
4832 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4833 information we recorded in RELATED_STMT field is used to vectorize
4836 /* In case of interleaving (non-unit grouped access):
4843 Vectorized loads are created in the order of memory accesses
4844 starting from the access of the first stmt of the chain:
4847 VS2: vx1 = &base + vec_size*1
4848 VS3: vx3 = &base + vec_size*2
4849 VS4: vx4 = &base + vec_size*3
4851 Then permutation statements are generated:
4853 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4854 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4857 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4858 (the order of the data-refs in the output of vect_permute_load_chain
4859 corresponds to the order of scalar stmts in the interleaving chain - see
4860 the documentation of vect_permute_load_chain()).
4861 The generation of permutation stmts and recording them in
4862 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4864 In case of both multiple types and interleaving, the vector loads and
4865 permutation stmts above are created for every copy. The result vector
4866 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4867 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4869 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4870 on a target that supports unaligned accesses (dr_unaligned_supported)
4871 we generate the following code:
4875 p = p + indx * vectype_size;
4880 Otherwise, the data reference is potentially unaligned on a target that
4881 does not support unaligned accesses (dr_explicit_realign_optimized) -
4882 then generate the following code, in which the data in each iteration is
4883 obtained by two vector loads, one from the previous iteration, and one
4884 from the current iteration:
4886 msq_init = *(floor(p1))
4887 p2 = initial_addr + VS - 1;
4888 realignment_token = call target_builtin;
4891 p2 = p2 + indx * vectype_size
4893 vec_dest = realign_load (msq, lsq, realignment_token)
4898 /* If the misalignment remains the same throughout the execution of the
4899 loop, we can create the init_addr and permutation mask at the loop
4900 preheader. Otherwise, it needs to be created inside the loop.
4901 This can only occur when vectorizing memory accesses in the inner-loop
4902 nested within an outer-loop that is being vectorized. */
4904 if (nested_in_vect_loop
4905 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4906 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
4908 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
4909 compute_in_loop
= true;
4912 if ((alignment_support_scheme
== dr_explicit_realign_optimized
4913 || alignment_support_scheme
== dr_explicit_realign
)
4914 && !compute_in_loop
)
4916 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
4917 alignment_support_scheme
, NULL_TREE
,
4919 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4921 phi
= SSA_NAME_DEF_STMT (msq
);
4922 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
4929 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
4932 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4934 aggr_type
= vectype
;
4936 prev_stmt_info
= NULL
;
4937 for (j
= 0; j
< ncopies
; j
++)
4939 /* 1. Create the vector or array pointer update chain. */
4941 dataref_ptr
= vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
4942 offset
, &dummy
, gsi
,
4943 &ptr_incr
, false, &inv_p
);
4945 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4946 TYPE_SIZE_UNIT (aggr_type
));
4948 if (grouped_load
|| slp_perm
)
4949 dr_chain
= VEC_alloc (tree
, heap
, vec_num
);
4955 vec_array
= create_vector_array (vectype
, vec_num
);
4958 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4959 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4960 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
4961 gimple_call_set_lhs (new_stmt
, vec_array
);
4962 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4964 /* Extract each vector into an SSA_NAME. */
4965 for (i
= 0; i
< vec_num
; i
++)
4967 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
4969 VEC_quick_push (tree
, dr_chain
, new_temp
);
4972 /* Record the mapping between SSA_NAMEs and statements. */
4973 vect_record_grouped_load_vectors (stmt
, dr_chain
);
4977 for (i
= 0; i
< vec_num
; i
++)
4980 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4983 /* 2. Create the vector-load in the loop. */
4984 switch (alignment_support_scheme
)
4987 case dr_unaligned_supported
:
4989 unsigned int align
, misalign
;
4992 = build2 (MEM_REF
, vectype
, dataref_ptr
,
4993 build_int_cst (reference_alias_ptr_type
4994 (DR_REF (first_dr
)), 0));
4995 align
= TYPE_ALIGN_UNIT (vectype
);
4996 if (alignment_support_scheme
== dr_aligned
)
4998 gcc_assert (aligned_access_p (first_dr
));
5001 else if (DR_MISALIGNMENT (first_dr
) == -1)
5003 TREE_TYPE (data_ref
)
5004 = build_aligned_type (TREE_TYPE (data_ref
),
5005 TYPE_ALIGN (elem_type
));
5006 align
= TYPE_ALIGN_UNIT (elem_type
);
5011 TREE_TYPE (data_ref
)
5012 = build_aligned_type (TREE_TYPE (data_ref
),
5013 TYPE_ALIGN (elem_type
));
5014 misalign
= DR_MISALIGNMENT (first_dr
);
5016 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5020 case dr_explicit_realign
:
5025 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5027 if (compute_in_loop
)
5028 msq
= vect_setup_realignment (first_stmt
, gsi
,
5030 dr_explicit_realign
,
5033 new_stmt
= gimple_build_assign_with_ops
5034 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
5036 (TREE_TYPE (dataref_ptr
),
5037 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5038 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
5039 gimple_assign_set_lhs (new_stmt
, ptr
);
5040 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5042 = build2 (MEM_REF
, vectype
, ptr
,
5043 build_int_cst (reference_alias_ptr_type
5044 (DR_REF (first_dr
)), 0));
5045 vec_dest
= vect_create_destination_var (scalar_dest
,
5047 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5048 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5049 gimple_assign_set_lhs (new_stmt
, new_temp
);
5050 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5051 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5052 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5055 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5056 TYPE_SIZE_UNIT (elem_type
));
5057 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5058 new_stmt
= gimple_build_assign_with_ops
5059 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5062 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5063 ptr
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
), new_stmt
);
5064 gimple_assign_set_lhs (new_stmt
, ptr
);
5065 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5067 = build2 (MEM_REF
, vectype
, ptr
,
5068 build_int_cst (reference_alias_ptr_type
5069 (DR_REF (first_dr
)), 0));
5072 case dr_explicit_realign_optimized
:
5073 new_stmt
= gimple_build_assign_with_ops
5074 (BIT_AND_EXPR
, NULL_TREE
, dataref_ptr
,
5076 (TREE_TYPE (dataref_ptr
),
5077 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5078 new_temp
= make_ssa_name (SSA_NAME_VAR (dataref_ptr
),
5080 gimple_assign_set_lhs (new_stmt
, new_temp
);
5081 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5083 = build2 (MEM_REF
, vectype
, new_temp
,
5084 build_int_cst (reference_alias_ptr_type
5085 (DR_REF (first_dr
)), 0));
5090 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5091 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5092 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5093 gimple_assign_set_lhs (new_stmt
, new_temp
);
5094 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5096 /* 3. Handle explicit realignment if necessary/supported.
5098 vec_dest = realign_load (msq, lsq, realignment_token) */
5099 if (alignment_support_scheme
== dr_explicit_realign_optimized
5100 || alignment_support_scheme
== dr_explicit_realign
)
5102 lsq
= gimple_assign_lhs (new_stmt
);
5103 if (!realignment_token
)
5104 realignment_token
= dataref_ptr
;
5105 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5107 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR
,
5110 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5111 gimple_assign_set_lhs (new_stmt
, new_temp
);
5112 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5114 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5117 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5118 add_phi_arg (phi
, lsq
,
5119 loop_latch_edge (containing_loop
),
5125 /* 4. Handle invariant-load. */
5126 if (inv_p
&& !bb_vinfo
)
5128 gimple_stmt_iterator gsi2
= *gsi
;
5129 gcc_assert (!grouped_load
);
5131 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5133 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5138 tree perm_mask
= perm_mask_for_reverse (vectype
);
5139 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5140 perm_mask
, stmt
, gsi
);
5141 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5144 /* Collect vector loads and later create their permutation in
5145 vect_transform_grouped_load (). */
5146 if (grouped_load
|| slp_perm
)
5147 VEC_quick_push (tree
, dr_chain
, new_temp
);
5149 /* Store vector loads in the corresponding SLP_NODE. */
5150 if (slp
&& !slp_perm
)
5151 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
),
5156 if (slp
&& !slp_perm
)
5161 if (!vect_transform_slp_perm_load (stmt
, dr_chain
, gsi
, vf
,
5162 slp_node_instance
, false))
5164 VEC_free (tree
, heap
, dr_chain
);
5173 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5174 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5179 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5181 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5182 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5186 VEC_free (tree
, heap
, dr_chain
);
5192 /* Function vect_is_simple_cond.
5195 LOOP - the loop that is being vectorized.
5196 COND - Condition that is checked for simple use.
5199 *COMP_VECTYPE - the vector type for the comparison.
5201 Returns whether a COND can be vectorized. Checks whether
5202 condition operands are supportable using vec_is_simple_use. */
5205 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5206 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5210 enum vect_def_type dt
;
5211 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5213 if (!COMPARISON_CLASS_P (cond
))
5216 lhs
= TREE_OPERAND (cond
, 0);
5217 rhs
= TREE_OPERAND (cond
, 1);
5219 if (TREE_CODE (lhs
) == SSA_NAME
)
5221 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5222 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5223 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5226 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5227 && TREE_CODE (lhs
) != FIXED_CST
)
5230 if (TREE_CODE (rhs
) == SSA_NAME
)
5232 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5233 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5234 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5237 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5238 && TREE_CODE (rhs
) != FIXED_CST
)
5241 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5245 /* vectorizable_condition.
5247 Check if STMT is conditional modify expression that can be vectorized.
5248 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5249 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5252 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5253 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5254 else caluse if it is 2).
5256 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5259 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5260 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5263 tree scalar_dest
= NULL_TREE
;
5264 tree vec_dest
= NULL_TREE
;
5265 tree cond_expr
, then_clause
, else_clause
;
5266 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5267 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5268 tree comp_vectype
= NULL_TREE
;
5269 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5270 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5271 tree vec_compare
, vec_cond_expr
;
5273 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5275 enum vect_def_type dt
, dts
[4];
5276 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5278 enum tree_code code
;
5279 stmt_vec_info prev_stmt_info
= NULL
;
5281 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5282 VEC (tree
, heap
) *vec_oprnds0
= NULL
, *vec_oprnds1
= NULL
;
5283 VEC (tree
, heap
) *vec_oprnds2
= NULL
, *vec_oprnds3
= NULL
;
5285 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5288 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5290 gcc_assert (ncopies
>= 1);
5291 if (reduc_index
&& ncopies
> 1)
5292 return false; /* FORNOW */
5294 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5297 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5300 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5301 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5305 /* FORNOW: not yet supported. */
5306 if (STMT_VINFO_LIVE_P (stmt_info
))
5308 if (vect_print_dump_info (REPORT_DETAILS
))
5309 fprintf (vect_dump
, "value used after loop.");
5313 /* Is vectorizable conditional operation? */
5314 if (!is_gimple_assign (stmt
))
5317 code
= gimple_assign_rhs_code (stmt
);
5319 if (code
!= COND_EXPR
)
5322 cond_expr
= gimple_assign_rhs1 (stmt
);
5323 then_clause
= gimple_assign_rhs2 (stmt
);
5324 else_clause
= gimple_assign_rhs3 (stmt
);
5326 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5331 if (TREE_CODE (then_clause
) == SSA_NAME
)
5333 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5334 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5335 &then_def_stmt
, &def
, &dt
))
5338 else if (TREE_CODE (then_clause
) != INTEGER_CST
5339 && TREE_CODE (then_clause
) != REAL_CST
5340 && TREE_CODE (then_clause
) != FIXED_CST
)
5343 if (TREE_CODE (else_clause
) == SSA_NAME
)
5345 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5346 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5347 &else_def_stmt
, &def
, &dt
))
5350 else if (TREE_CODE (else_clause
) != INTEGER_CST
5351 && TREE_CODE (else_clause
) != REAL_CST
5352 && TREE_CODE (else_clause
) != FIXED_CST
)
5357 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5358 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5365 vec_oprnds0
= VEC_alloc (tree
, heap
, 1);
5366 vec_oprnds1
= VEC_alloc (tree
, heap
, 1);
5367 vec_oprnds2
= VEC_alloc (tree
, heap
, 1);
5368 vec_oprnds3
= VEC_alloc (tree
, heap
, 1);
5372 scalar_dest
= gimple_assign_lhs (stmt
);
5373 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5375 /* Handle cond expr. */
5376 for (j
= 0; j
< ncopies
; j
++)
5378 gimple new_stmt
= NULL
;
5383 VEC (tree
, heap
) *ops
= VEC_alloc (tree
, heap
, 4);
5384 VEC (slp_void_p
, heap
) *vec_defs
;
5386 vec_defs
= VEC_alloc (slp_void_p
, heap
, 4);
5387 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 0));
5388 VEC_safe_push (tree
, heap
, ops
, TREE_OPERAND (cond_expr
, 1));
5389 VEC_safe_push (tree
, heap
, ops
, then_clause
);
5390 VEC_safe_push (tree
, heap
, ops
, else_clause
);
5391 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5392 vec_oprnds3
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5393 vec_oprnds2
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5394 vec_oprnds1
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5395 vec_oprnds0
= (VEC (tree
, heap
) *) VEC_pop (slp_void_p
, vec_defs
);
5397 VEC_free (tree
, heap
, ops
);
5398 VEC_free (slp_void_p
, heap
, vec_defs
);
5404 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5406 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5407 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5410 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5412 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5413 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5414 if (reduc_index
== 1)
5415 vec_then_clause
= reduc_def
;
5418 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5420 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5421 NULL
, >emp
, &def
, &dts
[2]);
5423 if (reduc_index
== 2)
5424 vec_else_clause
= reduc_def
;
5427 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5429 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5430 NULL
, >emp
, &def
, &dts
[3]);
5436 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5437 VEC_pop (tree
, vec_oprnds0
));
5438 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5439 VEC_pop (tree
, vec_oprnds1
));
5440 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5441 VEC_pop (tree
, vec_oprnds2
));
5442 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5443 VEC_pop (tree
, vec_oprnds3
));
5448 VEC_quick_push (tree
, vec_oprnds0
, vec_cond_lhs
);
5449 VEC_quick_push (tree
, vec_oprnds1
, vec_cond_rhs
);
5450 VEC_quick_push (tree
, vec_oprnds2
, vec_then_clause
);
5451 VEC_quick_push (tree
, vec_oprnds3
, vec_else_clause
);
5454 /* Arguments are ready. Create the new vector stmt. */
5455 FOR_EACH_VEC_ELT (tree
, vec_oprnds0
, i
, vec_cond_lhs
)
5457 vec_cond_rhs
= VEC_index (tree
, vec_oprnds1
, i
);
5458 vec_then_clause
= VEC_index (tree
, vec_oprnds2
, i
);
5459 vec_else_clause
= VEC_index (tree
, vec_oprnds3
, i
);
5461 vec_compare
= build2 (TREE_CODE (cond_expr
), vectype
,
5462 vec_cond_lhs
, vec_cond_rhs
);
5463 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5464 vec_compare
, vec_then_clause
, vec_else_clause
);
5466 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5467 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5468 gimple_assign_set_lhs (new_stmt
, new_temp
);
5469 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5471 VEC_quick_push (gimple
, SLP_TREE_VEC_STMTS (slp_node
), new_stmt
);
5478 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5480 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5482 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5485 VEC_free (tree
, heap
, vec_oprnds0
);
5486 VEC_free (tree
, heap
, vec_oprnds1
);
5487 VEC_free (tree
, heap
, vec_oprnds2
);
5488 VEC_free (tree
, heap
, vec_oprnds3
);
5494 /* Make sure the statement is vectorizable. */
5497 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5499 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5500 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5501 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5503 tree scalar_type
, vectype
;
5504 gimple pattern_stmt
;
5505 gimple_seq pattern_def_seq
;
5507 if (vect_print_dump_info (REPORT_DETAILS
))
5509 fprintf (vect_dump
, "==> examining statement: ");
5510 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5513 if (gimple_has_volatile_ops (stmt
))
5515 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5516 fprintf (vect_dump
, "not vectorized: stmt has volatile operands");
5521 /* Skip stmts that do not need to be vectorized. In loops this is expected
5523 - the COND_EXPR which is the loop exit condition
5524 - any LABEL_EXPRs in the loop
5525 - computations that are used only for array indexing or loop control.
5526 In basic blocks we only analyze statements that are a part of some SLP
5527 instance, therefore, all the statements are relevant.
5529 Pattern statement needs to be analyzed instead of the original statement
5530 if the original statement is not relevant. Otherwise, we analyze both
5531 statements. In basic blocks we are called from some SLP instance
5532 traversal, don't analyze pattern stmts instead, the pattern stmts
5533 already will be part of SLP instance. */
5535 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5536 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5537 && !STMT_VINFO_LIVE_P (stmt_info
))
5539 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5541 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5542 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5544 /* Analyze PATTERN_STMT instead of the original stmt. */
5545 stmt
= pattern_stmt
;
5546 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5547 if (vect_print_dump_info (REPORT_DETAILS
))
5549 fprintf (vect_dump
, "==> examining pattern statement: ");
5550 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5555 if (vect_print_dump_info (REPORT_DETAILS
))
5556 fprintf (vect_dump
, "irrelevant.");
5561 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5564 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5565 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5567 /* Analyze PATTERN_STMT too. */
5568 if (vect_print_dump_info (REPORT_DETAILS
))
5570 fprintf (vect_dump
, "==> examining pattern statement: ");
5571 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5574 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5578 if (is_pattern_stmt_p (stmt_info
)
5580 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5582 gimple_stmt_iterator si
;
5584 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5586 gimple pattern_def_stmt
= gsi_stmt (si
);
5587 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5588 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5590 /* Analyze def stmt of STMT if it's a pattern stmt. */
5591 if (vect_print_dump_info (REPORT_DETAILS
))
5593 fprintf (vect_dump
, "==> examining pattern def statement: ");
5594 print_gimple_stmt (vect_dump
, pattern_def_stmt
, 0, TDF_SLIM
);
5597 if (!vect_analyze_stmt (pattern_def_stmt
,
5598 need_to_vectorize
, node
))
5604 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5606 case vect_internal_def
:
5609 case vect_reduction_def
:
5610 case vect_nested_cycle
:
5611 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5612 || relevance
== vect_used_in_outer_by_reduction
5613 || relevance
== vect_unused_in_scope
));
5616 case vect_induction_def
:
5617 case vect_constant_def
:
5618 case vect_external_def
:
5619 case vect_unknown_def_type
:
5626 gcc_assert (PURE_SLP_STMT (stmt_info
));
5628 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5629 if (vect_print_dump_info (REPORT_DETAILS
))
5631 fprintf (vect_dump
, "get vectype for scalar type: ");
5632 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5635 vectype
= get_vectype_for_scalar_type (scalar_type
);
5638 if (vect_print_dump_info (REPORT_DETAILS
))
5640 fprintf (vect_dump
, "not SLPed: unsupported data-type ");
5641 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
5646 if (vect_print_dump_info (REPORT_DETAILS
))
5648 fprintf (vect_dump
, "vectype: ");
5649 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
5652 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5655 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5657 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5658 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5659 *need_to_vectorize
= true;
5664 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5665 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5666 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5667 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5668 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5669 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5670 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5671 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5672 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5673 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5674 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5678 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5679 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5680 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5681 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5682 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5683 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5684 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5685 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5690 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5692 fprintf (vect_dump
, "not vectorized: relevant stmt not ");
5693 fprintf (vect_dump
, "supported: ");
5694 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5703 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5704 need extra handling, except for vectorizable reductions. */
5705 if (STMT_VINFO_LIVE_P (stmt_info
)
5706 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5707 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5711 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
5713 fprintf (vect_dump
, "not vectorized: live stmt not ");
5714 fprintf (vect_dump
, "supported: ");
5715 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
5725 /* Function vect_transform_stmt.
5727 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5730 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5731 bool *grouped_store
, slp_tree slp_node
,
5732 slp_instance slp_node_instance
)
5734 bool is_store
= false;
5735 gimple vec_stmt
= NULL
;
5736 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5739 switch (STMT_VINFO_TYPE (stmt_info
))
5741 case type_demotion_vec_info_type
:
5742 case type_promotion_vec_info_type
:
5743 case type_conversion_vec_info_type
:
5744 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5748 case induc_vec_info_type
:
5749 gcc_assert (!slp_node
);
5750 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5754 case shift_vec_info_type
:
5755 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5759 case op_vec_info_type
:
5760 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5764 case assignment_vec_info_type
:
5765 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5769 case load_vec_info_type
:
5770 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5775 case store_vec_info_type
:
5776 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5778 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5780 /* In case of interleaving, the whole chain is vectorized when the
5781 last store in the chain is reached. Store stmts before the last
5782 one are skipped, and there vec_stmt_info shouldn't be freed
5784 *grouped_store
= true;
5785 if (STMT_VINFO_VEC_STMT (stmt_info
))
5792 case condition_vec_info_type
:
5793 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5797 case call_vec_info_type
:
5798 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5799 stmt
= gsi_stmt (*gsi
);
5802 case reduc_vec_info_type
:
5803 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5808 if (!STMT_VINFO_LIVE_P (stmt_info
))
5810 if (vect_print_dump_info (REPORT_DETAILS
))
5811 fprintf (vect_dump
, "stmt not supported.");
5816 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5817 is being vectorized, but outside the immediately enclosing loop. */
5819 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5820 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5821 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5822 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5823 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5824 || STMT_VINFO_RELEVANT (stmt_info
) ==
5825 vect_used_in_outer_by_reduction
))
5827 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5828 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5829 imm_use_iterator imm_iter
;
5830 use_operand_p use_p
;
5834 if (vect_print_dump_info (REPORT_DETAILS
))
5835 fprintf (vect_dump
, "Record the vdef for outer-loop vectorization.");
5837 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5838 (to be used when vectorizing outer-loop stmts that use the DEF of
5840 if (gimple_code (stmt
) == GIMPLE_PHI
)
5841 scalar_dest
= PHI_RESULT (stmt
);
5843 scalar_dest
= gimple_assign_lhs (stmt
);
5845 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
5847 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
5849 exit_phi
= USE_STMT (use_p
);
5850 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
5855 /* Handle stmts whose DEF is used outside the loop-nest that is
5856 being vectorized. */
5857 if (STMT_VINFO_LIVE_P (stmt_info
)
5858 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5860 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
5865 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
5871 /* Remove a group of stores (for SLP or interleaving), free their
5875 vect_remove_stores (gimple first_stmt
)
5877 gimple next
= first_stmt
;
5879 gimple_stmt_iterator next_si
;
5883 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
5885 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
5886 if (is_pattern_stmt_p (stmt_info
))
5887 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
5888 /* Free the attached stmt_vec_info and remove the stmt. */
5889 next_si
= gsi_for_stmt (next
);
5890 unlink_stmt_vdef (next
);
5891 gsi_remove (&next_si
, true);
5892 release_defs (next
);
5893 free_stmt_vec_info (next
);
5899 /* Function new_stmt_vec_info.
5901 Create and initialize a new stmt_vec_info struct for STMT. */
5904 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
5905 bb_vec_info bb_vinfo
)
5908 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
5910 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
5911 STMT_VINFO_STMT (res
) = stmt
;
5912 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
5913 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
5914 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
5915 STMT_VINFO_LIVE_P (res
) = false;
5916 STMT_VINFO_VECTYPE (res
) = NULL
;
5917 STMT_VINFO_VEC_STMT (res
) = NULL
;
5918 STMT_VINFO_VECTORIZABLE (res
) = true;
5919 STMT_VINFO_IN_PATTERN_P (res
) = false;
5920 STMT_VINFO_RELATED_STMT (res
) = NULL
;
5921 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
5922 STMT_VINFO_DATA_REF (res
) = NULL
;
5924 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
5925 STMT_VINFO_DR_OFFSET (res
) = NULL
;
5926 STMT_VINFO_DR_INIT (res
) = NULL
;
5927 STMT_VINFO_DR_STEP (res
) = NULL
;
5928 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
5930 if (gimple_code (stmt
) == GIMPLE_PHI
5931 && is_loop_header_bb_p (gimple_bb (stmt
)))
5932 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
5934 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
5936 STMT_VINFO_SAME_ALIGN_REFS (res
) = VEC_alloc (dr_p
, heap
, 5);
5937 STMT_VINFO_INSIDE_OF_LOOP_COST (res
) = 0;
5938 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res
) = 0;
5939 STMT_SLP_TYPE (res
) = loop_vect
;
5940 GROUP_FIRST_ELEMENT (res
) = NULL
;
5941 GROUP_NEXT_ELEMENT (res
) = NULL
;
5942 GROUP_SIZE (res
) = 0;
5943 GROUP_STORE_COUNT (res
) = 0;
5944 GROUP_GAP (res
) = 0;
5945 GROUP_SAME_DR_STMT (res
) = NULL
;
5946 GROUP_READ_WRITE_DEPENDENCE (res
) = false;
5952 /* Create a hash table for stmt_vec_info. */
5955 init_stmt_vec_info_vec (void)
5957 gcc_assert (!stmt_vec_info_vec
);
5958 stmt_vec_info_vec
= VEC_alloc (vec_void_p
, heap
, 50);
5962 /* Free hash table for stmt_vec_info. */
5965 free_stmt_vec_info_vec (void)
5967 gcc_assert (stmt_vec_info_vec
);
5968 VEC_free (vec_void_p
, heap
, stmt_vec_info_vec
);
5972 /* Free stmt vectorization related info. */
5975 free_stmt_vec_info (gimple stmt
)
5977 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5982 /* Check if this statement has a related "pattern stmt"
5983 (introduced by the vectorizer during the pattern recognition
5984 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5986 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
5988 stmt_vec_info patt_info
5989 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
5992 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
5995 gimple_stmt_iterator si
;
5996 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
5997 free_stmt_vec_info (gsi_stmt (si
));
5999 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6003 VEC_free (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
6004 set_vinfo_for_stmt (stmt
, NULL
);
6009 /* Function get_vectype_for_scalar_type_and_size.
6011 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6015 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6017 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6018 enum machine_mode simd_mode
;
6019 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6026 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6027 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6030 /* We can't build a vector type of elements with alignment bigger than
6032 if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6035 /* For vector types of elements whose mode precision doesn't
6036 match their types precision we use a element type of mode
6037 precision. The vectorization routines will have to make sure
6038 they support the proper result truncation/extension.
6039 We also make sure to build vector types with INTEGER_TYPE
6040 component type only. */
6041 if (INTEGRAL_TYPE_P (scalar_type
)
6042 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6043 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6044 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6045 TYPE_UNSIGNED (scalar_type
));
6047 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6048 When the component mode passes the above test simply use a type
6049 corresponding to that mode. The theory is that any use that
6050 would cause problems with this will disable vectorization anyway. */
6051 if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6052 && !INTEGRAL_TYPE_P (scalar_type
)
6053 && !POINTER_TYPE_P (scalar_type
))
6054 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6056 /* If no size was supplied use the mode the target prefers. Otherwise
6057 lookup a vector mode of the specified size. */
6059 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6061 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6062 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6066 vectype
= build_vector_type (scalar_type
, nunits
);
6067 if (vect_print_dump_info (REPORT_DETAILS
))
6069 fprintf (vect_dump
, "get vectype with %d units of type ", nunits
);
6070 print_generic_expr (vect_dump
, scalar_type
, TDF_SLIM
);
6076 if (vect_print_dump_info (REPORT_DETAILS
))
6078 fprintf (vect_dump
, "vectype: ");
6079 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
6082 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6083 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6085 if (vect_print_dump_info (REPORT_DETAILS
))
6086 fprintf (vect_dump
, "mode not supported by target.");
6093 unsigned int current_vector_size
;
6095 /* Function get_vectype_for_scalar_type.
6097 Returns the vector type corresponding to SCALAR_TYPE as supported
6101 get_vectype_for_scalar_type (tree scalar_type
)
6104 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6105 current_vector_size
);
6107 && current_vector_size
== 0)
6108 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6112 /* Function get_same_sized_vectype
6114 Returns a vector type corresponding to SCALAR_TYPE of size
6115 VECTOR_TYPE if supported by the target. */
6118 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6120 return get_vectype_for_scalar_type_and_size
6121 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6124 /* Function vect_is_simple_use.
6127 LOOP_VINFO - the vect info of the loop that is being vectorized.
6128 BB_VINFO - the vect info of the basic block that is being vectorized.
6129 OPERAND - operand of STMT in the loop or bb.
6130 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6132 Returns whether a stmt with OPERAND can be vectorized.
6133 For loops, supportable operands are constants, loop invariants, and operands
6134 that are defined by the current iteration of the loop. Unsupportable
6135 operands are those that are defined by a previous iteration of the loop (as
6136 is the case in reduction/induction computations).
6137 For basic blocks, supportable operands are constants and bb invariants.
6138 For now, operands defined outside the basic block are not supported. */
6141 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6142 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6143 tree
*def
, enum vect_def_type
*dt
)
6146 stmt_vec_info stmt_vinfo
;
6147 struct loop
*loop
= NULL
;
6150 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6155 if (vect_print_dump_info (REPORT_DETAILS
))
6157 fprintf (vect_dump
, "vect_is_simple_use: operand ");
6158 print_generic_expr (vect_dump
, operand
, TDF_SLIM
);
6161 if (CONSTANT_CLASS_P (operand
))
6163 *dt
= vect_constant_def
;
6167 if (is_gimple_min_invariant (operand
))
6170 *dt
= vect_external_def
;
6174 if (TREE_CODE (operand
) == PAREN_EXPR
)
6176 if (vect_print_dump_info (REPORT_DETAILS
))
6177 fprintf (vect_dump
, "non-associatable copy.");
6178 operand
= TREE_OPERAND (operand
, 0);
6181 if (TREE_CODE (operand
) != SSA_NAME
)
6183 if (vect_print_dump_info (REPORT_DETAILS
))
6184 fprintf (vect_dump
, "not ssa-name.");
6188 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6189 if (*def_stmt
== NULL
)
6191 if (vect_print_dump_info (REPORT_DETAILS
))
6192 fprintf (vect_dump
, "no def_stmt.");
6196 if (vect_print_dump_info (REPORT_DETAILS
))
6198 fprintf (vect_dump
, "def_stmt: ");
6199 print_gimple_stmt (vect_dump
, *def_stmt
, 0, TDF_SLIM
);
6202 /* Empty stmt is expected only in case of a function argument.
6203 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6204 if (gimple_nop_p (*def_stmt
))
6207 *dt
= vect_external_def
;
6211 bb
= gimple_bb (*def_stmt
);
6213 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6214 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6215 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6216 *dt
= vect_external_def
;
6219 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6220 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6223 if (*dt
== vect_unknown_def_type
6225 && *dt
== vect_double_reduction_def
6226 && gimple_code (stmt
) != GIMPLE_PHI
))
6228 if (vect_print_dump_info (REPORT_DETAILS
))
6229 fprintf (vect_dump
, "Unsupported pattern.");
6233 if (vect_print_dump_info (REPORT_DETAILS
))
6234 fprintf (vect_dump
, "type of def: %d.",*dt
);
6236 switch (gimple_code (*def_stmt
))
6239 *def
= gimple_phi_result (*def_stmt
);
6243 *def
= gimple_assign_lhs (*def_stmt
);
6247 *def
= gimple_call_lhs (*def_stmt
);
6252 if (vect_print_dump_info (REPORT_DETAILS
))
6253 fprintf (vect_dump
, "unsupported defining stmt: ");
6260 /* Function vect_is_simple_use_1.
6262 Same as vect_is_simple_use_1 but also determines the vector operand
6263 type of OPERAND and stores it to *VECTYPE. If the definition of
6264 OPERAND is vect_uninitialized_def, vect_constant_def or
6265 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6266 is responsible to compute the best suited vector type for the
6270 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6271 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6272 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6274 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6278 /* Now get a vector type if the def is internal, otherwise supply
6279 NULL_TREE and leave it up to the caller to figure out a proper
6280 type for the use stmt. */
6281 if (*dt
== vect_internal_def
6282 || *dt
== vect_induction_def
6283 || *dt
== vect_reduction_def
6284 || *dt
== vect_double_reduction_def
6285 || *dt
== vect_nested_cycle
)
6287 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6289 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6290 && !STMT_VINFO_RELEVANT (stmt_info
)
6291 && !STMT_VINFO_LIVE_P (stmt_info
))
6292 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6294 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6295 gcc_assert (*vectype
!= NULL_TREE
);
6297 else if (*dt
== vect_uninitialized_def
6298 || *dt
== vect_constant_def
6299 || *dt
== vect_external_def
)
6300 *vectype
= NULL_TREE
;
6308 /* Function supportable_widening_operation
6310 Check whether an operation represented by the code CODE is a
6311 widening operation that is supported by the target platform in
6312 vector form (i.e., when operating on arguments of type VECTYPE_IN
6313 producing a result of type VECTYPE_OUT).
6315 Widening operations we currently support are NOP (CONVERT), FLOAT
6316 and WIDEN_MULT. This function checks if these operations are supported
6317 by the target platform either directly (via vector tree-codes), or via
6321 - CODE1 and CODE2 are codes of vector operations to be used when
6322 vectorizing the operation, if available.
6323 - DECL1 and DECL2 are decls of target builtin functions to be used
6324 when vectorizing the operation, if available. In this case,
6325 CODE1 and CODE2 are CALL_EXPR.
6326 - MULTI_STEP_CVT determines the number of required intermediate steps in
6327 case of multi-step conversion (like char->short->int - in that case
6328 MULTI_STEP_CVT will be 1).
6329 - INTERM_TYPES contains the intermediate type required to perform the
6330 widening operation (short in the above example). */
6333 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6334 tree vectype_out
, tree vectype_in
,
6335 tree
*decl1
, tree
*decl2
,
6336 enum tree_code
*code1
, enum tree_code
*code2
,
6337 int *multi_step_cvt
,
6338 VEC (tree
, heap
) **interm_types
)
6340 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6341 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6342 struct loop
*vect_loop
= NULL
;
6344 enum machine_mode vec_mode
;
6345 enum insn_code icode1
, icode2
;
6346 optab optab1
, optab2
;
6347 tree vectype
= vectype_in
;
6348 tree wide_vectype
= vectype_out
;
6349 enum tree_code c1
, c2
;
6351 tree prev_type
, intermediate_type
;
6352 enum machine_mode intermediate_mode
, prev_mode
;
6353 optab optab3
, optab4
;
6355 *multi_step_cvt
= 0;
6357 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6359 /* The result of a vectorized widening operation usually requires two vectors
6360 (because the widened results do not fit into one vector). The generated
6361 vector results would normally be expected to be generated in the same
6362 order as in the original scalar computation, i.e. if 8 results are
6363 generated in each vector iteration, they are to be organized as follows:
6364 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6366 However, in the special case that the result of the widening operation is
6367 used in a reduction computation only, the order doesn't matter (because
6368 when vectorizing a reduction we change the order of the computation).
6369 Some targets can take advantage of this and generate more efficient code.
6370 For example, targets like Altivec, that support widen_mult using a sequence
6371 of {mult_even,mult_odd} generate the following vectors:
6372 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6374 When vectorizing outer-loops, we execute the inner-loop sequentially
6375 (each vectorized inner-loop iteration contributes to VF outer-loop
6376 iterations in parallel). We therefore don't allow to change the order
6377 of the computation in the inner-loop during outer-loop vectorization. */
6380 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6381 && !nested_in_vect_loop_p (vect_loop
, stmt
))
6387 && code
== WIDEN_MULT_EXPR
6388 && targetm
.vectorize
.builtin_mul_widen_even
6389 && targetm
.vectorize
.builtin_mul_widen_even (vectype
)
6390 && targetm
.vectorize
.builtin_mul_widen_odd
6391 && targetm
.vectorize
.builtin_mul_widen_odd (vectype
))
6393 if (vect_print_dump_info (REPORT_DETAILS
))
6394 fprintf (vect_dump
, "Unordered widening operation detected.");
6396 *code1
= *code2
= CALL_EXPR
;
6397 *decl1
= targetm
.vectorize
.builtin_mul_widen_even (vectype
);
6398 *decl2
= targetm
.vectorize
.builtin_mul_widen_odd (vectype
);
6404 case WIDEN_MULT_EXPR
:
6405 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6406 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6409 case WIDEN_LSHIFT_EXPR
:
6410 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6411 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6415 c1
= VEC_UNPACK_LO_EXPR
;
6416 c2
= VEC_UNPACK_HI_EXPR
;
6420 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6421 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6424 case FIX_TRUNC_EXPR
:
6425 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6426 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6427 computing the operation. */
6434 if (BYTES_BIG_ENDIAN
)
6436 enum tree_code ctmp
= c1
;
6441 if (code
== FIX_TRUNC_EXPR
)
6443 /* The signedness is determined from output operand. */
6444 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6445 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6449 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6450 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6453 if (!optab1
|| !optab2
)
6456 vec_mode
= TYPE_MODE (vectype
);
6457 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6458 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6464 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6465 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6468 /* Check if it's a multi-step conversion that can be done using intermediate
6471 prev_type
= vectype
;
6472 prev_mode
= vec_mode
;
6474 if (!CONVERT_EXPR_CODE_P (code
))
6477 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6478 intermediate steps in promotion sequence. We try
6479 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6481 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6482 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6484 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6486 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6487 TYPE_UNSIGNED (prev_type
));
6488 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6489 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6491 if (!optab3
|| !optab4
6492 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6493 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6494 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6495 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6496 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6497 == CODE_FOR_nothing
)
6498 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6499 == CODE_FOR_nothing
))
6502 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6503 (*multi_step_cvt
)++;
6505 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6506 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6509 prev_type
= intermediate_type
;
6510 prev_mode
= intermediate_mode
;
6513 VEC_free (tree
, heap
, *interm_types
);
6518 /* Function supportable_narrowing_operation
6520 Check whether an operation represented by the code CODE is a
6521 narrowing operation that is supported by the target platform in
6522 vector form (i.e., when operating on arguments of type VECTYPE_IN
6523 and producing a result of type VECTYPE_OUT).
6525 Narrowing operations we currently support are NOP (CONVERT) and
6526 FIX_TRUNC. This function checks if these operations are supported by
6527 the target platform directly via vector tree-codes.
6530 - CODE1 is the code of a vector operation to be used when
6531 vectorizing the operation, if available.
6532 - MULTI_STEP_CVT determines the number of required intermediate steps in
6533 case of multi-step conversion (like int->short->char - in that case
6534 MULTI_STEP_CVT will be 1).
6535 - INTERM_TYPES contains the intermediate type required to perform the
6536 narrowing operation (short in the above example). */
6539 supportable_narrowing_operation (enum tree_code code
,
6540 tree vectype_out
, tree vectype_in
,
6541 enum tree_code
*code1
, int *multi_step_cvt
,
6542 VEC (tree
, heap
) **interm_types
)
6544 enum machine_mode vec_mode
;
6545 enum insn_code icode1
;
6546 optab optab1
, interm_optab
;
6547 tree vectype
= vectype_in
;
6548 tree narrow_vectype
= vectype_out
;
6550 tree intermediate_type
;
6551 enum machine_mode intermediate_mode
, prev_mode
;
6555 *multi_step_cvt
= 0;
6559 c1
= VEC_PACK_TRUNC_EXPR
;
6562 case FIX_TRUNC_EXPR
:
6563 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6567 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6568 tree code and optabs used for computing the operation. */
6575 if (code
== FIX_TRUNC_EXPR
)
6576 /* The signedness is determined from output operand. */
6577 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6579 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6584 vec_mode
= TYPE_MODE (vectype
);
6585 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6590 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6593 /* Check if it's a multi-step conversion that can be done using intermediate
6595 prev_mode
= vec_mode
;
6596 if (code
== FIX_TRUNC_EXPR
)
6597 uns
= TYPE_UNSIGNED (vectype_out
);
6599 uns
= TYPE_UNSIGNED (vectype
);
6601 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6602 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6603 costly than signed. */
6604 if (code
== FIX_TRUNC_EXPR
&& uns
)
6606 enum insn_code icode2
;
6609 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6611 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6612 if (interm_optab
!= NULL
6613 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6614 && insn_data
[icode1
].operand
[0].mode
6615 == insn_data
[icode2
].operand
[0].mode
)
6618 optab1
= interm_optab
;
6623 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6624 intermediate steps in promotion sequence. We try
6625 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6626 *interm_types
= VEC_alloc (tree
, heap
, MAX_INTERM_CVT_STEPS
);
6627 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6629 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6631 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6633 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6636 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6637 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6638 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6639 == CODE_FOR_nothing
))
6642 VEC_quick_push (tree
, *interm_types
, intermediate_type
);
6643 (*multi_step_cvt
)++;
6645 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6648 prev_mode
= intermediate_mode
;
6649 optab1
= interm_optab
;
6652 VEC_free (tree
, heap
, *interm_types
);