1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "double-int.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "hard-reg-set.h"
43 #include "dominance.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
50 #include "gimple-expr.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
69 #include "statistics.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
87 #include "plugin-api.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
98 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
100 return STMT_VINFO_VECTYPE (stmt_info
);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
106 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
108 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
109 basic_block bb
= gimple_bb (stmt
);
110 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
116 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
118 return (bb
->loop_father
== loop
->inner
);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
126 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
127 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
128 int misalign
, enum vect_cost_model_location where
)
132 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
133 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
134 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
137 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
142 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
143 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
144 void *target_cost_data
;
147 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
149 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
151 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
159 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
161 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
171 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
172 tree array
, unsigned HOST_WIDE_INT n
)
174 tree vect_type
, vect
, vect_name
, array_ref
;
177 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
178 vect_type
= TREE_TYPE (TREE_TYPE (array
));
179 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
180 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
181 build_int_cst (size_type_node
, n
),
182 NULL_TREE
, NULL_TREE
);
184 new_stmt
= gimple_build_assign (vect
, array_ref
);
185 vect_name
= make_ssa_name (vect
, new_stmt
);
186 gimple_assign_set_lhs (new_stmt
, vect_name
);
187 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
197 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
198 tree array
, unsigned HOST_WIDE_INT n
)
203 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
204 build_int_cst (size_type_node
, n
),
205 NULL_TREE
, NULL_TREE
);
207 new_stmt
= gimple_build_assign (array_ref
, vect
);
208 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
216 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
218 tree mem_ref
, alias_ptr_type
;
220 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
221 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
234 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
235 enum vect_relevant relevant
, bool live_p
,
236 bool used_in_pattern
)
238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
239 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
240 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "mark relevant %d, live %d.\n", relevant
, live_p
);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
254 if (!used_in_pattern
)
256 imm_use_iterator imm_iter
;
260 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
261 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
263 if (is_gimple_assign (stmt
))
264 lhs
= gimple_assign_lhs (stmt
);
266 lhs
= gimple_call_lhs (stmt
);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
271 if (lhs
&& TREE_CODE (lhs
) == SSA_NAME
)
272 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
274 if (is_gimple_debug (USE_STMT (use_p
)))
276 use_stmt
= USE_STMT (use_p
);
278 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
281 if (vinfo_for_stmt (use_stmt
)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE
, vect_location
,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info
= vinfo_for_stmt (pattern_stmt
);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
305 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
306 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
311 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
312 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
313 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
315 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE
, vect_location
,
320 "already marked relevant/live.\n");
324 worklist
->safe_push (stmt
);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
341 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
342 enum vect_relevant
*relevant
, bool *live_p
)
344 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
346 imm_use_iterator imm_iter
;
350 *relevant
= vect_unused_in_scope
;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt
)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
356 != loop_exit_ctrl_vec_info_type
)
357 *relevant
= vect_used_in_scope
;
359 /* changing memory. */
360 if (gimple_code (stmt
) != GIMPLE_PHI
)
361 if (gimple_vdef (stmt
)
362 && !gimple_clobber_p (stmt
))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE
, vect_location
,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant
= vect_used_in_scope
;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
373 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
375 basic_block bb
= gimple_bb (USE_STMT (use_p
));
376 if (!flow_bb_inside_loop_p (loop
, bb
))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE
, vect_location
,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p
)))
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
388 gcc_assert (bb
== single_exit (loop
)->dest
);
395 return (*live_p
|| *relevant
);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
405 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
408 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info
))
416 /* STMT has a data_ref. FORNOW this means that its of one of
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt
))
431 if (is_gimple_call (stmt
)
432 && gimple_call_internal_p (stmt
))
433 switch (gimple_call_internal_fn (stmt
))
436 operand
= gimple_call_arg (stmt
, 3);
441 operand
= gimple_call_arg (stmt
, 2);
451 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
453 operand
= gimple_assign_rhs1 (stmt
);
454 if (TREE_CODE (operand
) != SSA_NAME
)
465 Function process_use.
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
492 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
493 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
496 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
497 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
498 stmt_vec_info dstmt_vinfo
;
499 basic_block bb
, def_bb
;
502 enum vect_def_type dt
;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
509 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
513 "not vectorized: unsupported use in stmt.\n");
517 if (!def_stmt
|| gimple_nop_p (def_stmt
))
520 def_bb
= gimple_bb (def_stmt
);
521 if (!flow_bb_inside_loop_p (loop
, def_bb
))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
534 bb
= gimple_bb (stmt
);
535 if (gimple_code (stmt
) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
537 && gimple_code (def_stmt
) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
539 && bb
->loop_father
== def_bb
->loop_father
)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE
, vect_location
,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
545 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
559 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE
, vect_location
,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
567 case vect_unused_in_scope
:
568 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
569 vect_used_in_scope
: vect_unused_in_scope
;
572 case vect_used_in_outer_by_reduction
:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
574 relevant
= vect_used_by_reduction
;
577 case vect_used_in_outer
:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
579 relevant
= vect_used_in_scope
;
582 case vect_used_in_scope
:
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
597 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE
, vect_location
,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
605 case vect_unused_in_scope
:
606 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
608 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
611 case vect_used_by_reduction
:
612 relevant
= vect_used_in_outer_by_reduction
;
615 case vect_used_in_scope
:
616 relevant
= vect_used_in_outer
;
624 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
625 is_pattern_stmt_p (stmt_vinfo
));
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
649 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
650 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
651 unsigned int nbbs
= loop
->num_nodes
;
652 gimple_stmt_iterator si
;
655 stmt_vec_info stmt_vinfo
;
659 enum vect_relevant relevant
, tmp_relevant
;
660 enum vect_def_type def_type
;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE
, vect_location
,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec
<gimple
, 64> worklist
;
668 /* 1. Init worklist. */
669 for (i
= 0; i
< nbbs
; i
++)
672 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
681 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
682 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
684 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
686 stmt
= gsi_stmt (si
);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
693 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
694 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
698 /* 2. Process_worklist */
699 while (worklist
.length () > 0)
704 stmt
= worklist
.pop ();
705 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo
= vinfo_for_stmt (stmt
);
715 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
716 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
733 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
734 tmp_relevant
= relevant
;
737 case vect_reduction_def
:
738 switch (tmp_relevant
)
740 case vect_unused_in_scope
:
741 relevant
= vect_used_by_reduction
;
744 case vect_used_by_reduction
:
745 if (gimple_code (stmt
) == GIMPLE_PHI
)
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
752 "unsupported use of reduction.\n");
759 case vect_nested_cycle
:
760 if (tmp_relevant
!= vect_unused_in_scope
761 && tmp_relevant
!= vect_used_in_outer_by_reduction
762 && tmp_relevant
!= vect_used_in_outer
)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
766 "unsupported use of nested cycle.\n");
774 case vect_double_reduction_def
:
775 if (tmp_relevant
!= vect_unused_in_scope
776 && tmp_relevant
!= vect_used_by_reduction
)
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
780 "unsupported use of double reduction.\n");
792 if (is_pattern_stmt_p (stmt_vinfo
))
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt
))
799 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
800 tree op
= gimple_assign_rhs1 (stmt
);
803 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
805 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
806 live_p
, relevant
, &worklist
, false)
807 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
808 live_p
, relevant
, &worklist
, false))
812 for (; i
< gimple_num_ops (stmt
); i
++)
814 op
= gimple_op (stmt
, i
);
815 if (TREE_CODE (op
) == SSA_NAME
816 && !process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
821 else if (is_gimple_call (stmt
))
823 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
825 tree arg
= gimple_call_arg (stmt
, i
);
826 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
833 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
835 tree op
= USE_FROM_PTR (use_p
);
836 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
841 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
844 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
846 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
850 } /* while worklist */
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
863 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
864 enum vect_def_type
*dt
,
865 stmt_vector_for_cost
*prologue_cost_vec
,
866 stmt_vector_for_cost
*body_cost_vec
)
869 int inside_cost
= 0, prologue_cost
= 0;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info
))
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i
= 0; i
< 2; i
++)
877 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
878 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
879 stmt_info
, 0, vect_prologue
);
881 /* Pass the inside-of-loop statements to the target-specific cost model. */
882 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
883 stmt_info
, 0, vect_body
);
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE
, vect_location
,
887 "vect_model_simple_cost: inside_cost = %d, "
888 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
892 /* Model cost for type demotion and promotion operations. PWR is normally
893 zero for single-step promotions and demotions. It will be one if
894 two-step promotion/demotion is required, and so on. Each additional
895 step doubles the number of instructions required. */
898 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
899 enum vect_def_type
*dt
, int pwr
)
902 int inside_cost
= 0, prologue_cost
= 0;
903 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
904 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
905 void *target_cost_data
;
907 /* The SLP costs were already calculated during SLP tree build. */
908 if (PURE_SLP_STMT (stmt_info
))
912 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
914 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
916 for (i
= 0; i
< pwr
+ 1; i
++)
918 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
920 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
921 vec_promote_demote
, stmt_info
, 0,
925 /* FORNOW: Assuming maximum 2 args per stmts. */
926 for (i
= 0; i
< 2; i
++)
927 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
928 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
929 stmt_info
, 0, vect_prologue
);
931 if (dump_enabled_p ())
932 dump_printf_loc (MSG_NOTE
, vect_location
,
933 "vect_model_promotion_demotion_cost: inside_cost = %d, "
934 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
937 /* Function vect_cost_group_size
939 For grouped load or store, return the group_size only if it is the first
940 load or store of a group, else return 1. This ensures that group size is
941 only returned once per group. */
944 vect_cost_group_size (stmt_vec_info stmt_info
)
946 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
948 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
949 return GROUP_SIZE (stmt_info
);
955 /* Function vect_model_store_cost
957 Models cost for stores. In the case of grouped accesses, one access
958 has the overhead of the grouped access attributed to it. */
961 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
962 bool store_lanes_p
, enum vect_def_type dt
,
964 stmt_vector_for_cost
*prologue_cost_vec
,
965 stmt_vector_for_cost
*body_cost_vec
)
968 unsigned int inside_cost
= 0, prologue_cost
= 0;
969 struct data_reference
*first_dr
;
972 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
973 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
974 stmt_info
, 0, vect_prologue
);
976 /* Grouped access? */
977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
981 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
986 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
987 group_size
= vect_cost_group_size (stmt_info
);
990 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
992 /* Not a grouped access. */
996 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
999 /* We assume that the cost of a single store-lanes instruction is
1000 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1001 access is instead being provided by a permute-and-store operation,
1002 include the cost of the permutes. */
1003 if (!store_lanes_p
&& group_size
> 1)
1005 /* Uses a high and low interleave or shuffle operations for each
1007 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1008 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1009 stmt_info
, 0, vect_body
);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE
, vect_location
,
1013 "vect_model_store_cost: strided group_size = %d .\n",
1017 /* Costs of the stores. */
1018 if (STMT_VINFO_STRIDED_P (stmt_info
))
1020 /* N scalar stores plus extracting the elements. */
1021 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1022 inside_cost
+= record_stmt_cost (body_cost_vec
,
1023 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1024 scalar_store
, stmt_info
, 0, vect_body
);
1025 inside_cost
+= record_stmt_cost (body_cost_vec
,
1026 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1027 vec_to_scalar
, stmt_info
, 0, vect_body
);
1030 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
1032 if (dump_enabled_p ())
1033 dump_printf_loc (MSG_NOTE
, vect_location
,
1034 "vect_model_store_cost: inside_cost = %d, "
1035 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1039 /* Calculate cost of DR's memory access. */
1041 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
1042 unsigned int *inside_cost
,
1043 stmt_vector_for_cost
*body_cost_vec
)
1045 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1046 gimple stmt
= DR_STMT (dr
);
1047 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1049 switch (alignment_support_scheme
)
1053 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1054 vector_store
, stmt_info
, 0,
1057 if (dump_enabled_p ())
1058 dump_printf_loc (MSG_NOTE
, vect_location
,
1059 "vect_model_store_cost: aligned.\n");
1063 case dr_unaligned_supported
:
1065 /* Here, we assign an additional cost for the unaligned store. */
1066 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1067 unaligned_store
, stmt_info
,
1068 DR_MISALIGNMENT (dr
), vect_body
);
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_NOTE
, vect_location
,
1071 "vect_model_store_cost: unaligned supported by "
1076 case dr_unaligned_unsupported
:
1078 *inside_cost
= VECT_MAX_COST
;
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1082 "vect_model_store_cost: unsupported access.\n");
1092 /* Function vect_model_load_cost
1094 Models cost for loads. In the case of grouped accesses, the last access
1095 has the overhead of the grouped access attributed to it. Since unaligned
1096 accesses are supported for loads, we also account for the costs of the
1097 access scheme chosen. */
1100 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1101 bool load_lanes_p
, slp_tree slp_node
,
1102 stmt_vector_for_cost
*prologue_cost_vec
,
1103 stmt_vector_for_cost
*body_cost_vec
)
1107 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1108 unsigned int inside_cost
= 0, prologue_cost
= 0;
1110 /* Grouped accesses? */
1111 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1112 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1114 group_size
= vect_cost_group_size (stmt_info
);
1115 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1117 /* Not a grouped access. */
1124 /* We assume that the cost of a single load-lanes instruction is
1125 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1126 access is instead being provided by a load-and-permute operation,
1127 include the cost of the permutes. */
1128 if (!load_lanes_p
&& group_size
> 1
1129 && !STMT_VINFO_STRIDED_P (stmt_info
))
1131 /* Uses an even and odd extract operations or shuffle operations
1132 for each needed permute. */
1133 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1134 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1135 stmt_info
, 0, vect_body
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1143 /* The loads themselves. */
1144 if (STMT_VINFO_STRIDED_P (stmt_info
)
1145 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1149 inside_cost
+= record_stmt_cost (body_cost_vec
,
1150 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1151 scalar_load
, stmt_info
, 0, vect_body
);
1154 vect_get_load_cost (first_dr
, ncopies
,
1155 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1156 || group_size
> 1 || slp_node
),
1157 &inside_cost
, &prologue_cost
,
1158 prologue_cost_vec
, body_cost_vec
, true);
1159 if (STMT_VINFO_STRIDED_P (stmt_info
))
1160 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1161 stmt_info
, 0, vect_body
);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE
, vect_location
,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1170 /* Calculate cost of DR's memory access. */
1172 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1173 bool add_realign_cost
, unsigned int *inside_cost
,
1174 unsigned int *prologue_cost
,
1175 stmt_vector_for_cost
*prologue_cost_vec
,
1176 stmt_vector_for_cost
*body_cost_vec
,
1177 bool record_prologue_costs
)
1179 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1180 gimple stmt
= DR_STMT (dr
);
1181 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1183 switch (alignment_support_scheme
)
1187 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1188 stmt_info
, 0, vect_body
);
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE
, vect_location
,
1192 "vect_model_load_cost: aligned.\n");
1196 case dr_unaligned_supported
:
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1200 unaligned_load
, stmt_info
,
1201 DR_MISALIGNMENT (dr
), vect_body
);
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_NOTE
, vect_location
,
1205 "vect_model_load_cost: unaligned supported by "
1210 case dr_explicit_realign
:
1212 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1213 vector_load
, stmt_info
, 0, vect_body
);
1214 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1215 vec_perm
, stmt_info
, 0, vect_body
);
1217 /* FIXME: If the misalignment remains fixed across the iterations of
1218 the containing loop, the following cost should be added to the
1220 if (targetm
.vectorize
.builtin_mask_for_load
)
1221 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1222 stmt_info
, 0, vect_body
);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE
, vect_location
,
1226 "vect_model_load_cost: explicit realign\n");
1230 case dr_explicit_realign_optimized
:
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE
, vect_location
,
1234 "vect_model_load_cost: unaligned software "
1237 /* Unaligned software pipeline has a load of an address, an initial
1238 load, and possibly a mask operation to "prime" the loop. However,
1239 if this is an access in a group of loads, which provide grouped
1240 access, then the above cost should only be considered for one
1241 access in the group. Inside the loop, there is a load op
1242 and a realignment op. */
1244 if (add_realign_cost
&& record_prologue_costs
)
1246 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1247 vector_stmt
, stmt_info
,
1249 if (targetm
.vectorize
.builtin_mask_for_load
)
1250 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1251 vector_stmt
, stmt_info
,
1255 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1256 stmt_info
, 0, vect_body
);
1257 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1258 stmt_info
, 0, vect_body
);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE
, vect_location
,
1262 "vect_model_load_cost: explicit realign optimized"
1268 case dr_unaligned_unsupported
:
1270 *inside_cost
= VECT_MAX_COST
;
1272 if (dump_enabled_p ())
1273 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1274 "vect_model_load_cost: unsupported access.\n");
1283 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1284 the loop preheader for the vectorized stmt STMT. */
1287 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1290 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1293 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1294 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1298 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1302 if (nested_in_vect_loop_p (loop
, stmt
))
1305 pe
= loop_preheader_edge (loop
);
1306 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1307 gcc_assert (!new_bb
);
1311 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1313 gimple_stmt_iterator gsi_bb_start
;
1315 gcc_assert (bb_vinfo
);
1316 bb
= BB_VINFO_BB (bb_vinfo
);
1317 gsi_bb_start
= gsi_after_labels (bb
);
1318 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1322 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE
, vect_location
,
1325 "created new init_stmt: ");
1326 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1330 /* Function vect_init_vector.
1332 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1333 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1334 vector type a vector with all elements equal to VAL is created first.
1335 Place the initialization at BSI if it is not NULL. Otherwise, place the
1336 initialization at the loop preheader.
1337 Return the DEF of INIT_STMT.
1338 It will be used in the vectorization of STMT. */
1341 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1348 if (TREE_CODE (type
) == VECTOR_TYPE
1349 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1351 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1353 if (CONSTANT_CLASS_P (val
))
1354 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1357 new_temp
= make_ssa_name (TREE_TYPE (type
));
1358 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1359 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1363 val
= build_vector_from_val (type
, val
);
1366 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1367 init_stmt
= gimple_build_assign (new_var
, val
);
1368 new_temp
= make_ssa_name (new_var
, init_stmt
);
1369 gimple_assign_set_lhs (init_stmt
, new_temp
);
1370 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1371 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1376 /* Function vect_get_vec_def_for_operand.
1378 OP is an operand in STMT. This function returns a (vector) def that will be
1379 used in the vectorized stmt for STMT.
1381 In the case that OP is an SSA_NAME which is defined in the loop, then
1382 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1384 In case OP is an invariant or constant, a new stmt that creates a vector def
1385 needs to be introduced. */
1388 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1393 stmt_vec_info def_stmt_info
= NULL
;
1394 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1395 unsigned int nunits
;
1396 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1398 enum vect_def_type dt
;
1402 if (dump_enabled_p ())
1404 dump_printf_loc (MSG_NOTE
, vect_location
,
1405 "vect_get_vec_def_for_operand: ");
1406 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1407 dump_printf (MSG_NOTE
, "\n");
1410 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1411 &def_stmt
, &def
, &dt
);
1412 gcc_assert (is_simple_use
);
1413 if (dump_enabled_p ())
1415 int loc_printed
= 0;
1418 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1420 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1421 dump_printf (MSG_NOTE
, "\n");
1426 dump_printf (MSG_NOTE
, " def_stmt = ");
1428 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1429 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1435 /* Case 1: operand is a constant. */
1436 case vect_constant_def
:
1438 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1439 gcc_assert (vector_type
);
1440 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1445 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1446 if (dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE
, vect_location
,
1448 "Create vector_cst. nunits = %d\n", nunits
);
1450 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1453 /* Case 2: operand is defined outside the loop - loop invariant. */
1454 case vect_external_def
:
1456 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1457 gcc_assert (vector_type
);
1462 /* Create 'vec_inv = {inv,inv,..,inv}' */
1463 if (dump_enabled_p ())
1464 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1466 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1469 /* Case 3: operand is defined inside the loop. */
1470 case vect_internal_def
:
1473 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1475 /* Get the def from the vectorized stmt. */
1476 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1478 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1479 /* Get vectorized pattern statement. */
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1483 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1484 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1485 gcc_assert (vec_stmt
);
1486 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1487 vec_oprnd
= PHI_RESULT (vec_stmt
);
1488 else if (is_gimple_call (vec_stmt
))
1489 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1491 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1495 /* Case 4: operand is defined by a loop header phi - reduction */
1496 case vect_reduction_def
:
1497 case vect_double_reduction_def
:
1498 case vect_nested_cycle
:
1502 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1503 loop
= (gimple_bb (def_stmt
))->loop_father
;
1505 /* Get the def before the loop */
1506 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1507 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1510 /* Case 5: operand is defined by loop-header phi - induction. */
1511 case vect_induction_def
:
1513 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1515 /* Get the def from the vectorized stmt. */
1516 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1517 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1518 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1519 vec_oprnd
= PHI_RESULT (vec_stmt
);
1521 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1531 /* Function vect_get_vec_def_for_stmt_copy
1533 Return a vector-def for an operand. This function is used when the
1534 vectorized stmt to be created (by the caller to this function) is a "copy"
1535 created in case the vectorized result cannot fit in one vector, and several
1536 copies of the vector-stmt are required. In this case the vector-def is
1537 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1538 of the stmt that defines VEC_OPRND.
1539 DT is the type of the vector def VEC_OPRND.
1542 In case the vectorization factor (VF) is bigger than the number
1543 of elements that can fit in a vectype (nunits), we have to generate
1544 more than one vector stmt to vectorize the scalar stmt. This situation
1545 arises when there are multiple data-types operated upon in the loop; the
1546 smallest data-type determines the VF, and as a result, when vectorizing
1547 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1548 vector stmt (each computing a vector of 'nunits' results, and together
1549 computing 'VF' results in each iteration). This function is called when
1550 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1551 which VF=16 and nunits=4, so the number of copies required is 4):
1553 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1555 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1556 VS1.1: vx.1 = memref1 VS1.2
1557 VS1.2: vx.2 = memref2 VS1.3
1558 VS1.3: vx.3 = memref3
1560 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1561 VSnew.1: vz1 = vx.1 + ... VSnew.2
1562 VSnew.2: vz2 = vx.2 + ... VSnew.3
1563 VSnew.3: vz3 = vx.3 + ...
1565 The vectorization of S1 is explained in vectorizable_load.
1566 The vectorization of S2:
1567 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1568 the function 'vect_get_vec_def_for_operand' is called to
1569 get the relevant vector-def for each operand of S2. For operand x it
1570 returns the vector-def 'vx.0'.
1572 To create the remaining copies of the vector-stmt (VSnew.j), this
1573 function is called to get the relevant vector-def for each operand. It is
1574 obtained from the respective VS1.j stmt, which is recorded in the
1575 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1577 For example, to obtain the vector-def 'vx.1' in order to create the
1578 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1579 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1580 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1581 and return its def ('vx.1').
1582 Overall, to create the above sequence this function will be called 3 times:
1583 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1584 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1585 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1588 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1590 gimple vec_stmt_for_operand
;
1591 stmt_vec_info def_stmt_info
;
1593 /* Do nothing; can reuse same def. */
1594 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1597 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1598 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1599 gcc_assert (def_stmt_info
);
1600 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1601 gcc_assert (vec_stmt_for_operand
);
1602 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1603 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1604 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1606 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1611 /* Get vectorized definitions for the operands to create a copy of an original
1612 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1615 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1616 vec
<tree
> *vec_oprnds0
,
1617 vec
<tree
> *vec_oprnds1
)
1619 tree vec_oprnd
= vec_oprnds0
->pop ();
1621 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1622 vec_oprnds0
->quick_push (vec_oprnd
);
1624 if (vec_oprnds1
&& vec_oprnds1
->length ())
1626 vec_oprnd
= vec_oprnds1
->pop ();
1627 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1628 vec_oprnds1
->quick_push (vec_oprnd
);
1633 /* Get vectorized definitions for OP0 and OP1.
1634 REDUC_INDEX is the index of reduction operand in case of reduction,
1635 and -1 otherwise. */
1638 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1639 vec
<tree
> *vec_oprnds0
,
1640 vec
<tree
> *vec_oprnds1
,
1641 slp_tree slp_node
, int reduc_index
)
1645 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1646 auto_vec
<tree
> ops (nops
);
1647 auto_vec
<vec
<tree
> > vec_defs (nops
);
1649 ops
.quick_push (op0
);
1651 ops
.quick_push (op1
);
1653 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1655 *vec_oprnds0
= vec_defs
[0];
1657 *vec_oprnds1
= vec_defs
[1];
1663 vec_oprnds0
->create (1);
1664 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1665 vec_oprnds0
->quick_push (vec_oprnd
);
1669 vec_oprnds1
->create (1);
1670 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1671 vec_oprnds1
->quick_push (vec_oprnd
);
1677 /* Function vect_finish_stmt_generation.
1679 Insert a new stmt. */
1682 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1683 gimple_stmt_iterator
*gsi
)
1685 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1686 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1687 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1689 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1691 if (!gsi_end_p (*gsi
)
1692 && gimple_has_mem_ops (vec_stmt
))
1694 gimple at_stmt
= gsi_stmt (*gsi
);
1695 tree vuse
= gimple_vuse (at_stmt
);
1696 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1698 tree vdef
= gimple_vdef (at_stmt
);
1699 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1700 /* If we have an SSA vuse and insert a store, update virtual
1701 SSA form to avoid triggering the renamer. Do so only
1702 if we can easily see all uses - which is what almost always
1703 happens with the way vectorized stmts are inserted. */
1704 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1705 && ((is_gimple_assign (vec_stmt
)
1706 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1707 || (is_gimple_call (vec_stmt
)
1708 && !(gimple_call_flags (vec_stmt
)
1709 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1711 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1712 gimple_set_vdef (vec_stmt
, new_vdef
);
1713 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1717 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1719 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1722 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1725 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1728 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1730 /* While EH edges will generally prevent vectorization, stmt might
1731 e.g. be in a must-not-throw region. Ensure newly created stmts
1732 that could throw are part of the same region. */
1733 int lp_nr
= lookup_stmt_eh_lp (stmt
);
1734 if (lp_nr
!= 0 && stmt_could_throw_p (vec_stmt
))
1735 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1738 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1739 a function declaration if the target has a vectorized version
1740 of the function, or NULL_TREE if the function cannot be vectorized. */
1743 vectorizable_function (gcall
*call
, tree vectype_out
, tree vectype_in
)
1745 tree fndecl
= gimple_call_fndecl (call
);
1747 /* We only handle functions that do not read or clobber memory -- i.e.
1748 const or novops ones. */
1749 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1753 || TREE_CODE (fndecl
) != FUNCTION_DECL
1754 || !DECL_BUILT_IN (fndecl
))
1757 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1762 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
1763 gimple_stmt_iterator
*);
1766 /* Function vectorizable_mask_load_store.
1768 Check if STMT performs a conditional load or store that can be vectorized.
1769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1770 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1774 vectorizable_mask_load_store (gimple stmt
, gimple_stmt_iterator
*gsi
,
1775 gimple
*vec_stmt
, slp_tree slp_node
)
1777 tree vec_dest
= NULL
;
1778 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1779 stmt_vec_info prev_stmt_info
;
1780 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1781 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1782 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
1783 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1784 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1788 tree dataref_ptr
= NULL_TREE
;
1790 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1794 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
1795 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
1796 int gather_scale
= 1;
1797 enum vect_def_type gather_dt
= vect_unknown_def_type
;
1802 enum vect_def_type dt
;
1804 if (slp_node
!= NULL
)
1807 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
1808 gcc_assert (ncopies
>= 1);
1810 is_store
= gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
;
1811 mask
= gimple_call_arg (stmt
, 2);
1812 if (TYPE_PRECISION (TREE_TYPE (mask
))
1813 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))))
1816 /* FORNOW. This restriction should be relaxed. */
1817 if (nested_in_vect_loop
&& ncopies
> 1)
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1821 "multiple types in nested loop.");
1825 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1828 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1831 if (!STMT_VINFO_DATA_REF (stmt_info
))
1834 elem_type
= TREE_TYPE (vectype
);
1836 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1839 if (STMT_VINFO_STRIDED_P (stmt_info
))
1842 if (STMT_VINFO_GATHER_P (stmt_info
))
1846 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
1847 &gather_off
, &gather_scale
);
1848 gcc_assert (gather_decl
);
1849 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, NULL
,
1850 &def_stmt
, &def
, &gather_dt
,
1851 &gather_off_vectype
))
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1855 "gather index use not simple.");
1859 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1861 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
1862 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1866 "masked gather with integer mask not supported.");
1870 else if (tree_int_cst_compare (nested_in_vect_loop
1871 ? STMT_VINFO_DR_STEP (stmt_info
)
1872 : DR_STEP (dr
), size_zero_node
) <= 0)
1874 else if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
1875 || !can_vec_mask_load_store_p (TYPE_MODE (vectype
), !is_store
))
1878 if (TREE_CODE (mask
) != SSA_NAME
)
1881 if (!vect_is_simple_use (mask
, stmt
, loop_vinfo
, NULL
,
1882 &def_stmt
, &def
, &dt
))
1887 tree rhs
= gimple_call_arg (stmt
, 3);
1888 if (!vect_is_simple_use (rhs
, stmt
, loop_vinfo
, NULL
,
1889 &def_stmt
, &def
, &dt
))
1893 if (!vec_stmt
) /* transformation not required. */
1895 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1897 vect_model_store_cost (stmt_info
, ncopies
, false, dt
,
1900 vect_model_load_cost (stmt_info
, ncopies
, false, NULL
, NULL
, NULL
);
1906 if (STMT_VINFO_GATHER_P (stmt_info
))
1908 tree vec_oprnd0
= NULL_TREE
, op
;
1909 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
1910 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
1911 tree ptr
, vec_mask
= NULL_TREE
, mask_op
= NULL_TREE
, var
, scale
;
1912 tree perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
1913 tree mask_perm_mask
= NULL_TREE
;
1914 edge pe
= loop_preheader_edge (loop
);
1917 enum { NARROW
, NONE
, WIDEN
} modifier
;
1918 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
1920 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
1921 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1922 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1923 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1924 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
1925 scaletype
= TREE_VALUE (arglist
);
1926 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
1927 && types_compatible_p (srctype
, masktype
));
1929 if (nunits
== gather_off_nunits
)
1931 else if (nunits
== gather_off_nunits
/ 2)
1933 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
1936 for (i
= 0; i
< gather_off_nunits
; ++i
)
1937 sel
[i
] = i
| nunits
;
1939 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
1941 else if (nunits
== gather_off_nunits
* 2)
1943 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
1946 for (i
= 0; i
< nunits
; ++i
)
1947 sel
[i
] = i
< gather_off_nunits
1948 ? i
: i
+ nunits
- gather_off_nunits
;
1950 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
1952 for (i
= 0; i
< nunits
; ++i
)
1953 sel
[i
] = i
| gather_off_nunits
;
1954 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, sel
);
1959 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
1961 ptr
= fold_convert (ptrtype
, gather_base
);
1962 if (!is_gimple_min_invariant (ptr
))
1964 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
1965 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
1966 gcc_assert (!new_bb
);
1969 scale
= build_int_cst (scaletype
, gather_scale
);
1971 prev_stmt_info
= NULL
;
1972 for (j
= 0; j
< ncopies
; ++j
)
1974 if (modifier
== WIDEN
&& (j
& 1))
1975 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
1976 perm_mask
, stmt
, gsi
);
1979 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
1982 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
1984 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
1986 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
1987 == TYPE_VECTOR_SUBPARTS (idxtype
));
1988 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
1989 var
= make_ssa_name (var
);
1990 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
1992 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
1993 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1997 if (mask_perm_mask
&& (j
& 1))
1998 mask_op
= permute_vec_elements (mask_op
, mask_op
,
1999 mask_perm_mask
, stmt
, gsi
);
2003 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2006 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
,
2007 &def_stmt
, &def
, &dt
);
2008 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2012 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2014 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
))
2015 == TYPE_VECTOR_SUBPARTS (masktype
));
2016 var
= vect_get_new_vect_var (masktype
, vect_simple_var
,
2018 var
= make_ssa_name (var
);
2019 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2021 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2022 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2028 = gimple_build_call (gather_decl
, 5, mask_op
, ptr
, op
, mask_op
,
2031 if (!useless_type_conversion_p (vectype
, rettype
))
2033 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
2034 == TYPE_VECTOR_SUBPARTS (rettype
));
2035 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
2036 op
= make_ssa_name (var
, new_stmt
);
2037 gimple_call_set_lhs (new_stmt
, op
);
2038 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2039 var
= make_ssa_name (vec_dest
);
2040 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2041 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2045 var
= make_ssa_name (vec_dest
, new_stmt
);
2046 gimple_call_set_lhs (new_stmt
, var
);
2049 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2051 if (modifier
== NARROW
)
2058 var
= permute_vec_elements (prev_res
, var
,
2059 perm_mask
, stmt
, gsi
);
2060 new_stmt
= SSA_NAME_DEF_STMT (var
);
2063 if (prev_stmt_info
== NULL
)
2064 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2066 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2067 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2070 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2072 tree lhs
= gimple_call_lhs (stmt
);
2073 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2074 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2075 set_vinfo_for_stmt (stmt
, NULL
);
2076 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2077 gsi_replace (gsi
, new_stmt
, true);
2082 tree vec_rhs
= NULL_TREE
, vec_mask
= NULL_TREE
;
2083 prev_stmt_info
= NULL
;
2084 for (i
= 0; i
< ncopies
; i
++)
2086 unsigned align
, misalign
;
2090 tree rhs
= gimple_call_arg (stmt
, 3);
2091 vec_rhs
= vect_get_vec_def_for_operand (rhs
, stmt
, NULL
);
2092 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2093 /* We should have catched mismatched types earlier. */
2094 gcc_assert (useless_type_conversion_p (vectype
,
2095 TREE_TYPE (vec_rhs
)));
2096 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2097 NULL_TREE
, &dummy
, gsi
,
2098 &ptr_incr
, false, &inv_p
);
2099 gcc_assert (!inv_p
);
2103 vect_is_simple_use (vec_rhs
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2105 vec_rhs
= vect_get_vec_def_for_stmt_copy (dt
, vec_rhs
);
2106 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2108 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2109 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2110 TYPE_SIZE_UNIT (vectype
));
2113 align
= TYPE_ALIGN_UNIT (vectype
);
2114 if (aligned_access_p (dr
))
2116 else if (DR_MISALIGNMENT (dr
) == -1)
2118 align
= TYPE_ALIGN_UNIT (elem_type
);
2122 misalign
= DR_MISALIGNMENT (dr
);
2123 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2126 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
2127 gimple_call_arg (stmt
, 1),
2129 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2131 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2133 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2134 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2139 tree vec_mask
= NULL_TREE
;
2140 prev_stmt_info
= NULL
;
2141 vec_dest
= vect_create_destination_var (gimple_call_lhs (stmt
), vectype
);
2142 for (i
= 0; i
< ncopies
; i
++)
2144 unsigned align
, misalign
;
2148 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt
, NULL
);
2149 dataref_ptr
= vect_create_data_ref_ptr (stmt
, vectype
, NULL
,
2150 NULL_TREE
, &dummy
, gsi
,
2151 &ptr_incr
, false, &inv_p
);
2152 gcc_assert (!inv_p
);
2156 vect_is_simple_use (vec_mask
, NULL
, loop_vinfo
, NULL
, &def_stmt
,
2158 vec_mask
= vect_get_vec_def_for_stmt_copy (dt
, vec_mask
);
2159 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
2160 TYPE_SIZE_UNIT (vectype
));
2163 align
= TYPE_ALIGN_UNIT (vectype
);
2164 if (aligned_access_p (dr
))
2166 else if (DR_MISALIGNMENT (dr
) == -1)
2168 align
= TYPE_ALIGN_UNIT (elem_type
);
2172 misalign
= DR_MISALIGNMENT (dr
);
2173 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
2176 = gimple_build_call_internal (IFN_MASK_LOAD
, 3, dataref_ptr
,
2177 gimple_call_arg (stmt
, 1),
2179 gimple_call_set_lhs (new_stmt
, make_ssa_name (vec_dest
));
2180 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2182 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2184 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2185 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2191 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2193 tree lhs
= gimple_call_lhs (stmt
);
2194 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
2195 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2196 set_vinfo_for_stmt (stmt
, NULL
);
2197 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2198 gsi_replace (gsi
, new_stmt
, true);
2205 /* Function vectorizable_call.
2207 Check if GS performs a function call that can be vectorized.
2208 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2209 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2210 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2213 vectorizable_call (gimple gs
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
2220 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2221 stmt_vec_info stmt_info
= vinfo_for_stmt (gs
), prev_stmt_info
;
2222 tree vectype_out
, vectype_in
;
2225 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2226 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2227 tree fndecl
, new_temp
, def
, rhs_type
;
2229 enum vect_def_type dt
[3]
2230 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
2231 gimple new_stmt
= NULL
;
2233 vec
<tree
> vargs
= vNULL
;
2234 enum { NARROW
, NONE
, WIDEN
} modifier
;
2238 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2241 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2244 /* Is GS a vectorizable call? */
2245 stmt
= dyn_cast
<gcall
*> (gs
);
2249 if (gimple_call_internal_p (stmt
)
2250 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2251 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
2252 return vectorizable_mask_load_store (stmt
, gsi
, vec_stmt
,
2255 if (gimple_call_lhs (stmt
) == NULL_TREE
2256 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2259 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2261 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2263 /* Process function arguments. */
2264 rhs_type
= NULL_TREE
;
2265 vectype_in
= NULL_TREE
;
2266 nargs
= gimple_call_num_args (stmt
);
2268 /* Bail out if the function has more than three arguments, we do not have
2269 interesting builtin functions to vectorize with more than two arguments
2270 except for fma. No arguments is also not good. */
2271 if (nargs
== 0 || nargs
> 3)
2274 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2275 if (gimple_call_internal_p (stmt
)
2276 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2279 rhs_type
= unsigned_type_node
;
2282 for (i
= 0; i
< nargs
; i
++)
2286 op
= gimple_call_arg (stmt
, i
);
2288 /* We can only handle calls with arguments of the same type. */
2290 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2294 "argument types differ.\n");
2298 rhs_type
= TREE_TYPE (op
);
2300 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2301 &def_stmt
, &def
, &dt
[i
], &opvectype
))
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2305 "use not simple.\n");
2310 vectype_in
= opvectype
;
2312 && opvectype
!= vectype_in
)
2314 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2316 "argument vector types differ.\n");
2320 /* If all arguments are external or constant defs use a vector type with
2321 the same size as the output vector type. */
2323 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2325 gcc_assert (vectype_in
);
2328 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2331 "no vectype for scalar type ");
2332 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2333 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2340 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2341 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2342 if (nunits_in
== nunits_out
/ 2)
2344 else if (nunits_out
== nunits_in
)
2346 else if (nunits_out
== nunits_in
/ 2)
2351 /* For now, we only vectorize functions if a target specific builtin
2352 is available. TODO -- in some cases, it might be profitable to
2353 insert the calls for pieces of the vector, in order to be able
2354 to vectorize other operations in the loop. */
2355 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
2356 if (fndecl
== NULL_TREE
)
2358 if (gimple_call_internal_p (stmt
)
2359 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
2362 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2363 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
2364 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
2365 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
2367 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2368 { 0, 1, 2, ... vf - 1 } vector. */
2369 gcc_assert (nargs
== 0);
2373 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2375 "function is not vectorizable.\n");
2380 gcc_assert (!gimple_vuse (stmt
));
2382 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2384 else if (modifier
== NARROW
)
2385 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2387 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2389 /* Sanity check: make sure that at least one copy of the vectorized stmt
2390 needs to be generated. */
2391 gcc_assert (ncopies
>= 1);
2393 if (!vec_stmt
) /* transformation not required. */
2395 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
2399 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2409 scalar_dest
= gimple_call_lhs (stmt
);
2410 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
2412 prev_stmt_info
= NULL
;
2416 for (j
= 0; j
< ncopies
; ++j
)
2418 /* Build argument list for the vectorized call. */
2420 vargs
.create (nargs
);
2426 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2427 vec
<tree
> vec_oprnds0
;
2429 for (i
= 0; i
< nargs
; i
++)
2430 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2431 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2432 vec_oprnds0
= vec_defs
[0];
2434 /* Arguments are ready. Create the new vector stmt. */
2435 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
2438 for (k
= 0; k
< nargs
; k
++)
2440 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2441 vargs
[k
] = vec_oprndsk
[i
];
2443 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2444 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2445 gimple_call_set_lhs (new_stmt
, new_temp
);
2446 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2447 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2450 for (i
= 0; i
< nargs
; i
++)
2452 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2453 vec_oprndsi
.release ();
2458 for (i
= 0; i
< nargs
; i
++)
2460 op
= gimple_call_arg (stmt
, i
);
2463 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2466 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
2468 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2471 vargs
.quick_push (vec_oprnd0
);
2474 if (gimple_call_internal_p (stmt
)
2475 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
2477 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
2479 for (k
= 0; k
< nunits_out
; ++k
)
2480 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
2481 tree cst
= build_vector (vectype_out
, v
);
2483 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
2484 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
2485 new_temp
= make_ssa_name (new_var
, init_stmt
);
2486 gimple_assign_set_lhs (init_stmt
, new_temp
);
2487 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
2488 new_temp
= make_ssa_name (vec_dest
);
2489 new_stmt
= gimple_build_assign (new_temp
,
2490 gimple_assign_lhs (init_stmt
));
2494 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2495 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2496 gimple_call_set_lhs (new_stmt
, new_temp
);
2498 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2501 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2503 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2505 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2511 for (j
= 0; j
< ncopies
; ++j
)
2513 /* Build argument list for the vectorized call. */
2515 vargs
.create (nargs
* 2);
2521 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2522 vec
<tree
> vec_oprnds0
;
2524 for (i
= 0; i
< nargs
; i
++)
2525 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2526 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2527 vec_oprnds0
= vec_defs
[0];
2529 /* Arguments are ready. Create the new vector stmt. */
2530 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2534 for (k
= 0; k
< nargs
; k
++)
2536 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2537 vargs
.quick_push (vec_oprndsk
[i
]);
2538 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2540 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2541 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2542 gimple_call_set_lhs (new_stmt
, new_temp
);
2543 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2544 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2547 for (i
= 0; i
< nargs
; i
++)
2549 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2550 vec_oprndsi
.release ();
2555 for (i
= 0; i
< nargs
; i
++)
2557 op
= gimple_call_arg (stmt
, i
);
2561 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2563 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2567 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2569 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2571 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2574 vargs
.quick_push (vec_oprnd0
);
2575 vargs
.quick_push (vec_oprnd1
);
2578 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2579 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2580 gimple_call_set_lhs (new_stmt
, new_temp
);
2581 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2584 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2586 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2588 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2591 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2596 /* No current target implements this case. */
2602 /* The call in STMT might prevent it from being removed in dce.
2603 We however cannot remove it here, due to the way the ssa name
2604 it defines is mapped to the new definition. So just replace
2605 rhs of the statement with something harmless. */
2610 type
= TREE_TYPE (scalar_dest
);
2611 if (is_pattern_stmt_p (stmt_info
))
2612 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2614 lhs
= gimple_call_lhs (stmt
);
2615 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2616 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2617 set_vinfo_for_stmt (stmt
, NULL
);
2618 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2619 gsi_replace (gsi
, new_stmt
, false);
2625 struct simd_call_arg_info
2629 enum vect_def_type dt
;
2630 HOST_WIDE_INT linear_step
;
2634 /* Function vectorizable_simd_clone_call.
2636 Check if STMT performs a function call that can be vectorized
2637 by calling a simd clone of the function.
2638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2643 vectorizable_simd_clone_call (gimple stmt
, gimple_stmt_iterator
*gsi
,
2644 gimple
*vec_stmt
, slp_tree slp_node
)
2649 tree vec_oprnd0
= NULL_TREE
;
2650 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
2652 unsigned int nunits
;
2653 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2654 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2655 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2656 tree fndecl
, new_temp
, def
;
2658 gimple new_stmt
= NULL
;
2660 vec
<simd_call_arg_info
> arginfo
= vNULL
;
2661 vec
<tree
> vargs
= vNULL
;
2663 tree lhs
, rtype
, ratype
;
2664 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
;
2666 /* Is STMT a vectorizable call? */
2667 if (!is_gimple_call (stmt
))
2670 fndecl
= gimple_call_fndecl (stmt
);
2671 if (fndecl
== NULL_TREE
)
2674 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
2675 if (node
== NULL
|| node
->simd_clones
== NULL
)
2678 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2681 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2684 if (gimple_call_lhs (stmt
)
2685 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
2688 gcc_checking_assert (!stmt_can_throw_internal (stmt
));
2690 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2692 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt
))
2696 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2699 /* Process function arguments. */
2700 nargs
= gimple_call_num_args (stmt
);
2702 /* Bail out if the function has zero arguments. */
2706 arginfo
.create (nargs
);
2708 for (i
= 0; i
< nargs
; i
++)
2710 simd_call_arg_info thisarginfo
;
2713 thisarginfo
.linear_step
= 0;
2714 thisarginfo
.align
= 0;
2715 thisarginfo
.op
= NULL_TREE
;
2717 op
= gimple_call_arg (stmt
, i
);
2718 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2719 &def_stmt
, &def
, &thisarginfo
.dt
,
2720 &thisarginfo
.vectype
)
2721 || thisarginfo
.dt
== vect_uninitialized_def
)
2723 if (dump_enabled_p ())
2724 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2725 "use not simple.\n");
2730 if (thisarginfo
.dt
== vect_constant_def
2731 || thisarginfo
.dt
== vect_external_def
)
2732 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
2734 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
2736 /* For linear arguments, the analyze phase should have saved
2737 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2738 if (i
* 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
2739 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2])
2741 gcc_assert (vec_stmt
);
2742 thisarginfo
.linear_step
2743 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2]);
2745 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 1];
2746 /* If loop has been peeled for alignment, we need to adjust it. */
2747 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
2748 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
2751 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
2752 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 2 + 2];
2753 tree opt
= TREE_TYPE (thisarginfo
.op
);
2754 bias
= fold_convert (TREE_TYPE (step
), bias
);
2755 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
2757 = fold_build2 (POINTER_TYPE_P (opt
)
2758 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
2759 thisarginfo
.op
, bias
);
2763 && thisarginfo
.dt
!= vect_constant_def
2764 && thisarginfo
.dt
!= vect_external_def
2766 && TREE_CODE (op
) == SSA_NAME
2767 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
2769 && tree_fits_shwi_p (iv
.step
))
2771 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
2772 thisarginfo
.op
= iv
.base
;
2774 else if ((thisarginfo
.dt
== vect_constant_def
2775 || thisarginfo
.dt
== vect_external_def
)
2776 && POINTER_TYPE_P (TREE_TYPE (op
)))
2777 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
2779 arginfo
.quick_push (thisarginfo
);
2782 unsigned int badness
= 0;
2783 struct cgraph_node
*bestn
= NULL
;
2784 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
2785 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
2787 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
2788 n
= n
->simdclone
->next_clone
)
2790 unsigned int this_badness
= 0;
2791 if (n
->simdclone
->simdlen
2792 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
2793 || n
->simdclone
->nargs
!= nargs
)
2795 if (n
->simdclone
->simdlen
2796 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2797 this_badness
+= (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo
))
2798 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
2799 if (n
->simdclone
->inbranch
)
2800 this_badness
+= 2048;
2801 int target_badness
= targetm
.simd_clone
.usable (n
);
2802 if (target_badness
< 0)
2804 this_badness
+= target_badness
* 512;
2805 /* FORNOW: Have to add code to add the mask argument. */
2806 if (n
->simdclone
->inbranch
)
2808 for (i
= 0; i
< nargs
; i
++)
2810 switch (n
->simdclone
->args
[i
].arg_type
)
2812 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2813 if (!useless_type_conversion_p
2814 (n
->simdclone
->args
[i
].orig_type
,
2815 TREE_TYPE (gimple_call_arg (stmt
, i
))))
2817 else if (arginfo
[i
].dt
== vect_constant_def
2818 || arginfo
[i
].dt
== vect_external_def
2819 || arginfo
[i
].linear_step
)
2822 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
2823 if (arginfo
[i
].dt
!= vect_constant_def
2824 && arginfo
[i
].dt
!= vect_external_def
)
2827 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
2828 if (arginfo
[i
].dt
== vect_constant_def
2829 || arginfo
[i
].dt
== vect_external_def
2830 || (arginfo
[i
].linear_step
2831 != n
->simdclone
->args
[i
].linear_step
))
2834 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
2838 case SIMD_CLONE_ARG_TYPE_MASK
:
2841 if (i
== (size_t) -1)
2843 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
2848 if (arginfo
[i
].align
)
2849 this_badness
+= (exact_log2 (arginfo
[i
].align
)
2850 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
2852 if (i
== (size_t) -1)
2854 if (bestn
== NULL
|| this_badness
< badness
)
2857 badness
= this_badness
;
2867 for (i
= 0; i
< nargs
; i
++)
2868 if ((arginfo
[i
].dt
== vect_constant_def
2869 || arginfo
[i
].dt
== vect_external_def
)
2870 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
2873 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
2875 if (arginfo
[i
].vectype
== NULL
2876 || (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2877 > bestn
->simdclone
->simdlen
))
2884 fndecl
= bestn
->decl
;
2885 nunits
= bestn
->simdclone
->simdlen
;
2886 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2888 /* If the function isn't const, only allow it in simd loops where user
2889 has asserted that at least nunits consecutive iterations can be
2890 performed using SIMD instructions. */
2891 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
2892 && gimple_vuse (stmt
))
2898 /* Sanity check: make sure that at least one copy of the vectorized stmt
2899 needs to be generated. */
2900 gcc_assert (ncopies
>= 1);
2902 if (!vec_stmt
) /* transformation not required. */
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
2905 for (i
= 0; i
< nargs
; i
++)
2906 if (bestn
->simdclone
->args
[i
].arg_type
2907 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
2909 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 2
2911 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
2912 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
2913 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
2914 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
2915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
2917 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE
, vect_location
,
2920 "=== vectorizable_simd_clone_call ===\n");
2921 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2928 if (dump_enabled_p ())
2929 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
2932 scalar_dest
= gimple_call_lhs (stmt
);
2933 vec_dest
= NULL_TREE
;
2938 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2939 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
2940 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
2943 rtype
= TREE_TYPE (ratype
);
2947 prev_stmt_info
= NULL
;
2948 for (j
= 0; j
< ncopies
; ++j
)
2950 /* Build argument list for the vectorized call. */
2952 vargs
.create (nargs
);
2956 for (i
= 0; i
< nargs
; i
++)
2958 unsigned int k
, l
, m
, o
;
2960 op
= gimple_call_arg (stmt
, i
);
2961 switch (bestn
->simdclone
->args
[i
].arg_type
)
2963 case SIMD_CLONE_ARG_TYPE_VECTOR
:
2964 atype
= bestn
->simdclone
->args
[i
].vector_type
;
2965 o
= nunits
/ TYPE_VECTOR_SUBPARTS (atype
);
2966 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
2968 if (TYPE_VECTOR_SUBPARTS (atype
)
2969 < TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
))
2971 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
2972 k
= (TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)
2973 / TYPE_VECTOR_SUBPARTS (atype
));
2974 gcc_assert ((k
& (k
- 1)) == 0);
2977 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2980 vec_oprnd0
= arginfo
[i
].op
;
2981 if ((m
& (k
- 1)) == 0)
2983 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
2986 arginfo
[i
].op
= vec_oprnd0
;
2988 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
2990 bitsize_int ((m
& (k
- 1)) * prec
));
2992 = gimple_build_assign (make_ssa_name (atype
),
2994 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2995 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
2999 k
= (TYPE_VECTOR_SUBPARTS (atype
)
3000 / TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
));
3001 gcc_assert ((k
& (k
- 1)) == 0);
3002 vec
<constructor_elt
, va_gc
> *ctor_elts
;
3004 vec_alloc (ctor_elts
, k
);
3007 for (l
= 0; l
< k
; l
++)
3009 if (m
== 0 && l
== 0)
3011 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
3014 = vect_get_vec_def_for_stmt_copy (arginfo
[i
].dt
,
3016 arginfo
[i
].op
= vec_oprnd0
;
3019 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
3023 vargs
.safe_push (vec_oprnd0
);
3026 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
3028 = gimple_build_assign (make_ssa_name (atype
),
3030 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3031 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
3036 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3037 vargs
.safe_push (op
);
3039 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3044 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
3049 edge pe
= loop_preheader_edge (loop
);
3050 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3051 gcc_assert (!new_bb
);
3053 tree phi_res
= copy_ssa_name (op
);
3054 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
3055 set_vinfo_for_stmt (new_phi
,
3056 new_stmt_vec_info (new_phi
, loop_vinfo
,
3058 add_phi_arg (new_phi
, arginfo
[i
].op
,
3059 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
3061 = POINTER_TYPE_P (TREE_TYPE (op
))
3062 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3063 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3064 ? sizetype
: TREE_TYPE (op
);
3066 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3068 tree tcst
= wide_int_to_tree (type
, cst
);
3069 tree phi_arg
= copy_ssa_name (op
);
3071 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
3072 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
3073 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
3074 set_vinfo_for_stmt (new_stmt
,
3075 new_stmt_vec_info (new_stmt
, loop_vinfo
,
3077 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
3079 arginfo
[i
].op
= phi_res
;
3080 vargs
.safe_push (phi_res
);
3085 = POINTER_TYPE_P (TREE_TYPE (op
))
3086 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
3087 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
3088 ? sizetype
: TREE_TYPE (op
);
3090 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
3092 tree tcst
= wide_int_to_tree (type
, cst
);
3093 new_temp
= make_ssa_name (TREE_TYPE (op
));
3094 new_stmt
= gimple_build_assign (new_temp
, code
,
3095 arginfo
[i
].op
, tcst
);
3096 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3097 vargs
.safe_push (new_temp
);
3100 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3106 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3109 gcc_assert (ratype
|| TYPE_VECTOR_SUBPARTS (rtype
) == nunits
);
3111 new_temp
= create_tmp_var (ratype
);
3112 else if (TYPE_VECTOR_SUBPARTS (vectype
)
3113 == TYPE_VECTOR_SUBPARTS (rtype
))
3114 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3116 new_temp
= make_ssa_name (rtype
, new_stmt
);
3117 gimple_call_set_lhs (new_stmt
, new_temp
);
3119 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3123 if (TYPE_VECTOR_SUBPARTS (vectype
) < nunits
)
3126 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
3127 k
= nunits
/ TYPE_VECTOR_SUBPARTS (vectype
);
3128 gcc_assert ((k
& (k
- 1)) == 0);
3129 for (l
= 0; l
< k
; l
++)
3134 t
= build_fold_addr_expr (new_temp
);
3135 t
= build2 (MEM_REF
, vectype
, t
,
3136 build_int_cst (TREE_TYPE (t
),
3137 l
* prec
/ BITS_PER_UNIT
));
3140 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
3141 size_int (prec
), bitsize_int (l
* prec
));
3143 = gimple_build_assign (make_ssa_name (vectype
), t
);
3144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3145 if (j
== 0 && l
== 0)
3146 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3148 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3150 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3155 tree clobber
= build_constructor (ratype
, NULL
);
3156 TREE_THIS_VOLATILE (clobber
) = 1;
3157 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3158 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3162 else if (TYPE_VECTOR_SUBPARTS (vectype
) > nunits
)
3164 unsigned int k
= (TYPE_VECTOR_SUBPARTS (vectype
)
3165 / TYPE_VECTOR_SUBPARTS (rtype
));
3166 gcc_assert ((k
& (k
- 1)) == 0);
3167 if ((j
& (k
- 1)) == 0)
3168 vec_alloc (ret_ctor_elts
, k
);
3171 unsigned int m
, o
= nunits
/ TYPE_VECTOR_SUBPARTS (rtype
);
3172 for (m
= 0; m
< o
; m
++)
3174 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
3175 size_int (m
), NULL_TREE
, NULL_TREE
);
3177 = gimple_build_assign (make_ssa_name (rtype
), tem
);
3178 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3179 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
3180 gimple_assign_lhs (new_stmt
));
3182 tree clobber
= build_constructor (ratype
, NULL
);
3183 TREE_THIS_VOLATILE (clobber
) = 1;
3184 new_stmt
= gimple_build_assign (new_temp
, clobber
);
3185 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3188 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
3189 if ((j
& (k
- 1)) != k
- 1)
3191 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
3193 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
3194 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3196 if ((unsigned) j
== k
- 1)
3197 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3199 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3201 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3206 tree t
= build_fold_addr_expr (new_temp
);
3207 t
= build2 (MEM_REF
, vectype
, t
,
3208 build_int_cst (TREE_TYPE (t
), 0));
3210 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
3211 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3212 tree clobber
= build_constructor (ratype
, NULL
);
3213 TREE_THIS_VOLATILE (clobber
) = 1;
3214 vect_finish_stmt_generation (stmt
,
3215 gimple_build_assign (new_temp
,
3221 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3223 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3225 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3230 /* The call in STMT might prevent it from being removed in dce.
3231 We however cannot remove it here, due to the way the ssa name
3232 it defines is mapped to the new definition. So just replace
3233 rhs of the statement with something harmless. */
3240 type
= TREE_TYPE (scalar_dest
);
3241 if (is_pattern_stmt_p (stmt_info
))
3242 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
3244 lhs
= gimple_call_lhs (stmt
);
3245 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
3248 new_stmt
= gimple_build_nop ();
3249 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3250 set_vinfo_for_stmt (stmt
, NULL
);
3251 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3252 gsi_replace (gsi
, new_stmt
, true);
3253 unlink_stmt_vdef (stmt
);
3259 /* Function vect_gen_widened_results_half
3261 Create a vector stmt whose code, type, number of arguments, and result
3262 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3263 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3264 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3265 needs to be created (DECL is a function-decl of a target-builtin).
3266 STMT is the original scalar stmt that we are vectorizing. */
3269 vect_gen_widened_results_half (enum tree_code code
,
3271 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
3272 tree vec_dest
, gimple_stmt_iterator
*gsi
,
3278 /* Generate half of the widened result: */
3279 if (code
== CALL_EXPR
)
3281 /* Target specific support */
3282 if (op_type
== binary_op
)
3283 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
3285 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
3286 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3287 gimple_call_set_lhs (new_stmt
, new_temp
);
3291 /* Generic support */
3292 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
3293 if (op_type
!= binary_op
)
3295 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
3296 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3297 gimple_assign_set_lhs (new_stmt
, new_temp
);
3299 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3305 /* Get vectorized definitions for loop-based vectorization. For the first
3306 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3307 scalar operand), and for the rest we get a copy with
3308 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3309 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3310 The vectors are collected into VEC_OPRNDS. */
3313 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
3314 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
3318 /* Get first vector operand. */
3319 /* All the vector operands except the very first one (that is scalar oprnd)
3321 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
3322 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
3324 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
3326 vec_oprnds
->quick_push (vec_oprnd
);
3328 /* Get second vector operand. */
3329 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
3330 vec_oprnds
->quick_push (vec_oprnd
);
3334 /* For conversion in multiple steps, continue to get operands
3337 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
3341 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3342 For multi-step conversions store the resulting vectors and call the function
3346 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
3347 int multi_step_cvt
, gimple stmt
,
3349 gimple_stmt_iterator
*gsi
,
3350 slp_tree slp_node
, enum tree_code code
,
3351 stmt_vec_info
*prev_stmt_info
)
3354 tree vop0
, vop1
, new_tmp
, vec_dest
;
3356 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3358 vec_dest
= vec_dsts
.pop ();
3360 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
3362 /* Create demotion operation. */
3363 vop0
= (*vec_oprnds
)[i
];
3364 vop1
= (*vec_oprnds
)[i
+ 1];
3365 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
3366 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
3367 gimple_assign_set_lhs (new_stmt
, new_tmp
);
3368 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3371 /* Store the resulting vector for next recursive call. */
3372 (*vec_oprnds
)[i
/2] = new_tmp
;
3375 /* This is the last step of the conversion sequence. Store the
3376 vectors in SLP_NODE or in vector info of the scalar statement
3377 (or in STMT_VINFO_RELATED_STMT chain). */
3379 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3382 if (!*prev_stmt_info
)
3383 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3385 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
3387 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3392 /* For multi-step demotion operations we first generate demotion operations
3393 from the source type to the intermediate types, and then combine the
3394 results (stored in VEC_OPRNDS) in demotion operation to the destination
3398 /* At each level of recursion we have half of the operands we had at the
3400 vec_oprnds
->truncate ((i
+1)/2);
3401 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
3402 stmt
, vec_dsts
, gsi
, slp_node
,
3403 VEC_PACK_TRUNC_EXPR
,
3407 vec_dsts
.quick_push (vec_dest
);
3411 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3412 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3413 the resulting vectors and call the function recursively. */
3416 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
3417 vec
<tree
> *vec_oprnds1
,
3418 gimple stmt
, tree vec_dest
,
3419 gimple_stmt_iterator
*gsi
,
3420 enum tree_code code1
,
3421 enum tree_code code2
, tree decl1
,
3422 tree decl2
, int op_type
)
3425 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
3426 gimple new_stmt1
, new_stmt2
;
3427 vec
<tree
> vec_tmp
= vNULL
;
3429 vec_tmp
.create (vec_oprnds0
->length () * 2);
3430 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
3432 if (op_type
== binary_op
)
3433 vop1
= (*vec_oprnds1
)[i
];
3437 /* Generate the two halves of promotion operation. */
3438 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
3439 op_type
, vec_dest
, gsi
, stmt
);
3440 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
3441 op_type
, vec_dest
, gsi
, stmt
);
3442 if (is_gimple_call (new_stmt1
))
3444 new_tmp1
= gimple_call_lhs (new_stmt1
);
3445 new_tmp2
= gimple_call_lhs (new_stmt2
);
3449 new_tmp1
= gimple_assign_lhs (new_stmt1
);
3450 new_tmp2
= gimple_assign_lhs (new_stmt2
);
3453 /* Store the results for the next step. */
3454 vec_tmp
.quick_push (new_tmp1
);
3455 vec_tmp
.quick_push (new_tmp2
);
3458 vec_oprnds0
->release ();
3459 *vec_oprnds0
= vec_tmp
;
3463 /* Check if STMT performs a conversion operation, that can be vectorized.
3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3465 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3469 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
3470 gimple
*vec_stmt
, slp_tree slp_node
)
3474 tree op0
, op1
= NULL_TREE
;
3475 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3476 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3477 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3478 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
3479 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
3480 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
3484 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3485 gimple new_stmt
= NULL
;
3486 stmt_vec_info prev_stmt_info
;
3489 tree vectype_out
, vectype_in
;
3491 tree lhs_type
, rhs_type
;
3492 enum { NARROW
, NONE
, WIDEN
} modifier
;
3493 vec
<tree
> vec_oprnds0
= vNULL
;
3494 vec
<tree
> vec_oprnds1
= vNULL
;
3496 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3497 int multi_step_cvt
= 0;
3498 vec
<tree
> vec_dsts
= vNULL
;
3499 vec
<tree
> interm_types
= vNULL
;
3500 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
3502 machine_mode rhs_mode
;
3503 unsigned short fltsz
;
3505 /* Is STMT a vectorizable conversion? */
3507 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3510 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3513 if (!is_gimple_assign (stmt
))
3516 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3519 code
= gimple_assign_rhs_code (stmt
);
3520 if (!CONVERT_EXPR_CODE_P (code
)
3521 && code
!= FIX_TRUNC_EXPR
3522 && code
!= FLOAT_EXPR
3523 && code
!= WIDEN_MULT_EXPR
3524 && code
!= WIDEN_LSHIFT_EXPR
)
3527 op_type
= TREE_CODE_LENGTH (code
);
3529 /* Check types of lhs and rhs. */
3530 scalar_dest
= gimple_assign_lhs (stmt
);
3531 lhs_type
= TREE_TYPE (scalar_dest
);
3532 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3534 op0
= gimple_assign_rhs1 (stmt
);
3535 rhs_type
= TREE_TYPE (op0
);
3537 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3538 && !((INTEGRAL_TYPE_P (lhs_type
)
3539 && INTEGRAL_TYPE_P (rhs_type
))
3540 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
3541 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
3544 if ((INTEGRAL_TYPE_P (lhs_type
)
3545 && (TYPE_PRECISION (lhs_type
)
3546 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
3547 || (INTEGRAL_TYPE_P (rhs_type
)
3548 && (TYPE_PRECISION (rhs_type
)
3549 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3553 "type conversion to/from bit-precision unsupported."
3558 /* Check the operands of the operation. */
3559 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3560 &def_stmt
, &def
, &dt
[0], &vectype_in
))
3562 if (dump_enabled_p ())
3563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3564 "use not simple.\n");
3567 if (op_type
== binary_op
)
3571 op1
= gimple_assign_rhs2 (stmt
);
3572 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
3573 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3575 if (CONSTANT_CLASS_P (op0
))
3576 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
3577 &def_stmt
, &def
, &dt
[1], &vectype_in
);
3579 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3584 if (dump_enabled_p ())
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3586 "use not simple.\n");
3591 /* If op0 is an external or constant defs use a vector type of
3592 the same size as the output vector type. */
3594 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3596 gcc_assert (vectype_in
);
3599 if (dump_enabled_p ())
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3602 "no vectype for scalar type ");
3603 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
3604 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3610 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3611 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3612 if (nunits_in
< nunits_out
)
3614 else if (nunits_out
== nunits_in
)
3619 /* Multiple types in SLP are handled by creating the appropriate number of
3620 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3622 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3624 else if (modifier
== NARROW
)
3625 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
3627 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3629 /* Sanity check: make sure that at least one copy of the vectorized stmt
3630 needs to be generated. */
3631 gcc_assert (ncopies
>= 1);
3633 /* Supportable by target? */
3637 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
3639 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3646 "conversion not supported by target.\n");
3650 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
3651 &code1
, &code2
, &multi_step_cvt
,
3654 /* Binary widening operation can only be supported directly by the
3656 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
3660 if (code
!= FLOAT_EXPR
3661 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3662 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3665 rhs_mode
= TYPE_MODE (rhs_type
);
3666 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
3667 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
3668 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
3669 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
3672 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3673 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3674 if (cvt_type
== NULL_TREE
)
3677 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3679 if (!supportable_convert_operation (code
, vectype_out
,
3680 cvt_type
, &decl1
, &codecvt1
))
3683 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
3684 cvt_type
, &codecvt1
,
3685 &codecvt2
, &multi_step_cvt
,
3689 gcc_assert (multi_step_cvt
== 0);
3691 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
3692 vectype_in
, &code1
, &code2
,
3693 &multi_step_cvt
, &interm_types
))
3697 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
3700 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
3701 codecvt2
= ERROR_MARK
;
3705 interm_types
.safe_push (cvt_type
);
3706 cvt_type
= NULL_TREE
;
3711 gcc_assert (op_type
== unary_op
);
3712 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
3713 &code1
, &multi_step_cvt
,
3717 if (code
!= FIX_TRUNC_EXPR
3718 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
3719 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
3722 rhs_mode
= TYPE_MODE (rhs_type
);
3724 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
3725 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
3726 if (cvt_type
== NULL_TREE
)
3728 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
3731 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
3732 &code1
, &multi_step_cvt
,
3741 if (!vec_stmt
) /* transformation not required. */
3743 if (dump_enabled_p ())
3744 dump_printf_loc (MSG_NOTE
, vect_location
,
3745 "=== vectorizable_conversion ===\n");
3746 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
3748 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
3749 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3751 else if (modifier
== NARROW
)
3753 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
3754 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3758 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
3759 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
3761 interm_types
.release ();
3766 if (dump_enabled_p ())
3767 dump_printf_loc (MSG_NOTE
, vect_location
,
3768 "transform conversion. ncopies = %d.\n", ncopies
);
3770 if (op_type
== binary_op
)
3772 if (CONSTANT_CLASS_P (op0
))
3773 op0
= fold_convert (TREE_TYPE (op1
), op0
);
3774 else if (CONSTANT_CLASS_P (op1
))
3775 op1
= fold_convert (TREE_TYPE (op0
), op1
);
3778 /* In case of multi-step conversion, we first generate conversion operations
3779 to the intermediate types, and then from that types to the final one.
3780 We create vector destinations for the intermediate type (TYPES) received
3781 from supportable_*_operation, and store them in the correct order
3782 for future use in vect_create_vectorized_*_stmts (). */
3783 vec_dsts
.create (multi_step_cvt
+ 1);
3784 vec_dest
= vect_create_destination_var (scalar_dest
,
3785 (cvt_type
&& modifier
== WIDEN
)
3786 ? cvt_type
: vectype_out
);
3787 vec_dsts
.quick_push (vec_dest
);
3791 for (i
= interm_types
.length () - 1;
3792 interm_types
.iterate (i
, &intermediate_type
); i
--)
3794 vec_dest
= vect_create_destination_var (scalar_dest
,
3796 vec_dsts
.quick_push (vec_dest
);
3801 vec_dest
= vect_create_destination_var (scalar_dest
,
3803 ? vectype_out
: cvt_type
);
3807 if (modifier
== WIDEN
)
3809 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
3810 if (op_type
== binary_op
)
3811 vec_oprnds1
.create (1);
3813 else if (modifier
== NARROW
)
3814 vec_oprnds0
.create (
3815 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
3817 else if (code
== WIDEN_LSHIFT_EXPR
)
3818 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
3821 prev_stmt_info
= NULL
;
3825 for (j
= 0; j
< ncopies
; j
++)
3828 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
3831 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
3833 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3835 /* Arguments are ready, create the new vector stmt. */
3836 if (code1
== CALL_EXPR
)
3838 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3839 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3840 gimple_call_set_lhs (new_stmt
, new_temp
);
3844 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
3845 new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
3846 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3847 gimple_assign_set_lhs (new_stmt
, new_temp
);
3850 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3852 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3856 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3858 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3859 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3864 /* In case the vectorization factor (VF) is bigger than the number
3865 of elements that we can fit in a vectype (nunits), we have to
3866 generate more than one vector stmt - i.e - we need to "unroll"
3867 the vector stmt by a factor VF/nunits. */
3868 for (j
= 0; j
< ncopies
; j
++)
3875 if (code
== WIDEN_LSHIFT_EXPR
)
3880 /* Store vec_oprnd1 for every vector stmt to be created
3881 for SLP_NODE. We check during the analysis that all
3882 the shift arguments are the same. */
3883 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3884 vec_oprnds1
.quick_push (vec_oprnd1
);
3886 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3890 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
3891 &vec_oprnds1
, slp_node
, -1);
3895 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
3896 vec_oprnds0
.quick_push (vec_oprnd0
);
3897 if (op_type
== binary_op
)
3899 if (code
== WIDEN_LSHIFT_EXPR
)
3902 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
3904 vec_oprnds1
.quick_push (vec_oprnd1
);
3910 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
3911 vec_oprnds0
.truncate (0);
3912 vec_oprnds0
.quick_push (vec_oprnd0
);
3913 if (op_type
== binary_op
)
3915 if (code
== WIDEN_LSHIFT_EXPR
)
3918 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
3920 vec_oprnds1
.truncate (0);
3921 vec_oprnds1
.quick_push (vec_oprnd1
);
3925 /* Arguments are ready. Create the new vector stmts. */
3926 for (i
= multi_step_cvt
; i
>= 0; i
--)
3928 tree this_dest
= vec_dsts
[i
];
3929 enum tree_code c1
= code1
, c2
= code2
;
3930 if (i
== 0 && codecvt2
!= ERROR_MARK
)
3935 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
3937 stmt
, this_dest
, gsi
,
3938 c1
, c2
, decl1
, decl2
,
3942 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3946 if (codecvt1
== CALL_EXPR
)
3948 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
3949 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3950 gimple_call_set_lhs (new_stmt
, new_temp
);
3954 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
3955 new_temp
= make_ssa_name (vec_dest
);
3956 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
3960 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3963 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
3966 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3969 if (!prev_stmt_info
)
3970 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
3972 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3973 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3978 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3982 /* In case the vectorization factor (VF) is bigger than the number
3983 of elements that we can fit in a vectype (nunits), we have to
3984 generate more than one vector stmt - i.e - we need to "unroll"
3985 the vector stmt by a factor VF/nunits. */
3986 for (j
= 0; j
< ncopies
; j
++)
3990 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3994 vec_oprnds0
.truncate (0);
3995 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
3996 vect_pow2 (multi_step_cvt
) - 1);
3999 /* Arguments are ready. Create the new vector stmts. */
4001 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4003 if (codecvt1
== CALL_EXPR
)
4005 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4006 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4007 gimple_call_set_lhs (new_stmt
, new_temp
);
4011 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4012 new_temp
= make_ssa_name (vec_dest
);
4013 new_stmt
= gimple_build_assign (new_temp
, codecvt1
,
4017 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4018 vec_oprnds0
[i
] = new_temp
;
4021 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
4022 stmt
, vec_dsts
, gsi
,
4027 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4031 vec_oprnds0
.release ();
4032 vec_oprnds1
.release ();
4033 vec_dsts
.release ();
4034 interm_types
.release ();
4040 /* Function vectorizable_assignment.
4042 Check if STMT performs an assignment (copy) that can be vectorized.
4043 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4044 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4045 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4048 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4049 gimple
*vec_stmt
, slp_tree slp_node
)
4054 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4055 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4056 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4060 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4061 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4064 vec
<tree
> vec_oprnds
= vNULL
;
4066 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4067 gimple new_stmt
= NULL
;
4068 stmt_vec_info prev_stmt_info
= NULL
;
4069 enum tree_code code
;
4072 /* Multiple types in SLP are handled by creating the appropriate number of
4073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4075 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4078 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4080 gcc_assert (ncopies
>= 1);
4082 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4085 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4088 /* Is vectorizable assignment? */
4089 if (!is_gimple_assign (stmt
))
4092 scalar_dest
= gimple_assign_lhs (stmt
);
4093 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4096 code
= gimple_assign_rhs_code (stmt
);
4097 if (gimple_assign_single_p (stmt
)
4098 || code
== PAREN_EXPR
4099 || CONVERT_EXPR_CODE_P (code
))
4100 op
= gimple_assign_rhs1 (stmt
);
4104 if (code
== VIEW_CONVERT_EXPR
)
4105 op
= TREE_OPERAND (op
, 0);
4107 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
4108 &def_stmt
, &def
, &dt
[0], &vectype_in
))
4110 if (dump_enabled_p ())
4111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4112 "use not simple.\n");
4116 /* We can handle NOP_EXPR conversions that do not change the number
4117 of elements or the vector size. */
4118 if ((CONVERT_EXPR_CODE_P (code
)
4119 || code
== VIEW_CONVERT_EXPR
)
4121 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
4122 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
4123 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
4126 /* We do not handle bit-precision changes. */
4127 if ((CONVERT_EXPR_CODE_P (code
)
4128 || code
== VIEW_CONVERT_EXPR
)
4129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
4130 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4131 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4132 || ((TYPE_PRECISION (TREE_TYPE (op
))
4133 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
4134 /* But a conversion that does not change the bit-pattern is ok. */
4135 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4136 > TYPE_PRECISION (TREE_TYPE (op
)))
4137 && TYPE_UNSIGNED (TREE_TYPE (op
))))
4139 if (dump_enabled_p ())
4140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4141 "type conversion to/from bit-precision "
4146 if (!vec_stmt
) /* transformation not required. */
4148 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
4149 if (dump_enabled_p ())
4150 dump_printf_loc (MSG_NOTE
, vect_location
,
4151 "=== vectorizable_assignment ===\n");
4152 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4157 if (dump_enabled_p ())
4158 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
4161 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4164 for (j
= 0; j
< ncopies
; j
++)
4168 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
4170 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
4172 /* Arguments are ready. create the new vector stmt. */
4173 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
4175 if (CONVERT_EXPR_CODE_P (code
)
4176 || code
== VIEW_CONVERT_EXPR
)
4177 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
4178 new_stmt
= gimple_build_assign (vec_dest
, vop
);
4179 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4180 gimple_assign_set_lhs (new_stmt
, new_temp
);
4181 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4183 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4190 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4192 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4194 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4197 vec_oprnds
.release ();
4202 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4203 either as shift by a scalar or by a vector. */
4206 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
4209 machine_mode vec_mode
;
4214 vectype
= get_vectype_for_scalar_type (scalar_type
);
4218 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4220 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
4222 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4224 || (optab_handler (optab
, TYPE_MODE (vectype
))
4225 == CODE_FOR_nothing
))
4229 vec_mode
= TYPE_MODE (vectype
);
4230 icode
= (int) optab_handler (optab
, vec_mode
);
4231 if (icode
== CODE_FOR_nothing
)
4238 /* Function vectorizable_shift.
4240 Check if STMT performs a shift operation that can be vectorized.
4241 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4242 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4243 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4246 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
4247 gimple
*vec_stmt
, slp_tree slp_node
)
4251 tree op0
, op1
= NULL
;
4252 tree vec_oprnd1
= NULL_TREE
;
4253 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4255 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4256 enum tree_code code
;
4257 machine_mode vec_mode
;
4261 machine_mode optab_op2_mode
;
4264 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4265 gimple new_stmt
= NULL
;
4266 stmt_vec_info prev_stmt_info
;
4273 vec
<tree
> vec_oprnds0
= vNULL
;
4274 vec
<tree
> vec_oprnds1
= vNULL
;
4277 bool scalar_shift_arg
= true;
4278 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4281 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4284 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4287 /* Is STMT a vectorizable binary/unary operation? */
4288 if (!is_gimple_assign (stmt
))
4291 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4294 code
= gimple_assign_rhs_code (stmt
);
4296 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4297 || code
== RROTATE_EXPR
))
4300 scalar_dest
= gimple_assign_lhs (stmt
);
4301 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4302 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4303 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4305 if (dump_enabled_p ())
4306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4307 "bit-precision shifts not supported.\n");
4311 op0
= gimple_assign_rhs1 (stmt
);
4312 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4313 &def_stmt
, &def
, &dt
[0], &vectype
))
4315 if (dump_enabled_p ())
4316 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4317 "use not simple.\n");
4320 /* If op0 is an external or constant def use a vector type with
4321 the same size as the output vector type. */
4323 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4325 gcc_assert (vectype
);
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4330 "no vectype for scalar type\n");
4334 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4335 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4336 if (nunits_out
!= nunits_in
)
4339 op1
= gimple_assign_rhs2 (stmt
);
4340 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4341 &def
, &dt
[1], &op1_vectype
))
4343 if (dump_enabled_p ())
4344 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4345 "use not simple.\n");
4350 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4354 /* Multiple types in SLP are handled by creating the appropriate number of
4355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4357 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4360 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4362 gcc_assert (ncopies
>= 1);
4364 /* Determine whether the shift amount is a vector, or scalar. If the
4365 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4367 if (dt
[1] == vect_internal_def
&& !slp_node
)
4368 scalar_shift_arg
= false;
4369 else if (dt
[1] == vect_constant_def
4370 || dt
[1] == vect_external_def
4371 || dt
[1] == vect_internal_def
)
4373 /* In SLP, need to check whether the shift count is the same,
4374 in loops if it is a constant or invariant, it is always
4378 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
4381 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
4382 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
4383 scalar_shift_arg
= false;
4388 if (dump_enabled_p ())
4389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4390 "operand mode requires invariant argument.\n");
4394 /* Vector shifted by vector. */
4395 if (!scalar_shift_arg
)
4397 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_NOTE
, vect_location
,
4400 "vector/vector shift/rotate found.\n");
4403 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
4404 if (op1_vectype
== NULL_TREE
4405 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
4407 if (dump_enabled_p ())
4408 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4409 "unusable type for last operand in"
4410 " vector/vector shift/rotate.\n");
4414 /* See if the machine has a vector shifted by scalar insn and if not
4415 then see if it has a vector shifted by vector insn. */
4418 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
4420 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_NOTE
, vect_location
,
4424 "vector/scalar shift/rotate found.\n");
4428 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4430 && (optab_handler (optab
, TYPE_MODE (vectype
))
4431 != CODE_FOR_nothing
))
4433 scalar_shift_arg
= false;
4435 if (dump_enabled_p ())
4436 dump_printf_loc (MSG_NOTE
, vect_location
,
4437 "vector/vector shift/rotate found.\n");
4439 /* Unlike the other binary operators, shifts/rotates have
4440 the rhs being int, instead of the same type as the lhs,
4441 so make sure the scalar is the right type if we are
4442 dealing with vectors of long long/long/short/char. */
4443 if (dt
[1] == vect_constant_def
)
4444 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4445 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
4449 && TYPE_MODE (TREE_TYPE (vectype
))
4450 != TYPE_MODE (TREE_TYPE (op1
)))
4452 if (dump_enabled_p ())
4453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4454 "unusable type for last operand in"
4455 " vector/vector shift/rotate.\n");
4458 if (vec_stmt
&& !slp_node
)
4460 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
4461 op1
= vect_init_vector (stmt
, op1
,
4462 TREE_TYPE (vectype
), NULL
);
4469 /* Supportable by target? */
4472 if (dump_enabled_p ())
4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4477 vec_mode
= TYPE_MODE (vectype
);
4478 icode
= (int) optab_handler (optab
, vec_mode
);
4479 if (icode
== CODE_FOR_nothing
)
4481 if (dump_enabled_p ())
4482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4483 "op not supported by target.\n");
4484 /* Check only during analysis. */
4485 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4486 || (vf
< vect_min_worthwhile_factor (code
)
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE
, vect_location
,
4491 "proceeding using word mode.\n");
4494 /* Worthwhile without SIMD support? Check only during analysis. */
4495 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
4496 && vf
< vect_min_worthwhile_factor (code
)
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4501 "not worthwhile without SIMD support.\n");
4505 if (!vec_stmt
) /* transformation not required. */
4507 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE
, vect_location
,
4510 "=== vectorizable_shift ===\n");
4511 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_NOTE
, vect_location
,
4519 "transform binary/unary operation.\n");
4522 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4524 prev_stmt_info
= NULL
;
4525 for (j
= 0; j
< ncopies
; j
++)
4530 if (scalar_shift_arg
)
4532 /* Vector shl and shr insn patterns can be defined with scalar
4533 operand 2 (shift operand). In this case, use constant or loop
4534 invariant op1 directly, without extending it to vector mode
4536 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
4537 if (!VECTOR_MODE_P (optab_op2_mode
))
4539 if (dump_enabled_p ())
4540 dump_printf_loc (MSG_NOTE
, vect_location
,
4541 "operand 1 using scalar mode.\n");
4543 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
4544 vec_oprnds1
.quick_push (vec_oprnd1
);
4547 /* Store vec_oprnd1 for every vector stmt to be created
4548 for SLP_NODE. We check during the analysis that all
4549 the shift arguments are the same.
4550 TODO: Allow different constants for different vector
4551 stmts generated for an SLP instance. */
4552 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4553 vec_oprnds1
.quick_push (vec_oprnd1
);
4558 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4559 (a special case for certain kind of vector shifts); otherwise,
4560 operand 1 should be of a vector type (the usual case). */
4562 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4565 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4569 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4571 /* Arguments are ready. Create the new vector stmt. */
4572 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4574 vop1
= vec_oprnds1
[i
];
4575 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4576 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4577 gimple_assign_set_lhs (new_stmt
, new_temp
);
4578 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4580 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4587 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4589 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4590 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4593 vec_oprnds0
.release ();
4594 vec_oprnds1
.release ();
4600 /* Function vectorizable_operation.
4602 Check if STMT performs a binary, unary or ternary operation that can
4604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4605 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4609 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
4610 gimple
*vec_stmt
, slp_tree slp_node
)
4614 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
4615 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4617 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4618 enum tree_code code
;
4619 machine_mode vec_mode
;
4626 enum vect_def_type dt
[3]
4627 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
4628 gimple new_stmt
= NULL
;
4629 stmt_vec_info prev_stmt_info
;
4635 vec
<tree
> vec_oprnds0
= vNULL
;
4636 vec
<tree
> vec_oprnds1
= vNULL
;
4637 vec
<tree
> vec_oprnds2
= vNULL
;
4638 tree vop0
, vop1
, vop2
;
4639 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4642 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4645 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4648 /* Is STMT a vectorizable binary/unary operation? */
4649 if (!is_gimple_assign (stmt
))
4652 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4655 code
= gimple_assign_rhs_code (stmt
);
4657 /* For pointer addition, we should use the normal plus for
4658 the vector addition. */
4659 if (code
== POINTER_PLUS_EXPR
)
4662 /* Support only unary or binary operations. */
4663 op_type
= TREE_CODE_LENGTH (code
);
4664 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4668 "num. args = %d (not unary/binary/ternary op).\n",
4673 scalar_dest
= gimple_assign_lhs (stmt
);
4674 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4676 /* Most operations cannot handle bit-precision types without extra
4678 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
4679 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
4680 /* Exception are bitwise binary operations. */
4681 && code
!= BIT_IOR_EXPR
4682 && code
!= BIT_XOR_EXPR
4683 && code
!= BIT_AND_EXPR
)
4685 if (dump_enabled_p ())
4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4687 "bit-precision arithmetic not supported.\n");
4691 op0
= gimple_assign_rhs1 (stmt
);
4692 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
4693 &def_stmt
, &def
, &dt
[0], &vectype
))
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4697 "use not simple.\n");
4700 /* If op0 is an external or constant def use a vector type with
4701 the same size as the output vector type. */
4703 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
4705 gcc_assert (vectype
);
4708 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4711 "no vectype for scalar type ");
4712 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
4714 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
4720 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4721 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
4722 if (nunits_out
!= nunits_in
)
4725 if (op_type
== binary_op
|| op_type
== ternary_op
)
4727 op1
= gimple_assign_rhs2 (stmt
);
4728 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4733 "use not simple.\n");
4737 if (op_type
== ternary_op
)
4739 op2
= gimple_assign_rhs3 (stmt
);
4740 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4745 "use not simple.\n");
4751 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4755 /* Multiple types in SLP are handled by creating the appropriate number of
4756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4758 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
4761 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
4763 gcc_assert (ncopies
>= 1);
4765 /* Shifts are handled in vectorizable_shift (). */
4766 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
4767 || code
== RROTATE_EXPR
)
4770 /* Supportable by target? */
4772 vec_mode
= TYPE_MODE (vectype
);
4773 if (code
== MULT_HIGHPART_EXPR
)
4775 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
4776 icode
= LAST_INSN_CODE
;
4778 icode
= CODE_FOR_nothing
;
4782 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
4785 if (dump_enabled_p ())
4786 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4790 icode
= (int) optab_handler (optab
, vec_mode
);
4793 if (icode
== CODE_FOR_nothing
)
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4797 "op not supported by target.\n");
4798 /* Check only during analysis. */
4799 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
4800 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
4802 if (dump_enabled_p ())
4803 dump_printf_loc (MSG_NOTE
, vect_location
,
4804 "proceeding using word mode.\n");
4807 /* Worthwhile without SIMD support? Check only during analysis. */
4808 if (!VECTOR_MODE_P (vec_mode
)
4810 && vf
< vect_min_worthwhile_factor (code
))
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4814 "not worthwhile without SIMD support.\n");
4818 if (!vec_stmt
) /* transformation not required. */
4820 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_NOTE
, vect_location
,
4823 "=== vectorizable_operation ===\n");
4824 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_NOTE
, vect_location
,
4832 "transform binary/unary operation.\n");
4835 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4837 /* In case the vectorization factor (VF) is bigger than the number
4838 of elements that we can fit in a vectype (nunits), we have to generate
4839 more than one vector stmt - i.e - we need to "unroll" the
4840 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4841 from one copy of the vector stmt to the next, in the field
4842 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4843 stages to find the correct vector defs to be used when vectorizing
4844 stmts that use the defs of the current stmt. The example below
4845 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4846 we need to create 4 vectorized stmts):
4848 before vectorization:
4849 RELATED_STMT VEC_STMT
4853 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4855 RELATED_STMT VEC_STMT
4856 VS1_0: vx0 = memref0 VS1_1 -
4857 VS1_1: vx1 = memref1 VS1_2 -
4858 VS1_2: vx2 = memref2 VS1_3 -
4859 VS1_3: vx3 = memref3 - -
4860 S1: x = load - VS1_0
4863 step2: vectorize stmt S2 (done here):
4864 To vectorize stmt S2 we first need to find the relevant vector
4865 def for the first operand 'x'. This is, as usual, obtained from
4866 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4867 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4868 relevant vector def 'vx0'. Having found 'vx0' we can generate
4869 the vector stmt VS2_0, and as usual, record it in the
4870 STMT_VINFO_VEC_STMT of stmt S2.
4871 When creating the second copy (VS2_1), we obtain the relevant vector
4872 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4873 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4874 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4875 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4876 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4877 chain of stmts and pointers:
4878 RELATED_STMT VEC_STMT
4879 VS1_0: vx0 = memref0 VS1_1 -
4880 VS1_1: vx1 = memref1 VS1_2 -
4881 VS1_2: vx2 = memref2 VS1_3 -
4882 VS1_3: vx3 = memref3 - -
4883 S1: x = load - VS1_0
4884 VS2_0: vz0 = vx0 + v1 VS2_1 -
4885 VS2_1: vz1 = vx1 + v1 VS2_2 -
4886 VS2_2: vz2 = vx2 + v1 VS2_3 -
4887 VS2_3: vz3 = vx3 + v1 - -
4888 S2: z = x + 1 - VS2_0 */
4890 prev_stmt_info
= NULL
;
4891 for (j
= 0; j
< ncopies
; j
++)
4896 if (op_type
== binary_op
|| op_type
== ternary_op
)
4897 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
4900 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
4902 if (op_type
== ternary_op
)
4904 vec_oprnds2
.create (1);
4905 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
4912 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
4913 if (op_type
== ternary_op
)
4915 tree vec_oprnd
= vec_oprnds2
.pop ();
4916 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
4921 /* Arguments are ready. Create the new vector stmt. */
4922 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4924 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
4925 ? vec_oprnds1
[i
] : NULL_TREE
);
4926 vop2
= ((op_type
== ternary_op
)
4927 ? vec_oprnds2
[i
] : NULL_TREE
);
4928 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
4929 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4930 gimple_assign_set_lhs (new_stmt
, new_temp
);
4931 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4933 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4940 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4942 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4943 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4946 vec_oprnds0
.release ();
4947 vec_oprnds1
.release ();
4948 vec_oprnds2
.release ();
4953 /* A helper function to ensure data reference DR's base alignment
4957 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
4962 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
4964 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4965 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
4967 if (decl_in_symtab_p (base_decl
))
4968 symtab_node::get (base_decl
)->increase_alignment (TYPE_ALIGN (vectype
));
4971 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
4972 DECL_USER_ALIGN (base_decl
) = 1;
4974 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
4979 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4980 reversal of the vector elements. If that is impossible to do,
4984 perm_mask_for_reverse (tree vectype
)
4989 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4990 sel
= XALLOCAVEC (unsigned char, nunits
);
4992 for (i
= 0; i
< nunits
; ++i
)
4993 sel
[i
] = nunits
- 1 - i
;
4995 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4997 return vect_gen_perm_mask_checked (vectype
, sel
);
5000 /* Function vectorizable_store.
5002 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5004 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5005 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5006 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5009 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5015 tree vec_oprnd
= NULL_TREE
;
5016 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5017 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5018 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5020 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5021 struct loop
*loop
= NULL
;
5022 machine_mode vec_mode
;
5024 enum dr_alignment_support alignment_support_scheme
;
5027 enum vect_def_type dt
;
5028 stmt_vec_info prev_stmt_info
= NULL
;
5029 tree dataref_ptr
= NULL_TREE
;
5030 tree dataref_offset
= NULL_TREE
;
5031 gimple ptr_incr
= NULL
;
5032 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5035 gimple next_stmt
, first_stmt
= NULL
;
5036 bool grouped_store
= false;
5037 bool store_lanes_p
= false;
5038 unsigned int group_size
, i
;
5039 vec
<tree
> dr_chain
= vNULL
;
5040 vec
<tree
> oprnds
= vNULL
;
5041 vec
<tree
> result_chain
= vNULL
;
5043 bool negative
= false;
5044 tree offset
= NULL_TREE
;
5045 vec
<tree
> vec_oprnds
= vNULL
;
5046 bool slp
= (slp_node
!= NULL
);
5047 unsigned int vec_num
;
5048 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5052 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5054 /* Multiple types in SLP are handled by creating the appropriate number of
5055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5057 if (slp
|| PURE_SLP_STMT (stmt_info
))
5060 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5062 gcc_assert (ncopies
>= 1);
5064 /* FORNOW. This restriction should be relaxed. */
5065 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5069 "multiple types in nested loop.\n");
5073 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5076 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5079 /* Is vectorizable store? */
5081 if (!is_gimple_assign (stmt
))
5084 scalar_dest
= gimple_assign_lhs (stmt
);
5085 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
5086 && is_pattern_stmt_p (stmt_info
))
5087 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
5088 if (TREE_CODE (scalar_dest
) != ARRAY_REF
5089 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
5090 && TREE_CODE (scalar_dest
) != INDIRECT_REF
5091 && TREE_CODE (scalar_dest
) != COMPONENT_REF
5092 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
5093 && TREE_CODE (scalar_dest
) != REALPART_EXPR
5094 && TREE_CODE (scalar_dest
) != MEM_REF
)
5097 gcc_assert (gimple_assign_single_p (stmt
));
5098 op
= gimple_assign_rhs1 (stmt
);
5099 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5104 "use not simple.\n");
5108 elem_type
= TREE_TYPE (vectype
);
5109 vec_mode
= TYPE_MODE (vectype
);
5111 /* FORNOW. In some cases can vectorize even if data-type not supported
5112 (e.g. - array initialization with 0). */
5113 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
5116 if (!STMT_VINFO_DATA_REF (stmt_info
))
5119 if (!STMT_VINFO_STRIDED_P (stmt_info
))
5122 tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
5123 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
5124 size_zero_node
) < 0;
5125 if (negative
&& ncopies
> 1)
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5129 "multiple types with negative step.\n");
5134 gcc_assert (!grouped_store
);
5135 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
5136 if (alignment_support_scheme
!= dr_aligned
5137 && alignment_support_scheme
!= dr_unaligned_supported
)
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5141 "negative step but alignment required.\n");
5144 if (dt
!= vect_constant_def
5145 && dt
!= vect_external_def
5146 && !perm_mask_for_reverse (vectype
))
5148 if (dump_enabled_p ())
5149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5150 "negative step and reversing not supported.\n");
5156 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5158 grouped_store
= true;
5159 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5160 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
5162 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5163 if (vect_store_lanes_supported (vectype
, group_size
))
5164 store_lanes_p
= true;
5165 else if (!vect_grouped_store_supported (vectype
, group_size
))
5169 if (first_stmt
== stmt
)
5171 /* STMT is the leader of the group. Check the operands of all the
5172 stmts of the group. */
5173 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
5176 gcc_assert (gimple_assign_single_p (next_stmt
));
5177 op
= gimple_assign_rhs1 (next_stmt
);
5178 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
5179 &def_stmt
, &def
, &dt
))
5181 if (dump_enabled_p ())
5182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5183 "use not simple.\n");
5186 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5191 if (!vec_stmt
) /* transformation not required. */
5193 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
5194 /* The SLP costs are calculated during SLP analysis. */
5195 if (!PURE_SLP_STMT (stmt_info
))
5196 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
5203 ensure_base_align (stmt_info
, dr
);
5207 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5208 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5210 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
5213 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
5215 /* We vectorize all the stmts of the interleaving group when we
5216 reach the last stmt in the group. */
5217 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
5218 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
5227 grouped_store
= false;
5228 /* VEC_NUM is the number of vect stmts to be created for this
5230 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5231 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
5232 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
5233 op
= gimple_assign_rhs1 (first_stmt
);
5236 /* VEC_NUM is the number of vect stmts to be created for this
5238 vec_num
= group_size
;
5244 group_size
= vec_num
= 1;
5247 if (dump_enabled_p ())
5248 dump_printf_loc (MSG_NOTE
, vect_location
,
5249 "transform store. ncopies = %d\n", ncopies
);
5251 if (STMT_VINFO_STRIDED_P (stmt_info
))
5253 gimple_stmt_iterator incr_gsi
;
5259 gimple_seq stmts
= NULL
;
5260 tree stride_base
, stride_step
, alias_off
;
5263 gcc_assert (!nested_in_vect_loop_p (loop
, stmt
));
5266 = fold_build_pointer_plus
5267 (unshare_expr (DR_BASE_ADDRESS (dr
)),
5268 size_binop (PLUS_EXPR
,
5269 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
5270 convert_to_ptrofftype (DR_INIT(dr
))));
5271 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
5273 /* For a store with loop-invariant (but other than power-of-2)
5274 stride (i.e. not a grouped access) like so:
5276 for (i = 0; i < n; i += stride)
5279 we generate a new induction variable and new stores from
5280 the components of the (vectorized) rhs:
5282 for (j = 0; ; j += VF*stride)
5287 array[j + stride] = tmp2;
5291 ivstep
= stride_step
;
5292 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
5293 build_int_cst (TREE_TYPE (ivstep
),
5296 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
5298 create_iv (stride_base
, ivstep
, NULL
,
5299 loop
, &incr_gsi
, insert_after
,
5301 incr
= gsi_stmt (incr_gsi
);
5302 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
5304 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
5306 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
5308 prev_stmt_info
= NULL
;
5309 running_off
= offvar
;
5310 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
5311 for (j
= 0; j
< ncopies
; j
++)
5313 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5314 and first_stmt == stmt. */
5316 vec_oprnd
= vect_get_vec_def_for_operand (op
, first_stmt
, NULL
);
5318 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
5320 for (i
= 0; i
< nunits
; i
++)
5322 tree newref
, newoff
;
5323 gimple incr
, assign
;
5324 tree size
= TYPE_SIZE (elem_type
);
5325 /* Extract the i'th component. */
5326 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
, bitsize_int (i
),
5328 tree elem
= fold_build3 (BIT_FIELD_REF
, elem_type
, vec_oprnd
,
5331 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
5335 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
5336 running_off
, alias_off
);
5338 /* And store it to *running_off. */
5339 assign
= gimple_build_assign (newref
, elem
);
5340 vect_finish_stmt_generation (stmt
, assign
, gsi
);
5342 newoff
= copy_ssa_name (running_off
, NULL
);
5343 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
5344 running_off
, stride_step
);
5345 vect_finish_stmt_generation (stmt
, incr
, gsi
);
5347 running_off
= newoff
;
5348 if (j
== 0 && i
== i
)
5349 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= assign
;
5351 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign
;
5352 prev_stmt_info
= vinfo_for_stmt (assign
);
5358 dr_chain
.create (group_size
);
5359 oprnds
.create (group_size
);
5361 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
5362 gcc_assert (alignment_support_scheme
);
5363 /* Targets with store-lane instructions must not require explicit
5365 gcc_assert (!store_lanes_p
5366 || alignment_support_scheme
== dr_aligned
5367 || alignment_support_scheme
== dr_unaligned_supported
);
5370 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5373 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5375 aggr_type
= vectype
;
5377 /* In case the vectorization factor (VF) is bigger than the number
5378 of elements that we can fit in a vectype (nunits), we have to generate
5379 more than one vector stmt - i.e - we need to "unroll" the
5380 vector stmt by a factor VF/nunits. For more details see documentation in
5381 vect_get_vec_def_for_copy_stmt. */
5383 /* In case of interleaving (non-unit grouped access):
5390 We create vectorized stores starting from base address (the access of the
5391 first stmt in the chain (S2 in the above example), when the last store stmt
5392 of the chain (S4) is reached:
5395 VS2: &base + vec_size*1 = vx0
5396 VS3: &base + vec_size*2 = vx1
5397 VS4: &base + vec_size*3 = vx3
5399 Then permutation statements are generated:
5401 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5402 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5405 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5406 (the order of the data-refs in the output of vect_permute_store_chain
5407 corresponds to the order of scalar stmts in the interleaving chain - see
5408 the documentation of vect_permute_store_chain()).
5410 In case of both multiple types and interleaving, above vector stores and
5411 permutation stmts are created for every copy. The result vector stmts are
5412 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5413 STMT_VINFO_RELATED_STMT for the next copies.
5416 prev_stmt_info
= NULL
;
5417 for (j
= 0; j
< ncopies
; j
++)
5425 /* Get vectorized arguments for SLP_NODE. */
5426 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
5427 NULL
, slp_node
, -1);
5429 vec_oprnd
= vec_oprnds
[0];
5433 /* For interleaved stores we collect vectorized defs for all the
5434 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5435 used as an input to vect_permute_store_chain(), and OPRNDS as
5436 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5438 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5439 OPRNDS are of size 1. */
5440 next_stmt
= first_stmt
;
5441 for (i
= 0; i
< group_size
; i
++)
5443 /* Since gaps are not supported for interleaved stores,
5444 GROUP_SIZE is the exact number of stmts in the chain.
5445 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5446 there is no interleaving, GROUP_SIZE is 1, and only one
5447 iteration of the loop will be executed. */
5448 gcc_assert (next_stmt
5449 && gimple_assign_single_p (next_stmt
));
5450 op
= gimple_assign_rhs1 (next_stmt
);
5452 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
5454 dr_chain
.quick_push (vec_oprnd
);
5455 oprnds
.quick_push (vec_oprnd
);
5456 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5460 /* We should have catched mismatched types earlier. */
5461 gcc_assert (useless_type_conversion_p (vectype
,
5462 TREE_TYPE (vec_oprnd
)));
5463 bool simd_lane_access_p
5464 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5465 if (simd_lane_access_p
5466 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5467 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5468 && integer_zerop (DR_OFFSET (first_dr
))
5469 && integer_zerop (DR_INIT (first_dr
))
5470 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5471 get_alias_set (DR_REF (first_dr
))))
5473 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5474 dataref_offset
= build_int_cst (reference_alias_ptr_type
5475 (DR_REF (first_dr
)), 0);
5480 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
5481 simd_lane_access_p
? loop
: NULL
,
5482 offset
, &dummy
, gsi
, &ptr_incr
,
5483 simd_lane_access_p
, &inv_p
);
5484 gcc_assert (bb_vinfo
|| !inv_p
);
5488 /* For interleaved stores we created vectorized defs for all the
5489 defs stored in OPRNDS in the previous iteration (previous copy).
5490 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5491 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5493 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5494 OPRNDS are of size 1. */
5495 for (i
= 0; i
< group_size
; i
++)
5498 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
5500 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
5501 dr_chain
[i
] = vec_oprnd
;
5502 oprnds
[i
] = vec_oprnd
;
5506 = int_const_binop (PLUS_EXPR
, dataref_offset
,
5507 TYPE_SIZE_UNIT (aggr_type
));
5509 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5510 TYPE_SIZE_UNIT (aggr_type
));
5517 /* Combine all the vectors into an array. */
5518 vec_array
= create_vector_array (vectype
, vec_num
);
5519 for (i
= 0; i
< vec_num
; i
++)
5521 vec_oprnd
= dr_chain
[i
];
5522 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
5526 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5527 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5528 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
5529 gimple_call_set_lhs (new_stmt
, data_ref
);
5530 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5538 result_chain
.create (group_size
);
5540 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
5544 next_stmt
= first_stmt
;
5545 for (i
= 0; i
< vec_num
; i
++)
5547 unsigned align
, misalign
;
5550 /* Bump the vector pointer. */
5551 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5555 vec_oprnd
= vec_oprnds
[i
];
5556 else if (grouped_store
)
5557 /* For grouped stores vectorized defs are interleaved in
5558 vect_permute_store_chain(). */
5559 vec_oprnd
= result_chain
[i
];
5561 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
5564 : build_int_cst (reference_alias_ptr_type
5565 (DR_REF (first_dr
)), 0));
5566 align
= TYPE_ALIGN_UNIT (vectype
);
5567 if (aligned_access_p (first_dr
))
5569 else if (DR_MISALIGNMENT (first_dr
) == -1)
5571 TREE_TYPE (data_ref
)
5572 = build_aligned_type (TREE_TYPE (data_ref
),
5573 TYPE_ALIGN (elem_type
));
5574 align
= TYPE_ALIGN_UNIT (elem_type
);
5579 TREE_TYPE (data_ref
)
5580 = build_aligned_type (TREE_TYPE (data_ref
),
5581 TYPE_ALIGN (elem_type
));
5582 misalign
= DR_MISALIGNMENT (first_dr
);
5584 if (dataref_offset
== NULL_TREE
)
5585 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
5589 && dt
!= vect_constant_def
5590 && dt
!= vect_external_def
)
5592 tree perm_mask
= perm_mask_for_reverse (vectype
);
5594 = vect_create_destination_var (gimple_assign_rhs1 (stmt
),
5596 tree new_temp
= make_ssa_name (perm_dest
);
5598 /* Generate the permute statement. */
5600 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
5601 vec_oprnd
, perm_mask
);
5602 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5604 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5605 vec_oprnd
= new_temp
;
5608 /* Arguments are ready. Create the new vector stmt. */
5609 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
5610 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5615 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5623 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5625 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5626 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5630 dr_chain
.release ();
5632 result_chain
.release ();
5633 vec_oprnds
.release ();
5638 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5639 VECTOR_CST mask. No checks are made that the target platform supports the
5640 mask, so callers may wish to test can_vec_perm_p separately, or use
5641 vect_gen_perm_mask_checked. */
5644 vect_gen_perm_mask_any (tree vectype
, const unsigned char *sel
)
5646 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
5649 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5651 mask_elt_type
= lang_hooks
.types
.type_for_mode
5652 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
5653 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
5655 mask_elts
= XALLOCAVEC (tree
, nunits
);
5656 for (i
= nunits
- 1; i
>= 0; i
--)
5657 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
5658 mask_vec
= build_vector (mask_type
, mask_elts
);
5663 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5664 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5667 vect_gen_perm_mask_checked (tree vectype
, const unsigned char *sel
)
5669 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype
), false, sel
));
5670 return vect_gen_perm_mask_any (vectype
, sel
);
5673 /* Given a vector variable X and Y, that was generated for the scalar
5674 STMT, generate instructions to permute the vector elements of X and Y
5675 using permutation mask MASK_VEC, insert them at *GSI and return the
5676 permuted vector variable. */
5679 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
5680 gimple_stmt_iterator
*gsi
)
5682 tree vectype
= TREE_TYPE (x
);
5683 tree perm_dest
, data_ref
;
5686 perm_dest
= vect_create_destination_var (gimple_get_lhs (stmt
), vectype
);
5687 data_ref
= make_ssa_name (perm_dest
);
5689 /* Generate the permute statement. */
5690 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
5691 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5696 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5697 inserting them on the loops preheader edge. Returns true if we
5698 were successful in doing so (and thus STMT can be moved then),
5699 otherwise returns false. */
5702 hoist_defs_of_uses (gimple stmt
, struct loop
*loop
)
5708 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5710 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5711 if (!gimple_nop_p (def_stmt
)
5712 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5714 /* Make sure we don't need to recurse. While we could do
5715 so in simple cases when there are more complex use webs
5716 we don't have an easy way to preserve stmt order to fulfil
5717 dependencies within them. */
5720 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
5722 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
5724 gimple def_stmt2
= SSA_NAME_DEF_STMT (op2
);
5725 if (!gimple_nop_p (def_stmt2
)
5726 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
5736 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_USE
)
5738 gimple def_stmt
= SSA_NAME_DEF_STMT (op
);
5739 if (!gimple_nop_p (def_stmt
)
5740 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
5742 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5743 gsi_remove (&gsi
, false);
5744 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
5751 /* vectorizable_load.
5753 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5755 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5756 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5757 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5760 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
5761 slp_tree slp_node
, slp_instance slp_node_instance
)
5764 tree vec_dest
= NULL
;
5765 tree data_ref
= NULL
;
5766 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5767 stmt_vec_info prev_stmt_info
;
5768 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5769 struct loop
*loop
= NULL
;
5770 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
5771 bool nested_in_vect_loop
= false;
5772 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
5773 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5777 gimple new_stmt
= NULL
;
5779 enum dr_alignment_support alignment_support_scheme
;
5780 tree dataref_ptr
= NULL_TREE
;
5781 tree dataref_offset
= NULL_TREE
;
5782 gimple ptr_incr
= NULL
;
5783 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5785 int i
, j
, group_size
= -1, group_gap
;
5786 tree msq
= NULL_TREE
, lsq
;
5787 tree offset
= NULL_TREE
;
5788 tree byte_offset
= NULL_TREE
;
5789 tree realignment_token
= NULL_TREE
;
5791 vec
<tree
> dr_chain
= vNULL
;
5792 bool grouped_load
= false;
5793 bool load_lanes_p
= false;
5796 bool negative
= false;
5797 bool compute_in_loop
= false;
5798 struct loop
*at_loop
;
5800 bool slp
= (slp_node
!= NULL
);
5801 bool slp_perm
= false;
5802 enum tree_code code
;
5803 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5806 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
5807 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
5808 int gather_scale
= 1;
5809 enum vect_def_type gather_dt
= vect_unknown_def_type
;
5813 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5814 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
5815 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
5820 /* Multiple types in SLP are handled by creating the appropriate number of
5821 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5823 if (slp
|| PURE_SLP_STMT (stmt_info
))
5826 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5828 gcc_assert (ncopies
>= 1);
5830 /* FORNOW. This restriction should be relaxed. */
5831 if (nested_in_vect_loop
&& ncopies
> 1)
5833 if (dump_enabled_p ())
5834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5835 "multiple types in nested loop.\n");
5839 /* Invalidate assumptions made by dependence analysis when vectorization
5840 on the unrolled body effectively re-orders stmts. */
5842 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5843 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5844 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5846 if (dump_enabled_p ())
5847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5848 "cannot perform implicit CSE when unrolling "
5849 "with negative dependence distance\n");
5853 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5856 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
5859 /* Is vectorizable load? */
5860 if (!is_gimple_assign (stmt
))
5863 scalar_dest
= gimple_assign_lhs (stmt
);
5864 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5867 code
= gimple_assign_rhs_code (stmt
);
5868 if (code
!= ARRAY_REF
5869 && code
!= BIT_FIELD_REF
5870 && code
!= INDIRECT_REF
5871 && code
!= COMPONENT_REF
5872 && code
!= IMAGPART_EXPR
5873 && code
!= REALPART_EXPR
5875 && TREE_CODE_CLASS (code
) != tcc_declaration
)
5878 if (!STMT_VINFO_DATA_REF (stmt_info
))
5881 elem_type
= TREE_TYPE (vectype
);
5882 mode
= TYPE_MODE (vectype
);
5884 /* FORNOW. In some cases can vectorize even if data-type not supported
5885 (e.g. - data copies). */
5886 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
5888 if (dump_enabled_p ())
5889 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5890 "Aligned load, but unsupported type.\n");
5894 /* Check if the load is a part of an interleaving chain. */
5895 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5897 grouped_load
= true;
5899 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
5901 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
5903 /* If this is single-element interleaving with an element distance
5904 that leaves unused vector loads around punt - we at least create
5905 very sub-optimal code in that case (and blow up memory,
5907 if (first_stmt
== stmt
5908 && !GROUP_NEXT_ELEMENT (stmt_info
)
5909 && GROUP_SIZE (stmt_info
) > TYPE_VECTOR_SUBPARTS (vectype
))
5911 if (dump_enabled_p ())
5912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5913 "single-element interleaving not supported "
5914 "for not adjacent vector loads\n");
5918 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
5920 && !PURE_SLP_STMT (stmt_info
)
5921 && !STMT_VINFO_STRIDED_P (stmt_info
))
5923 if (vect_load_lanes_supported (vectype
, group_size
))
5924 load_lanes_p
= true;
5925 else if (!vect_grouped_load_supported (vectype
, group_size
))
5929 /* Invalidate assumptions made by dependence analysis when vectorization
5930 on the unrolled body effectively re-orders stmts. */
5931 if (!PURE_SLP_STMT (stmt_info
)
5932 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
5933 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo
)
5934 > STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
5936 if (dump_enabled_p ())
5937 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5938 "cannot perform implicit CSE when performing "
5939 "group loads with negative dependence distance\n");
5943 /* Similarly when the stmt is a load that is both part of a SLP
5944 instance and a loop vectorized stmt via the same-dr mechanism
5945 we have to give up. */
5946 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)
5947 && (STMT_SLP_TYPE (stmt_info
)
5948 != STMT_SLP_TYPE (vinfo_for_stmt
5949 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info
)))))
5951 if (dump_enabled_p ())
5952 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5953 "conflicting SLP types for CSEd load\n");
5959 if (STMT_VINFO_GATHER_P (stmt_info
))
5963 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
5964 &gather_off
, &gather_scale
);
5965 gcc_assert (gather_decl
);
5966 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
5967 &def_stmt
, &def
, &gather_dt
,
5968 &gather_off_vectype
))
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5972 "gather index use not simple.\n");
5976 else if (STMT_VINFO_STRIDED_P (stmt_info
))
5979 && (slp
|| PURE_SLP_STMT (stmt_info
)))
5980 && (group_size
> nunits
5981 || nunits
% group_size
!= 0
5982 /* ??? During analysis phase we are not called with the
5983 slp node/instance we are in so whether we'll end up
5984 with a permutation we don't know. Still we don't
5985 support load permutations. */
5988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5989 "unhandled strided group load\n");
5995 negative
= tree_int_cst_compare (nested_in_vect_loop
5996 ? STMT_VINFO_DR_STEP (stmt_info
)
5998 size_zero_node
) < 0;
5999 if (negative
&& ncopies
> 1)
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6003 "multiple types with negative step.\n");
6011 if (dump_enabled_p ())
6012 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6013 "negative step for group load not supported"
6017 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
6018 if (alignment_support_scheme
!= dr_aligned
6019 && alignment_support_scheme
!= dr_unaligned_supported
)
6021 if (dump_enabled_p ())
6022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6023 "negative step but alignment required.\n");
6026 if (!perm_mask_for_reverse (vectype
))
6028 if (dump_enabled_p ())
6029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6030 "negative step and reversing not supported."
6037 if (!vec_stmt
) /* transformation not required. */
6039 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
6040 /* The SLP costs are calculated during SLP analysis. */
6041 if (!PURE_SLP_STMT (stmt_info
))
6042 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
,
6047 if (dump_enabled_p ())
6048 dump_printf_loc (MSG_NOTE
, vect_location
,
6049 "transform load. ncopies = %d\n", ncopies
);
6053 ensure_base_align (stmt_info
, dr
);
6055 if (STMT_VINFO_GATHER_P (stmt_info
))
6057 tree vec_oprnd0
= NULL_TREE
, op
;
6058 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
6059 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6060 tree ptr
, mask
, var
, scale
, merge
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
6061 edge pe
= loop_preheader_edge (loop
);
6064 enum { NARROW
, NONE
, WIDEN
} modifier
;
6065 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
6067 if (nunits
== gather_off_nunits
)
6069 else if (nunits
== gather_off_nunits
/ 2)
6071 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
6074 for (i
= 0; i
< gather_off_nunits
; ++i
)
6075 sel
[i
] = i
| nunits
;
6077 perm_mask
= vect_gen_perm_mask_checked (gather_off_vectype
, sel
);
6079 else if (nunits
== gather_off_nunits
* 2)
6081 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
6084 for (i
= 0; i
< nunits
; ++i
)
6085 sel
[i
] = i
< gather_off_nunits
6086 ? i
: i
+ nunits
- gather_off_nunits
;
6088 perm_mask
= vect_gen_perm_mask_checked (vectype
, sel
);
6094 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
6095 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6096 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6097 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6098 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6099 scaletype
= TREE_VALUE (arglist
);
6100 gcc_checking_assert (types_compatible_p (srctype
, rettype
));
6102 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6104 ptr
= fold_convert (ptrtype
, gather_base
);
6105 if (!is_gimple_min_invariant (ptr
))
6107 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6108 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6109 gcc_assert (!new_bb
);
6112 /* Currently we support only unconditional gather loads,
6113 so mask should be all ones. */
6114 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
6115 mask
= build_int_cst (masktype
, -1);
6116 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
6118 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
6119 mask
= build_vector_from_val (masktype
, mask
);
6120 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6122 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
6126 for (j
= 0; j
< 6; ++j
)
6128 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
6129 mask
= build_real (TREE_TYPE (masktype
), r
);
6130 mask
= build_vector_from_val (masktype
, mask
);
6131 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
6136 scale
= build_int_cst (scaletype
, gather_scale
);
6138 if (TREE_CODE (TREE_TYPE (rettype
)) == INTEGER_TYPE
)
6139 merge
= build_int_cst (TREE_TYPE (rettype
), 0);
6140 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype
)))
6144 for (j
= 0; j
< 6; ++j
)
6146 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (rettype
)));
6147 merge
= build_real (TREE_TYPE (rettype
), r
);
6151 merge
= build_vector_from_val (rettype
, merge
);
6152 merge
= vect_init_vector (stmt
, merge
, rettype
, NULL
);
6154 prev_stmt_info
= NULL
;
6155 for (j
= 0; j
< ncopies
; ++j
)
6157 if (modifier
== WIDEN
&& (j
& 1))
6158 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
6159 perm_mask
, stmt
, gsi
);
6162 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
6165 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
6167 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6169 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
6170 == TYPE_VECTOR_SUBPARTS (idxtype
));
6171 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
6172 var
= make_ssa_name (var
);
6173 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6175 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6176 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6181 = gimple_build_call (gather_decl
, 5, merge
, ptr
, op
, mask
, scale
);
6183 if (!useless_type_conversion_p (vectype
, rettype
))
6185 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
6186 == TYPE_VECTOR_SUBPARTS (rettype
));
6187 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
6188 op
= make_ssa_name (var
, new_stmt
);
6189 gimple_call_set_lhs (new_stmt
, op
);
6190 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6191 var
= make_ssa_name (vec_dest
);
6192 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
6194 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6198 var
= make_ssa_name (vec_dest
, new_stmt
);
6199 gimple_call_set_lhs (new_stmt
, var
);
6202 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6204 if (modifier
== NARROW
)
6211 var
= permute_vec_elements (prev_res
, var
,
6212 perm_mask
, stmt
, gsi
);
6213 new_stmt
= SSA_NAME_DEF_STMT (var
);
6216 if (prev_stmt_info
== NULL
)
6217 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6219 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6220 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6224 else if (STMT_VINFO_STRIDED_P (stmt_info
))
6226 gimple_stmt_iterator incr_gsi
;
6232 vec
<constructor_elt
, va_gc
> *v
= NULL
;
6233 gimple_seq stmts
= NULL
;
6234 tree stride_base
, stride_step
, alias_off
;
6236 gcc_assert (!nested_in_vect_loop
);
6239 = fold_build_pointer_plus
6240 (unshare_expr (DR_BASE_ADDRESS (dr
)),
6241 size_binop (PLUS_EXPR
,
6242 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
6243 convert_to_ptrofftype (DR_INIT (dr
))));
6244 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
6246 /* For a load with loop-invariant (but other than power-of-2)
6247 stride (i.e. not a grouped access) like so:
6249 for (i = 0; i < n; i += stride)
6252 we generate a new induction variable and new accesses to
6253 form a new vector (or vectors, depending on ncopies):
6255 for (j = 0; ; j += VF*stride)
6257 tmp2 = array[j + stride];
6259 vectemp = {tmp1, tmp2, ...}
6262 ivstep
= stride_step
;
6263 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6264 build_int_cst (TREE_TYPE (ivstep
), vf
));
6266 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6268 create_iv (stride_base
, ivstep
, NULL
,
6269 loop
, &incr_gsi
, insert_after
,
6271 incr
= gsi_stmt (incr_gsi
);
6272 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
6274 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
6276 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
6278 prev_stmt_info
= NULL
;
6279 running_off
= offvar
;
6280 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
6281 int nloads
= nunits
;
6282 tree ltype
= TREE_TYPE (vectype
);
6285 nloads
= nunits
/ group_size
;
6286 if (group_size
< nunits
)
6287 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
6290 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
6291 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6292 gcc_assert (!slp_perm
);
6294 for (j
= 0; j
< ncopies
; j
++)
6300 vec_alloc (v
, nloads
);
6301 for (i
= 0; i
< nloads
; i
++)
6303 tree newref
, newoff
;
6305 newref
= build2 (MEM_REF
, ltype
, running_off
, alias_off
);
6307 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
6310 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
6311 newoff
= copy_ssa_name (running_off
);
6312 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6313 running_off
, stride_step
);
6314 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6316 running_off
= newoff
;
6319 vec_inv
= build_constructor (vectype
, v
);
6320 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
6321 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6325 new_stmt
= gimple_build_assign (make_ssa_name (ltype
),
6326 build2 (MEM_REF
, ltype
,
6327 running_off
, alias_off
));
6328 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6330 tree newoff
= copy_ssa_name (running_off
);
6331 gimple incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6332 running_off
, stride_step
);
6333 vect_finish_stmt_generation (stmt
, incr
, gsi
);
6335 running_off
= newoff
;
6339 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6341 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6343 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6344 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6351 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
6353 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
6354 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
6355 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6357 /* Check if the chain of loads is already vectorized. */
6358 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
6359 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6360 ??? But we can only do so if there is exactly one
6361 as we have no way to get at the rest. Leave the CSE
6363 ??? With the group load eventually participating
6364 in multiple different permutations (having multiple
6365 slp nodes which refer to the same group) the CSE
6366 is even wrong code. See PR56270. */
6369 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6372 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
6373 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
6375 /* VEC_NUM is the number of vect stmts to be created for this group. */
6378 grouped_load
= false;
6379 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6380 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
6382 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
6386 vec_num
= group_size
;
6394 group_size
= vec_num
= 1;
6398 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
6399 gcc_assert (alignment_support_scheme
);
6400 /* Targets with load-lane instructions must not require explicit
6402 gcc_assert (!load_lanes_p
6403 || alignment_support_scheme
== dr_aligned
6404 || alignment_support_scheme
== dr_unaligned_supported
);
6406 /* In case the vectorization factor (VF) is bigger than the number
6407 of elements that we can fit in a vectype (nunits), we have to generate
6408 more than one vector stmt - i.e - we need to "unroll" the
6409 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6410 from one copy of the vector stmt to the next, in the field
6411 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6412 stages to find the correct vector defs to be used when vectorizing
6413 stmts that use the defs of the current stmt. The example below
6414 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6415 need to create 4 vectorized stmts):
6417 before vectorization:
6418 RELATED_STMT VEC_STMT
6422 step 1: vectorize stmt S1:
6423 We first create the vector stmt VS1_0, and, as usual, record a
6424 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6425 Next, we create the vector stmt VS1_1, and record a pointer to
6426 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6427 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6429 RELATED_STMT VEC_STMT
6430 VS1_0: vx0 = memref0 VS1_1 -
6431 VS1_1: vx1 = memref1 VS1_2 -
6432 VS1_2: vx2 = memref2 VS1_3 -
6433 VS1_3: vx3 = memref3 - -
6434 S1: x = load - VS1_0
6437 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6438 information we recorded in RELATED_STMT field is used to vectorize
6441 /* In case of interleaving (non-unit grouped access):
6448 Vectorized loads are created in the order of memory accesses
6449 starting from the access of the first stmt of the chain:
6452 VS2: vx1 = &base + vec_size*1
6453 VS3: vx3 = &base + vec_size*2
6454 VS4: vx4 = &base + vec_size*3
6456 Then permutation statements are generated:
6458 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6459 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6462 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6463 (the order of the data-refs in the output of vect_permute_load_chain
6464 corresponds to the order of scalar stmts in the interleaving chain - see
6465 the documentation of vect_permute_load_chain()).
6466 The generation of permutation stmts and recording them in
6467 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6469 In case of both multiple types and interleaving, the vector loads and
6470 permutation stmts above are created for every copy. The result vector
6471 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6472 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6474 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6475 on a target that supports unaligned accesses (dr_unaligned_supported)
6476 we generate the following code:
6480 p = p + indx * vectype_size;
6485 Otherwise, the data reference is potentially unaligned on a target that
6486 does not support unaligned accesses (dr_explicit_realign_optimized) -
6487 then generate the following code, in which the data in each iteration is
6488 obtained by two vector loads, one from the previous iteration, and one
6489 from the current iteration:
6491 msq_init = *(floor(p1))
6492 p2 = initial_addr + VS - 1;
6493 realignment_token = call target_builtin;
6496 p2 = p2 + indx * vectype_size
6498 vec_dest = realign_load (msq, lsq, realignment_token)
6503 /* If the misalignment remains the same throughout the execution of the
6504 loop, we can create the init_addr and permutation mask at the loop
6505 preheader. Otherwise, it needs to be created inside the loop.
6506 This can only occur when vectorizing memory accesses in the inner-loop
6507 nested within an outer-loop that is being vectorized. */
6509 if (nested_in_vect_loop
6510 && (TREE_INT_CST_LOW (DR_STEP (dr
))
6511 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
6513 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
6514 compute_in_loop
= true;
6517 if ((alignment_support_scheme
== dr_explicit_realign_optimized
6518 || alignment_support_scheme
== dr_explicit_realign
)
6519 && !compute_in_loop
)
6521 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
6522 alignment_support_scheme
, NULL_TREE
,
6524 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6526 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
6527 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
6535 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6538 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6540 aggr_type
= vectype
;
6542 prev_stmt_info
= NULL
;
6543 for (j
= 0; j
< ncopies
; j
++)
6545 /* 1. Create the vector or array pointer update chain. */
6548 bool simd_lane_access_p
6549 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6550 if (simd_lane_access_p
6551 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
6552 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
6553 && integer_zerop (DR_OFFSET (first_dr
))
6554 && integer_zerop (DR_INIT (first_dr
))
6555 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6556 get_alias_set (DR_REF (first_dr
)))
6557 && (alignment_support_scheme
== dr_aligned
6558 || alignment_support_scheme
== dr_unaligned_supported
))
6560 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
6561 dataref_offset
= build_int_cst (reference_alias_ptr_type
6562 (DR_REF (first_dr
)), 0);
6567 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
6568 offset
, &dummy
, gsi
, &ptr_incr
,
6569 simd_lane_access_p
, &inv_p
,
6572 else if (dataref_offset
)
6573 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
6574 TYPE_SIZE_UNIT (aggr_type
));
6576 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
6577 TYPE_SIZE_UNIT (aggr_type
));
6579 if (grouped_load
|| slp_perm
)
6580 dr_chain
.create (vec_num
);
6586 vec_array
= create_vector_array (vectype
, vec_num
);
6589 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6590 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
6591 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
6592 gimple_call_set_lhs (new_stmt
, vec_array
);
6593 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6595 /* Extract each vector into an SSA_NAME. */
6596 for (i
= 0; i
< vec_num
; i
++)
6598 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
6600 dr_chain
.quick_push (new_temp
);
6603 /* Record the mapping between SSA_NAMEs and statements. */
6604 vect_record_grouped_load_vectors (stmt
, dr_chain
);
6608 for (i
= 0; i
< vec_num
; i
++)
6611 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6614 /* 2. Create the vector-load in the loop. */
6615 switch (alignment_support_scheme
)
6618 case dr_unaligned_supported
:
6620 unsigned int align
, misalign
;
6623 = build2 (MEM_REF
, vectype
, dataref_ptr
,
6626 : build_int_cst (reference_alias_ptr_type
6627 (DR_REF (first_dr
)), 0));
6628 align
= TYPE_ALIGN_UNIT (vectype
);
6629 if (alignment_support_scheme
== dr_aligned
)
6631 gcc_assert (aligned_access_p (first_dr
));
6634 else if (DR_MISALIGNMENT (first_dr
) == -1)
6636 TREE_TYPE (data_ref
)
6637 = build_aligned_type (TREE_TYPE (data_ref
),
6638 TYPE_ALIGN (elem_type
));
6639 align
= TYPE_ALIGN_UNIT (elem_type
);
6644 TREE_TYPE (data_ref
)
6645 = build_aligned_type (TREE_TYPE (data_ref
),
6646 TYPE_ALIGN (elem_type
));
6647 misalign
= DR_MISALIGNMENT (first_dr
);
6649 if (dataref_offset
== NULL_TREE
)
6650 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
6654 case dr_explicit_realign
:
6658 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
6660 if (compute_in_loop
)
6661 msq
= vect_setup_realignment (first_stmt
, gsi
,
6663 dr_explicit_realign
,
6666 ptr
= copy_ssa_name (dataref_ptr
);
6667 new_stmt
= gimple_build_assign
6668 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
6670 (TREE_TYPE (dataref_ptr
),
6671 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6672 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6674 = build2 (MEM_REF
, vectype
, ptr
,
6675 build_int_cst (reference_alias_ptr_type
6676 (DR_REF (first_dr
)), 0));
6677 vec_dest
= vect_create_destination_var (scalar_dest
,
6679 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6680 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6681 gimple_assign_set_lhs (new_stmt
, new_temp
);
6682 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
6683 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
6684 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6687 bump
= size_binop (MULT_EXPR
, vs
,
6688 TYPE_SIZE_UNIT (elem_type
));
6689 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
6690 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
6691 new_stmt
= gimple_build_assign
6692 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
6695 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6696 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
6697 gimple_assign_set_lhs (new_stmt
, ptr
);
6698 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6700 = build2 (MEM_REF
, vectype
, ptr
,
6701 build_int_cst (reference_alias_ptr_type
6702 (DR_REF (first_dr
)), 0));
6705 case dr_explicit_realign_optimized
:
6706 new_temp
= copy_ssa_name (dataref_ptr
);
6707 new_stmt
= gimple_build_assign
6708 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
6710 (TREE_TYPE (dataref_ptr
),
6711 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
6712 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6714 = build2 (MEM_REF
, vectype
, new_temp
,
6715 build_int_cst (reference_alias_ptr_type
6716 (DR_REF (first_dr
)), 0));
6721 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6722 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
6723 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6724 gimple_assign_set_lhs (new_stmt
, new_temp
);
6725 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6727 /* 3. Handle explicit realignment if necessary/supported.
6729 vec_dest = realign_load (msq, lsq, realignment_token) */
6730 if (alignment_support_scheme
== dr_explicit_realign_optimized
6731 || alignment_support_scheme
== dr_explicit_realign
)
6733 lsq
= gimple_assign_lhs (new_stmt
);
6734 if (!realignment_token
)
6735 realignment_token
= dataref_ptr
;
6736 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6737 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
6738 msq
, lsq
, realignment_token
);
6739 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6740 gimple_assign_set_lhs (new_stmt
, new_temp
);
6741 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
6743 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
6746 if (i
== vec_num
- 1 && j
== ncopies
- 1)
6747 add_phi_arg (phi
, lsq
,
6748 loop_latch_edge (containing_loop
),
6754 /* 4. Handle invariant-load. */
6755 if (inv_p
&& !bb_vinfo
)
6757 gcc_assert (!grouped_load
);
6758 /* If we have versioned for aliasing or the loop doesn't
6759 have any data dependencies that would preclude this,
6760 then we are sure this is a loop invariant load and
6761 thus we can insert it on the preheader edge. */
6762 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
6763 && !nested_in_vect_loop
6764 && hoist_defs_of_uses (stmt
, loop
))
6766 if (dump_enabled_p ())
6768 dump_printf_loc (MSG_NOTE
, vect_location
,
6769 "hoisting out of the vectorized "
6771 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
6773 tree tem
= copy_ssa_name (scalar_dest
);
6774 gsi_insert_on_edge_immediate
6775 (loop_preheader_edge (loop
),
6776 gimple_build_assign (tem
,
6778 (gimple_assign_rhs1 (stmt
))));
6779 new_temp
= vect_init_vector (stmt
, tem
, vectype
, NULL
);
6783 gimple_stmt_iterator gsi2
= *gsi
;
6785 new_temp
= vect_init_vector (stmt
, scalar_dest
,
6788 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6789 set_vinfo_for_stmt (new_stmt
,
6790 new_stmt_vec_info (new_stmt
, loop_vinfo
,
6796 tree perm_mask
= perm_mask_for_reverse (vectype
);
6797 new_temp
= permute_vec_elements (new_temp
, new_temp
,
6798 perm_mask
, stmt
, gsi
);
6799 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
6802 /* Collect vector loads and later create their permutation in
6803 vect_transform_grouped_load (). */
6804 if (grouped_load
|| slp_perm
)
6805 dr_chain
.quick_push (new_temp
);
6807 /* Store vector loads in the corresponding SLP_NODE. */
6808 if (slp
&& !slp_perm
)
6809 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6811 /* Bump the vector pointer to account for a gap. */
6812 if (slp
&& group_gap
!= 0)
6814 tree bump
= size_binop (MULT_EXPR
,
6815 TYPE_SIZE_UNIT (elem_type
),
6816 size_int (group_gap
));
6817 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
6822 if (slp
&& !slp_perm
)
6827 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
6828 slp_node_instance
, false))
6830 dr_chain
.release ();
6839 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
6840 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
6845 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
6847 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
6848 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
6851 dr_chain
.release ();
6857 /* Function vect_is_simple_cond.
6860 LOOP - the loop that is being vectorized.
6861 COND - Condition that is checked for simple use.
6864 *COMP_VECTYPE - the vector type for the comparison.
6866 Returns whether a COND can be vectorized. Checks whether
6867 condition operands are supportable using vec_is_simple_use. */
6870 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
6871 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
6875 enum vect_def_type dt
;
6876 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
6878 if (!COMPARISON_CLASS_P (cond
))
6881 lhs
= TREE_OPERAND (cond
, 0);
6882 rhs
= TREE_OPERAND (cond
, 1);
6884 if (TREE_CODE (lhs
) == SSA_NAME
)
6886 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
6887 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
6888 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
6891 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
6892 && TREE_CODE (lhs
) != FIXED_CST
)
6895 if (TREE_CODE (rhs
) == SSA_NAME
)
6897 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6898 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
6899 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
6902 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
6903 && TREE_CODE (rhs
) != FIXED_CST
)
6906 *comp_vectype
= vectype1
? vectype1
: vectype2
;
6910 /* vectorizable_condition.
6912 Check if STMT is conditional modify expression that can be vectorized.
6913 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6914 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6917 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6918 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6919 else caluse if it is 2).
6921 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6924 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
6925 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
6928 tree scalar_dest
= NULL_TREE
;
6929 tree vec_dest
= NULL_TREE
;
6930 tree cond_expr
, then_clause
, else_clause
;
6931 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6932 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6933 tree comp_vectype
= NULL_TREE
;
6934 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
6935 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
6936 tree vec_compare
, vec_cond_expr
;
6938 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6940 enum vect_def_type dt
, dts
[4];
6941 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6943 enum tree_code code
;
6944 stmt_vec_info prev_stmt_info
= NULL
;
6946 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6947 vec
<tree
> vec_oprnds0
= vNULL
;
6948 vec
<tree
> vec_oprnds1
= vNULL
;
6949 vec
<tree
> vec_oprnds2
= vNULL
;
6950 vec
<tree
> vec_oprnds3
= vNULL
;
6953 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
6956 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
6958 gcc_assert (ncopies
>= 1);
6959 if (reduc_index
&& ncopies
> 1)
6960 return false; /* FORNOW */
6962 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
6965 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6968 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6969 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
6973 /* FORNOW: not yet supported. */
6974 if (STMT_VINFO_LIVE_P (stmt_info
))
6976 if (dump_enabled_p ())
6977 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6978 "value used after loop.\n");
6982 /* Is vectorizable conditional operation? */
6983 if (!is_gimple_assign (stmt
))
6986 code
= gimple_assign_rhs_code (stmt
);
6988 if (code
!= COND_EXPR
)
6991 cond_expr
= gimple_assign_rhs1 (stmt
);
6992 then_clause
= gimple_assign_rhs2 (stmt
);
6993 else_clause
= gimple_assign_rhs3 (stmt
);
6995 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
7000 if (TREE_CODE (then_clause
) == SSA_NAME
)
7002 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
7003 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7004 &then_def_stmt
, &def
, &dt
))
7007 else if (TREE_CODE (then_clause
) != INTEGER_CST
7008 && TREE_CODE (then_clause
) != REAL_CST
7009 && TREE_CODE (then_clause
) != FIXED_CST
)
7012 if (TREE_CODE (else_clause
) == SSA_NAME
)
7014 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
7015 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
7016 &else_def_stmt
, &def
, &dt
))
7019 else if (TREE_CODE (else_clause
) != INTEGER_CST
7020 && TREE_CODE (else_clause
) != REAL_CST
7021 && TREE_CODE (else_clause
) != FIXED_CST
)
7024 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
7025 /* The result of a vector comparison should be signed type. */
7026 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
7027 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
7028 if (vec_cmp_type
== NULL_TREE
)
7033 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
7034 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
7041 vec_oprnds0
.create (1);
7042 vec_oprnds1
.create (1);
7043 vec_oprnds2
.create (1);
7044 vec_oprnds3
.create (1);
7048 scalar_dest
= gimple_assign_lhs (stmt
);
7049 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7051 /* Handle cond expr. */
7052 for (j
= 0; j
< ncopies
; j
++)
7054 gassign
*new_stmt
= NULL
;
7059 auto_vec
<tree
, 4> ops
;
7060 auto_vec
<vec
<tree
>, 4> vec_defs
;
7062 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
7063 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
7064 ops
.safe_push (then_clause
);
7065 ops
.safe_push (else_clause
);
7066 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
7067 vec_oprnds3
= vec_defs
.pop ();
7068 vec_oprnds2
= vec_defs
.pop ();
7069 vec_oprnds1
= vec_defs
.pop ();
7070 vec_oprnds0
= vec_defs
.pop ();
7073 vec_defs
.release ();
7079 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
7081 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
7082 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
7085 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
7087 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
7088 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
7089 if (reduc_index
== 1)
7090 vec_then_clause
= reduc_def
;
7093 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
7095 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
7096 NULL
, >emp
, &def
, &dts
[2]);
7098 if (reduc_index
== 2)
7099 vec_else_clause
= reduc_def
;
7102 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
7104 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
7105 NULL
, >emp
, &def
, &dts
[3]);
7111 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
7112 vec_oprnds0
.pop ());
7113 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
7114 vec_oprnds1
.pop ());
7115 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
7116 vec_oprnds2
.pop ());
7117 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
7118 vec_oprnds3
.pop ());
7123 vec_oprnds0
.quick_push (vec_cond_lhs
);
7124 vec_oprnds1
.quick_push (vec_cond_rhs
);
7125 vec_oprnds2
.quick_push (vec_then_clause
);
7126 vec_oprnds3
.quick_push (vec_else_clause
);
7129 /* Arguments are ready. Create the new vector stmt. */
7130 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
7132 vec_cond_rhs
= vec_oprnds1
[i
];
7133 vec_then_clause
= vec_oprnds2
[i
];
7134 vec_else_clause
= vec_oprnds3
[i
];
7136 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
7137 vec_cond_lhs
, vec_cond_rhs
);
7138 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
7139 vec_compare
, vec_then_clause
, vec_else_clause
);
7141 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
7142 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7143 gimple_assign_set_lhs (new_stmt
, new_temp
);
7144 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
7146 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
7153 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
7155 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
7157 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
7160 vec_oprnds0
.release ();
7161 vec_oprnds1
.release ();
7162 vec_oprnds2
.release ();
7163 vec_oprnds3
.release ();
7169 /* Make sure the statement is vectorizable. */
7172 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
7174 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7175 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7176 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
7178 tree scalar_type
, vectype
;
7179 gimple pattern_stmt
;
7180 gimple_seq pattern_def_seq
;
7182 if (dump_enabled_p ())
7184 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
7185 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7188 if (gimple_has_volatile_ops (stmt
))
7190 if (dump_enabled_p ())
7191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7192 "not vectorized: stmt has volatile operands\n");
7197 /* Skip stmts that do not need to be vectorized. In loops this is expected
7199 - the COND_EXPR which is the loop exit condition
7200 - any LABEL_EXPRs in the loop
7201 - computations that are used only for array indexing or loop control.
7202 In basic blocks we only analyze statements that are a part of some SLP
7203 instance, therefore, all the statements are relevant.
7205 Pattern statement needs to be analyzed instead of the original statement
7206 if the original statement is not relevant. Otherwise, we analyze both
7207 statements. In basic blocks we are called from some SLP instance
7208 traversal, don't analyze pattern stmts instead, the pattern stmts
7209 already will be part of SLP instance. */
7211 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
7212 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
7213 && !STMT_VINFO_LIVE_P (stmt_info
))
7215 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7217 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7218 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7220 /* Analyze PATTERN_STMT instead of the original stmt. */
7221 stmt
= pattern_stmt
;
7222 stmt_info
= vinfo_for_stmt (pattern_stmt
);
7223 if (dump_enabled_p ())
7225 dump_printf_loc (MSG_NOTE
, vect_location
,
7226 "==> examining pattern statement: ");
7227 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7232 if (dump_enabled_p ())
7233 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
7238 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
7241 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
7242 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
7244 /* Analyze PATTERN_STMT too. */
7245 if (dump_enabled_p ())
7247 dump_printf_loc (MSG_NOTE
, vect_location
,
7248 "==> examining pattern statement: ");
7249 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
7252 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
7256 if (is_pattern_stmt_p (stmt_info
)
7258 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
7260 gimple_stmt_iterator si
;
7262 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
7264 gimple pattern_def_stmt
= gsi_stmt (si
);
7265 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
7266 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
7268 /* Analyze def stmt of STMT if it's a pattern stmt. */
7269 if (dump_enabled_p ())
7271 dump_printf_loc (MSG_NOTE
, vect_location
,
7272 "==> examining pattern def statement: ");
7273 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
7276 if (!vect_analyze_stmt (pattern_def_stmt
,
7277 need_to_vectorize
, node
))
7283 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
7285 case vect_internal_def
:
7288 case vect_reduction_def
:
7289 case vect_nested_cycle
:
7290 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
7291 || relevance
== vect_used_in_outer_by_reduction
7292 || relevance
== vect_unused_in_scope
));
7295 case vect_induction_def
:
7296 case vect_constant_def
:
7297 case vect_external_def
:
7298 case vect_unknown_def_type
:
7305 gcc_assert (PURE_SLP_STMT (stmt_info
));
7307 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
7308 if (dump_enabled_p ())
7310 dump_printf_loc (MSG_NOTE
, vect_location
,
7311 "get vectype for scalar type: ");
7312 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
7313 dump_printf (MSG_NOTE
, "\n");
7316 vectype
= get_vectype_for_scalar_type (scalar_type
);
7319 if (dump_enabled_p ())
7321 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7322 "not SLPed: unsupported data-type ");
7323 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
7325 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
7330 if (dump_enabled_p ())
7332 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
7333 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
7334 dump_printf (MSG_NOTE
, "\n");
7337 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
7340 if (STMT_VINFO_RELEVANT_P (stmt_info
))
7342 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
7343 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
7344 || (is_gimple_call (stmt
)
7345 && gimple_call_lhs (stmt
) == NULL_TREE
));
7346 *need_to_vectorize
= true;
7351 && (STMT_VINFO_RELEVANT_P (stmt_info
)
7352 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
7353 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, NULL
)
7354 || vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
7355 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
7356 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
7357 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
7358 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
7359 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
7360 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
7361 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
7362 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
7366 ok
= (vectorizable_simd_clone_call (stmt
, NULL
, NULL
, node
)
7367 || vectorizable_conversion (stmt
, NULL
, NULL
, node
)
7368 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
7369 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
7370 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
7371 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
7372 || vectorizable_call (stmt
, NULL
, NULL
, node
)
7373 || vectorizable_store (stmt
, NULL
, NULL
, node
)
7374 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
7379 if (dump_enabled_p ())
7381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7382 "not vectorized: relevant stmt not ");
7383 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7384 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7393 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7394 need extra handling, except for vectorizable reductions. */
7395 if (STMT_VINFO_LIVE_P (stmt_info
)
7396 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7397 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
7401 if (dump_enabled_p ())
7403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7404 "not vectorized: live stmt not ");
7405 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
7406 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
7416 /* Function vect_transform_stmt.
7418 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7421 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
7422 bool *grouped_store
, slp_tree slp_node
,
7423 slp_instance slp_node_instance
)
7425 bool is_store
= false;
7426 gimple vec_stmt
= NULL
;
7427 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7430 switch (STMT_VINFO_TYPE (stmt_info
))
7432 case type_demotion_vec_info_type
:
7433 case type_promotion_vec_info_type
:
7434 case type_conversion_vec_info_type
:
7435 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
7439 case induc_vec_info_type
:
7440 gcc_assert (!slp_node
);
7441 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
7445 case shift_vec_info_type
:
7446 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
7450 case op_vec_info_type
:
7451 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
7455 case assignment_vec_info_type
:
7456 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
7460 case load_vec_info_type
:
7461 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
7466 case store_vec_info_type
:
7467 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
7469 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
7471 /* In case of interleaving, the whole chain is vectorized when the
7472 last store in the chain is reached. Store stmts before the last
7473 one are skipped, and there vec_stmt_info shouldn't be freed
7475 *grouped_store
= true;
7476 if (STMT_VINFO_VEC_STMT (stmt_info
))
7483 case condition_vec_info_type
:
7484 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
7488 case call_vec_info_type
:
7489 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7490 stmt
= gsi_stmt (*gsi
);
7491 if (is_gimple_call (stmt
)
7492 && gimple_call_internal_p (stmt
)
7493 && gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
7497 case call_simd_clone_vec_info_type
:
7498 done
= vectorizable_simd_clone_call (stmt
, gsi
, &vec_stmt
, slp_node
);
7499 stmt
= gsi_stmt (*gsi
);
7502 case reduc_vec_info_type
:
7503 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
7508 if (!STMT_VINFO_LIVE_P (stmt_info
))
7510 if (dump_enabled_p ())
7511 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7512 "stmt not supported.\n");
7517 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7518 is being vectorized, but outside the immediately enclosing loop. */
7520 && STMT_VINFO_LOOP_VINFO (stmt_info
)
7521 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7522 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
7523 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
7524 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
7525 || STMT_VINFO_RELEVANT (stmt_info
) ==
7526 vect_used_in_outer_by_reduction
))
7528 struct loop
*innerloop
= LOOP_VINFO_LOOP (
7529 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
7530 imm_use_iterator imm_iter
;
7531 use_operand_p use_p
;
7535 if (dump_enabled_p ())
7536 dump_printf_loc (MSG_NOTE
, vect_location
,
7537 "Record the vdef for outer-loop vectorization.\n");
7539 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7540 (to be used when vectorizing outer-loop stmts that use the DEF of
7542 if (gimple_code (stmt
) == GIMPLE_PHI
)
7543 scalar_dest
= PHI_RESULT (stmt
);
7545 scalar_dest
= gimple_assign_lhs (stmt
);
7547 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
7549 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
7551 exit_phi
= USE_STMT (use_p
);
7552 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
7557 /* Handle stmts whose DEF is used outside the loop-nest that is
7558 being vectorized. */
7559 if (STMT_VINFO_LIVE_P (stmt_info
)
7560 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
7562 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
7567 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
7573 /* Remove a group of stores (for SLP or interleaving), free their
7577 vect_remove_stores (gimple first_stmt
)
7579 gimple next
= first_stmt
;
7581 gimple_stmt_iterator next_si
;
7585 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
7587 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
7588 if (is_pattern_stmt_p (stmt_info
))
7589 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
7590 /* Free the attached stmt_vec_info and remove the stmt. */
7591 next_si
= gsi_for_stmt (next
);
7592 unlink_stmt_vdef (next
);
7593 gsi_remove (&next_si
, true);
7594 release_defs (next
);
7595 free_stmt_vec_info (next
);
7601 /* Function new_stmt_vec_info.
7603 Create and initialize a new stmt_vec_info struct for STMT. */
7606 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
7607 bb_vec_info bb_vinfo
)
7610 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
7612 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
7613 STMT_VINFO_STMT (res
) = stmt
;
7614 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
7615 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
7616 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
7617 STMT_VINFO_LIVE_P (res
) = false;
7618 STMT_VINFO_VECTYPE (res
) = NULL
;
7619 STMT_VINFO_VEC_STMT (res
) = NULL
;
7620 STMT_VINFO_VECTORIZABLE (res
) = true;
7621 STMT_VINFO_IN_PATTERN_P (res
) = false;
7622 STMT_VINFO_RELATED_STMT (res
) = NULL
;
7623 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
7624 STMT_VINFO_DATA_REF (res
) = NULL
;
7626 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
7627 STMT_VINFO_DR_OFFSET (res
) = NULL
;
7628 STMT_VINFO_DR_INIT (res
) = NULL
;
7629 STMT_VINFO_DR_STEP (res
) = NULL
;
7630 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
7632 if (gimple_code (stmt
) == GIMPLE_PHI
7633 && is_loop_header_bb_p (gimple_bb (stmt
)))
7634 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
7636 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
7638 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
7639 STMT_SLP_TYPE (res
) = loop_vect
;
7640 GROUP_FIRST_ELEMENT (res
) = NULL
;
7641 GROUP_NEXT_ELEMENT (res
) = NULL
;
7642 GROUP_SIZE (res
) = 0;
7643 GROUP_STORE_COUNT (res
) = 0;
7644 GROUP_GAP (res
) = 0;
7645 GROUP_SAME_DR_STMT (res
) = NULL
;
7651 /* Create a hash table for stmt_vec_info. */
7654 init_stmt_vec_info_vec (void)
7656 gcc_assert (!stmt_vec_info_vec
.exists ());
7657 stmt_vec_info_vec
.create (50);
7661 /* Free hash table for stmt_vec_info. */
7664 free_stmt_vec_info_vec (void)
7668 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
7670 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
7671 gcc_assert (stmt_vec_info_vec
.exists ());
7672 stmt_vec_info_vec
.release ();
7676 /* Free stmt vectorization related info. */
7679 free_stmt_vec_info (gimple stmt
)
7681 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
7686 /* Check if this statement has a related "pattern stmt"
7687 (introduced by the vectorizer during the pattern recognition
7688 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7690 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7692 stmt_vec_info patt_info
7693 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
7696 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
7697 gimple patt_stmt
= STMT_VINFO_STMT (patt_info
);
7698 gimple_set_bb (patt_stmt
, NULL
);
7699 tree lhs
= gimple_get_lhs (patt_stmt
);
7700 if (TREE_CODE (lhs
) == SSA_NAME
)
7701 release_ssa_name (lhs
);
7704 gimple_stmt_iterator si
;
7705 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
7707 gimple seq_stmt
= gsi_stmt (si
);
7708 gimple_set_bb (seq_stmt
, NULL
);
7709 lhs
= gimple_get_lhs (patt_stmt
);
7710 if (TREE_CODE (lhs
) == SSA_NAME
)
7711 release_ssa_name (lhs
);
7712 free_stmt_vec_info (seq_stmt
);
7715 free_stmt_vec_info (patt_stmt
);
7719 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
7720 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).release ();
7721 set_vinfo_for_stmt (stmt
, NULL
);
7726 /* Function get_vectype_for_scalar_type_and_size.
7728 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7732 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
7734 machine_mode inner_mode
= TYPE_MODE (scalar_type
);
7735 machine_mode simd_mode
;
7736 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
7743 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
7744 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
7747 /* For vector types of elements whose mode precision doesn't
7748 match their types precision we use a element type of mode
7749 precision. The vectorization routines will have to make sure
7750 they support the proper result truncation/extension.
7751 We also make sure to build vector types with INTEGER_TYPE
7752 component type only. */
7753 if (INTEGRAL_TYPE_P (scalar_type
)
7754 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
7755 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
7756 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
7757 TYPE_UNSIGNED (scalar_type
));
7759 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7760 When the component mode passes the above test simply use a type
7761 corresponding to that mode. The theory is that any use that
7762 would cause problems with this will disable vectorization anyway. */
7763 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
7764 && !INTEGRAL_TYPE_P (scalar_type
))
7765 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
7767 /* We can't build a vector type of elements with alignment bigger than
7769 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
7770 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
7771 TYPE_UNSIGNED (scalar_type
));
7773 /* If we felt back to using the mode fail if there was
7774 no scalar type for it. */
7775 if (scalar_type
== NULL_TREE
)
7778 /* If no size was supplied use the mode the target prefers. Otherwise
7779 lookup a vector mode of the specified size. */
7781 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
7783 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
7784 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
7788 vectype
= build_vector_type (scalar_type
, nunits
);
7790 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7791 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
7797 unsigned int current_vector_size
;
7799 /* Function get_vectype_for_scalar_type.
7801 Returns the vector type corresponding to SCALAR_TYPE as supported
7805 get_vectype_for_scalar_type (tree scalar_type
)
7808 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
7809 current_vector_size
);
7811 && current_vector_size
== 0)
7812 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
7816 /* Function get_same_sized_vectype
7818 Returns a vector type corresponding to SCALAR_TYPE of size
7819 VECTOR_TYPE if supported by the target. */
7822 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
7824 return get_vectype_for_scalar_type_and_size
7825 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
7828 /* Function vect_is_simple_use.
7831 LOOP_VINFO - the vect info of the loop that is being vectorized.
7832 BB_VINFO - the vect info of the basic block that is being vectorized.
7833 OPERAND - operand of STMT in the loop or bb.
7834 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7836 Returns whether a stmt with OPERAND can be vectorized.
7837 For loops, supportable operands are constants, loop invariants, and operands
7838 that are defined by the current iteration of the loop. Unsupportable
7839 operands are those that are defined by a previous iteration of the loop (as
7840 is the case in reduction/induction computations).
7841 For basic blocks, supportable operands are constants and bb invariants.
7842 For now, operands defined outside the basic block are not supported. */
7845 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
7846 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
7847 tree
*def
, enum vect_def_type
*dt
)
7850 stmt_vec_info stmt_vinfo
;
7851 struct loop
*loop
= NULL
;
7854 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7859 if (dump_enabled_p ())
7861 dump_printf_loc (MSG_NOTE
, vect_location
,
7862 "vect_is_simple_use: operand ");
7863 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
7864 dump_printf (MSG_NOTE
, "\n");
7867 if (CONSTANT_CLASS_P (operand
))
7869 *dt
= vect_constant_def
;
7873 if (is_gimple_min_invariant (operand
))
7876 *dt
= vect_external_def
;
7880 if (TREE_CODE (operand
) == PAREN_EXPR
)
7882 if (dump_enabled_p ())
7883 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
7884 operand
= TREE_OPERAND (operand
, 0);
7887 if (TREE_CODE (operand
) != SSA_NAME
)
7889 if (dump_enabled_p ())
7890 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7895 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
7896 if (*def_stmt
== NULL
)
7898 if (dump_enabled_p ())
7899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7904 if (dump_enabled_p ())
7906 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
7907 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
7910 /* Empty stmt is expected only in case of a function argument.
7911 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7912 if (gimple_nop_p (*def_stmt
))
7915 *dt
= vect_external_def
;
7919 bb
= gimple_bb (*def_stmt
);
7921 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
7922 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
7923 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
7924 *dt
= vect_external_def
;
7927 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
7928 if (!loop
&& !STMT_VINFO_VECTORIZABLE (stmt_vinfo
))
7929 *dt
= vect_external_def
;
7931 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
7934 if (dump_enabled_p ())
7936 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: ");
7939 case vect_uninitialized_def
:
7940 dump_printf (MSG_NOTE
, "uninitialized\n");
7942 case vect_constant_def
:
7943 dump_printf (MSG_NOTE
, "constant\n");
7945 case vect_external_def
:
7946 dump_printf (MSG_NOTE
, "external\n");
7948 case vect_internal_def
:
7949 dump_printf (MSG_NOTE
, "internal\n");
7951 case vect_induction_def
:
7952 dump_printf (MSG_NOTE
, "induction\n");
7954 case vect_reduction_def
:
7955 dump_printf (MSG_NOTE
, "reduction\n");
7957 case vect_double_reduction_def
:
7958 dump_printf (MSG_NOTE
, "double reduction\n");
7960 case vect_nested_cycle
:
7961 dump_printf (MSG_NOTE
, "nested cycle\n");
7963 case vect_unknown_def_type
:
7964 dump_printf (MSG_NOTE
, "unknown\n");
7969 if (*dt
== vect_unknown_def_type
7971 && *dt
== vect_double_reduction_def
7972 && gimple_code (stmt
) != GIMPLE_PHI
))
7974 if (dump_enabled_p ())
7975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7976 "Unsupported pattern.\n");
7980 switch (gimple_code (*def_stmt
))
7983 *def
= gimple_phi_result (*def_stmt
);
7987 *def
= gimple_assign_lhs (*def_stmt
);
7991 *def
= gimple_call_lhs (*def_stmt
);
7996 if (dump_enabled_p ())
7997 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7998 "unsupported defining stmt:\n");
8005 /* Function vect_is_simple_use_1.
8007 Same as vect_is_simple_use_1 but also determines the vector operand
8008 type of OPERAND and stores it to *VECTYPE. If the definition of
8009 OPERAND is vect_uninitialized_def, vect_constant_def or
8010 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8011 is responsible to compute the best suited vector type for the
8015 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
8016 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
8017 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
8019 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
8023 /* Now get a vector type if the def is internal, otherwise supply
8024 NULL_TREE and leave it up to the caller to figure out a proper
8025 type for the use stmt. */
8026 if (*dt
== vect_internal_def
8027 || *dt
== vect_induction_def
8028 || *dt
== vect_reduction_def
8029 || *dt
== vect_double_reduction_def
8030 || *dt
== vect_nested_cycle
)
8032 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
8034 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
8035 && !STMT_VINFO_RELEVANT (stmt_info
)
8036 && !STMT_VINFO_LIVE_P (stmt_info
))
8037 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
8039 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8040 gcc_assert (*vectype
!= NULL_TREE
);
8042 else if (*dt
== vect_uninitialized_def
8043 || *dt
== vect_constant_def
8044 || *dt
== vect_external_def
)
8045 *vectype
= NULL_TREE
;
8053 /* Function supportable_widening_operation
8055 Check whether an operation represented by the code CODE is a
8056 widening operation that is supported by the target platform in
8057 vector form (i.e., when operating on arguments of type VECTYPE_IN
8058 producing a result of type VECTYPE_OUT).
8060 Widening operations we currently support are NOP (CONVERT), FLOAT
8061 and WIDEN_MULT. This function checks if these operations are supported
8062 by the target platform either directly (via vector tree-codes), or via
8066 - CODE1 and CODE2 are codes of vector operations to be used when
8067 vectorizing the operation, if available.
8068 - MULTI_STEP_CVT determines the number of required intermediate steps in
8069 case of multi-step conversion (like char->short->int - in that case
8070 MULTI_STEP_CVT will be 1).
8071 - INTERM_TYPES contains the intermediate type required to perform the
8072 widening operation (short in the above example). */
8075 supportable_widening_operation (enum tree_code code
, gimple stmt
,
8076 tree vectype_out
, tree vectype_in
,
8077 enum tree_code
*code1
, enum tree_code
*code2
,
8078 int *multi_step_cvt
,
8079 vec
<tree
> *interm_types
)
8081 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
8082 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8083 struct loop
*vect_loop
= NULL
;
8084 machine_mode vec_mode
;
8085 enum insn_code icode1
, icode2
;
8086 optab optab1
, optab2
;
8087 tree vectype
= vectype_in
;
8088 tree wide_vectype
= vectype_out
;
8089 enum tree_code c1
, c2
;
8091 tree prev_type
, intermediate_type
;
8092 machine_mode intermediate_mode
, prev_mode
;
8093 optab optab3
, optab4
;
8095 *multi_step_cvt
= 0;
8097 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
8101 case WIDEN_MULT_EXPR
:
8102 /* The result of a vectorized widening operation usually requires
8103 two vectors (because the widened results do not fit into one vector).
8104 The generated vector results would normally be expected to be
8105 generated in the same order as in the original scalar computation,
8106 i.e. if 8 results are generated in each vector iteration, they are
8107 to be organized as follows:
8108 vect1: [res1,res2,res3,res4],
8109 vect2: [res5,res6,res7,res8].
8111 However, in the special case that the result of the widening
8112 operation is used in a reduction computation only, the order doesn't
8113 matter (because when vectorizing a reduction we change the order of
8114 the computation). Some targets can take advantage of this and
8115 generate more efficient code. For example, targets like Altivec,
8116 that support widen_mult using a sequence of {mult_even,mult_odd}
8117 generate the following vectors:
8118 vect1: [res1,res3,res5,res7],
8119 vect2: [res2,res4,res6,res8].
8121 When vectorizing outer-loops, we execute the inner-loop sequentially
8122 (each vectorized inner-loop iteration contributes to VF outer-loop
8123 iterations in parallel). We therefore don't allow to change the
8124 order of the computation in the inner-loop during outer-loop
8126 /* TODO: Another case in which order doesn't *really* matter is when we
8127 widen and then contract again, e.g. (short)((int)x * y >> 8).
8128 Normally, pack_trunc performs an even/odd permute, whereas the
8129 repack from an even/odd expansion would be an interleave, which
8130 would be significantly simpler for e.g. AVX2. */
8131 /* In any case, in order to avoid duplicating the code below, recurse
8132 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8133 are properly set up for the caller. If we fail, we'll continue with
8134 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8136 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
8137 && !nested_in_vect_loop_p (vect_loop
, stmt
)
8138 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
8139 stmt
, vectype_out
, vectype_in
,
8140 code1
, code2
, multi_step_cvt
,
8143 /* Elements in a vector with vect_used_by_reduction property cannot
8144 be reordered if the use chain with this property does not have the
8145 same operation. One such an example is s += a * b, where elements
8146 in a and b cannot be reordered. Here we check if the vector defined
8147 by STMT is only directly used in the reduction statement. */
8148 tree lhs
= gimple_assign_lhs (stmt
);
8149 use_operand_p dummy
;
8151 stmt_vec_info use_stmt_info
= NULL
;
8152 if (single_imm_use (lhs
, &dummy
, &use_stmt
)
8153 && (use_stmt_info
= vinfo_for_stmt (use_stmt
))
8154 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
8157 c1
= VEC_WIDEN_MULT_LO_EXPR
;
8158 c2
= VEC_WIDEN_MULT_HI_EXPR
;
8161 case VEC_WIDEN_MULT_EVEN_EXPR
:
8162 /* Support the recursion induced just above. */
8163 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
8164 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
8167 case WIDEN_LSHIFT_EXPR
:
8168 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
8169 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
8173 c1
= VEC_UNPACK_LO_EXPR
;
8174 c2
= VEC_UNPACK_HI_EXPR
;
8178 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
8179 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
8182 case FIX_TRUNC_EXPR
:
8183 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8184 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8185 computing the operation. */
8192 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
8194 enum tree_code ctmp
= c1
;
8199 if (code
== FIX_TRUNC_EXPR
)
8201 /* The signedness is determined from output operand. */
8202 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8203 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
8207 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8208 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
8211 if (!optab1
|| !optab2
)
8214 vec_mode
= TYPE_MODE (vectype
);
8215 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
8216 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
8222 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8223 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8226 /* Check if it's a multi-step conversion that can be done using intermediate
8229 prev_type
= vectype
;
8230 prev_mode
= vec_mode
;
8232 if (!CONVERT_EXPR_CODE_P (code
))
8235 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8236 intermediate steps in promotion sequence. We try
8237 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8239 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8240 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8242 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8244 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
8245 TYPE_UNSIGNED (prev_type
));
8246 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8247 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
8249 if (!optab3
|| !optab4
8250 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
8251 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8252 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
8253 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
8254 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
8255 == CODE_FOR_nothing
)
8256 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
8257 == CODE_FOR_nothing
))
8260 interm_types
->quick_push (intermediate_type
);
8261 (*multi_step_cvt
)++;
8263 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
8264 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
8267 prev_type
= intermediate_type
;
8268 prev_mode
= intermediate_mode
;
8271 interm_types
->release ();
8276 /* Function supportable_narrowing_operation
8278 Check whether an operation represented by the code CODE is a
8279 narrowing operation that is supported by the target platform in
8280 vector form (i.e., when operating on arguments of type VECTYPE_IN
8281 and producing a result of type VECTYPE_OUT).
8283 Narrowing operations we currently support are NOP (CONVERT) and
8284 FIX_TRUNC. This function checks if these operations are supported by
8285 the target platform directly via vector tree-codes.
8288 - CODE1 is the code of a vector operation to be used when
8289 vectorizing the operation, if available.
8290 - MULTI_STEP_CVT determines the number of required intermediate steps in
8291 case of multi-step conversion (like int->short->char - in that case
8292 MULTI_STEP_CVT will be 1).
8293 - INTERM_TYPES contains the intermediate type required to perform the
8294 narrowing operation (short in the above example). */
8297 supportable_narrowing_operation (enum tree_code code
,
8298 tree vectype_out
, tree vectype_in
,
8299 enum tree_code
*code1
, int *multi_step_cvt
,
8300 vec
<tree
> *interm_types
)
8302 machine_mode vec_mode
;
8303 enum insn_code icode1
;
8304 optab optab1
, interm_optab
;
8305 tree vectype
= vectype_in
;
8306 tree narrow_vectype
= vectype_out
;
8308 tree intermediate_type
;
8309 machine_mode intermediate_mode
, prev_mode
;
8313 *multi_step_cvt
= 0;
8317 c1
= VEC_PACK_TRUNC_EXPR
;
8320 case FIX_TRUNC_EXPR
:
8321 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
8325 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8326 tree code and optabs used for computing the operation. */
8333 if (code
== FIX_TRUNC_EXPR
)
8334 /* The signedness is determined from output operand. */
8335 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
8337 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
8342 vec_mode
= TYPE_MODE (vectype
);
8343 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
8348 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8351 /* Check if it's a multi-step conversion that can be done using intermediate
8353 prev_mode
= vec_mode
;
8354 if (code
== FIX_TRUNC_EXPR
)
8355 uns
= TYPE_UNSIGNED (vectype_out
);
8357 uns
= TYPE_UNSIGNED (vectype
);
8359 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8360 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8361 costly than signed. */
8362 if (code
== FIX_TRUNC_EXPR
&& uns
)
8364 enum insn_code icode2
;
8367 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
8369 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
8370 if (interm_optab
!= unknown_optab
8371 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
8372 && insn_data
[icode1
].operand
[0].mode
8373 == insn_data
[icode2
].operand
[0].mode
)
8376 optab1
= interm_optab
;
8381 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8382 intermediate steps in promotion sequence. We try
8383 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8384 interm_types
->create (MAX_INTERM_CVT_STEPS
);
8385 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
8387 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
8389 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
8391 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
8394 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
8395 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
8396 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
8397 == CODE_FOR_nothing
))
8400 interm_types
->quick_push (intermediate_type
);
8401 (*multi_step_cvt
)++;
8403 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
8406 prev_mode
= intermediate_mode
;
8407 optab1
= interm_optab
;
8410 interm_types
->release ();