kernel - Fix races created by a comedy of circumstansces (3)
[dragonfly.git] / contrib / gcc-4.7 / gcc / tree-vect-stmts.c
blob7deaffcb5da349f88035f203767cead20546b0b0
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
79 return vect_name;
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
90 tree array_ref;
91 gimple new_stmt;
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
146 bool found = false;
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
153 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
156 if (is_gimple_assign (stmt))
157 lhs = gimple_assign_lhs (stmt);
158 else
159 lhs = gimple_call_lhs (stmt);
161 /* This use is out of pattern use, if LHS has other uses that are
162 pattern uses, we should mark the stmt itself, and not the pattern
163 stmt. */
164 if (TREE_CODE (lhs) == SSA_NAME)
165 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
167 if (is_gimple_debug (USE_STMT (use_p)))
168 continue;
169 use_stmt = USE_STMT (use_p);
171 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
172 continue;
174 if (vinfo_for_stmt (use_stmt)
175 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
177 found = true;
178 break;
183 if (!found)
185 /* This is the last stmt in a sequence that was detected as a
186 pattern that can potentially be vectorized. Don't mark the stmt
187 as relevant/live because it's not going to be vectorized.
188 Instead mark the pattern-stmt that replaces it. */
190 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
192 if (vect_print_dump_info (REPORT_DETAILS))
193 fprintf (vect_dump, "last stmt in pattern. don't mark"
194 " relevant/live.");
195 stmt_info = vinfo_for_stmt (pattern_stmt);
196 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
197 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
198 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
199 stmt = pattern_stmt;
203 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
204 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
205 STMT_VINFO_RELEVANT (stmt_info) = relevant;
207 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
208 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
210 if (vect_print_dump_info (REPORT_DETAILS))
211 fprintf (vect_dump, "already marked relevant/live.");
212 return;
215 VEC_safe_push (gimple, heap, *worklist, stmt);
219 /* Function vect_stmt_relevant_p.
221 Return true if STMT in loop that is represented by LOOP_VINFO is
222 "relevant for vectorization".
224 A stmt is considered "relevant for vectorization" if:
225 - it has uses outside the loop.
226 - it has vdefs (it alters memory).
227 - control stmts in the loop (except for the exit condition).
229 CHECKME: what other side effects would the vectorizer allow? */
231 static bool
232 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
233 enum vect_relevant *relevant, bool *live_p)
235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
236 ssa_op_iter op_iter;
237 imm_use_iterator imm_iter;
238 use_operand_p use_p;
239 def_operand_p def_p;
241 *relevant = vect_unused_in_scope;
242 *live_p = false;
244 /* cond stmt other than loop exit cond. */
245 if (is_ctrl_stmt (stmt)
246 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
247 != loop_exit_ctrl_vec_info_type)
248 *relevant = vect_used_in_scope;
250 /* changing memory. */
251 if (gimple_code (stmt) != GIMPLE_PHI)
252 if (gimple_vdef (stmt))
254 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
256 *relevant = vect_used_in_scope;
259 /* uses outside the loop. */
260 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
262 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
264 basic_block bb = gimple_bb (USE_STMT (use_p));
265 if (!flow_bb_inside_loop_p (loop, bb))
267 if (vect_print_dump_info (REPORT_DETAILS))
268 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
270 if (is_gimple_debug (USE_STMT (use_p)))
271 continue;
273 /* We expect all such uses to be in the loop exit phis
274 (because of loop closed form) */
275 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
276 gcc_assert (bb == single_exit (loop)->dest);
278 *live_p = true;
283 return (*live_p || *relevant);
287 /* Function exist_non_indexing_operands_for_use_p
289 USE is one of the uses attached to STMT. Check if USE is
290 used in STMT for anything other than indexing an array. */
292 static bool
293 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
295 tree operand;
296 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
298 /* USE corresponds to some operand in STMT. If there is no data
299 reference in STMT, then any operand that corresponds to USE
300 is not indexing an array. */
301 if (!STMT_VINFO_DATA_REF (stmt_info))
302 return true;
304 /* STMT has a data_ref. FORNOW this means that its of one of
305 the following forms:
306 -1- ARRAY_REF = var
307 -2- var = ARRAY_REF
308 (This should have been verified in analyze_data_refs).
310 'var' in the second case corresponds to a def, not a use,
311 so USE cannot correspond to any operands that are not used
312 for array indexing.
314 Therefore, all we need to check is if STMT falls into the
315 first case, and whether var corresponds to USE. */
317 if (!gimple_assign_copy_p (stmt))
318 return false;
319 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
320 return false;
321 operand = gimple_assign_rhs1 (stmt);
322 if (TREE_CODE (operand) != SSA_NAME)
323 return false;
325 if (operand == use)
326 return true;
328 return false;
333 Function process_use.
335 Inputs:
336 - a USE in STMT in a loop represented by LOOP_VINFO
337 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
338 that defined USE. This is done by calling mark_relevant and passing it
339 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
340 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
341 be performed.
343 Outputs:
344 Generally, LIVE_P and RELEVANT are used to define the liveness and
345 relevance info of the DEF_STMT of this USE:
346 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
347 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
348 Exceptions:
349 - case 1: If USE is used only for address computations (e.g. array indexing),
350 which does not need to be directly vectorized, then the liveness/relevance
351 of the respective DEF_STMT is left unchanged.
352 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
353 skip DEF_STMT cause it had already been processed.
354 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
355 be modified accordingly.
357 Return true if everything is as expected. Return false otherwise. */
359 static bool
360 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
361 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
362 bool force)
364 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
365 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
366 stmt_vec_info dstmt_vinfo;
367 basic_block bb, def_bb;
368 tree def;
369 gimple def_stmt;
370 enum vect_def_type dt;
372 /* case 1: we are only interested in uses that need to be vectorized. Uses
373 that are used for address computation are not considered relevant. */
374 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
375 return true;
377 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
380 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
381 return false;
384 if (!def_stmt || gimple_nop_p (def_stmt))
385 return true;
387 def_bb = gimple_bb (def_stmt);
388 if (!flow_bb_inside_loop_p (loop, def_bb))
390 if (vect_print_dump_info (REPORT_DETAILS))
391 fprintf (vect_dump, "def_stmt is out of loop.");
392 return true;
395 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
396 DEF_STMT must have already been processed, because this should be the
397 only way that STMT, which is a reduction-phi, was put in the worklist,
398 as there should be no other uses for DEF_STMT in the loop. So we just
399 check that everything is as expected, and we are done. */
400 dstmt_vinfo = vinfo_for_stmt (def_stmt);
401 bb = gimple_bb (stmt);
402 if (gimple_code (stmt) == GIMPLE_PHI
403 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
404 && gimple_code (def_stmt) != GIMPLE_PHI
405 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
406 && bb->loop_father == def_bb->loop_father)
408 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
410 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
411 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
412 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
413 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
414 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
415 return true;
418 /* case 3a: outer-loop stmt defining an inner-loop stmt:
419 outer-loop-header-bb:
420 d = def_stmt
421 inner-loop:
422 stmt # use (d)
423 outer-loop-tail-bb:
424 ... */
425 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
427 if (vect_print_dump_info (REPORT_DETAILS))
428 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
430 switch (relevant)
432 case vect_unused_in_scope:
433 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
434 vect_used_in_scope : vect_unused_in_scope;
435 break;
437 case vect_used_in_outer_by_reduction:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_by_reduction;
440 break;
442 case vect_used_in_outer:
443 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
444 relevant = vect_used_in_scope;
445 break;
447 case vect_used_in_scope:
448 break;
450 default:
451 gcc_unreachable ();
455 /* case 3b: inner-loop stmt defining an outer-loop stmt:
456 outer-loop-header-bb:
458 inner-loop:
459 d = def_stmt
460 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
461 stmt # use (d) */
462 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
464 if (vect_print_dump_info (REPORT_DETAILS))
465 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
467 switch (relevant)
469 case vect_unused_in_scope:
470 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
472 vect_used_in_outer_by_reduction : vect_unused_in_scope;
473 break;
475 case vect_used_by_reduction:
476 relevant = vect_used_in_outer_by_reduction;
477 break;
479 case vect_used_in_scope:
480 relevant = vect_used_in_outer;
481 break;
483 default:
484 gcc_unreachable ();
488 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
489 is_pattern_stmt_p (stmt_vinfo));
490 return true;
494 /* Function vect_mark_stmts_to_be_vectorized.
496 Not all stmts in the loop need to be vectorized. For example:
498 for i...
499 for j...
500 1. T0 = i + j
501 2. T1 = a[T0]
503 3. j = j + 1
505 Stmt 1 and 3 do not need to be vectorized, because loop control and
506 addressing of vectorized data-refs are handled differently.
508 This pass detects such stmts. */
510 bool
511 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
513 VEC(gimple,heap) *worklist;
514 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
515 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
516 unsigned int nbbs = loop->num_nodes;
517 gimple_stmt_iterator si;
518 gimple stmt;
519 unsigned int i;
520 stmt_vec_info stmt_vinfo;
521 basic_block bb;
522 gimple phi;
523 bool live_p;
524 enum vect_relevant relevant, tmp_relevant;
525 enum vect_def_type def_type;
527 if (vect_print_dump_info (REPORT_DETAILS))
528 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
530 worklist = VEC_alloc (gimple, heap, 64);
532 /* 1. Init worklist. */
533 for (i = 0; i < nbbs; i++)
535 bb = bbs[i];
536 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
538 phi = gsi_stmt (si);
539 if (vect_print_dump_info (REPORT_DETAILS))
541 fprintf (vect_dump, "init: phi relevant? ");
542 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
545 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
546 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
548 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
550 stmt = gsi_stmt (si);
551 if (vect_print_dump_info (REPORT_DETAILS))
553 fprintf (vect_dump, "init: stmt relevant? ");
554 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
557 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
558 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
562 /* 2. Process_worklist */
563 while (VEC_length (gimple, worklist) > 0)
565 use_operand_p use_p;
566 ssa_op_iter iter;
568 stmt = VEC_pop (gimple, worklist);
569 if (vect_print_dump_info (REPORT_DETAILS))
571 fprintf (vect_dump, "worklist: examine stmt: ");
572 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
575 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
576 (DEF_STMT) as relevant/irrelevant and live/dead according to the
577 liveness and relevance properties of STMT. */
578 stmt_vinfo = vinfo_for_stmt (stmt);
579 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
580 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
582 /* Generally, the liveness and relevance properties of STMT are
583 propagated as is to the DEF_STMTs of its USEs:
584 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
585 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
587 One exception is when STMT has been identified as defining a reduction
588 variable; in this case we set the liveness/relevance as follows:
589 live_p = false
590 relevant = vect_used_by_reduction
591 This is because we distinguish between two kinds of relevant stmts -
592 those that are used by a reduction computation, and those that are
593 (also) used by a regular computation. This allows us later on to
594 identify stmts that are used solely by a reduction, and therefore the
595 order of the results that they produce does not have to be kept. */
597 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
598 tmp_relevant = relevant;
599 switch (def_type)
601 case vect_reduction_def:
602 switch (tmp_relevant)
604 case vect_unused_in_scope:
605 relevant = vect_used_by_reduction;
606 break;
608 case vect_used_by_reduction:
609 if (gimple_code (stmt) == GIMPLE_PHI)
610 break;
611 /* fall through */
613 default:
614 if (vect_print_dump_info (REPORT_DETAILS))
615 fprintf (vect_dump, "unsupported use of reduction.");
617 VEC_free (gimple, heap, worklist);
618 return false;
621 live_p = false;
622 break;
624 case vect_nested_cycle:
625 if (tmp_relevant != vect_unused_in_scope
626 && tmp_relevant != vect_used_in_outer_by_reduction
627 && tmp_relevant != vect_used_in_outer)
629 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "unsupported use of nested cycle.");
632 VEC_free (gimple, heap, worklist);
633 return false;
636 live_p = false;
637 break;
639 case vect_double_reduction_def:
640 if (tmp_relevant != vect_unused_in_scope
641 && tmp_relevant != vect_used_by_reduction)
643 if (vect_print_dump_info (REPORT_DETAILS))
644 fprintf (vect_dump, "unsupported use of double reduction.");
646 VEC_free (gimple, heap, worklist);
647 return false;
650 live_p = false;
651 break;
653 default:
654 break;
657 if (is_pattern_stmt_p (stmt_vinfo))
659 /* Pattern statements are not inserted into the code, so
660 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
661 have to scan the RHS or function arguments instead. */
662 if (is_gimple_assign (stmt))
664 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
665 tree op = gimple_assign_rhs1 (stmt);
667 i = 1;
668 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
670 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
671 live_p, relevant, &worklist, false)
672 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
673 live_p, relevant, &worklist, false))
675 VEC_free (gimple, heap, worklist);
676 return false;
678 i = 2;
680 for (; i < gimple_num_ops (stmt); i++)
682 op = gimple_op (stmt, i);
683 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
684 &worklist, false))
686 VEC_free (gimple, heap, worklist);
687 return false;
691 else if (is_gimple_call (stmt))
693 for (i = 0; i < gimple_call_num_args (stmt); i++)
695 tree arg = gimple_call_arg (stmt, i);
696 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
697 &worklist, false))
699 VEC_free (gimple, heap, worklist);
700 return false;
705 else
706 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
708 tree op = USE_FROM_PTR (use_p);
709 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
710 &worklist, false))
712 VEC_free (gimple, heap, worklist);
713 return false;
717 if (STMT_VINFO_GATHER_P (stmt_vinfo))
719 tree off;
720 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
721 gcc_assert (decl);
722 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
723 &worklist, true))
725 VEC_free (gimple, heap, worklist);
726 return false;
729 } /* while worklist */
731 VEC_free (gimple, heap, worklist);
732 return true;
736 /* Get cost by calling cost target builtin. */
738 static inline
739 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
741 tree dummy_type = NULL;
742 int dummy = 0;
744 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
745 dummy_type, dummy);
749 /* Get cost for STMT. */
752 cost_for_stmt (gimple stmt)
754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
756 switch (STMT_VINFO_TYPE (stmt_info))
758 case load_vec_info_type:
759 return vect_get_stmt_cost (scalar_load);
760 case store_vec_info_type:
761 return vect_get_stmt_cost (scalar_store);
762 case op_vec_info_type:
763 case condition_vec_info_type:
764 case assignment_vec_info_type:
765 case reduc_vec_info_type:
766 case induc_vec_info_type:
767 case type_promotion_vec_info_type:
768 case type_demotion_vec_info_type:
769 case type_conversion_vec_info_type:
770 case call_vec_info_type:
771 return vect_get_stmt_cost (scalar_stmt);
772 case undef_vec_info_type:
773 default:
774 gcc_unreachable ();
778 /* Function vect_model_simple_cost.
780 Models cost for simple operations, i.e. those that only emit ncopies of a
781 single op. Right now, this does not account for multiple insns that could
782 be generated for the single vector op. We will handle that shortly. */
784 void
785 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
786 enum vect_def_type *dt, slp_tree slp_node)
788 int i;
789 int inside_cost = 0, outside_cost = 0;
791 /* The SLP costs were already calculated during SLP tree build. */
792 if (PURE_SLP_STMT (stmt_info))
793 return;
795 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
797 /* FORNOW: Assuming maximum 2 args per stmts. */
798 for (i = 0; i < 2; i++)
800 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
801 outside_cost += vect_get_stmt_cost (vector_stmt);
804 if (vect_print_dump_info (REPORT_COST))
805 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
806 "outside_cost = %d .", inside_cost, outside_cost);
808 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
809 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
810 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
814 /* Model cost for type demotion and promotion operations. PWR is normally
815 zero for single-step promotions and demotions. It will be one if
816 two-step promotion/demotion is required, and so on. Each additional
817 step doubles the number of instructions required. */
819 static void
820 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
821 enum vect_def_type *dt, int pwr)
823 int i, tmp;
824 int inside_cost = 0, outside_cost = 0, single_stmt_cost;
826 /* The SLP costs were already calculated during SLP tree build. */
827 if (PURE_SLP_STMT (stmt_info))
828 return;
830 single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
831 for (i = 0; i < pwr + 1; i++)
833 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
834 (i + 1) : i;
835 inside_cost += vect_pow2 (tmp) * single_stmt_cost;
838 /* FORNOW: Assuming maximum 2 args per stmts. */
839 for (i = 0; i < 2; i++)
841 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
842 outside_cost += vect_get_stmt_cost (vector_stmt);
845 if (vect_print_dump_info (REPORT_COST))
846 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
847 "outside_cost = %d .", inside_cost, outside_cost);
849 /* Set the costs in STMT_INFO. */
850 stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
851 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
854 /* Function vect_cost_strided_group_size
856 For strided load or store, return the group_size only if it is the first
857 load or store of a group, else return 1. This ensures that group size is
858 only returned once per group. */
860 static int
861 vect_cost_strided_group_size (stmt_vec_info stmt_info)
863 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
865 if (first_stmt == STMT_VINFO_STMT (stmt_info))
866 return GROUP_SIZE (stmt_info);
868 return 1;
872 /* Function vect_model_store_cost
874 Models cost for stores. In the case of strided accesses, one access
875 has the overhead of the strided access attributed to it. */
877 void
878 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
879 bool store_lanes_p, enum vect_def_type dt,
880 slp_tree slp_node)
882 int group_size;
883 unsigned int inside_cost = 0, outside_cost = 0;
884 struct data_reference *first_dr;
885 gimple first_stmt;
887 /* The SLP costs were already calculated during SLP tree build. */
888 if (PURE_SLP_STMT (stmt_info))
889 return;
891 if (dt == vect_constant_def || dt == vect_external_def)
892 outside_cost = vect_get_stmt_cost (scalar_to_vec);
894 /* Strided access? */
895 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
897 if (slp_node)
899 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
900 group_size = 1;
902 else
904 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
905 group_size = vect_cost_strided_group_size (stmt_info);
908 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
910 /* Not a strided access. */
911 else
913 group_size = 1;
914 first_dr = STMT_VINFO_DATA_REF (stmt_info);
917 /* We assume that the cost of a single store-lanes instruction is
918 equivalent to the cost of GROUP_SIZE separate stores. If a strided
919 access is instead being provided by a permute-and-store operation,
920 include the cost of the permutes. */
921 if (!store_lanes_p && group_size > 1)
923 /* Uses a high and low interleave operation for each needed permute. */
924 inside_cost = ncopies * exact_log2(group_size) * group_size
925 * vect_get_stmt_cost (vec_perm);
927 if (vect_print_dump_info (REPORT_COST))
928 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
929 group_size);
932 /* Costs of the stores. */
933 vect_get_store_cost (first_dr, ncopies, &inside_cost);
935 if (vect_print_dump_info (REPORT_COST))
936 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
937 "outside_cost = %d .", inside_cost, outside_cost);
939 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
940 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
941 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
945 /* Calculate cost of DR's memory access. */
946 void
947 vect_get_store_cost (struct data_reference *dr, int ncopies,
948 unsigned int *inside_cost)
950 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
952 switch (alignment_support_scheme)
954 case dr_aligned:
956 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
958 if (vect_print_dump_info (REPORT_COST))
959 fprintf (vect_dump, "vect_model_store_cost: aligned.");
961 break;
964 case dr_unaligned_supported:
966 gimple stmt = DR_STMT (dr);
967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
970 /* Here, we assign an additional cost for the unaligned store. */
971 *inside_cost += ncopies
972 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
973 vectype, DR_MISALIGNMENT (dr));
975 if (vect_print_dump_info (REPORT_COST))
976 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
977 "hardware.");
979 break;
982 default:
983 gcc_unreachable ();
988 /* Function vect_model_load_cost
990 Models cost for loads. In the case of strided accesses, the last access
991 has the overhead of the strided access attributed to it. Since unaligned
992 accesses are supported for loads, we also account for the costs of the
993 access scheme chosen. */
995 void
996 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
997 slp_tree slp_node)
999 int group_size;
1000 gimple first_stmt;
1001 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1002 unsigned int inside_cost = 0, outside_cost = 0;
1004 /* The SLP costs were already calculated during SLP tree build. */
1005 if (PURE_SLP_STMT (stmt_info))
1006 return;
1008 /* Strided accesses? */
1009 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1010 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
1012 group_size = vect_cost_strided_group_size (stmt_info);
1013 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1015 /* Not a strided access. */
1016 else
1018 group_size = 1;
1019 first_dr = dr;
1022 /* We assume that the cost of a single load-lanes instruction is
1023 equivalent to the cost of GROUP_SIZE separate loads. If a strided
1024 access is instead being provided by a load-and-permute operation,
1025 include the cost of the permutes. */
1026 if (!load_lanes_p && group_size > 1)
1028 /* Uses an even and odd extract operations for each needed permute. */
1029 inside_cost = ncopies * exact_log2(group_size) * group_size
1030 * vect_get_stmt_cost (vec_perm);
1032 if (vect_print_dump_info (REPORT_COST))
1033 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1034 group_size);
1037 /* The loads themselves. */
1038 vect_get_load_cost (first_dr, ncopies,
1039 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
1040 || slp_node),
1041 &inside_cost, &outside_cost);
1043 if (vect_print_dump_info (REPORT_COST))
1044 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1045 "outside_cost = %d .", inside_cost, outside_cost);
1047 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1048 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1049 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1053 /* Calculate cost of DR's memory access. */
1054 void
1055 vect_get_load_cost (struct data_reference *dr, int ncopies,
1056 bool add_realign_cost, unsigned int *inside_cost,
1057 unsigned int *outside_cost)
1059 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1061 switch (alignment_support_scheme)
1063 case dr_aligned:
1065 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1067 if (vect_print_dump_info (REPORT_COST))
1068 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1070 break;
1072 case dr_unaligned_supported:
1074 gimple stmt = DR_STMT (dr);
1075 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1076 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1078 /* Here, we assign an additional cost for the unaligned load. */
1079 *inside_cost += ncopies
1080 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1081 vectype, DR_MISALIGNMENT (dr));
1082 if (vect_print_dump_info (REPORT_COST))
1083 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1084 "hardware.");
1086 break;
1088 case dr_explicit_realign:
1090 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1091 + vect_get_stmt_cost (vec_perm));
1093 /* FIXME: If the misalignment remains fixed across the iterations of
1094 the containing loop, the following cost should be added to the
1095 outside costs. */
1096 if (targetm.vectorize.builtin_mask_for_load)
1097 *inside_cost += vect_get_stmt_cost (vector_stmt);
1099 if (vect_print_dump_info (REPORT_COST))
1100 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1102 break;
1104 case dr_explicit_realign_optimized:
1106 if (vect_print_dump_info (REPORT_COST))
1107 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1108 "pipelined.");
1110 /* Unaligned software pipeline has a load of an address, an initial
1111 load, and possibly a mask operation to "prime" the loop. However,
1112 if this is an access in a group of loads, which provide strided
1113 access, then the above cost should only be considered for one
1114 access in the group. Inside the loop, there is a load op
1115 and a realignment op. */
1117 if (add_realign_cost)
1119 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1120 if (targetm.vectorize.builtin_mask_for_load)
1121 *outside_cost += vect_get_stmt_cost (vector_stmt);
1124 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1125 + vect_get_stmt_cost (vec_perm));
1127 if (vect_print_dump_info (REPORT_COST))
1128 fprintf (vect_dump,
1129 "vect_model_load_cost: explicit realign optimized");
1131 break;
1134 default:
1135 gcc_unreachable ();
1140 /* Function vect_init_vector.
1142 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1143 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1144 is not NULL. Otherwise, place the initialization at the loop preheader.
1145 Return the DEF of INIT_STMT.
1146 It will be used in the vectorization of STMT. */
1148 tree
1149 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1150 gimple_stmt_iterator *gsi)
1152 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1153 tree new_var;
1154 gimple init_stmt;
1155 tree vec_oprnd;
1156 edge pe;
1157 tree new_temp;
1158 basic_block new_bb;
1160 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1161 add_referenced_var (new_var);
1162 init_stmt = gimple_build_assign (new_var, vector_var);
1163 new_temp = make_ssa_name (new_var, init_stmt);
1164 gimple_assign_set_lhs (init_stmt, new_temp);
1166 if (gsi)
1167 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1168 else
1170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1172 if (loop_vinfo)
1174 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1176 if (nested_in_vect_loop_p (loop, stmt))
1177 loop = loop->inner;
1179 pe = loop_preheader_edge (loop);
1180 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1181 gcc_assert (!new_bb);
1183 else
1185 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1186 basic_block bb;
1187 gimple_stmt_iterator gsi_bb_start;
1189 gcc_assert (bb_vinfo);
1190 bb = BB_VINFO_BB (bb_vinfo);
1191 gsi_bb_start = gsi_after_labels (bb);
1192 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1196 if (vect_print_dump_info (REPORT_DETAILS))
1198 fprintf (vect_dump, "created new init_stmt: ");
1199 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1202 vec_oprnd = gimple_assign_lhs (init_stmt);
1203 return vec_oprnd;
1207 /* Function vect_get_vec_def_for_operand.
1209 OP is an operand in STMT. This function returns a (vector) def that will be
1210 used in the vectorized stmt for STMT.
1212 In the case that OP is an SSA_NAME which is defined in the loop, then
1213 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1215 In case OP is an invariant or constant, a new stmt that creates a vector def
1216 needs to be introduced. */
1218 tree
1219 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1221 tree vec_oprnd;
1222 gimple vec_stmt;
1223 gimple def_stmt;
1224 stmt_vec_info def_stmt_info = NULL;
1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226 unsigned int nunits;
1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228 tree vec_inv;
1229 tree vec_cst;
1230 tree t = NULL_TREE;
1231 tree def;
1232 int i;
1233 enum vect_def_type dt;
1234 bool is_simple_use;
1235 tree vector_type;
1237 if (vect_print_dump_info (REPORT_DETAILS))
1239 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1240 print_generic_expr (vect_dump, op, TDF_SLIM);
1243 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1244 &def_stmt, &def, &dt);
1245 gcc_assert (is_simple_use);
1246 if (vect_print_dump_info (REPORT_DETAILS))
1248 if (def)
1250 fprintf (vect_dump, "def = ");
1251 print_generic_expr (vect_dump, def, TDF_SLIM);
1253 if (def_stmt)
1255 fprintf (vect_dump, " def_stmt = ");
1256 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1260 switch (dt)
1262 /* Case 1: operand is a constant. */
1263 case vect_constant_def:
1265 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1266 gcc_assert (vector_type);
1267 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1269 if (scalar_def)
1270 *scalar_def = op;
1272 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1273 if (vect_print_dump_info (REPORT_DETAILS))
1274 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1276 vec_cst = build_vector_from_val (vector_type,
1277 fold_convert (TREE_TYPE (vector_type),
1278 op));
1279 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1282 /* Case 2: operand is defined outside the loop - loop invariant. */
1283 case vect_external_def:
1285 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1286 gcc_assert (vector_type);
1287 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1289 if (scalar_def)
1290 *scalar_def = def;
1292 /* Create 'vec_inv = {inv,inv,..,inv}' */
1293 if (vect_print_dump_info (REPORT_DETAILS))
1294 fprintf (vect_dump, "Create vector_inv.");
1296 for (i = nunits - 1; i >= 0; --i)
1298 t = tree_cons (NULL_TREE, def, t);
1301 /* FIXME: use build_constructor directly. */
1302 vec_inv = build_constructor_from_list (vector_type, t);
1303 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1306 /* Case 3: operand is defined inside the loop. */
1307 case vect_internal_def:
1309 if (scalar_def)
1310 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1312 /* Get the def from the vectorized stmt. */
1313 def_stmt_info = vinfo_for_stmt (def_stmt);
1315 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1316 /* Get vectorized pattern statement. */
1317 if (!vec_stmt
1318 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1319 && !STMT_VINFO_RELEVANT (def_stmt_info))
1320 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1321 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1322 gcc_assert (vec_stmt);
1323 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1324 vec_oprnd = PHI_RESULT (vec_stmt);
1325 else if (is_gimple_call (vec_stmt))
1326 vec_oprnd = gimple_call_lhs (vec_stmt);
1327 else
1328 vec_oprnd = gimple_assign_lhs (vec_stmt);
1329 return vec_oprnd;
1332 /* Case 4: operand is defined by a loop header phi - reduction */
1333 case vect_reduction_def:
1334 case vect_double_reduction_def:
1335 case vect_nested_cycle:
1337 struct loop *loop;
1339 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1340 loop = (gimple_bb (def_stmt))->loop_father;
1342 /* Get the def before the loop */
1343 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1344 return get_initial_def_for_reduction (stmt, op, scalar_def);
1347 /* Case 5: operand is defined by loop-header phi - induction. */
1348 case vect_induction_def:
1350 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1352 /* Get the def from the vectorized stmt. */
1353 def_stmt_info = vinfo_for_stmt (def_stmt);
1354 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1355 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1356 vec_oprnd = PHI_RESULT (vec_stmt);
1357 else
1358 vec_oprnd = gimple_get_lhs (vec_stmt);
1359 return vec_oprnd;
1362 default:
1363 gcc_unreachable ();
1368 /* Function vect_get_vec_def_for_stmt_copy
1370 Return a vector-def for an operand. This function is used when the
1371 vectorized stmt to be created (by the caller to this function) is a "copy"
1372 created in case the vectorized result cannot fit in one vector, and several
1373 copies of the vector-stmt are required. In this case the vector-def is
1374 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1375 of the stmt that defines VEC_OPRND.
1376 DT is the type of the vector def VEC_OPRND.
1378 Context:
1379 In case the vectorization factor (VF) is bigger than the number
1380 of elements that can fit in a vectype (nunits), we have to generate
1381 more than one vector stmt to vectorize the scalar stmt. This situation
1382 arises when there are multiple data-types operated upon in the loop; the
1383 smallest data-type determines the VF, and as a result, when vectorizing
1384 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1385 vector stmt (each computing a vector of 'nunits' results, and together
1386 computing 'VF' results in each iteration). This function is called when
1387 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1388 which VF=16 and nunits=4, so the number of copies required is 4):
1390 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1392 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1393 VS1.1: vx.1 = memref1 VS1.2
1394 VS1.2: vx.2 = memref2 VS1.3
1395 VS1.3: vx.3 = memref3
1397 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1398 VSnew.1: vz1 = vx.1 + ... VSnew.2
1399 VSnew.2: vz2 = vx.2 + ... VSnew.3
1400 VSnew.3: vz3 = vx.3 + ...
1402 The vectorization of S1 is explained in vectorizable_load.
1403 The vectorization of S2:
1404 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1405 the function 'vect_get_vec_def_for_operand' is called to
1406 get the relevant vector-def for each operand of S2. For operand x it
1407 returns the vector-def 'vx.0'.
1409 To create the remaining copies of the vector-stmt (VSnew.j), this
1410 function is called to get the relevant vector-def for each operand. It is
1411 obtained from the respective VS1.j stmt, which is recorded in the
1412 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1414 For example, to obtain the vector-def 'vx.1' in order to create the
1415 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1416 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1417 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1418 and return its def ('vx.1').
1419 Overall, to create the above sequence this function will be called 3 times:
1420 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1421 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1422 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1424 tree
1425 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1427 gimple vec_stmt_for_operand;
1428 stmt_vec_info def_stmt_info;
1430 /* Do nothing; can reuse same def. */
1431 if (dt == vect_external_def || dt == vect_constant_def )
1432 return vec_oprnd;
1434 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1435 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1436 gcc_assert (def_stmt_info);
1437 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1438 gcc_assert (vec_stmt_for_operand);
1439 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1440 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1441 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1442 else
1443 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1444 return vec_oprnd;
1448 /* Get vectorized definitions for the operands to create a copy of an original
1449 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1451 static void
1452 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1453 VEC(tree,heap) **vec_oprnds0,
1454 VEC(tree,heap) **vec_oprnds1)
1456 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1458 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1459 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1461 if (vec_oprnds1 && *vec_oprnds1)
1463 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1464 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1465 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1470 /* Get vectorized definitions for OP0 and OP1.
1471 REDUC_INDEX is the index of reduction operand in case of reduction,
1472 and -1 otherwise. */
1474 void
1475 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1476 VEC (tree, heap) **vec_oprnds0,
1477 VEC (tree, heap) **vec_oprnds1,
1478 slp_tree slp_node, int reduc_index)
1480 if (slp_node)
1482 int nops = (op1 == NULL_TREE) ? 1 : 2;
1483 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1484 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1486 VEC_quick_push (tree, ops, op0);
1487 if (op1)
1488 VEC_quick_push (tree, ops, op1);
1490 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1492 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1493 if (op1)
1494 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1496 VEC_free (tree, heap, ops);
1497 VEC_free (slp_void_p, heap, vec_defs);
1499 else
1501 tree vec_oprnd;
1503 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1504 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1505 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1507 if (op1)
1509 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1510 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1511 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1517 /* Function vect_finish_stmt_generation.
1519 Insert a new stmt. */
1521 void
1522 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1523 gimple_stmt_iterator *gsi)
1525 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1526 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1527 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1529 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1531 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1533 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1534 bb_vinfo));
1536 if (vect_print_dump_info (REPORT_DETAILS))
1538 fprintf (vect_dump, "add new stmt: ");
1539 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1542 gimple_set_location (vec_stmt, gimple_location (stmt));
1545 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1546 a function declaration if the target has a vectorized version
1547 of the function, or NULL_TREE if the function cannot be vectorized. */
1549 tree
1550 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1552 tree fndecl = gimple_call_fndecl (call);
1554 /* We only handle functions that do not read or clobber memory -- i.e.
1555 const or novops ones. */
1556 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1557 return NULL_TREE;
1559 if (!fndecl
1560 || TREE_CODE (fndecl) != FUNCTION_DECL
1561 || !DECL_BUILT_IN (fndecl))
1562 return NULL_TREE;
1564 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1565 vectype_in);
1568 /* Function vectorizable_call.
1570 Check if STMT performs a function call that can be vectorized.
1571 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1572 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1573 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1575 static bool
1576 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1577 slp_tree slp_node)
1579 tree vec_dest;
1580 tree scalar_dest;
1581 tree op, type;
1582 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1583 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1584 tree vectype_out, vectype_in;
1585 int nunits_in;
1586 int nunits_out;
1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1588 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1589 tree fndecl, new_temp, def, rhs_type;
1590 gimple def_stmt;
1591 enum vect_def_type dt[3]
1592 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1593 gimple new_stmt = NULL;
1594 int ncopies, j;
1595 VEC(tree, heap) *vargs = NULL;
1596 enum { NARROW, NONE, WIDEN } modifier;
1597 size_t i, nargs;
1598 tree lhs;
1600 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1601 return false;
1603 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1604 return false;
1606 /* Is STMT a vectorizable call? */
1607 if (!is_gimple_call (stmt))
1608 return false;
1610 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1611 return false;
1613 if (stmt_can_throw_internal (stmt))
1614 return false;
1616 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1618 /* Process function arguments. */
1619 rhs_type = NULL_TREE;
1620 vectype_in = NULL_TREE;
1621 nargs = gimple_call_num_args (stmt);
1623 /* Bail out if the function has more than three arguments, we do not have
1624 interesting builtin functions to vectorize with more than two arguments
1625 except for fma. No arguments is also not good. */
1626 if (nargs == 0 || nargs > 3)
1627 return false;
1629 for (i = 0; i < nargs; i++)
1631 tree opvectype;
1633 op = gimple_call_arg (stmt, i);
1635 /* We can only handle calls with arguments of the same type. */
1636 if (rhs_type
1637 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1639 if (vect_print_dump_info (REPORT_DETAILS))
1640 fprintf (vect_dump, "argument types differ.");
1641 return false;
1643 if (!rhs_type)
1644 rhs_type = TREE_TYPE (op);
1646 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1647 &def_stmt, &def, &dt[i], &opvectype))
1649 if (vect_print_dump_info (REPORT_DETAILS))
1650 fprintf (vect_dump, "use not simple.");
1651 return false;
1654 if (!vectype_in)
1655 vectype_in = opvectype;
1656 else if (opvectype
1657 && opvectype != vectype_in)
1659 if (vect_print_dump_info (REPORT_DETAILS))
1660 fprintf (vect_dump, "argument vector types differ.");
1661 return false;
1664 /* If all arguments are external or constant defs use a vector type with
1665 the same size as the output vector type. */
1666 if (!vectype_in)
1667 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1668 if (vec_stmt)
1669 gcc_assert (vectype_in);
1670 if (!vectype_in)
1672 if (vect_print_dump_info (REPORT_DETAILS))
1674 fprintf (vect_dump, "no vectype for scalar type ");
1675 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1678 return false;
1681 /* FORNOW */
1682 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1683 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1684 if (nunits_in == nunits_out / 2)
1685 modifier = NARROW;
1686 else if (nunits_out == nunits_in)
1687 modifier = NONE;
1688 else if (nunits_out == nunits_in / 2)
1689 modifier = WIDEN;
1690 else
1691 return false;
1693 /* For now, we only vectorize functions if a target specific builtin
1694 is available. TODO -- in some cases, it might be profitable to
1695 insert the calls for pieces of the vector, in order to be able
1696 to vectorize other operations in the loop. */
1697 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1698 if (fndecl == NULL_TREE)
1700 if (vect_print_dump_info (REPORT_DETAILS))
1701 fprintf (vect_dump, "function is not vectorizable.");
1703 return false;
1706 gcc_assert (!gimple_vuse (stmt));
1708 if (slp_node || PURE_SLP_STMT (stmt_info))
1709 ncopies = 1;
1710 else if (modifier == NARROW)
1711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1712 else
1713 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1715 /* Sanity check: make sure that at least one copy of the vectorized stmt
1716 needs to be generated. */
1717 gcc_assert (ncopies >= 1);
1719 if (!vec_stmt) /* transformation not required. */
1721 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1722 if (vect_print_dump_info (REPORT_DETAILS))
1723 fprintf (vect_dump, "=== vectorizable_call ===");
1724 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1725 return true;
1728 /** Transform. **/
1730 if (vect_print_dump_info (REPORT_DETAILS))
1731 fprintf (vect_dump, "transform call.");
1733 /* Handle def. */
1734 scalar_dest = gimple_call_lhs (stmt);
1735 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1737 prev_stmt_info = NULL;
1738 switch (modifier)
1740 case NONE:
1741 for (j = 0; j < ncopies; ++j)
1743 /* Build argument list for the vectorized call. */
1744 if (j == 0)
1745 vargs = VEC_alloc (tree, heap, nargs);
1746 else
1747 VEC_truncate (tree, vargs, 0);
1749 if (slp_node)
1751 VEC (slp_void_p, heap) *vec_defs
1752 = VEC_alloc (slp_void_p, heap, nargs);
1753 VEC (tree, heap) *vec_oprnds0;
1755 for (i = 0; i < nargs; i++)
1756 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1757 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1758 vec_oprnds0
1759 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1761 /* Arguments are ready. Create the new vector stmt. */
1762 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1764 size_t k;
1765 for (k = 0; k < nargs; k++)
1767 VEC (tree, heap) *vec_oprndsk
1768 = (VEC (tree, heap) *)
1769 VEC_index (slp_void_p, vec_defs, k);
1770 VEC_replace (tree, vargs, k,
1771 VEC_index (tree, vec_oprndsk, i));
1773 new_stmt = gimple_build_call_vec (fndecl, vargs);
1774 new_temp = make_ssa_name (vec_dest, new_stmt);
1775 gimple_call_set_lhs (new_stmt, new_temp);
1776 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1777 mark_symbols_for_renaming (new_stmt);
1778 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1779 new_stmt);
1782 for (i = 0; i < nargs; i++)
1784 VEC (tree, heap) *vec_oprndsi
1785 = (VEC (tree, heap) *)
1786 VEC_index (slp_void_p, vec_defs, i);
1787 VEC_free (tree, heap, vec_oprndsi);
1789 VEC_free (slp_void_p, heap, vec_defs);
1790 continue;
1793 for (i = 0; i < nargs; i++)
1795 op = gimple_call_arg (stmt, i);
1796 if (j == 0)
1797 vec_oprnd0
1798 = vect_get_vec_def_for_operand (op, stmt, NULL);
1799 else
1801 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1802 vec_oprnd0
1803 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1806 VEC_quick_push (tree, vargs, vec_oprnd0);
1809 new_stmt = gimple_build_call_vec (fndecl, vargs);
1810 new_temp = make_ssa_name (vec_dest, new_stmt);
1811 gimple_call_set_lhs (new_stmt, new_temp);
1813 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1814 mark_symbols_for_renaming (new_stmt);
1816 if (j == 0)
1817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1818 else
1819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1821 prev_stmt_info = vinfo_for_stmt (new_stmt);
1824 break;
1826 case NARROW:
1827 for (j = 0; j < ncopies; ++j)
1829 /* Build argument list for the vectorized call. */
1830 if (j == 0)
1831 vargs = VEC_alloc (tree, heap, nargs * 2);
1832 else
1833 VEC_truncate (tree, vargs, 0);
1835 if (slp_node)
1837 VEC (slp_void_p, heap) *vec_defs
1838 = VEC_alloc (slp_void_p, heap, nargs);
1839 VEC (tree, heap) *vec_oprnds0;
1841 for (i = 0; i < nargs; i++)
1842 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1843 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1844 vec_oprnds0
1845 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1847 /* Arguments are ready. Create the new vector stmt. */
1848 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1849 i += 2)
1851 size_t k;
1852 VEC_truncate (tree, vargs, 0);
1853 for (k = 0; k < nargs; k++)
1855 VEC (tree, heap) *vec_oprndsk
1856 = (VEC (tree, heap) *)
1857 VEC_index (slp_void_p, vec_defs, k);
1858 VEC_quick_push (tree, vargs,
1859 VEC_index (tree, vec_oprndsk, i));
1860 VEC_quick_push (tree, vargs,
1861 VEC_index (tree, vec_oprndsk, i + 1));
1863 new_stmt = gimple_build_call_vec (fndecl, vargs);
1864 new_temp = make_ssa_name (vec_dest, new_stmt);
1865 gimple_call_set_lhs (new_stmt, new_temp);
1866 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1867 mark_symbols_for_renaming (new_stmt);
1868 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1869 new_stmt);
1872 for (i = 0; i < nargs; i++)
1874 VEC (tree, heap) *vec_oprndsi
1875 = (VEC (tree, heap) *)
1876 VEC_index (slp_void_p, vec_defs, i);
1877 VEC_free (tree, heap, vec_oprndsi);
1879 VEC_free (slp_void_p, heap, vec_defs);
1880 continue;
1883 for (i = 0; i < nargs; i++)
1885 op = gimple_call_arg (stmt, i);
1886 if (j == 0)
1888 vec_oprnd0
1889 = vect_get_vec_def_for_operand (op, stmt, NULL);
1890 vec_oprnd1
1891 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1893 else
1895 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1896 vec_oprnd0
1897 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1898 vec_oprnd1
1899 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1902 VEC_quick_push (tree, vargs, vec_oprnd0);
1903 VEC_quick_push (tree, vargs, vec_oprnd1);
1906 new_stmt = gimple_build_call_vec (fndecl, vargs);
1907 new_temp = make_ssa_name (vec_dest, new_stmt);
1908 gimple_call_set_lhs (new_stmt, new_temp);
1910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1911 mark_symbols_for_renaming (new_stmt);
1913 if (j == 0)
1914 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1915 else
1916 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1918 prev_stmt_info = vinfo_for_stmt (new_stmt);
1921 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1923 break;
1925 case WIDEN:
1926 /* No current target implements this case. */
1927 return false;
1930 VEC_free (tree, heap, vargs);
1932 /* Update the exception handling table with the vector stmt if necessary. */
1933 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1934 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1936 /* The call in STMT might prevent it from being removed in dce.
1937 We however cannot remove it here, due to the way the ssa name
1938 it defines is mapped to the new definition. So just replace
1939 rhs of the statement with something harmless. */
1941 if (slp_node)
1942 return true;
1944 type = TREE_TYPE (scalar_dest);
1945 if (is_pattern_stmt_p (stmt_info))
1946 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1947 else
1948 lhs = gimple_call_lhs (stmt);
1949 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1950 set_vinfo_for_stmt (new_stmt, stmt_info);
1951 set_vinfo_for_stmt (stmt, NULL);
1952 STMT_VINFO_STMT (stmt_info) = new_stmt;
1953 gsi_replace (gsi, new_stmt, false);
1954 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1956 return true;
1960 /* Function vect_gen_widened_results_half
1962 Create a vector stmt whose code, type, number of arguments, and result
1963 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1964 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1965 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1966 needs to be created (DECL is a function-decl of a target-builtin).
1967 STMT is the original scalar stmt that we are vectorizing. */
1969 static gimple
1970 vect_gen_widened_results_half (enum tree_code code,
1971 tree decl,
1972 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1973 tree vec_dest, gimple_stmt_iterator *gsi,
1974 gimple stmt)
1976 gimple new_stmt;
1977 tree new_temp;
1979 /* Generate half of the widened result: */
1980 if (code == CALL_EXPR)
1982 /* Target specific support */
1983 if (op_type == binary_op)
1984 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1985 else
1986 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1987 new_temp = make_ssa_name (vec_dest, new_stmt);
1988 gimple_call_set_lhs (new_stmt, new_temp);
1990 else
1992 /* Generic support */
1993 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1994 if (op_type != binary_op)
1995 vec_oprnd1 = NULL;
1996 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1997 vec_oprnd1);
1998 new_temp = make_ssa_name (vec_dest, new_stmt);
1999 gimple_assign_set_lhs (new_stmt, new_temp);
2001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2003 return new_stmt;
2007 /* Get vectorized definitions for loop-based vectorization. For the first
2008 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2009 scalar operand), and for the rest we get a copy with
2010 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2011 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2012 The vectors are collected into VEC_OPRNDS. */
2014 static void
2015 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2016 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2018 tree vec_oprnd;
2020 /* Get first vector operand. */
2021 /* All the vector operands except the very first one (that is scalar oprnd)
2022 are stmt copies. */
2023 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2024 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2025 else
2026 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2028 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2030 /* Get second vector operand. */
2031 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2032 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2034 *oprnd = vec_oprnd;
2036 /* For conversion in multiple steps, continue to get operands
2037 recursively. */
2038 if (multi_step_cvt)
2039 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2043 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2044 For multi-step conversions store the resulting vectors and call the function
2045 recursively. */
2047 static void
2048 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2049 int multi_step_cvt, gimple stmt,
2050 VEC (tree, heap) *vec_dsts,
2051 gimple_stmt_iterator *gsi,
2052 slp_tree slp_node, enum tree_code code,
2053 stmt_vec_info *prev_stmt_info)
2055 unsigned int i;
2056 tree vop0, vop1, new_tmp, vec_dest;
2057 gimple new_stmt;
2058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2060 vec_dest = VEC_pop (tree, vec_dsts);
2062 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2064 /* Create demotion operation. */
2065 vop0 = VEC_index (tree, *vec_oprnds, i);
2066 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2067 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2068 new_tmp = make_ssa_name (vec_dest, new_stmt);
2069 gimple_assign_set_lhs (new_stmt, new_tmp);
2070 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2072 if (multi_step_cvt)
2073 /* Store the resulting vector for next recursive call. */
2074 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2075 else
2077 /* This is the last step of the conversion sequence. Store the
2078 vectors in SLP_NODE or in vector info of the scalar statement
2079 (or in STMT_VINFO_RELATED_STMT chain). */
2080 if (slp_node)
2081 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2082 else
2084 if (!*prev_stmt_info)
2085 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2086 else
2087 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2089 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2094 /* For multi-step demotion operations we first generate demotion operations
2095 from the source type to the intermediate types, and then combine the
2096 results (stored in VEC_OPRNDS) in demotion operation to the destination
2097 type. */
2098 if (multi_step_cvt)
2100 /* At each level of recursion we have half of the operands we had at the
2101 previous level. */
2102 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2103 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2104 stmt, vec_dsts, gsi, slp_node,
2105 VEC_PACK_TRUNC_EXPR,
2106 prev_stmt_info);
2109 VEC_quick_push (tree, vec_dsts, vec_dest);
2113 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2114 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2115 the resulting vectors and call the function recursively. */
2117 static void
2118 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2119 VEC (tree, heap) **vec_oprnds1,
2120 gimple stmt, tree vec_dest,
2121 gimple_stmt_iterator *gsi,
2122 enum tree_code code1,
2123 enum tree_code code2, tree decl1,
2124 tree decl2, int op_type)
2126 int i;
2127 tree vop0, vop1, new_tmp1, new_tmp2;
2128 gimple new_stmt1, new_stmt2;
2129 VEC (tree, heap) *vec_tmp = NULL;
2131 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2132 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2134 if (op_type == binary_op)
2135 vop1 = VEC_index (tree, *vec_oprnds1, i);
2136 else
2137 vop1 = NULL_TREE;
2139 /* Generate the two halves of promotion operation. */
2140 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2141 op_type, vec_dest, gsi, stmt);
2142 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2143 op_type, vec_dest, gsi, stmt);
2144 if (is_gimple_call (new_stmt1))
2146 new_tmp1 = gimple_call_lhs (new_stmt1);
2147 new_tmp2 = gimple_call_lhs (new_stmt2);
2149 else
2151 new_tmp1 = gimple_assign_lhs (new_stmt1);
2152 new_tmp2 = gimple_assign_lhs (new_stmt2);
2155 /* Store the results for the next step. */
2156 VEC_quick_push (tree, vec_tmp, new_tmp1);
2157 VEC_quick_push (tree, vec_tmp, new_tmp2);
2160 VEC_free (tree, heap, *vec_oprnds0);
2161 *vec_oprnds0 = vec_tmp;
2165 /* Check if STMT performs a conversion operation, that can be vectorized.
2166 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2167 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2168 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2170 static bool
2171 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2172 gimple *vec_stmt, slp_tree slp_node)
2174 tree vec_dest;
2175 tree scalar_dest;
2176 tree op0, op1 = NULL_TREE;
2177 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2178 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2180 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2181 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2182 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2183 tree new_temp;
2184 tree def;
2185 gimple def_stmt;
2186 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2187 gimple new_stmt = NULL;
2188 stmt_vec_info prev_stmt_info;
2189 int nunits_in;
2190 int nunits_out;
2191 tree vectype_out, vectype_in;
2192 int ncopies, i, j;
2193 tree lhs_type, rhs_type;
2194 enum { NARROW, NONE, WIDEN } modifier;
2195 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2196 tree vop0;
2197 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2198 int multi_step_cvt = 0;
2199 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2200 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2201 int op_type;
2202 enum machine_mode rhs_mode;
2203 unsigned short fltsz;
2205 /* Is STMT a vectorizable conversion? */
2207 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2208 return false;
2210 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2211 return false;
2213 if (!is_gimple_assign (stmt))
2214 return false;
2216 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2217 return false;
2219 code = gimple_assign_rhs_code (stmt);
2220 if (!CONVERT_EXPR_CODE_P (code)
2221 && code != FIX_TRUNC_EXPR
2222 && code != FLOAT_EXPR
2223 && code != WIDEN_MULT_EXPR
2224 && code != WIDEN_LSHIFT_EXPR)
2225 return false;
2227 op_type = TREE_CODE_LENGTH (code);
2229 /* Check types of lhs and rhs. */
2230 scalar_dest = gimple_assign_lhs (stmt);
2231 lhs_type = TREE_TYPE (scalar_dest);
2232 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2234 op0 = gimple_assign_rhs1 (stmt);
2235 rhs_type = TREE_TYPE (op0);
2237 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2238 && !((INTEGRAL_TYPE_P (lhs_type)
2239 && INTEGRAL_TYPE_P (rhs_type))
2240 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2241 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2242 return false;
2244 if ((INTEGRAL_TYPE_P (lhs_type)
2245 && (TYPE_PRECISION (lhs_type)
2246 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2247 || (INTEGRAL_TYPE_P (rhs_type)
2248 && (TYPE_PRECISION (rhs_type)
2249 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2251 if (vect_print_dump_info (REPORT_DETAILS))
2252 fprintf (vect_dump,
2253 "type conversion to/from bit-precision unsupported.");
2254 return false;
2257 /* Check the operands of the operation. */
2258 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2259 &def_stmt, &def, &dt[0], &vectype_in))
2261 if (vect_print_dump_info (REPORT_DETAILS))
2262 fprintf (vect_dump, "use not simple.");
2263 return false;
2265 if (op_type == binary_op)
2267 bool ok;
2269 op1 = gimple_assign_rhs2 (stmt);
2270 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2271 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2272 OP1. */
2273 if (CONSTANT_CLASS_P (op0))
2274 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, NULL,
2275 &def_stmt, &def, &dt[1], &vectype_in);
2276 else
2277 ok = vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &def_stmt,
2278 &def, &dt[1]);
2280 if (!ok)
2282 if (vect_print_dump_info (REPORT_DETAILS))
2283 fprintf (vect_dump, "use not simple.");
2284 return false;
2288 /* If op0 is an external or constant defs use a vector type of
2289 the same size as the output vector type. */
2290 if (!vectype_in)
2291 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2292 if (vec_stmt)
2293 gcc_assert (vectype_in);
2294 if (!vectype_in)
2296 if (vect_print_dump_info (REPORT_DETAILS))
2298 fprintf (vect_dump, "no vectype for scalar type ");
2299 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2302 return false;
2305 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2306 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2307 if (nunits_in < nunits_out)
2308 modifier = NARROW;
2309 else if (nunits_out == nunits_in)
2310 modifier = NONE;
2311 else
2312 modifier = WIDEN;
2314 /* Multiple types in SLP are handled by creating the appropriate number of
2315 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2316 case of SLP. */
2317 if (slp_node || PURE_SLP_STMT (stmt_info))
2318 ncopies = 1;
2319 else if (modifier == NARROW)
2320 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2321 else
2322 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2324 /* Sanity check: make sure that at least one copy of the vectorized stmt
2325 needs to be generated. */
2326 gcc_assert (ncopies >= 1);
2328 /* Supportable by target? */
2329 switch (modifier)
2331 case NONE:
2332 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2333 return false;
2334 if (supportable_convert_operation (code, vectype_out, vectype_in,
2335 &decl1, &code1))
2336 break;
2337 /* FALLTHRU */
2338 unsupported:
2339 if (vect_print_dump_info (REPORT_DETAILS))
2340 fprintf (vect_dump, "conversion not supported by target.");
2341 return false;
2343 case WIDEN:
2344 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2345 &decl1, &decl2, &code1, &code2,
2346 &multi_step_cvt, &interm_types))
2348 /* Binary widening operation can only be supported directly by the
2349 architecture. */
2350 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2351 break;
2354 if (code != FLOAT_EXPR
2355 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2356 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2357 goto unsupported;
2359 rhs_mode = TYPE_MODE (rhs_type);
2360 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2361 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2362 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2363 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2365 cvt_type
2366 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2367 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2368 if (cvt_type == NULL_TREE)
2369 goto unsupported;
2371 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2373 if (!supportable_convert_operation (code, vectype_out,
2374 cvt_type, &decl1, &codecvt1))
2375 goto unsupported;
2377 else if (!supportable_widening_operation (code, stmt, vectype_out,
2378 cvt_type, &decl1, &decl2,
2379 &codecvt1, &codecvt2,
2380 &multi_step_cvt,
2381 &interm_types))
2382 continue;
2383 else
2384 gcc_assert (multi_step_cvt == 0);
2386 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2387 vectype_in, NULL, NULL, &code1,
2388 &code2, &multi_step_cvt,
2389 &interm_types))
2390 break;
2393 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2394 goto unsupported;
2396 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2397 codecvt2 = ERROR_MARK;
2398 else
2400 multi_step_cvt++;
2401 VEC_safe_push (tree, heap, interm_types, cvt_type);
2402 cvt_type = NULL_TREE;
2404 break;
2406 case NARROW:
2407 gcc_assert (op_type == unary_op);
2408 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2409 &code1, &multi_step_cvt,
2410 &interm_types))
2411 break;
2413 if (code != FIX_TRUNC_EXPR
2414 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2415 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2416 goto unsupported;
2418 rhs_mode = TYPE_MODE (rhs_type);
2419 cvt_type
2420 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2421 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2422 if (cvt_type == NULL_TREE)
2423 goto unsupported;
2424 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2425 &decl1, &codecvt1))
2426 goto unsupported;
2427 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2428 &code1, &multi_step_cvt,
2429 &interm_types))
2430 break;
2431 goto unsupported;
2433 default:
2434 gcc_unreachable ();
2437 if (!vec_stmt) /* transformation not required. */
2439 if (vect_print_dump_info (REPORT_DETAILS))
2440 fprintf (vect_dump, "=== vectorizable_conversion ===");
2441 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2443 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2444 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2446 else if (modifier == NARROW)
2448 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2449 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2451 else
2453 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2454 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2456 VEC_free (tree, heap, interm_types);
2457 return true;
2460 /** Transform. **/
2461 if (vect_print_dump_info (REPORT_DETAILS))
2462 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2464 if (op_type == binary_op)
2466 if (CONSTANT_CLASS_P (op0))
2467 op0 = fold_convert (TREE_TYPE (op1), op0);
2468 else if (CONSTANT_CLASS_P (op1))
2469 op1 = fold_convert (TREE_TYPE (op0), op1);
2472 /* In case of multi-step conversion, we first generate conversion operations
2473 to the intermediate types, and then from that types to the final one.
2474 We create vector destinations for the intermediate type (TYPES) received
2475 from supportable_*_operation, and store them in the correct order
2476 for future use in vect_create_vectorized_*_stmts (). */
2477 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2478 vec_dest = vect_create_destination_var (scalar_dest,
2479 (cvt_type && modifier == WIDEN)
2480 ? cvt_type : vectype_out);
2481 VEC_quick_push (tree, vec_dsts, vec_dest);
2483 if (multi_step_cvt)
2485 for (i = VEC_length (tree, interm_types) - 1;
2486 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2488 vec_dest = vect_create_destination_var (scalar_dest,
2489 intermediate_type);
2490 VEC_quick_push (tree, vec_dsts, vec_dest);
2494 if (cvt_type)
2495 vec_dest = vect_create_destination_var (scalar_dest,
2496 modifier == WIDEN
2497 ? vectype_out : cvt_type);
2499 if (!slp_node)
2501 if (modifier == NONE)
2502 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2503 else if (modifier == WIDEN)
2505 vec_oprnds0 = VEC_alloc (tree, heap,
2506 (multi_step_cvt
2507 ? vect_pow2 (multi_step_cvt) : 1));
2508 if (op_type == binary_op)
2509 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2511 else
2512 vec_oprnds0 = VEC_alloc (tree, heap,
2513 2 * (multi_step_cvt
2514 ? vect_pow2 (multi_step_cvt) : 1));
2516 else if (code == WIDEN_LSHIFT_EXPR)
2517 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2519 last_oprnd = op0;
2520 prev_stmt_info = NULL;
2521 switch (modifier)
2523 case NONE:
2524 for (j = 0; j < ncopies; j++)
2526 if (j == 0)
2527 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2528 -1);
2529 else
2530 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2532 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2534 /* Arguments are ready, create the new vector stmt. */
2535 if (code1 == CALL_EXPR)
2537 new_stmt = gimple_build_call (decl1, 1, vop0);
2538 new_temp = make_ssa_name (vec_dest, new_stmt);
2539 gimple_call_set_lhs (new_stmt, new_temp);
2541 else
2543 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2544 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2545 vop0, NULL);
2546 new_temp = make_ssa_name (vec_dest, new_stmt);
2547 gimple_assign_set_lhs (new_stmt, new_temp);
2550 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2551 if (slp_node)
2552 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2553 new_stmt);
2556 if (j == 0)
2557 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2558 else
2559 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2560 prev_stmt_info = vinfo_for_stmt (new_stmt);
2562 break;
2564 case WIDEN:
2565 /* In case the vectorization factor (VF) is bigger than the number
2566 of elements that we can fit in a vectype (nunits), we have to
2567 generate more than one vector stmt - i.e - we need to "unroll"
2568 the vector stmt by a factor VF/nunits. */
2569 for (j = 0; j < ncopies; j++)
2571 /* Handle uses. */
2572 if (j == 0)
2574 if (slp_node)
2576 if (code == WIDEN_LSHIFT_EXPR)
2578 unsigned int k;
2580 vec_oprnd1 = op1;
2581 /* Store vec_oprnd1 for every vector stmt to be created
2582 for SLP_NODE. We check during the analysis that all
2583 the shift arguments are the same. */
2584 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2585 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2587 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2588 slp_node, -1);
2590 else
2591 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2592 &vec_oprnds1, slp_node, -1);
2594 else
2596 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2597 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2598 if (op_type == binary_op)
2600 if (code == WIDEN_LSHIFT_EXPR)
2601 vec_oprnd1 = op1;
2602 else
2603 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2604 NULL);
2605 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2609 else
2611 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2612 VEC_truncate (tree, vec_oprnds0, 0);
2613 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2614 if (op_type == binary_op)
2616 if (code == WIDEN_LSHIFT_EXPR)
2617 vec_oprnd1 = op1;
2618 else
2619 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2620 vec_oprnd1);
2621 VEC_truncate (tree, vec_oprnds1, 0);
2622 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2626 /* Arguments are ready. Create the new vector stmts. */
2627 for (i = multi_step_cvt; i >= 0; i--)
2629 tree this_dest = VEC_index (tree, vec_dsts, i);
2630 enum tree_code c1 = code1, c2 = code2;
2631 if (i == 0 && codecvt2 != ERROR_MARK)
2633 c1 = codecvt1;
2634 c2 = codecvt2;
2636 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2637 &vec_oprnds1,
2638 stmt, this_dest, gsi,
2639 c1, c2, decl1, decl2,
2640 op_type);
2643 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2645 if (cvt_type)
2647 if (codecvt1 == CALL_EXPR)
2649 new_stmt = gimple_build_call (decl1, 1, vop0);
2650 new_temp = make_ssa_name (vec_dest, new_stmt);
2651 gimple_call_set_lhs (new_stmt, new_temp);
2653 else
2655 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2656 new_temp = make_ssa_name (vec_dest, NULL);
2657 new_stmt = gimple_build_assign_with_ops (codecvt1,
2658 new_temp,
2659 vop0, NULL);
2662 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2664 else
2665 new_stmt = SSA_NAME_DEF_STMT (vop0);
2667 if (slp_node)
2668 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2669 new_stmt);
2670 else
2672 if (!prev_stmt_info)
2673 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2674 else
2675 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2676 prev_stmt_info = vinfo_for_stmt (new_stmt);
2681 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2682 break;
2684 case NARROW:
2685 /* In case the vectorization factor (VF) is bigger than the number
2686 of elements that we can fit in a vectype (nunits), we have to
2687 generate more than one vector stmt - i.e - we need to "unroll"
2688 the vector stmt by a factor VF/nunits. */
2689 for (j = 0; j < ncopies; j++)
2691 /* Handle uses. */
2692 if (slp_node)
2693 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2694 slp_node, -1);
2695 else
2697 VEC_truncate (tree, vec_oprnds0, 0);
2698 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2699 vect_pow2 (multi_step_cvt) - 1);
2702 /* Arguments are ready. Create the new vector stmts. */
2703 if (cvt_type)
2704 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2706 if (codecvt1 == CALL_EXPR)
2708 new_stmt = gimple_build_call (decl1, 1, vop0);
2709 new_temp = make_ssa_name (vec_dest, new_stmt);
2710 gimple_call_set_lhs (new_stmt, new_temp);
2712 else
2714 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2715 new_temp = make_ssa_name (vec_dest, NULL);
2716 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2717 vop0, NULL);
2720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2721 VEC_replace (tree, vec_oprnds0, i, new_temp);
2724 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2725 stmt, vec_dsts, gsi,
2726 slp_node, code1,
2727 &prev_stmt_info);
2730 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2731 break;
2734 VEC_free (tree, heap, vec_oprnds0);
2735 VEC_free (tree, heap, vec_oprnds1);
2736 VEC_free (tree, heap, vec_dsts);
2737 VEC_free (tree, heap, interm_types);
2739 return true;
2743 /* Function vectorizable_assignment.
2745 Check if STMT performs an assignment (copy) that can be vectorized.
2746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2747 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2750 static bool
2751 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2752 gimple *vec_stmt, slp_tree slp_node)
2754 tree vec_dest;
2755 tree scalar_dest;
2756 tree op;
2757 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2758 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2759 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2760 tree new_temp;
2761 tree def;
2762 gimple def_stmt;
2763 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2764 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2765 int ncopies;
2766 int i, j;
2767 VEC(tree,heap) *vec_oprnds = NULL;
2768 tree vop;
2769 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2770 gimple new_stmt = NULL;
2771 stmt_vec_info prev_stmt_info = NULL;
2772 enum tree_code code;
2773 tree vectype_in;
2775 /* Multiple types in SLP are handled by creating the appropriate number of
2776 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2777 case of SLP. */
2778 if (slp_node || PURE_SLP_STMT (stmt_info))
2779 ncopies = 1;
2780 else
2781 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2783 gcc_assert (ncopies >= 1);
2785 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2786 return false;
2788 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2789 return false;
2791 /* Is vectorizable assignment? */
2792 if (!is_gimple_assign (stmt))
2793 return false;
2795 scalar_dest = gimple_assign_lhs (stmt);
2796 if (TREE_CODE (scalar_dest) != SSA_NAME)
2797 return false;
2799 code = gimple_assign_rhs_code (stmt);
2800 if (gimple_assign_single_p (stmt)
2801 || code == PAREN_EXPR
2802 || CONVERT_EXPR_CODE_P (code))
2803 op = gimple_assign_rhs1 (stmt);
2804 else
2805 return false;
2807 if (code == VIEW_CONVERT_EXPR)
2808 op = TREE_OPERAND (op, 0);
2810 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2811 &def_stmt, &def, &dt[0], &vectype_in))
2813 if (vect_print_dump_info (REPORT_DETAILS))
2814 fprintf (vect_dump, "use not simple.");
2815 return false;
2818 /* We can handle NOP_EXPR conversions that do not change the number
2819 of elements or the vector size. */
2820 if ((CONVERT_EXPR_CODE_P (code)
2821 || code == VIEW_CONVERT_EXPR)
2822 && (!vectype_in
2823 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2824 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2825 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2826 return false;
2828 /* We do not handle bit-precision changes. */
2829 if ((CONVERT_EXPR_CODE_P (code)
2830 || code == VIEW_CONVERT_EXPR)
2831 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2832 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2833 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2834 || ((TYPE_PRECISION (TREE_TYPE (op))
2835 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2836 /* But a conversion that does not change the bit-pattern is ok. */
2837 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2838 > TYPE_PRECISION (TREE_TYPE (op)))
2839 && TYPE_UNSIGNED (TREE_TYPE (op))))
2841 if (vect_print_dump_info (REPORT_DETAILS))
2842 fprintf (vect_dump, "type conversion to/from bit-precision "
2843 "unsupported.");
2844 return false;
2847 if (!vec_stmt) /* transformation not required. */
2849 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2850 if (vect_print_dump_info (REPORT_DETAILS))
2851 fprintf (vect_dump, "=== vectorizable_assignment ===");
2852 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2853 return true;
2856 /** Transform. **/
2857 if (vect_print_dump_info (REPORT_DETAILS))
2858 fprintf (vect_dump, "transform assignment.");
2860 /* Handle def. */
2861 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2863 /* Handle use. */
2864 for (j = 0; j < ncopies; j++)
2866 /* Handle uses. */
2867 if (j == 0)
2868 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2869 else
2870 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2872 /* Arguments are ready. create the new vector stmt. */
2873 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2875 if (CONVERT_EXPR_CODE_P (code)
2876 || code == VIEW_CONVERT_EXPR)
2877 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2878 new_stmt = gimple_build_assign (vec_dest, vop);
2879 new_temp = make_ssa_name (vec_dest, new_stmt);
2880 gimple_assign_set_lhs (new_stmt, new_temp);
2881 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2882 if (slp_node)
2883 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2886 if (slp_node)
2887 continue;
2889 if (j == 0)
2890 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2891 else
2892 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2894 prev_stmt_info = vinfo_for_stmt (new_stmt);
2897 VEC_free (tree, heap, vec_oprnds);
2898 return true;
2902 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2903 either as shift by a scalar or by a vector. */
2905 bool
2906 vect_supportable_shift (enum tree_code code, tree scalar_type)
2909 enum machine_mode vec_mode;
2910 optab optab;
2911 int icode;
2912 tree vectype;
2914 vectype = get_vectype_for_scalar_type (scalar_type);
2915 if (!vectype)
2916 return false;
2918 optab = optab_for_tree_code (code, vectype, optab_scalar);
2919 if (!optab
2920 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2922 optab = optab_for_tree_code (code, vectype, optab_vector);
2923 if (!optab
2924 || (optab_handler (optab, TYPE_MODE (vectype))
2925 == CODE_FOR_nothing))
2926 return false;
2929 vec_mode = TYPE_MODE (vectype);
2930 icode = (int) optab_handler (optab, vec_mode);
2931 if (icode == CODE_FOR_nothing)
2932 return false;
2934 return true;
2938 /* Function vectorizable_shift.
2940 Check if STMT performs a shift operation that can be vectorized.
2941 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2942 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2943 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2945 static bool
2946 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2947 gimple *vec_stmt, slp_tree slp_node)
2949 tree vec_dest;
2950 tree scalar_dest;
2951 tree op0, op1 = NULL;
2952 tree vec_oprnd1 = NULL_TREE;
2953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2954 tree vectype;
2955 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2956 enum tree_code code;
2957 enum machine_mode vec_mode;
2958 tree new_temp;
2959 optab optab;
2960 int icode;
2961 enum machine_mode optab_op2_mode;
2962 tree def;
2963 gimple def_stmt;
2964 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2965 gimple new_stmt = NULL;
2966 stmt_vec_info prev_stmt_info;
2967 int nunits_in;
2968 int nunits_out;
2969 tree vectype_out;
2970 tree op1_vectype;
2971 int ncopies;
2972 int j, i;
2973 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2974 tree vop0, vop1;
2975 unsigned int k;
2976 bool scalar_shift_arg = true;
2977 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2978 int vf;
2980 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2981 return false;
2983 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2984 return false;
2986 /* Is STMT a vectorizable binary/unary operation? */
2987 if (!is_gimple_assign (stmt))
2988 return false;
2990 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2991 return false;
2993 code = gimple_assign_rhs_code (stmt);
2995 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2996 || code == RROTATE_EXPR))
2997 return false;
2999 scalar_dest = gimple_assign_lhs (stmt);
3000 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3001 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3002 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3004 if (vect_print_dump_info (REPORT_DETAILS))
3005 fprintf (vect_dump, "bit-precision shifts not supported.");
3006 return false;
3009 op0 = gimple_assign_rhs1 (stmt);
3010 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3011 &def_stmt, &def, &dt[0], &vectype))
3013 if (vect_print_dump_info (REPORT_DETAILS))
3014 fprintf (vect_dump, "use not simple.");
3015 return false;
3017 /* If op0 is an external or constant def use a vector type with
3018 the same size as the output vector type. */
3019 if (!vectype)
3020 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3021 if (vec_stmt)
3022 gcc_assert (vectype);
3023 if (!vectype)
3025 if (vect_print_dump_info (REPORT_DETAILS))
3027 fprintf (vect_dump, "no vectype for scalar type ");
3028 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3031 return false;
3034 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3035 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3036 if (nunits_out != nunits_in)
3037 return false;
3039 op1 = gimple_assign_rhs2 (stmt);
3040 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3041 &def, &dt[1], &op1_vectype))
3043 if (vect_print_dump_info (REPORT_DETAILS))
3044 fprintf (vect_dump, "use not simple.");
3045 return false;
3048 if (loop_vinfo)
3049 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3050 else
3051 vf = 1;
3053 /* Multiple types in SLP are handled by creating the appropriate number of
3054 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3055 case of SLP. */
3056 if (slp_node || PURE_SLP_STMT (stmt_info))
3057 ncopies = 1;
3058 else
3059 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3061 gcc_assert (ncopies >= 1);
3063 /* Determine whether the shift amount is a vector, or scalar. If the
3064 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3066 if (dt[1] == vect_internal_def && !slp_node)
3067 scalar_shift_arg = false;
3068 else if (dt[1] == vect_constant_def
3069 || dt[1] == vect_external_def
3070 || dt[1] == vect_internal_def)
3072 /* In SLP, need to check whether the shift count is the same,
3073 in loops if it is a constant or invariant, it is always
3074 a scalar shift. */
3075 if (slp_node)
3077 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3078 gimple slpstmt;
3080 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3081 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3082 scalar_shift_arg = false;
3085 else
3087 if (vect_print_dump_info (REPORT_DETAILS))
3088 fprintf (vect_dump, "operand mode requires invariant argument.");
3089 return false;
3092 /* Vector shifted by vector. */
3093 if (!scalar_shift_arg)
3095 optab = optab_for_tree_code (code, vectype, optab_vector);
3096 if (vect_print_dump_info (REPORT_DETAILS))
3097 fprintf (vect_dump, "vector/vector shift/rotate found.");
3098 if (!op1_vectype)
3099 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3100 if (op1_vectype == NULL_TREE
3101 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "unusable type for last operand in"
3105 " vector/vector shift/rotate.");
3106 return false;
3109 /* See if the machine has a vector shifted by scalar insn and if not
3110 then see if it has a vector shifted by vector insn. */
3111 else
3113 optab = optab_for_tree_code (code, vectype, optab_scalar);
3114 if (optab
3115 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3117 if (vect_print_dump_info (REPORT_DETAILS))
3118 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3120 else
3122 optab = optab_for_tree_code (code, vectype, optab_vector);
3123 if (optab
3124 && (optab_handler (optab, TYPE_MODE (vectype))
3125 != CODE_FOR_nothing))
3127 scalar_shift_arg = false;
3129 if (vect_print_dump_info (REPORT_DETAILS))
3130 fprintf (vect_dump, "vector/vector shift/rotate found.");
3132 /* Unlike the other binary operators, shifts/rotates have
3133 the rhs being int, instead of the same type as the lhs,
3134 so make sure the scalar is the right type if we are
3135 dealing with vectors of long long/long/short/char. */
3136 if (dt[1] == vect_constant_def)
3137 op1 = fold_convert (TREE_TYPE (vectype), op1);
3138 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3139 TREE_TYPE (op1)))
3141 if (slp_node
3142 && TYPE_MODE (TREE_TYPE (vectype))
3143 != TYPE_MODE (TREE_TYPE (op1)))
3145 if (vect_print_dump_info (REPORT_DETAILS))
3146 fprintf (vect_dump, "unusable type for last operand in"
3147 " vector/vector shift/rotate.");
3148 return false;
3150 if (vec_stmt && !slp_node)
3152 op1 = fold_convert (TREE_TYPE (vectype), op1);
3153 op1 = vect_init_vector (stmt, op1,
3154 TREE_TYPE (vectype), NULL);
3161 /* Supportable by target? */
3162 if (!optab)
3164 if (vect_print_dump_info (REPORT_DETAILS))
3165 fprintf (vect_dump, "no optab.");
3166 return false;
3168 vec_mode = TYPE_MODE (vectype);
3169 icode = (int) optab_handler (optab, vec_mode);
3170 if (icode == CODE_FOR_nothing)
3172 if (vect_print_dump_info (REPORT_DETAILS))
3173 fprintf (vect_dump, "op not supported by target.");
3174 /* Check only during analysis. */
3175 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3176 || (vf < vect_min_worthwhile_factor (code)
3177 && !vec_stmt))
3178 return false;
3179 if (vect_print_dump_info (REPORT_DETAILS))
3180 fprintf (vect_dump, "proceeding using word mode.");
3183 /* Worthwhile without SIMD support? Check only during analysis. */
3184 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3185 && vf < vect_min_worthwhile_factor (code)
3186 && !vec_stmt)
3188 if (vect_print_dump_info (REPORT_DETAILS))
3189 fprintf (vect_dump, "not worthwhile without SIMD support.");
3190 return false;
3193 if (!vec_stmt) /* transformation not required. */
3195 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3196 if (vect_print_dump_info (REPORT_DETAILS))
3197 fprintf (vect_dump, "=== vectorizable_shift ===");
3198 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3199 return true;
3202 /** Transform. **/
3204 if (vect_print_dump_info (REPORT_DETAILS))
3205 fprintf (vect_dump, "transform binary/unary operation.");
3207 /* Handle def. */
3208 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3210 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3211 created in the previous stages of the recursion, so no allocation is
3212 needed, except for the case of shift with scalar shift argument. In that
3213 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3214 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3215 In case of loop-based vectorization we allocate VECs of size 1. We
3216 allocate VEC_OPRNDS1 only in case of binary operation. */
3217 if (!slp_node)
3219 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3220 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3222 else if (scalar_shift_arg)
3223 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3225 prev_stmt_info = NULL;
3226 for (j = 0; j < ncopies; j++)
3228 /* Handle uses. */
3229 if (j == 0)
3231 if (scalar_shift_arg)
3233 /* Vector shl and shr insn patterns can be defined with scalar
3234 operand 2 (shift operand). In this case, use constant or loop
3235 invariant op1 directly, without extending it to vector mode
3236 first. */
3237 optab_op2_mode = insn_data[icode].operand[2].mode;
3238 if (!VECTOR_MODE_P (optab_op2_mode))
3240 if (vect_print_dump_info (REPORT_DETAILS))
3241 fprintf (vect_dump, "operand 1 using scalar mode.");
3242 vec_oprnd1 = op1;
3243 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3244 if (slp_node)
3246 /* Store vec_oprnd1 for every vector stmt to be created
3247 for SLP_NODE. We check during the analysis that all
3248 the shift arguments are the same.
3249 TODO: Allow different constants for different vector
3250 stmts generated for an SLP instance. */
3251 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3252 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3257 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3258 (a special case for certain kind of vector shifts); otherwise,
3259 operand 1 should be of a vector type (the usual case). */
3260 if (vec_oprnd1)
3261 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3262 slp_node, -1);
3263 else
3264 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3265 slp_node, -1);
3267 else
3268 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3270 /* Arguments are ready. Create the new vector stmt. */
3271 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3273 vop1 = VEC_index (tree, vec_oprnds1, i);
3274 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3275 new_temp = make_ssa_name (vec_dest, new_stmt);
3276 gimple_assign_set_lhs (new_stmt, new_temp);
3277 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3278 if (slp_node)
3279 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3282 if (slp_node)
3283 continue;
3285 if (j == 0)
3286 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3287 else
3288 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3289 prev_stmt_info = vinfo_for_stmt (new_stmt);
3292 VEC_free (tree, heap, vec_oprnds0);
3293 VEC_free (tree, heap, vec_oprnds1);
3295 return true;
3299 /* Function vectorizable_operation.
3301 Check if STMT performs a binary, unary or ternary operation that can
3302 be vectorized.
3303 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3304 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3305 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3307 static bool
3308 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3309 gimple *vec_stmt, slp_tree slp_node)
3311 tree vec_dest;
3312 tree scalar_dest;
3313 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3315 tree vectype;
3316 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3317 enum tree_code code;
3318 enum machine_mode vec_mode;
3319 tree new_temp;
3320 int op_type;
3321 optab optab;
3322 int icode;
3323 tree def;
3324 gimple def_stmt;
3325 enum vect_def_type dt[3]
3326 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3327 gimple new_stmt = NULL;
3328 stmt_vec_info prev_stmt_info;
3329 int nunits_in;
3330 int nunits_out;
3331 tree vectype_out;
3332 int ncopies;
3333 int j, i;
3334 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3335 tree vop0, vop1, vop2;
3336 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3337 int vf;
3339 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3340 return false;
3342 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3343 return false;
3345 /* Is STMT a vectorizable binary/unary operation? */
3346 if (!is_gimple_assign (stmt))
3347 return false;
3349 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3350 return false;
3352 code = gimple_assign_rhs_code (stmt);
3354 /* For pointer addition, we should use the normal plus for
3355 the vector addition. */
3356 if (code == POINTER_PLUS_EXPR)
3357 code = PLUS_EXPR;
3359 /* Support only unary or binary operations. */
3360 op_type = TREE_CODE_LENGTH (code);
3361 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3363 if (vect_print_dump_info (REPORT_DETAILS))
3364 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3365 op_type);
3366 return false;
3369 scalar_dest = gimple_assign_lhs (stmt);
3370 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3372 /* Most operations cannot handle bit-precision types without extra
3373 truncations. */
3374 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3375 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3376 /* Exception are bitwise binary operations. */
3377 && code != BIT_IOR_EXPR
3378 && code != BIT_XOR_EXPR
3379 && code != BIT_AND_EXPR)
3381 if (vect_print_dump_info (REPORT_DETAILS))
3382 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3383 return false;
3386 op0 = gimple_assign_rhs1 (stmt);
3387 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3388 &def_stmt, &def, &dt[0], &vectype))
3390 if (vect_print_dump_info (REPORT_DETAILS))
3391 fprintf (vect_dump, "use not simple.");
3392 return false;
3394 /* If op0 is an external or constant def use a vector type with
3395 the same size as the output vector type. */
3396 if (!vectype)
3397 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3398 if (vec_stmt)
3399 gcc_assert (vectype);
3400 if (!vectype)
3402 if (vect_print_dump_info (REPORT_DETAILS))
3404 fprintf (vect_dump, "no vectype for scalar type ");
3405 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3408 return false;
3411 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3412 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3413 if (nunits_out != nunits_in)
3414 return false;
3416 if (op_type == binary_op || op_type == ternary_op)
3418 op1 = gimple_assign_rhs2 (stmt);
3419 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3420 &def, &dt[1]))
3422 if (vect_print_dump_info (REPORT_DETAILS))
3423 fprintf (vect_dump, "use not simple.");
3424 return false;
3427 if (op_type == ternary_op)
3429 op2 = gimple_assign_rhs3 (stmt);
3430 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3431 &def, &dt[2]))
3433 if (vect_print_dump_info (REPORT_DETAILS))
3434 fprintf (vect_dump, "use not simple.");
3435 return false;
3439 if (loop_vinfo)
3440 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3441 else
3442 vf = 1;
3444 /* Multiple types in SLP are handled by creating the appropriate number of
3445 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3446 case of SLP. */
3447 if (slp_node || PURE_SLP_STMT (stmt_info))
3448 ncopies = 1;
3449 else
3450 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3452 gcc_assert (ncopies >= 1);
3454 /* Shifts are handled in vectorizable_shift (). */
3455 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3456 || code == RROTATE_EXPR)
3457 return false;
3459 optab = optab_for_tree_code (code, vectype, optab_default);
3461 /* Supportable by target? */
3462 if (!optab)
3464 if (vect_print_dump_info (REPORT_DETAILS))
3465 fprintf (vect_dump, "no optab.");
3466 return false;
3468 vec_mode = TYPE_MODE (vectype);
3469 icode = (int) optab_handler (optab, vec_mode);
3470 if (icode == CODE_FOR_nothing)
3472 if (vect_print_dump_info (REPORT_DETAILS))
3473 fprintf (vect_dump, "op not supported by target.");
3474 /* Check only during analysis. */
3475 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3476 || (vf < vect_min_worthwhile_factor (code)
3477 && !vec_stmt))
3478 return false;
3479 if (vect_print_dump_info (REPORT_DETAILS))
3480 fprintf (vect_dump, "proceeding using word mode.");
3483 /* Worthwhile without SIMD support? Check only during analysis. */
3484 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3485 && vf < vect_min_worthwhile_factor (code)
3486 && !vec_stmt)
3488 if (vect_print_dump_info (REPORT_DETAILS))
3489 fprintf (vect_dump, "not worthwhile without SIMD support.");
3490 return false;
3493 if (!vec_stmt) /* transformation not required. */
3495 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3496 if (vect_print_dump_info (REPORT_DETAILS))
3497 fprintf (vect_dump, "=== vectorizable_operation ===");
3498 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3499 return true;
3502 /** Transform. **/
3504 if (vect_print_dump_info (REPORT_DETAILS))
3505 fprintf (vect_dump, "transform binary/unary operation.");
3507 /* Handle def. */
3508 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3510 /* In case the vectorization factor (VF) is bigger than the number
3511 of elements that we can fit in a vectype (nunits), we have to generate
3512 more than one vector stmt - i.e - we need to "unroll" the
3513 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3514 from one copy of the vector stmt to the next, in the field
3515 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3516 stages to find the correct vector defs to be used when vectorizing
3517 stmts that use the defs of the current stmt. The example below
3518 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3519 we need to create 4 vectorized stmts):
3521 before vectorization:
3522 RELATED_STMT VEC_STMT
3523 S1: x = memref - -
3524 S2: z = x + 1 - -
3526 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3527 there):
3528 RELATED_STMT VEC_STMT
3529 VS1_0: vx0 = memref0 VS1_1 -
3530 VS1_1: vx1 = memref1 VS1_2 -
3531 VS1_2: vx2 = memref2 VS1_3 -
3532 VS1_3: vx3 = memref3 - -
3533 S1: x = load - VS1_0
3534 S2: z = x + 1 - -
3536 step2: vectorize stmt S2 (done here):
3537 To vectorize stmt S2 we first need to find the relevant vector
3538 def for the first operand 'x'. This is, as usual, obtained from
3539 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3540 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3541 relevant vector def 'vx0'. Having found 'vx0' we can generate
3542 the vector stmt VS2_0, and as usual, record it in the
3543 STMT_VINFO_VEC_STMT of stmt S2.
3544 When creating the second copy (VS2_1), we obtain the relevant vector
3545 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3546 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3547 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3548 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3549 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3550 chain of stmts and pointers:
3551 RELATED_STMT VEC_STMT
3552 VS1_0: vx0 = memref0 VS1_1 -
3553 VS1_1: vx1 = memref1 VS1_2 -
3554 VS1_2: vx2 = memref2 VS1_3 -
3555 VS1_3: vx3 = memref3 - -
3556 S1: x = load - VS1_0
3557 VS2_0: vz0 = vx0 + v1 VS2_1 -
3558 VS2_1: vz1 = vx1 + v1 VS2_2 -
3559 VS2_2: vz2 = vx2 + v1 VS2_3 -
3560 VS2_3: vz3 = vx3 + v1 - -
3561 S2: z = x + 1 - VS2_0 */
3563 prev_stmt_info = NULL;
3564 for (j = 0; j < ncopies; j++)
3566 /* Handle uses. */
3567 if (j == 0)
3569 if (op_type == binary_op || op_type == ternary_op)
3570 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3571 slp_node, -1);
3572 else
3573 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3574 slp_node, -1);
3575 if (op_type == ternary_op)
3577 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3578 VEC_quick_push (tree, vec_oprnds2,
3579 vect_get_vec_def_for_operand (op2, stmt, NULL));
3582 else
3584 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3585 if (op_type == ternary_op)
3587 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3588 VEC_quick_push (tree, vec_oprnds2,
3589 vect_get_vec_def_for_stmt_copy (dt[2],
3590 vec_oprnd));
3594 /* Arguments are ready. Create the new vector stmt. */
3595 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3597 vop1 = ((op_type == binary_op || op_type == ternary_op)
3598 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3599 vop2 = ((op_type == ternary_op)
3600 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3601 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3602 vop0, vop1, vop2);
3603 new_temp = make_ssa_name (vec_dest, new_stmt);
3604 gimple_assign_set_lhs (new_stmt, new_temp);
3605 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3606 if (slp_node)
3607 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3610 if (slp_node)
3611 continue;
3613 if (j == 0)
3614 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3615 else
3616 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3617 prev_stmt_info = vinfo_for_stmt (new_stmt);
3620 VEC_free (tree, heap, vec_oprnds0);
3621 if (vec_oprnds1)
3622 VEC_free (tree, heap, vec_oprnds1);
3623 if (vec_oprnds2)
3624 VEC_free (tree, heap, vec_oprnds2);
3626 return true;
3630 /* Function vectorizable_store.
3632 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3633 can be vectorized.
3634 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3635 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3636 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3638 static bool
3639 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3640 slp_tree slp_node)
3642 tree scalar_dest;
3643 tree data_ref;
3644 tree op;
3645 tree vec_oprnd = NULL_TREE;
3646 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3647 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3648 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3649 tree elem_type;
3650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3651 struct loop *loop = NULL;
3652 enum machine_mode vec_mode;
3653 tree dummy;
3654 enum dr_alignment_support alignment_support_scheme;
3655 tree def;
3656 gimple def_stmt;
3657 enum vect_def_type dt;
3658 stmt_vec_info prev_stmt_info = NULL;
3659 tree dataref_ptr = NULL_TREE;
3660 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3661 int ncopies;
3662 int j;
3663 gimple next_stmt, first_stmt = NULL;
3664 bool strided_store = false;
3665 bool store_lanes_p = false;
3666 unsigned int group_size, i;
3667 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3668 bool inv_p;
3669 VEC(tree,heap) *vec_oprnds = NULL;
3670 bool slp = (slp_node != NULL);
3671 unsigned int vec_num;
3672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3673 tree aggr_type;
3675 if (loop_vinfo)
3676 loop = LOOP_VINFO_LOOP (loop_vinfo);
3678 /* Multiple types in SLP are handled by creating the appropriate number of
3679 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3680 case of SLP. */
3681 if (slp || PURE_SLP_STMT (stmt_info))
3682 ncopies = 1;
3683 else
3684 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3686 gcc_assert (ncopies >= 1);
3688 /* FORNOW. This restriction should be relaxed. */
3689 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3691 if (vect_print_dump_info (REPORT_DETAILS))
3692 fprintf (vect_dump, "multiple types in nested loop.");
3693 return false;
3696 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3697 return false;
3699 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3700 return false;
3702 /* Is vectorizable store? */
3704 if (!is_gimple_assign (stmt))
3705 return false;
3707 scalar_dest = gimple_assign_lhs (stmt);
3708 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3709 && is_pattern_stmt_p (stmt_info))
3710 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3711 if (TREE_CODE (scalar_dest) != ARRAY_REF
3712 && TREE_CODE (scalar_dest) != INDIRECT_REF
3713 && TREE_CODE (scalar_dest) != COMPONENT_REF
3714 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3715 && TREE_CODE (scalar_dest) != REALPART_EXPR
3716 && TREE_CODE (scalar_dest) != MEM_REF)
3717 return false;
3719 gcc_assert (gimple_assign_single_p (stmt));
3720 op = gimple_assign_rhs1 (stmt);
3721 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3722 &def, &dt))
3724 if (vect_print_dump_info (REPORT_DETAILS))
3725 fprintf (vect_dump, "use not simple.");
3726 return false;
3729 elem_type = TREE_TYPE (vectype);
3730 vec_mode = TYPE_MODE (vectype);
3732 /* FORNOW. In some cases can vectorize even if data-type not supported
3733 (e.g. - array initialization with 0). */
3734 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3735 return false;
3737 if (!STMT_VINFO_DATA_REF (stmt_info))
3738 return false;
3740 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3741 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3742 size_zero_node) < 0)
3744 if (vect_print_dump_info (REPORT_DETAILS))
3745 fprintf (vect_dump, "negative step for store.");
3746 return false;
3749 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3751 strided_store = true;
3752 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3753 if (!slp && !PURE_SLP_STMT (stmt_info))
3755 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3756 if (vect_store_lanes_supported (vectype, group_size))
3757 store_lanes_p = true;
3758 else if (!vect_strided_store_supported (vectype, group_size))
3759 return false;
3762 if (first_stmt == stmt)
3764 /* STMT is the leader of the group. Check the operands of all the
3765 stmts of the group. */
3766 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3767 while (next_stmt)
3769 gcc_assert (gimple_assign_single_p (next_stmt));
3770 op = gimple_assign_rhs1 (next_stmt);
3771 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3772 &def_stmt, &def, &dt))
3774 if (vect_print_dump_info (REPORT_DETAILS))
3775 fprintf (vect_dump, "use not simple.");
3776 return false;
3778 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3783 if (!vec_stmt) /* transformation not required. */
3785 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3786 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3787 return true;
3790 /** Transform. **/
3792 if (strided_store)
3794 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3795 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3797 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3799 /* FORNOW */
3800 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3802 /* We vectorize all the stmts of the interleaving group when we
3803 reach the last stmt in the group. */
3804 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3805 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3806 && !slp)
3808 *vec_stmt = NULL;
3809 return true;
3812 if (slp)
3814 strided_store = false;
3815 /* VEC_NUM is the number of vect stmts to be created for this
3816 group. */
3817 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3818 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3819 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3820 op = gimple_assign_rhs1 (first_stmt);
3822 else
3823 /* VEC_NUM is the number of vect stmts to be created for this
3824 group. */
3825 vec_num = group_size;
3827 else
3829 first_stmt = stmt;
3830 first_dr = dr;
3831 group_size = vec_num = 1;
3834 if (vect_print_dump_info (REPORT_DETAILS))
3835 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3837 dr_chain = VEC_alloc (tree, heap, group_size);
3838 oprnds = VEC_alloc (tree, heap, group_size);
3840 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3841 gcc_assert (alignment_support_scheme);
3842 /* Targets with store-lane instructions must not require explicit
3843 realignment. */
3844 gcc_assert (!store_lanes_p
3845 || alignment_support_scheme == dr_aligned
3846 || alignment_support_scheme == dr_unaligned_supported);
3848 if (store_lanes_p)
3849 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3850 else
3851 aggr_type = vectype;
3853 /* In case the vectorization factor (VF) is bigger than the number
3854 of elements that we can fit in a vectype (nunits), we have to generate
3855 more than one vector stmt - i.e - we need to "unroll" the
3856 vector stmt by a factor VF/nunits. For more details see documentation in
3857 vect_get_vec_def_for_copy_stmt. */
3859 /* In case of interleaving (non-unit strided access):
3861 S1: &base + 2 = x2
3862 S2: &base = x0
3863 S3: &base + 1 = x1
3864 S4: &base + 3 = x3
3866 We create vectorized stores starting from base address (the access of the
3867 first stmt in the chain (S2 in the above example), when the last store stmt
3868 of the chain (S4) is reached:
3870 VS1: &base = vx2
3871 VS2: &base + vec_size*1 = vx0
3872 VS3: &base + vec_size*2 = vx1
3873 VS4: &base + vec_size*3 = vx3
3875 Then permutation statements are generated:
3877 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3878 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3881 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3882 (the order of the data-refs in the output of vect_permute_store_chain
3883 corresponds to the order of scalar stmts in the interleaving chain - see
3884 the documentation of vect_permute_store_chain()).
3886 In case of both multiple types and interleaving, above vector stores and
3887 permutation stmts are created for every copy. The result vector stmts are
3888 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3889 STMT_VINFO_RELATED_STMT for the next copies.
3892 prev_stmt_info = NULL;
3893 for (j = 0; j < ncopies; j++)
3895 gimple new_stmt;
3896 gimple ptr_incr;
3898 if (j == 0)
3900 if (slp)
3902 /* Get vectorized arguments for SLP_NODE. */
3903 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3904 NULL, slp_node, -1);
3906 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3908 else
3910 /* For interleaved stores we collect vectorized defs for all the
3911 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3912 used as an input to vect_permute_store_chain(), and OPRNDS as
3913 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3915 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3916 OPRNDS are of size 1. */
3917 next_stmt = first_stmt;
3918 for (i = 0; i < group_size; i++)
3920 /* Since gaps are not supported for interleaved stores,
3921 GROUP_SIZE is the exact number of stmts in the chain.
3922 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3923 there is no interleaving, GROUP_SIZE is 1, and only one
3924 iteration of the loop will be executed. */
3925 gcc_assert (next_stmt
3926 && gimple_assign_single_p (next_stmt));
3927 op = gimple_assign_rhs1 (next_stmt);
3929 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3930 NULL);
3931 VEC_quick_push(tree, dr_chain, vec_oprnd);
3932 VEC_quick_push(tree, oprnds, vec_oprnd);
3933 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3937 /* We should have catched mismatched types earlier. */
3938 gcc_assert (useless_type_conversion_p (vectype,
3939 TREE_TYPE (vec_oprnd)));
3940 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3941 NULL_TREE, &dummy, gsi,
3942 &ptr_incr, false, &inv_p);
3943 gcc_assert (bb_vinfo || !inv_p);
3945 else
3947 /* For interleaved stores we created vectorized defs for all the
3948 defs stored in OPRNDS in the previous iteration (previous copy).
3949 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3950 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3951 next copy.
3952 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3953 OPRNDS are of size 1. */
3954 for (i = 0; i < group_size; i++)
3956 op = VEC_index (tree, oprnds, i);
3957 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
3958 &def, &dt);
3959 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3960 VEC_replace(tree, dr_chain, i, vec_oprnd);
3961 VEC_replace(tree, oprnds, i, vec_oprnd);
3963 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3964 TYPE_SIZE_UNIT (aggr_type));
3967 if (store_lanes_p)
3969 tree vec_array;
3971 /* Combine all the vectors into an array. */
3972 vec_array = create_vector_array (vectype, vec_num);
3973 for (i = 0; i < vec_num; i++)
3975 vec_oprnd = VEC_index (tree, dr_chain, i);
3976 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3979 /* Emit:
3980 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3981 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3982 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3983 gimple_call_set_lhs (new_stmt, data_ref);
3984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3985 mark_symbols_for_renaming (new_stmt);
3987 else
3989 new_stmt = NULL;
3990 if (strided_store)
3992 result_chain = VEC_alloc (tree, heap, group_size);
3993 /* Permute. */
3994 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3995 &result_chain);
3998 next_stmt = first_stmt;
3999 for (i = 0; i < vec_num; i++)
4001 struct ptr_info_def *pi;
4003 if (i > 0)
4004 /* Bump the vector pointer. */
4005 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4006 stmt, NULL_TREE);
4008 if (slp)
4009 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4010 else if (strided_store)
4011 /* For strided stores vectorized defs are interleaved in
4012 vect_permute_store_chain(). */
4013 vec_oprnd = VEC_index (tree, result_chain, i);
4015 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4016 build_int_cst (reference_alias_ptr_type
4017 (DR_REF (first_dr)), 0));
4018 pi = get_ptr_info (dataref_ptr);
4019 pi->align = TYPE_ALIGN_UNIT (vectype);
4020 if (aligned_access_p (first_dr))
4021 pi->misalign = 0;
4022 else if (DR_MISALIGNMENT (first_dr) == -1)
4024 TREE_TYPE (data_ref)
4025 = build_aligned_type (TREE_TYPE (data_ref),
4026 TYPE_ALIGN (elem_type));
4027 pi->align = TYPE_ALIGN_UNIT (elem_type);
4028 pi->misalign = 0;
4030 else
4032 TREE_TYPE (data_ref)
4033 = build_aligned_type (TREE_TYPE (data_ref),
4034 TYPE_ALIGN (elem_type));
4035 pi->misalign = DR_MISALIGNMENT (first_dr);
4038 /* Arguments are ready. Create the new vector stmt. */
4039 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4041 mark_symbols_for_renaming (new_stmt);
4043 if (slp)
4044 continue;
4046 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4047 if (!next_stmt)
4048 break;
4051 if (!slp)
4053 if (j == 0)
4054 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4055 else
4056 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4057 prev_stmt_info = vinfo_for_stmt (new_stmt);
4061 VEC_free (tree, heap, dr_chain);
4062 VEC_free (tree, heap, oprnds);
4063 if (result_chain)
4064 VEC_free (tree, heap, result_chain);
4065 if (vec_oprnds)
4066 VEC_free (tree, heap, vec_oprnds);
4068 return true;
4071 /* Given a vector type VECTYPE and permutation SEL returns
4072 the VECTOR_CST mask that implements the permutation of the
4073 vector elements. If that is impossible to do, returns NULL. */
4075 tree
4076 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4078 tree mask_elt_type, mask_type, mask_vec;
4079 int i, nunits;
4081 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4083 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4084 return NULL;
4086 mask_elt_type
4087 = lang_hooks.types.type_for_size
4088 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4089 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4091 mask_vec = NULL;
4092 for (i = nunits - 1; i >= 0; i--)
4093 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4094 mask_vec);
4095 mask_vec = build_vector (mask_type, mask_vec);
4097 return mask_vec;
4100 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4101 reversal of the vector elements. If that is impossible to do,
4102 returns NULL. */
4104 static tree
4105 perm_mask_for_reverse (tree vectype)
4107 int i, nunits;
4108 unsigned char *sel;
4110 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4111 sel = XALLOCAVEC (unsigned char, nunits);
4113 for (i = 0; i < nunits; ++i)
4114 sel[i] = nunits - 1 - i;
4116 return vect_gen_perm_mask (vectype, sel);
4119 /* Given a vector variable X and Y, that was generated for the scalar
4120 STMT, generate instructions to permute the vector elements of X and Y
4121 using permutation mask MASK_VEC, insert them at *GSI and return the
4122 permuted vector variable. */
4124 static tree
4125 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4126 gimple_stmt_iterator *gsi)
4128 tree vectype = TREE_TYPE (x);
4129 tree perm_dest, data_ref;
4130 gimple perm_stmt;
4132 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4133 data_ref = make_ssa_name (perm_dest, NULL);
4135 /* Generate the permute statement. */
4136 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4137 x, y, mask_vec);
4138 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4140 return data_ref;
4143 /* vectorizable_load.
4145 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4146 can be vectorized.
4147 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4148 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4149 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4151 static bool
4152 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4153 slp_tree slp_node, slp_instance slp_node_instance)
4155 tree scalar_dest;
4156 tree vec_dest = NULL;
4157 tree data_ref = NULL;
4158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4159 stmt_vec_info prev_stmt_info;
4160 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4161 struct loop *loop = NULL;
4162 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4163 bool nested_in_vect_loop = false;
4164 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4165 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4166 tree elem_type;
4167 tree new_temp;
4168 enum machine_mode mode;
4169 gimple new_stmt = NULL;
4170 tree dummy;
4171 enum dr_alignment_support alignment_support_scheme;
4172 tree dataref_ptr = NULL_TREE;
4173 gimple ptr_incr;
4174 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4175 int ncopies;
4176 int i, j, group_size;
4177 tree msq = NULL_TREE, lsq;
4178 tree offset = NULL_TREE;
4179 tree realignment_token = NULL_TREE;
4180 gimple phi = NULL;
4181 VEC(tree,heap) *dr_chain = NULL;
4182 bool strided_load = false;
4183 bool load_lanes_p = false;
4184 gimple first_stmt;
4185 bool inv_p;
4186 bool negative;
4187 bool compute_in_loop = false;
4188 struct loop *at_loop;
4189 int vec_num;
4190 bool slp = (slp_node != NULL);
4191 bool slp_perm = false;
4192 enum tree_code code;
4193 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4194 int vf;
4195 tree aggr_type;
4196 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4197 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4198 int gather_scale = 1;
4199 enum vect_def_type gather_dt = vect_unknown_def_type;
4201 if (loop_vinfo)
4203 loop = LOOP_VINFO_LOOP (loop_vinfo);
4204 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4205 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4207 else
4208 vf = 1;
4210 /* Multiple types in SLP are handled by creating the appropriate number of
4211 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4212 case of SLP. */
4213 if (slp || PURE_SLP_STMT (stmt_info))
4214 ncopies = 1;
4215 else
4216 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4218 gcc_assert (ncopies >= 1);
4220 /* FORNOW. This restriction should be relaxed. */
4221 if (nested_in_vect_loop && ncopies > 1)
4223 if (vect_print_dump_info (REPORT_DETAILS))
4224 fprintf (vect_dump, "multiple types in nested loop.");
4225 return false;
4228 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4229 return false;
4231 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4232 return false;
4234 /* Is vectorizable load? */
4235 if (!is_gimple_assign (stmt))
4236 return false;
4238 scalar_dest = gimple_assign_lhs (stmt);
4239 if (TREE_CODE (scalar_dest) != SSA_NAME)
4240 return false;
4242 code = gimple_assign_rhs_code (stmt);
4243 if (code != ARRAY_REF
4244 && code != INDIRECT_REF
4245 && code != COMPONENT_REF
4246 && code != IMAGPART_EXPR
4247 && code != REALPART_EXPR
4248 && code != MEM_REF
4249 && TREE_CODE_CLASS (code) != tcc_declaration)
4250 return false;
4252 if (!STMT_VINFO_DATA_REF (stmt_info))
4253 return false;
4255 negative = tree_int_cst_compare (nested_in_vect_loop
4256 ? STMT_VINFO_DR_STEP (stmt_info)
4257 : DR_STEP (dr),
4258 size_zero_node) < 0;
4259 if (negative && ncopies > 1)
4261 if (vect_print_dump_info (REPORT_DETAILS))
4262 fprintf (vect_dump, "multiple types with negative step.");
4263 return false;
4266 elem_type = TREE_TYPE (vectype);
4267 mode = TYPE_MODE (vectype);
4269 /* FORNOW. In some cases can vectorize even if data-type not supported
4270 (e.g. - data copies). */
4271 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4273 if (vect_print_dump_info (REPORT_DETAILS))
4274 fprintf (vect_dump, "Aligned load, but unsupported type.");
4275 return false;
4278 /* Check if the load is a part of an interleaving chain. */
4279 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4281 strided_load = true;
4282 /* FORNOW */
4283 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4285 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4286 if (!slp && !PURE_SLP_STMT (stmt_info))
4288 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4289 if (vect_load_lanes_supported (vectype, group_size))
4290 load_lanes_p = true;
4291 else if (!vect_strided_load_supported (vectype, group_size))
4292 return false;
4296 if (negative)
4298 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4299 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4300 if (alignment_support_scheme != dr_aligned
4301 && alignment_support_scheme != dr_unaligned_supported)
4303 if (vect_print_dump_info (REPORT_DETAILS))
4304 fprintf (vect_dump, "negative step but alignment required.");
4305 return false;
4307 if (!perm_mask_for_reverse (vectype))
4309 if (vect_print_dump_info (REPORT_DETAILS))
4310 fprintf (vect_dump, "negative step and reversing not supported.");
4311 return false;
4315 if (STMT_VINFO_GATHER_P (stmt_info))
4317 gimple def_stmt;
4318 tree def;
4319 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4320 &gather_off, &gather_scale);
4321 gcc_assert (gather_decl);
4322 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4323 &def_stmt, &def, &gather_dt,
4324 &gather_off_vectype))
4326 if (vect_print_dump_info (REPORT_DETAILS))
4327 fprintf (vect_dump, "gather index use not simple.");
4328 return false;
4332 if (!vec_stmt) /* transformation not required. */
4334 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4335 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4336 return true;
4339 if (vect_print_dump_info (REPORT_DETAILS))
4340 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4342 /** Transform. **/
4344 if (STMT_VINFO_GATHER_P (stmt_info))
4346 tree vec_oprnd0 = NULL_TREE, op;
4347 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4348 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4349 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4350 edge pe = loop_preheader_edge (loop);
4351 gimple_seq seq;
4352 basic_block new_bb;
4353 enum { NARROW, NONE, WIDEN } modifier;
4354 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4356 if (nunits == gather_off_nunits)
4357 modifier = NONE;
4358 else if (nunits == gather_off_nunits / 2)
4360 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4361 modifier = WIDEN;
4363 for (i = 0; i < gather_off_nunits; ++i)
4364 sel[i] = i | nunits;
4366 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4367 gcc_assert (perm_mask != NULL_TREE);
4369 else if (nunits == gather_off_nunits * 2)
4371 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4372 modifier = NARROW;
4374 for (i = 0; i < nunits; ++i)
4375 sel[i] = i < gather_off_nunits
4376 ? i : i + nunits - gather_off_nunits;
4378 perm_mask = vect_gen_perm_mask (vectype, sel);
4379 gcc_assert (perm_mask != NULL_TREE);
4380 ncopies *= 2;
4382 else
4383 gcc_unreachable ();
4385 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4386 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4387 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4388 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4389 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4390 scaletype = TREE_VALUE (arglist);
4391 gcc_checking_assert (types_compatible_p (srctype, rettype)
4392 && types_compatible_p (srctype, masktype));
4394 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4396 ptr = fold_convert (ptrtype, gather_base);
4397 if (!is_gimple_min_invariant (ptr))
4399 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4400 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4401 gcc_assert (!new_bb);
4404 /* Currently we support only unconditional gather loads,
4405 so mask should be all ones. */
4406 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4407 mask = build_int_cst (TREE_TYPE (masktype), -1);
4408 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4410 REAL_VALUE_TYPE r;
4411 long tmp[6];
4412 for (j = 0; j < 6; ++j)
4413 tmp[j] = -1;
4414 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4415 mask = build_real (TREE_TYPE (masktype), r);
4417 else
4418 gcc_unreachable ();
4419 mask = build_vector_from_val (masktype, mask);
4420 mask = vect_init_vector (stmt, mask, masktype, NULL);
4422 scale = build_int_cst (scaletype, gather_scale);
4424 prev_stmt_info = NULL;
4425 for (j = 0; j < ncopies; ++j)
4427 if (modifier == WIDEN && (j & 1))
4428 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4429 perm_mask, stmt, gsi);
4430 else if (j == 0)
4431 op = vec_oprnd0
4432 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4433 else
4434 op = vec_oprnd0
4435 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4437 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4439 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4440 == TYPE_VECTOR_SUBPARTS (idxtype));
4441 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4442 add_referenced_var (var);
4443 var = make_ssa_name (var, NULL);
4444 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4445 new_stmt
4446 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4447 op, NULL_TREE);
4448 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4449 op = var;
4452 new_stmt
4453 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4455 if (!useless_type_conversion_p (vectype, rettype))
4457 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4458 == TYPE_VECTOR_SUBPARTS (rettype));
4459 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4460 add_referenced_var (var);
4461 op = make_ssa_name (var, new_stmt);
4462 gimple_call_set_lhs (new_stmt, op);
4463 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4464 var = make_ssa_name (vec_dest, NULL);
4465 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4466 new_stmt
4467 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4468 NULL_TREE);
4470 else
4472 var = make_ssa_name (vec_dest, new_stmt);
4473 gimple_call_set_lhs (new_stmt, var);
4476 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4478 if (modifier == NARROW)
4480 if ((j & 1) == 0)
4482 prev_res = var;
4483 continue;
4485 var = permute_vec_elements (prev_res, var,
4486 perm_mask, stmt, gsi);
4487 new_stmt = SSA_NAME_DEF_STMT (var);
4490 if (prev_stmt_info == NULL)
4491 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4492 else
4493 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4494 prev_stmt_info = vinfo_for_stmt (new_stmt);
4496 return true;
4499 if (strided_load)
4501 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4502 if (slp
4503 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4504 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4505 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4507 /* Check if the chain of loads is already vectorized. */
4508 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4510 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4511 return true;
4513 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4514 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4516 /* VEC_NUM is the number of vect stmts to be created for this group. */
4517 if (slp)
4519 strided_load = false;
4520 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4521 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4522 slp_perm = true;
4524 else
4525 vec_num = group_size;
4527 else
4529 first_stmt = stmt;
4530 first_dr = dr;
4531 group_size = vec_num = 1;
4534 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4535 gcc_assert (alignment_support_scheme);
4536 /* Targets with load-lane instructions must not require explicit
4537 realignment. */
4538 gcc_assert (!load_lanes_p
4539 || alignment_support_scheme == dr_aligned
4540 || alignment_support_scheme == dr_unaligned_supported);
4542 /* In case the vectorization factor (VF) is bigger than the number
4543 of elements that we can fit in a vectype (nunits), we have to generate
4544 more than one vector stmt - i.e - we need to "unroll" the
4545 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4546 from one copy of the vector stmt to the next, in the field
4547 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4548 stages to find the correct vector defs to be used when vectorizing
4549 stmts that use the defs of the current stmt. The example below
4550 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4551 need to create 4 vectorized stmts):
4553 before vectorization:
4554 RELATED_STMT VEC_STMT
4555 S1: x = memref - -
4556 S2: z = x + 1 - -
4558 step 1: vectorize stmt S1:
4559 We first create the vector stmt VS1_0, and, as usual, record a
4560 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4561 Next, we create the vector stmt VS1_1, and record a pointer to
4562 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4563 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4564 stmts and pointers:
4565 RELATED_STMT VEC_STMT
4566 VS1_0: vx0 = memref0 VS1_1 -
4567 VS1_1: vx1 = memref1 VS1_2 -
4568 VS1_2: vx2 = memref2 VS1_3 -
4569 VS1_3: vx3 = memref3 - -
4570 S1: x = load - VS1_0
4571 S2: z = x + 1 - -
4573 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4574 information we recorded in RELATED_STMT field is used to vectorize
4575 stmt S2. */
4577 /* In case of interleaving (non-unit strided access):
4579 S1: x2 = &base + 2
4580 S2: x0 = &base
4581 S3: x1 = &base + 1
4582 S4: x3 = &base + 3
4584 Vectorized loads are created in the order of memory accesses
4585 starting from the access of the first stmt of the chain:
4587 VS1: vx0 = &base
4588 VS2: vx1 = &base + vec_size*1
4589 VS3: vx3 = &base + vec_size*2
4590 VS4: vx4 = &base + vec_size*3
4592 Then permutation statements are generated:
4594 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4595 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4598 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4599 (the order of the data-refs in the output of vect_permute_load_chain
4600 corresponds to the order of scalar stmts in the interleaving chain - see
4601 the documentation of vect_permute_load_chain()).
4602 The generation of permutation stmts and recording them in
4603 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4605 In case of both multiple types and interleaving, the vector loads and
4606 permutation stmts above are created for every copy. The result vector
4607 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4608 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4610 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4611 on a target that supports unaligned accesses (dr_unaligned_supported)
4612 we generate the following code:
4613 p = initial_addr;
4614 indx = 0;
4615 loop {
4616 p = p + indx * vectype_size;
4617 vec_dest = *(p);
4618 indx = indx + 1;
4621 Otherwise, the data reference is potentially unaligned on a target that
4622 does not support unaligned accesses (dr_explicit_realign_optimized) -
4623 then generate the following code, in which the data in each iteration is
4624 obtained by two vector loads, one from the previous iteration, and one
4625 from the current iteration:
4626 p1 = initial_addr;
4627 msq_init = *(floor(p1))
4628 p2 = initial_addr + VS - 1;
4629 realignment_token = call target_builtin;
4630 indx = 0;
4631 loop {
4632 p2 = p2 + indx * vectype_size
4633 lsq = *(floor(p2))
4634 vec_dest = realign_load (msq, lsq, realignment_token)
4635 indx = indx + 1;
4636 msq = lsq;
4637 } */
4639 /* If the misalignment remains the same throughout the execution of the
4640 loop, we can create the init_addr and permutation mask at the loop
4641 preheader. Otherwise, it needs to be created inside the loop.
4642 This can only occur when vectorizing memory accesses in the inner-loop
4643 nested within an outer-loop that is being vectorized. */
4645 if (nested_in_vect_loop
4646 && (TREE_INT_CST_LOW (DR_STEP (dr))
4647 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4649 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4650 compute_in_loop = true;
4653 if ((alignment_support_scheme == dr_explicit_realign_optimized
4654 || alignment_support_scheme == dr_explicit_realign)
4655 && !compute_in_loop)
4657 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4658 alignment_support_scheme, NULL_TREE,
4659 &at_loop);
4660 if (alignment_support_scheme == dr_explicit_realign_optimized)
4662 phi = SSA_NAME_DEF_STMT (msq);
4663 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4666 else
4667 at_loop = loop;
4669 if (negative)
4670 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4672 if (load_lanes_p)
4673 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4674 else
4675 aggr_type = vectype;
4677 prev_stmt_info = NULL;
4678 for (j = 0; j < ncopies; j++)
4680 /* 1. Create the vector or array pointer update chain. */
4681 if (j == 0)
4682 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4683 offset, &dummy, gsi,
4684 &ptr_incr, false, &inv_p);
4685 else
4686 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4687 TYPE_SIZE_UNIT (aggr_type));
4689 if (strided_load || slp_perm)
4690 dr_chain = VEC_alloc (tree, heap, vec_num);
4692 if (load_lanes_p)
4694 tree vec_array;
4696 vec_array = create_vector_array (vectype, vec_num);
4698 /* Emit:
4699 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4700 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4701 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4702 gimple_call_set_lhs (new_stmt, vec_array);
4703 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4704 mark_symbols_for_renaming (new_stmt);
4706 /* Extract each vector into an SSA_NAME. */
4707 for (i = 0; i < vec_num; i++)
4709 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4710 vec_array, i);
4711 VEC_quick_push (tree, dr_chain, new_temp);
4714 /* Record the mapping between SSA_NAMEs and statements. */
4715 vect_record_strided_load_vectors (stmt, dr_chain);
4717 else
4719 for (i = 0; i < vec_num; i++)
4721 if (i > 0)
4722 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4723 stmt, NULL_TREE);
4725 /* 2. Create the vector-load in the loop. */
4726 switch (alignment_support_scheme)
4728 case dr_aligned:
4729 case dr_unaligned_supported:
4731 struct ptr_info_def *pi;
4732 data_ref
4733 = build2 (MEM_REF, vectype, dataref_ptr,
4734 build_int_cst (reference_alias_ptr_type
4735 (DR_REF (first_dr)), 0));
4736 pi = get_ptr_info (dataref_ptr);
4737 pi->align = TYPE_ALIGN_UNIT (vectype);
4738 if (alignment_support_scheme == dr_aligned)
4740 gcc_assert (aligned_access_p (first_dr));
4741 pi->misalign = 0;
4743 else if (DR_MISALIGNMENT (first_dr) == -1)
4745 TREE_TYPE (data_ref)
4746 = build_aligned_type (TREE_TYPE (data_ref),
4747 TYPE_ALIGN (elem_type));
4748 pi->align = TYPE_ALIGN_UNIT (elem_type);
4749 pi->misalign = 0;
4751 else
4753 TREE_TYPE (data_ref)
4754 = build_aligned_type (TREE_TYPE (data_ref),
4755 TYPE_ALIGN (elem_type));
4756 pi->misalign = DR_MISALIGNMENT (first_dr);
4758 break;
4760 case dr_explicit_realign:
4762 tree ptr, bump;
4763 tree vs_minus_1;
4765 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4767 if (compute_in_loop)
4768 msq = vect_setup_realignment (first_stmt, gsi,
4769 &realignment_token,
4770 dr_explicit_realign,
4771 dataref_ptr, NULL);
4773 new_stmt = gimple_build_assign_with_ops
4774 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4775 build_int_cst
4776 (TREE_TYPE (dataref_ptr),
4777 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4778 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4779 gimple_assign_set_lhs (new_stmt, ptr);
4780 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4781 data_ref
4782 = build2 (MEM_REF, vectype, ptr,
4783 build_int_cst (reference_alias_ptr_type
4784 (DR_REF (first_dr)), 0));
4785 vec_dest = vect_create_destination_var (scalar_dest,
4786 vectype);
4787 new_stmt = gimple_build_assign (vec_dest, data_ref);
4788 new_temp = make_ssa_name (vec_dest, new_stmt);
4789 gimple_assign_set_lhs (new_stmt, new_temp);
4790 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4791 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4792 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4793 msq = new_temp;
4795 bump = size_binop (MULT_EXPR, vs_minus_1,
4796 TYPE_SIZE_UNIT (elem_type));
4797 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4798 new_stmt = gimple_build_assign_with_ops
4799 (BIT_AND_EXPR, NULL_TREE, ptr,
4800 build_int_cst
4801 (TREE_TYPE (ptr),
4802 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4803 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4804 gimple_assign_set_lhs (new_stmt, ptr);
4805 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4806 data_ref
4807 = build2 (MEM_REF, vectype, ptr,
4808 build_int_cst (reference_alias_ptr_type
4809 (DR_REF (first_dr)), 0));
4810 break;
4812 case dr_explicit_realign_optimized:
4813 new_stmt = gimple_build_assign_with_ops
4814 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4815 build_int_cst
4816 (TREE_TYPE (dataref_ptr),
4817 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4818 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4819 new_stmt);
4820 gimple_assign_set_lhs (new_stmt, new_temp);
4821 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4822 data_ref
4823 = build2 (MEM_REF, vectype, new_temp,
4824 build_int_cst (reference_alias_ptr_type
4825 (DR_REF (first_dr)), 0));
4826 break;
4827 default:
4828 gcc_unreachable ();
4830 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4831 new_stmt = gimple_build_assign (vec_dest, data_ref);
4832 new_temp = make_ssa_name (vec_dest, new_stmt);
4833 gimple_assign_set_lhs (new_stmt, new_temp);
4834 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4835 mark_symbols_for_renaming (new_stmt);
4837 /* 3. Handle explicit realignment if necessary/supported.
4838 Create in loop:
4839 vec_dest = realign_load (msq, lsq, realignment_token) */
4840 if (alignment_support_scheme == dr_explicit_realign_optimized
4841 || alignment_support_scheme == dr_explicit_realign)
4843 lsq = gimple_assign_lhs (new_stmt);
4844 if (!realignment_token)
4845 realignment_token = dataref_ptr;
4846 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4847 new_stmt
4848 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4849 vec_dest, msq, lsq,
4850 realignment_token);
4851 new_temp = make_ssa_name (vec_dest, new_stmt);
4852 gimple_assign_set_lhs (new_stmt, new_temp);
4853 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4855 if (alignment_support_scheme == dr_explicit_realign_optimized)
4857 gcc_assert (phi);
4858 if (i == vec_num - 1 && j == ncopies - 1)
4859 add_phi_arg (phi, lsq,
4860 loop_latch_edge (containing_loop),
4861 UNKNOWN_LOCATION);
4862 msq = lsq;
4866 /* 4. Handle invariant-load. */
4867 if (inv_p && !bb_vinfo)
4869 tree tem, vec_inv;
4870 gimple_stmt_iterator gsi2 = *gsi;
4871 gcc_assert (!strided_load);
4872 gsi_next (&gsi2);
4873 tem = scalar_dest;
4874 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4875 TREE_TYPE (tem)))
4877 tem = fold_convert (TREE_TYPE (vectype), tem);
4878 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4879 NULL_TREE, true,
4880 GSI_SAME_STMT);
4882 vec_inv = build_vector_from_val (vectype, tem);
4883 new_temp = vect_init_vector (stmt, vec_inv,
4884 vectype, &gsi2);
4885 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4888 if (negative)
4890 tree perm_mask = perm_mask_for_reverse (vectype);
4891 new_temp = permute_vec_elements (new_temp, new_temp,
4892 perm_mask, stmt, gsi);
4893 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4896 /* Collect vector loads and later create their permutation in
4897 vect_transform_strided_load (). */
4898 if (strided_load || slp_perm)
4899 VEC_quick_push (tree, dr_chain, new_temp);
4901 /* Store vector loads in the corresponding SLP_NODE. */
4902 if (slp && !slp_perm)
4903 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4904 new_stmt);
4908 if (slp && !slp_perm)
4909 continue;
4911 if (slp_perm)
4913 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4914 slp_node_instance, false))
4916 VEC_free (tree, heap, dr_chain);
4917 return false;
4920 else
4922 if (strided_load)
4924 if (!load_lanes_p)
4925 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4926 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4928 else
4930 if (j == 0)
4931 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4932 else
4933 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4934 prev_stmt_info = vinfo_for_stmt (new_stmt);
4937 if (dr_chain)
4938 VEC_free (tree, heap, dr_chain);
4941 return true;
4944 /* Function vect_is_simple_cond.
4946 Input:
4947 LOOP - the loop that is being vectorized.
4948 COND - Condition that is checked for simple use.
4950 Output:
4951 *COMP_VECTYPE - the vector type for the comparison.
4953 Returns whether a COND can be vectorized. Checks whether
4954 condition operands are supportable using vec_is_simple_use. */
4956 static bool
4957 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
4958 bb_vec_info bb_vinfo, tree *comp_vectype)
4960 tree lhs, rhs;
4961 tree def;
4962 enum vect_def_type dt;
4963 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4965 if (!COMPARISON_CLASS_P (cond))
4966 return false;
4968 lhs = TREE_OPERAND (cond, 0);
4969 rhs = TREE_OPERAND (cond, 1);
4971 if (TREE_CODE (lhs) == SSA_NAME)
4973 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4974 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
4975 &lhs_def_stmt, &def, &dt, &vectype1))
4976 return false;
4978 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4979 && TREE_CODE (lhs) != FIXED_CST)
4980 return false;
4982 if (TREE_CODE (rhs) == SSA_NAME)
4984 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4985 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
4986 &rhs_def_stmt, &def, &dt, &vectype2))
4987 return false;
4989 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4990 && TREE_CODE (rhs) != FIXED_CST)
4991 return false;
4993 *comp_vectype = vectype1 ? vectype1 : vectype2;
4994 return true;
4997 /* vectorizable_condition.
4999 Check if STMT is conditional modify expression that can be vectorized.
5000 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5001 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5002 at GSI.
5004 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5005 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5006 else caluse if it is 2).
5008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5010 bool
5011 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5012 gimple *vec_stmt, tree reduc_def, int reduc_index,
5013 slp_tree slp_node)
5015 tree scalar_dest = NULL_TREE;
5016 tree vec_dest = NULL_TREE;
5017 tree cond_expr, then_clause, else_clause;
5018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5019 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5020 tree comp_vectype = NULL_TREE;
5021 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5022 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5023 tree vec_compare, vec_cond_expr;
5024 tree new_temp;
5025 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5026 tree def;
5027 enum vect_def_type dt, dts[4];
5028 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5029 int ncopies;
5030 enum tree_code code;
5031 stmt_vec_info prev_stmt_info = NULL;
5032 int i, j;
5033 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5034 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5035 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5037 if (slp_node || PURE_SLP_STMT (stmt_info))
5038 ncopies = 1;
5039 else
5040 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5042 gcc_assert (ncopies >= 1);
5043 if (reduc_index && ncopies > 1)
5044 return false; /* FORNOW */
5046 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5047 return false;
5049 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5050 return false;
5052 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5053 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5054 && reduc_def))
5055 return false;
5057 /* FORNOW: not yet supported. */
5058 if (STMT_VINFO_LIVE_P (stmt_info))
5060 if (vect_print_dump_info (REPORT_DETAILS))
5061 fprintf (vect_dump, "value used after loop.");
5062 return false;
5065 /* Is vectorizable conditional operation? */
5066 if (!is_gimple_assign (stmt))
5067 return false;
5069 code = gimple_assign_rhs_code (stmt);
5071 if (code != COND_EXPR)
5072 return false;
5074 cond_expr = gimple_assign_rhs1 (stmt);
5075 then_clause = gimple_assign_rhs2 (stmt);
5076 else_clause = gimple_assign_rhs3 (stmt);
5078 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5079 &comp_vectype)
5080 || !comp_vectype)
5081 return false;
5083 if (TREE_CODE (then_clause) == SSA_NAME)
5085 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5086 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5087 &then_def_stmt, &def, &dt))
5088 return false;
5090 else if (TREE_CODE (then_clause) != INTEGER_CST
5091 && TREE_CODE (then_clause) != REAL_CST
5092 && TREE_CODE (then_clause) != FIXED_CST)
5093 return false;
5095 if (TREE_CODE (else_clause) == SSA_NAME)
5097 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5098 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5099 &else_def_stmt, &def, &dt))
5100 return false;
5102 else if (TREE_CODE (else_clause) != INTEGER_CST
5103 && TREE_CODE (else_clause) != REAL_CST
5104 && TREE_CODE (else_clause) != FIXED_CST)
5105 return false;
5107 if (!vec_stmt)
5109 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5110 return expand_vec_cond_expr_p (vectype, comp_vectype);
5113 /* Transform. */
5115 if (!slp_node)
5117 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5118 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5119 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5120 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5123 /* Handle def. */
5124 scalar_dest = gimple_assign_lhs (stmt);
5125 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5127 /* Handle cond expr. */
5128 for (j = 0; j < ncopies; j++)
5130 gimple new_stmt = NULL;
5131 if (j == 0)
5133 if (slp_node)
5135 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5136 VEC (slp_void_p, heap) *vec_defs;
5138 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5139 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5140 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5141 VEC_safe_push (tree, heap, ops, then_clause);
5142 VEC_safe_push (tree, heap, ops, else_clause);
5143 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5144 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5145 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5146 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5147 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5149 VEC_free (tree, heap, ops);
5150 VEC_free (slp_void_p, heap, vec_defs);
5152 else
5154 gimple gtemp;
5155 vec_cond_lhs =
5156 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5157 stmt, NULL);
5158 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5159 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5161 vec_cond_rhs =
5162 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5163 stmt, NULL);
5164 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5165 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5166 if (reduc_index == 1)
5167 vec_then_clause = reduc_def;
5168 else
5170 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5171 stmt, NULL);
5172 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5173 NULL, &gtemp, &def, &dts[2]);
5175 if (reduc_index == 2)
5176 vec_else_clause = reduc_def;
5177 else
5179 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5180 stmt, NULL);
5181 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5182 NULL, &gtemp, &def, &dts[3]);
5186 else
5188 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5189 VEC_pop (tree, vec_oprnds0));
5190 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5191 VEC_pop (tree, vec_oprnds1));
5192 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5193 VEC_pop (tree, vec_oprnds2));
5194 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5195 VEC_pop (tree, vec_oprnds3));
5198 if (!slp_node)
5200 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5201 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5202 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5203 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5206 /* Arguments are ready. Create the new vector stmt. */
5207 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5209 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5210 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5211 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5213 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5214 vec_cond_lhs, vec_cond_rhs);
5215 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5216 vec_compare, vec_then_clause, vec_else_clause);
5218 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5219 new_temp = make_ssa_name (vec_dest, new_stmt);
5220 gimple_assign_set_lhs (new_stmt, new_temp);
5221 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5222 if (slp_node)
5223 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5226 if (slp_node)
5227 continue;
5229 if (j == 0)
5230 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5231 else
5232 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5234 prev_stmt_info = vinfo_for_stmt (new_stmt);
5237 VEC_free (tree, heap, vec_oprnds0);
5238 VEC_free (tree, heap, vec_oprnds1);
5239 VEC_free (tree, heap, vec_oprnds2);
5240 VEC_free (tree, heap, vec_oprnds3);
5242 return true;
5246 /* Make sure the statement is vectorizable. */
5248 bool
5249 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5251 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5252 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5253 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5254 bool ok;
5255 tree scalar_type, vectype;
5256 gimple pattern_stmt;
5257 gimple_seq pattern_def_seq;
5259 if (vect_print_dump_info (REPORT_DETAILS))
5261 fprintf (vect_dump, "==> examining statement: ");
5262 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5265 if (gimple_has_volatile_ops (stmt))
5267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5268 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5270 return false;
5273 /* Skip stmts that do not need to be vectorized. In loops this is expected
5274 to include:
5275 - the COND_EXPR which is the loop exit condition
5276 - any LABEL_EXPRs in the loop
5277 - computations that are used only for array indexing or loop control.
5278 In basic blocks we only analyze statements that are a part of some SLP
5279 instance, therefore, all the statements are relevant.
5281 Pattern statement needs to be analyzed instead of the original statement
5282 if the original statement is not relevant. Otherwise, we analyze both
5283 statements. */
5285 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5286 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5287 && !STMT_VINFO_LIVE_P (stmt_info))
5289 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5290 && pattern_stmt
5291 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5292 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5294 /* Analyze PATTERN_STMT instead of the original stmt. */
5295 stmt = pattern_stmt;
5296 stmt_info = vinfo_for_stmt (pattern_stmt);
5297 if (vect_print_dump_info (REPORT_DETAILS))
5299 fprintf (vect_dump, "==> examining pattern statement: ");
5300 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5303 else
5305 if (vect_print_dump_info (REPORT_DETAILS))
5306 fprintf (vect_dump, "irrelevant.");
5308 return true;
5311 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5312 && pattern_stmt
5313 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5314 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5316 /* Analyze PATTERN_STMT too. */
5317 if (vect_print_dump_info (REPORT_DETAILS))
5319 fprintf (vect_dump, "==> examining pattern statement: ");
5320 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5323 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5324 return false;
5327 if (is_pattern_stmt_p (stmt_info)
5328 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5330 gimple_stmt_iterator si;
5332 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5334 gimple pattern_def_stmt = gsi_stmt (si);
5335 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5336 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5338 /* Analyze def stmt of STMT if it's a pattern stmt. */
5339 if (vect_print_dump_info (REPORT_DETAILS))
5341 fprintf (vect_dump, "==> examining pattern def statement: ");
5342 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5345 if (!vect_analyze_stmt (pattern_def_stmt,
5346 need_to_vectorize, node))
5347 return false;
5352 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5354 case vect_internal_def:
5355 break;
5357 case vect_reduction_def:
5358 case vect_nested_cycle:
5359 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5360 || relevance == vect_used_in_outer_by_reduction
5361 || relevance == vect_unused_in_scope));
5362 break;
5364 case vect_induction_def:
5365 case vect_constant_def:
5366 case vect_external_def:
5367 case vect_unknown_def_type:
5368 default:
5369 gcc_unreachable ();
5372 if (bb_vinfo)
5374 gcc_assert (PURE_SLP_STMT (stmt_info));
5376 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5377 if (vect_print_dump_info (REPORT_DETAILS))
5379 fprintf (vect_dump, "get vectype for scalar type: ");
5380 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5383 vectype = get_vectype_for_scalar_type (scalar_type);
5384 if (!vectype)
5386 if (vect_print_dump_info (REPORT_DETAILS))
5388 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5389 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5391 return false;
5394 if (vect_print_dump_info (REPORT_DETAILS))
5396 fprintf (vect_dump, "vectype: ");
5397 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5400 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5403 if (STMT_VINFO_RELEVANT_P (stmt_info))
5405 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5406 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5407 *need_to_vectorize = true;
5410 ok = true;
5411 if (!bb_vinfo
5412 && (STMT_VINFO_RELEVANT_P (stmt_info)
5413 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5414 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5415 || vectorizable_shift (stmt, NULL, NULL, NULL)
5416 || vectorizable_operation (stmt, NULL, NULL, NULL)
5417 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5418 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5419 || vectorizable_call (stmt, NULL, NULL, NULL)
5420 || vectorizable_store (stmt, NULL, NULL, NULL)
5421 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5422 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5423 else
5425 if (bb_vinfo)
5426 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5427 || vectorizable_shift (stmt, NULL, NULL, node)
5428 || vectorizable_operation (stmt, NULL, NULL, node)
5429 || vectorizable_assignment (stmt, NULL, NULL, node)
5430 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5431 || vectorizable_call (stmt, NULL, NULL, node)
5432 || vectorizable_store (stmt, NULL, NULL, node)
5433 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5436 if (!ok)
5438 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5440 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5441 fprintf (vect_dump, "supported: ");
5442 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5445 return false;
5448 if (bb_vinfo)
5449 return true;
5451 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5452 need extra handling, except for vectorizable reductions. */
5453 if (STMT_VINFO_LIVE_P (stmt_info)
5454 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5455 ok = vectorizable_live_operation (stmt, NULL, NULL);
5457 if (!ok)
5459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5461 fprintf (vect_dump, "not vectorized: live stmt not ");
5462 fprintf (vect_dump, "supported: ");
5463 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5466 return false;
5469 return true;
5473 /* Function vect_transform_stmt.
5475 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5477 bool
5478 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5479 bool *strided_store, slp_tree slp_node,
5480 slp_instance slp_node_instance)
5482 bool is_store = false;
5483 gimple vec_stmt = NULL;
5484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5485 bool done;
5487 switch (STMT_VINFO_TYPE (stmt_info))
5489 case type_demotion_vec_info_type:
5490 case type_promotion_vec_info_type:
5491 case type_conversion_vec_info_type:
5492 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5493 gcc_assert (done);
5494 break;
5496 case induc_vec_info_type:
5497 gcc_assert (!slp_node);
5498 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5499 gcc_assert (done);
5500 break;
5502 case shift_vec_info_type:
5503 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5504 gcc_assert (done);
5505 break;
5507 case op_vec_info_type:
5508 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5509 gcc_assert (done);
5510 break;
5512 case assignment_vec_info_type:
5513 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5514 gcc_assert (done);
5515 break;
5517 case load_vec_info_type:
5518 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5519 slp_node_instance);
5520 gcc_assert (done);
5521 break;
5523 case store_vec_info_type:
5524 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5525 gcc_assert (done);
5526 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5528 /* In case of interleaving, the whole chain is vectorized when the
5529 last store in the chain is reached. Store stmts before the last
5530 one are skipped, and there vec_stmt_info shouldn't be freed
5531 meanwhile. */
5532 *strided_store = true;
5533 if (STMT_VINFO_VEC_STMT (stmt_info))
5534 is_store = true;
5536 else
5537 is_store = true;
5538 break;
5540 case condition_vec_info_type:
5541 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5542 gcc_assert (done);
5543 break;
5545 case call_vec_info_type:
5546 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5547 stmt = gsi_stmt (*gsi);
5548 break;
5550 case reduc_vec_info_type:
5551 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5552 gcc_assert (done);
5553 break;
5555 default:
5556 if (!STMT_VINFO_LIVE_P (stmt_info))
5558 if (vect_print_dump_info (REPORT_DETAILS))
5559 fprintf (vect_dump, "stmt not supported.");
5560 gcc_unreachable ();
5564 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5565 is being vectorized, but outside the immediately enclosing loop. */
5566 if (vec_stmt
5567 && STMT_VINFO_LOOP_VINFO (stmt_info)
5568 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5569 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5570 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5571 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5572 || STMT_VINFO_RELEVANT (stmt_info) ==
5573 vect_used_in_outer_by_reduction))
5575 struct loop *innerloop = LOOP_VINFO_LOOP (
5576 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5577 imm_use_iterator imm_iter;
5578 use_operand_p use_p;
5579 tree scalar_dest;
5580 gimple exit_phi;
5582 if (vect_print_dump_info (REPORT_DETAILS))
5583 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5585 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5586 (to be used when vectorizing outer-loop stmts that use the DEF of
5587 STMT). */
5588 if (gimple_code (stmt) == GIMPLE_PHI)
5589 scalar_dest = PHI_RESULT (stmt);
5590 else
5591 scalar_dest = gimple_assign_lhs (stmt);
5593 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5595 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5597 exit_phi = USE_STMT (use_p);
5598 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5603 /* Handle stmts whose DEF is used outside the loop-nest that is
5604 being vectorized. */
5605 if (STMT_VINFO_LIVE_P (stmt_info)
5606 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5608 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5609 gcc_assert (done);
5612 if (vec_stmt)
5613 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5615 return is_store;
5619 /* Remove a group of stores (for SLP or interleaving), free their
5620 stmt_vec_info. */
5622 void
5623 vect_remove_stores (gimple first_stmt)
5625 gimple next = first_stmt;
5626 gimple tmp;
5627 gimple_stmt_iterator next_si;
5629 while (next)
5631 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5633 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5634 if (is_pattern_stmt_p (stmt_info))
5635 next = STMT_VINFO_RELATED_STMT (stmt_info);
5636 /* Free the attached stmt_vec_info and remove the stmt. */
5637 next_si = gsi_for_stmt (next);
5638 gsi_remove (&next_si, true);
5639 free_stmt_vec_info (next);
5640 next = tmp;
5645 /* Function new_stmt_vec_info.
5647 Create and initialize a new stmt_vec_info struct for STMT. */
5649 stmt_vec_info
5650 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5651 bb_vec_info bb_vinfo)
5653 stmt_vec_info res;
5654 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5656 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5657 STMT_VINFO_STMT (res) = stmt;
5658 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5659 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5660 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5661 STMT_VINFO_LIVE_P (res) = false;
5662 STMT_VINFO_VECTYPE (res) = NULL;
5663 STMT_VINFO_VEC_STMT (res) = NULL;
5664 STMT_VINFO_VECTORIZABLE (res) = true;
5665 STMT_VINFO_IN_PATTERN_P (res) = false;
5666 STMT_VINFO_RELATED_STMT (res) = NULL;
5667 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5668 STMT_VINFO_DATA_REF (res) = NULL;
5670 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5671 STMT_VINFO_DR_OFFSET (res) = NULL;
5672 STMT_VINFO_DR_INIT (res) = NULL;
5673 STMT_VINFO_DR_STEP (res) = NULL;
5674 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5676 if (gimple_code (stmt) == GIMPLE_PHI
5677 && is_loop_header_bb_p (gimple_bb (stmt)))
5678 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5679 else
5680 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5682 STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
5683 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5684 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5685 STMT_SLP_TYPE (res) = loop_vect;
5686 GROUP_FIRST_ELEMENT (res) = NULL;
5687 GROUP_NEXT_ELEMENT (res) = NULL;
5688 GROUP_SIZE (res) = 0;
5689 GROUP_STORE_COUNT (res) = 0;
5690 GROUP_GAP (res) = 0;
5691 GROUP_SAME_DR_STMT (res) = NULL;
5692 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5694 return res;
5698 /* Create a hash table for stmt_vec_info. */
5700 void
5701 init_stmt_vec_info_vec (void)
5703 gcc_assert (!stmt_vec_info_vec);
5704 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5708 /* Free hash table for stmt_vec_info. */
5710 void
5711 free_stmt_vec_info_vec (void)
5713 gcc_assert (stmt_vec_info_vec);
5714 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5718 /* Free stmt vectorization related info. */
5720 void
5721 free_stmt_vec_info (gimple stmt)
5723 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5725 if (!stmt_info)
5726 return;
5728 /* Check if this statement has a related "pattern stmt"
5729 (introduced by the vectorizer during the pattern recognition
5730 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5731 too. */
5732 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5734 stmt_vec_info patt_info
5735 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5736 if (patt_info)
5738 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5739 if (seq)
5741 gimple_stmt_iterator si;
5742 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5743 free_stmt_vec_info (gsi_stmt (si));
5745 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5749 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5750 set_vinfo_for_stmt (stmt, NULL);
5751 free (stmt_info);
5755 /* Function get_vectype_for_scalar_type_and_size.
5757 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5758 by the target. */
5760 static tree
5761 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5763 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5764 enum machine_mode simd_mode;
5765 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5766 int nunits;
5767 tree vectype;
5769 if (nbytes == 0)
5770 return NULL_TREE;
5772 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5773 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5774 return NULL_TREE;
5776 /* For vector types of elements whose mode precision doesn't
5777 match their types precision we use a element type of mode
5778 precision. The vectorization routines will have to make sure
5779 they support the proper result truncation/extension.
5780 We also make sure to build vector types with INTEGER_TYPE
5781 component type only. */
5782 if (INTEGRAL_TYPE_P (scalar_type)
5783 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5784 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5785 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5786 TYPE_UNSIGNED (scalar_type));
5788 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5789 When the component mode passes the above test simply use a type
5790 corresponding to that mode. The theory is that any use that
5791 would cause problems with this will disable vectorization anyway. */
5792 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5793 && !INTEGRAL_TYPE_P (scalar_type)
5794 && !POINTER_TYPE_P (scalar_type))
5795 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5797 /* We can't build a vector type of elements with alignment bigger than
5798 their size. */
5799 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5800 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
5801 TYPE_UNSIGNED (scalar_type));
5803 /* If we felt back to using the mode fail if there was
5804 no scalar type for it. */
5805 if (scalar_type == NULL_TREE)
5806 return NULL_TREE;
5808 /* If no size was supplied use the mode the target prefers. Otherwise
5809 lookup a vector mode of the specified size. */
5810 if (size == 0)
5811 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5812 else
5813 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5814 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5815 if (nunits <= 1)
5816 return NULL_TREE;
5818 vectype = build_vector_type (scalar_type, nunits);
5819 if (vect_print_dump_info (REPORT_DETAILS))
5821 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5822 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5825 if (!vectype)
5826 return NULL_TREE;
5828 if (vect_print_dump_info (REPORT_DETAILS))
5830 fprintf (vect_dump, "vectype: ");
5831 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5834 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5835 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5837 if (vect_print_dump_info (REPORT_DETAILS))
5838 fprintf (vect_dump, "mode not supported by target.");
5839 return NULL_TREE;
5842 return vectype;
5845 unsigned int current_vector_size;
5847 /* Function get_vectype_for_scalar_type.
5849 Returns the vector type corresponding to SCALAR_TYPE as supported
5850 by the target. */
5852 tree
5853 get_vectype_for_scalar_type (tree scalar_type)
5855 tree vectype;
5856 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5857 current_vector_size);
5858 if (vectype
5859 && current_vector_size == 0)
5860 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5861 return vectype;
5864 /* Function get_same_sized_vectype
5866 Returns a vector type corresponding to SCALAR_TYPE of size
5867 VECTOR_TYPE if supported by the target. */
5869 tree
5870 get_same_sized_vectype (tree scalar_type, tree vector_type)
5872 return get_vectype_for_scalar_type_and_size
5873 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5876 /* Function vect_is_simple_use.
5878 Input:
5879 LOOP_VINFO - the vect info of the loop that is being vectorized.
5880 BB_VINFO - the vect info of the basic block that is being vectorized.
5881 OPERAND - operand of STMT in the loop or bb.
5882 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5884 Returns whether a stmt with OPERAND can be vectorized.
5885 For loops, supportable operands are constants, loop invariants, and operands
5886 that are defined by the current iteration of the loop. Unsupportable
5887 operands are those that are defined by a previous iteration of the loop (as
5888 is the case in reduction/induction computations).
5889 For basic blocks, supportable operands are constants and bb invariants.
5890 For now, operands defined outside the basic block are not supported. */
5892 bool
5893 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
5894 bb_vec_info bb_vinfo, gimple *def_stmt,
5895 tree *def, enum vect_def_type *dt)
5897 basic_block bb;
5898 stmt_vec_info stmt_vinfo;
5899 struct loop *loop = NULL;
5901 if (loop_vinfo)
5902 loop = LOOP_VINFO_LOOP (loop_vinfo);
5904 *def_stmt = NULL;
5905 *def = NULL_TREE;
5907 if (vect_print_dump_info (REPORT_DETAILS))
5909 fprintf (vect_dump, "vect_is_simple_use: operand ");
5910 print_generic_expr (vect_dump, operand, TDF_SLIM);
5913 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5915 *dt = vect_constant_def;
5916 return true;
5919 if (is_gimple_min_invariant (operand))
5921 *def = operand;
5922 *dt = vect_external_def;
5923 return true;
5926 if (TREE_CODE (operand) == PAREN_EXPR)
5928 if (vect_print_dump_info (REPORT_DETAILS))
5929 fprintf (vect_dump, "non-associatable copy.");
5930 operand = TREE_OPERAND (operand, 0);
5933 if (TREE_CODE (operand) != SSA_NAME)
5935 if (vect_print_dump_info (REPORT_DETAILS))
5936 fprintf (vect_dump, "not ssa-name.");
5937 return false;
5940 *def_stmt = SSA_NAME_DEF_STMT (operand);
5941 if (*def_stmt == NULL)
5943 if (vect_print_dump_info (REPORT_DETAILS))
5944 fprintf (vect_dump, "no def_stmt.");
5945 return false;
5948 if (vect_print_dump_info (REPORT_DETAILS))
5950 fprintf (vect_dump, "def_stmt: ");
5951 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5954 /* Empty stmt is expected only in case of a function argument.
5955 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5956 if (gimple_nop_p (*def_stmt))
5958 *def = operand;
5959 *dt = vect_external_def;
5960 return true;
5963 bb = gimple_bb (*def_stmt);
5965 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5966 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5967 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5968 *dt = vect_external_def;
5969 else
5971 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5972 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5975 if (*dt == vect_unknown_def_type
5976 || (stmt
5977 && *dt == vect_double_reduction_def
5978 && gimple_code (stmt) != GIMPLE_PHI))
5980 if (vect_print_dump_info (REPORT_DETAILS))
5981 fprintf (vect_dump, "Unsupported pattern.");
5982 return false;
5985 if (vect_print_dump_info (REPORT_DETAILS))
5986 fprintf (vect_dump, "type of def: %d.",*dt);
5988 switch (gimple_code (*def_stmt))
5990 case GIMPLE_PHI:
5991 *def = gimple_phi_result (*def_stmt);
5992 break;
5994 case GIMPLE_ASSIGN:
5995 *def = gimple_assign_lhs (*def_stmt);
5996 break;
5998 case GIMPLE_CALL:
5999 *def = gimple_call_lhs (*def_stmt);
6000 if (*def != NULL)
6001 break;
6002 /* FALLTHRU */
6003 default:
6004 if (vect_print_dump_info (REPORT_DETAILS))
6005 fprintf (vect_dump, "unsupported defining stmt: ");
6006 return false;
6009 return true;
6012 /* Function vect_is_simple_use_1.
6014 Same as vect_is_simple_use_1 but also determines the vector operand
6015 type of OPERAND and stores it to *VECTYPE. If the definition of
6016 OPERAND is vect_uninitialized_def, vect_constant_def or
6017 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6018 is responsible to compute the best suited vector type for the
6019 scalar operand. */
6021 bool
6022 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6023 bb_vec_info bb_vinfo, gimple *def_stmt,
6024 tree *def, enum vect_def_type *dt, tree *vectype)
6026 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6027 def, dt))
6028 return false;
6030 /* Now get a vector type if the def is internal, otherwise supply
6031 NULL_TREE and leave it up to the caller to figure out a proper
6032 type for the use stmt. */
6033 if (*dt == vect_internal_def
6034 || *dt == vect_induction_def
6035 || *dt == vect_reduction_def
6036 || *dt == vect_double_reduction_def
6037 || *dt == vect_nested_cycle)
6039 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6041 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6042 && !STMT_VINFO_RELEVANT (stmt_info)
6043 && !STMT_VINFO_LIVE_P (stmt_info))
6044 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6046 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6047 gcc_assert (*vectype != NULL_TREE);
6049 else if (*dt == vect_uninitialized_def
6050 || *dt == vect_constant_def
6051 || *dt == vect_external_def)
6052 *vectype = NULL_TREE;
6053 else
6054 gcc_unreachable ();
6056 return true;
6060 /* Function supportable_widening_operation
6062 Check whether an operation represented by the code CODE is a
6063 widening operation that is supported by the target platform in
6064 vector form (i.e., when operating on arguments of type VECTYPE_IN
6065 producing a result of type VECTYPE_OUT).
6067 Widening operations we currently support are NOP (CONVERT), FLOAT
6068 and WIDEN_MULT. This function checks if these operations are supported
6069 by the target platform either directly (via vector tree-codes), or via
6070 target builtins.
6072 Output:
6073 - CODE1 and CODE2 are codes of vector operations to be used when
6074 vectorizing the operation, if available.
6075 - DECL1 and DECL2 are decls of target builtin functions to be used
6076 when vectorizing the operation, if available. In this case,
6077 CODE1 and CODE2 are CALL_EXPR.
6078 - MULTI_STEP_CVT determines the number of required intermediate steps in
6079 case of multi-step conversion (like char->short->int - in that case
6080 MULTI_STEP_CVT will be 1).
6081 - INTERM_TYPES contains the intermediate type required to perform the
6082 widening operation (short in the above example). */
6084 bool
6085 supportable_widening_operation (enum tree_code code, gimple stmt,
6086 tree vectype_out, tree vectype_in,
6087 tree *decl1, tree *decl2,
6088 enum tree_code *code1, enum tree_code *code2,
6089 int *multi_step_cvt,
6090 VEC (tree, heap) **interm_types)
6092 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6093 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6094 struct loop *vect_loop = NULL;
6095 bool ordered_p;
6096 enum machine_mode vec_mode;
6097 enum insn_code icode1, icode2;
6098 optab optab1, optab2;
6099 tree vectype = vectype_in;
6100 tree wide_vectype = vectype_out;
6101 enum tree_code c1, c2;
6102 int i;
6103 tree prev_type, intermediate_type;
6104 enum machine_mode intermediate_mode, prev_mode;
6105 optab optab3, optab4;
6107 *multi_step_cvt = 0;
6108 if (loop_info)
6109 vect_loop = LOOP_VINFO_LOOP (loop_info);
6111 /* The result of a vectorized widening operation usually requires two vectors
6112 (because the widened results do not fit into one vector). The generated
6113 vector results would normally be expected to be generated in the same
6114 order as in the original scalar computation, i.e. if 8 results are
6115 generated in each vector iteration, they are to be organized as follows:
6116 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6118 However, in the special case that the result of the widening operation is
6119 used in a reduction computation only, the order doesn't matter (because
6120 when vectorizing a reduction we change the order of the computation).
6121 Some targets can take advantage of this and generate more efficient code.
6122 For example, targets like Altivec, that support widen_mult using a sequence
6123 of {mult_even,mult_odd} generate the following vectors:
6124 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6126 When vectorizing outer-loops, we execute the inner-loop sequentially
6127 (each vectorized inner-loop iteration contributes to VF outer-loop
6128 iterations in parallel). We therefore don't allow to change the order
6129 of the computation in the inner-loop during outer-loop vectorization. */
6131 if (vect_loop
6132 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6133 && !nested_in_vect_loop_p (vect_loop, stmt))
6134 ordered_p = false;
6135 else
6136 ordered_p = true;
6138 if (!ordered_p
6139 && code == WIDEN_MULT_EXPR
6140 && targetm.vectorize.builtin_mul_widen_even
6141 && targetm.vectorize.builtin_mul_widen_even (vectype)
6142 && targetm.vectorize.builtin_mul_widen_odd
6143 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6145 if (vect_print_dump_info (REPORT_DETAILS))
6146 fprintf (vect_dump, "Unordered widening operation detected.");
6148 *code1 = *code2 = CALL_EXPR;
6149 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6150 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6151 return true;
6154 switch (code)
6156 case WIDEN_MULT_EXPR:
6157 c1 = VEC_WIDEN_MULT_LO_EXPR;
6158 c2 = VEC_WIDEN_MULT_HI_EXPR;
6159 break;
6161 case WIDEN_LSHIFT_EXPR:
6162 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6163 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6164 break;
6166 CASE_CONVERT:
6167 c1 = VEC_UNPACK_LO_EXPR;
6168 c2 = VEC_UNPACK_HI_EXPR;
6169 break;
6171 case FLOAT_EXPR:
6172 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6173 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6174 break;
6176 case FIX_TRUNC_EXPR:
6177 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6178 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6179 computing the operation. */
6180 return false;
6182 default:
6183 gcc_unreachable ();
6186 if (BYTES_BIG_ENDIAN)
6188 enum tree_code ctmp = c1;
6189 c1 = c2;
6190 c2 = ctmp;
6193 if (code == FIX_TRUNC_EXPR)
6195 /* The signedness is determined from output operand. */
6196 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6197 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6199 else
6201 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6202 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6205 if (!optab1 || !optab2)
6206 return false;
6208 vec_mode = TYPE_MODE (vectype);
6209 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6210 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6211 return false;
6213 *code1 = c1;
6214 *code2 = c2;
6216 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6217 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6218 return true;
6220 /* Check if it's a multi-step conversion that can be done using intermediate
6221 types. */
6223 prev_type = vectype;
6224 prev_mode = vec_mode;
6226 if (!CONVERT_EXPR_CODE_P (code))
6227 return false;
6229 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6230 intermediate steps in promotion sequence. We try
6231 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6232 not. */
6233 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6234 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6236 intermediate_mode = insn_data[icode1].operand[0].mode;
6237 intermediate_type
6238 = lang_hooks.types.type_for_mode (intermediate_mode,
6239 TYPE_UNSIGNED (prev_type));
6240 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6241 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6243 if (!optab3 || !optab4
6244 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6245 || insn_data[icode1].operand[0].mode != intermediate_mode
6246 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6247 || insn_data[icode2].operand[0].mode != intermediate_mode
6248 || ((icode1 = optab_handler (optab3, intermediate_mode))
6249 == CODE_FOR_nothing)
6250 || ((icode2 = optab_handler (optab4, intermediate_mode))
6251 == CODE_FOR_nothing))
6252 break;
6254 VEC_quick_push (tree, *interm_types, intermediate_type);
6255 (*multi_step_cvt)++;
6257 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6258 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6259 return true;
6261 prev_type = intermediate_type;
6262 prev_mode = intermediate_mode;
6265 VEC_free (tree, heap, *interm_types);
6266 return false;
6270 /* Function supportable_narrowing_operation
6272 Check whether an operation represented by the code CODE is a
6273 narrowing operation that is supported by the target platform in
6274 vector form (i.e., when operating on arguments of type VECTYPE_IN
6275 and producing a result of type VECTYPE_OUT).
6277 Narrowing operations we currently support are NOP (CONVERT) and
6278 FIX_TRUNC. This function checks if these operations are supported by
6279 the target platform directly via vector tree-codes.
6281 Output:
6282 - CODE1 is the code of a vector operation to be used when
6283 vectorizing the operation, if available.
6284 - MULTI_STEP_CVT determines the number of required intermediate steps in
6285 case of multi-step conversion (like int->short->char - in that case
6286 MULTI_STEP_CVT will be 1).
6287 - INTERM_TYPES contains the intermediate type required to perform the
6288 narrowing operation (short in the above example). */
6290 bool
6291 supportable_narrowing_operation (enum tree_code code,
6292 tree vectype_out, tree vectype_in,
6293 enum tree_code *code1, int *multi_step_cvt,
6294 VEC (tree, heap) **interm_types)
6296 enum machine_mode vec_mode;
6297 enum insn_code icode1;
6298 optab optab1, interm_optab;
6299 tree vectype = vectype_in;
6300 tree narrow_vectype = vectype_out;
6301 enum tree_code c1;
6302 tree intermediate_type;
6303 enum machine_mode intermediate_mode, prev_mode;
6304 int i;
6305 bool uns;
6307 *multi_step_cvt = 0;
6308 switch (code)
6310 CASE_CONVERT:
6311 c1 = VEC_PACK_TRUNC_EXPR;
6312 break;
6314 case FIX_TRUNC_EXPR:
6315 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6316 break;
6318 case FLOAT_EXPR:
6319 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6320 tree code and optabs used for computing the operation. */
6321 return false;
6323 default:
6324 gcc_unreachable ();
6327 if (code == FIX_TRUNC_EXPR)
6328 /* The signedness is determined from output operand. */
6329 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6330 else
6331 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6333 if (!optab1)
6334 return false;
6336 vec_mode = TYPE_MODE (vectype);
6337 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6338 return false;
6340 *code1 = c1;
6342 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6343 return true;
6345 /* Check if it's a multi-step conversion that can be done using intermediate
6346 types. */
6347 prev_mode = vec_mode;
6348 if (code == FIX_TRUNC_EXPR)
6349 uns = TYPE_UNSIGNED (vectype_out);
6350 else
6351 uns = TYPE_UNSIGNED (vectype);
6353 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6354 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6355 costly than signed. */
6356 if (code == FIX_TRUNC_EXPR && uns)
6358 enum insn_code icode2;
6360 intermediate_type
6361 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6362 interm_optab
6363 = optab_for_tree_code (c1, intermediate_type, optab_default);
6364 if (interm_optab != NULL
6365 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6366 && insn_data[icode1].operand[0].mode
6367 == insn_data[icode2].operand[0].mode)
6369 uns = false;
6370 optab1 = interm_optab;
6371 icode1 = icode2;
6375 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6376 intermediate steps in promotion sequence. We try
6377 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6378 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6379 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6381 intermediate_mode = insn_data[icode1].operand[0].mode;
6382 intermediate_type
6383 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6384 interm_optab
6385 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6386 optab_default);
6387 if (!interm_optab
6388 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6389 || insn_data[icode1].operand[0].mode != intermediate_mode
6390 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6391 == CODE_FOR_nothing))
6392 break;
6394 VEC_quick_push (tree, *interm_types, intermediate_type);
6395 (*multi_step_cvt)++;
6397 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6398 return true;
6400 prev_mode = intermediate_mode;
6401 optab1 = interm_optab;
6404 VEC_free (tree, heap, *interm_types);
6405 return false;