PR gcov-profile/51113
[official-gcc.git] / gcc / tree-vect-stmts.c
blob8589a850df967d592e7feaa3de69fe8753e3dab6
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "cfglayout.h"
37 #include "expr.h"
38 #include "recog.h"
39 #include "optabs.h"
40 #include "diagnostic-core.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
45 /* Return a variable of type ELEM_TYPE[NELEMS]. */
47 static tree
48 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
50 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
51 "vect_array");
54 /* ARRAY is an array of vectors created by create_vector_array.
55 Return an SSA_NAME for the vector in index N. The reference
56 is part of the vectorization of STMT and the vector is associated
57 with scalar destination SCALAR_DEST. */
59 static tree
60 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
61 tree array, unsigned HOST_WIDE_INT n)
63 tree vect_type, vect, vect_name, array_ref;
64 gimple new_stmt;
66 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
67 vect_type = TREE_TYPE (TREE_TYPE (array));
68 vect = vect_create_destination_var (scalar_dest, vect_type);
69 array_ref = build4 (ARRAY_REF, vect_type, array,
70 build_int_cst (size_type_node, n),
71 NULL_TREE, NULL_TREE);
73 new_stmt = gimple_build_assign (vect, array_ref);
74 vect_name = make_ssa_name (vect, new_stmt);
75 gimple_assign_set_lhs (new_stmt, vect_name);
76 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 mark_symbols_for_renaming (new_stmt);
79 return vect_name;
82 /* ARRAY is an array of vectors created by create_vector_array.
83 Emit code to store SSA_NAME VECT in index N of the array.
84 The store is part of the vectorization of STMT. */
86 static void
87 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
88 tree array, unsigned HOST_WIDE_INT n)
90 tree array_ref;
91 gimple new_stmt;
93 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
94 build_int_cst (size_type_node, n),
95 NULL_TREE, NULL_TREE);
97 new_stmt = gimple_build_assign (array_ref, vect);
98 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 mark_symbols_for_renaming (new_stmt);
102 /* PTR is a pointer to an array of type TYPE. Return a representation
103 of *PTR. The memory reference replaces those in FIRST_DR
104 (and its group). */
106 static tree
107 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
109 struct ptr_info_def *pi;
110 tree mem_ref, alias_ptr_type;
112 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
113 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
114 /* Arrays have the same alignment as their type. */
115 pi = get_ptr_info (ptr);
116 pi->align = TYPE_ALIGN_UNIT (type);
117 pi->misalign = 0;
118 return mem_ref;
121 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
123 /* Function vect_mark_relevant.
125 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
127 static void
128 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
129 enum vect_relevant relevant, bool live_p,
130 bool used_in_pattern)
132 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
133 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
134 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
135 gimple pattern_stmt;
137 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
140 /* If this stmt is an original stmt in a pattern, we might need to mark its
141 related pattern stmt instead of the original stmt. However, such stmts
142 may have their own uses that are not in any pattern, in such cases the
143 stmt itself should be marked. */
144 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
146 bool found = false;
147 if (!used_in_pattern)
149 imm_use_iterator imm_iter;
150 use_operand_p use_p;
151 gimple use_stmt;
152 tree lhs;
154 if (is_gimple_assign (stmt))
155 lhs = gimple_assign_lhs (stmt);
156 else
157 lhs = gimple_call_lhs (stmt);
159 /* This use is out of pattern use, if LHS has other uses that are
160 pattern uses, we should mark the stmt itself, and not the pattern
161 stmt. */
162 if (TREE_CODE (lhs) == SSA_NAME)
163 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
165 if (is_gimple_debug (USE_STMT (use_p)))
166 continue;
167 use_stmt = USE_STMT (use_p);
169 if (vinfo_for_stmt (use_stmt)
170 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
172 found = true;
173 break;
178 if (!found)
180 /* This is the last stmt in a sequence that was detected as a
181 pattern that can potentially be vectorized. Don't mark the stmt
182 as relevant/live because it's not going to be vectorized.
183 Instead mark the pattern-stmt that replaces it. */
185 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
187 if (vect_print_dump_info (REPORT_DETAILS))
188 fprintf (vect_dump, "last stmt in pattern. don't mark"
189 " relevant/live.");
190 stmt_info = vinfo_for_stmt (pattern_stmt);
191 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
192 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
193 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
194 stmt = pattern_stmt;
198 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
199 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
200 STMT_VINFO_RELEVANT (stmt_info) = relevant;
202 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
203 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
205 if (vect_print_dump_info (REPORT_DETAILS))
206 fprintf (vect_dump, "already marked relevant/live.");
207 return;
210 VEC_safe_push (gimple, heap, *worklist, stmt);
214 /* Function vect_stmt_relevant_p.
216 Return true if STMT in loop that is represented by LOOP_VINFO is
217 "relevant for vectorization".
219 A stmt is considered "relevant for vectorization" if:
220 - it has uses outside the loop.
221 - it has vdefs (it alters memory).
222 - control stmts in the loop (except for the exit condition).
224 CHECKME: what other side effects would the vectorizer allow? */
226 static bool
227 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
228 enum vect_relevant *relevant, bool *live_p)
230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
231 ssa_op_iter op_iter;
232 imm_use_iterator imm_iter;
233 use_operand_p use_p;
234 def_operand_p def_p;
236 *relevant = vect_unused_in_scope;
237 *live_p = false;
239 /* cond stmt other than loop exit cond. */
240 if (is_ctrl_stmt (stmt)
241 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
242 != loop_exit_ctrl_vec_info_type)
243 *relevant = vect_used_in_scope;
245 /* changing memory. */
246 if (gimple_code (stmt) != GIMPLE_PHI)
247 if (gimple_vdef (stmt))
249 if (vect_print_dump_info (REPORT_DETAILS))
250 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
251 *relevant = vect_used_in_scope;
254 /* uses outside the loop. */
255 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
259 basic_block bb = gimple_bb (USE_STMT (use_p));
260 if (!flow_bb_inside_loop_p (loop, bb))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
265 if (is_gimple_debug (USE_STMT (use_p)))
266 continue;
268 /* We expect all such uses to be in the loop exit phis
269 (because of loop closed form) */
270 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
271 gcc_assert (bb == single_exit (loop)->dest);
273 *live_p = true;
278 return (*live_p || *relevant);
282 /* Function exist_non_indexing_operands_for_use_p
284 USE is one of the uses attached to STMT. Check if USE is
285 used in STMT for anything other than indexing an array. */
287 static bool
288 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
290 tree operand;
291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
293 /* USE corresponds to some operand in STMT. If there is no data
294 reference in STMT, then any operand that corresponds to USE
295 is not indexing an array. */
296 if (!STMT_VINFO_DATA_REF (stmt_info))
297 return true;
299 /* STMT has a data_ref. FORNOW this means that its of one of
300 the following forms:
301 -1- ARRAY_REF = var
302 -2- var = ARRAY_REF
303 (This should have been verified in analyze_data_refs).
305 'var' in the second case corresponds to a def, not a use,
306 so USE cannot correspond to any operands that are not used
307 for array indexing.
309 Therefore, all we need to check is if STMT falls into the
310 first case, and whether var corresponds to USE. */
312 if (!gimple_assign_copy_p (stmt))
313 return false;
314 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
315 return false;
316 operand = gimple_assign_rhs1 (stmt);
317 if (TREE_CODE (operand) != SSA_NAME)
318 return false;
320 if (operand == use)
321 return true;
323 return false;
328 Function process_use.
330 Inputs:
331 - a USE in STMT in a loop represented by LOOP_VINFO
332 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
333 that defined USE. This is done by calling mark_relevant and passing it
334 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
335 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
336 be performed.
338 Outputs:
339 Generally, LIVE_P and RELEVANT are used to define the liveness and
340 relevance info of the DEF_STMT of this USE:
341 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
342 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
343 Exceptions:
344 - case 1: If USE is used only for address computations (e.g. array indexing),
345 which does not need to be directly vectorized, then the liveness/relevance
346 of the respective DEF_STMT is left unchanged.
347 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
348 skip DEF_STMT cause it had already been processed.
349 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
350 be modified accordingly.
352 Return true if everything is as expected. Return false otherwise. */
354 static bool
355 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
356 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
357 bool force)
359 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
360 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
361 stmt_vec_info dstmt_vinfo;
362 basic_block bb, def_bb;
363 tree def;
364 gimple def_stmt;
365 enum vect_def_type dt;
367 /* case 1: we are only interested in uses that need to be vectorized. Uses
368 that are used for address computation are not considered relevant. */
369 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
370 return true;
372 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
375 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
376 return false;
379 if (!def_stmt || gimple_nop_p (def_stmt))
380 return true;
382 def_bb = gimple_bb (def_stmt);
383 if (!flow_bb_inside_loop_p (loop, def_bb))
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "def_stmt is out of loop.");
387 return true;
390 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
391 DEF_STMT must have already been processed, because this should be the
392 only way that STMT, which is a reduction-phi, was put in the worklist,
393 as there should be no other uses for DEF_STMT in the loop. So we just
394 check that everything is as expected, and we are done. */
395 dstmt_vinfo = vinfo_for_stmt (def_stmt);
396 bb = gimple_bb (stmt);
397 if (gimple_code (stmt) == GIMPLE_PHI
398 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
399 && gimple_code (def_stmt) != GIMPLE_PHI
400 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
401 && bb->loop_father == def_bb->loop_father)
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
405 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
406 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
407 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
408 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
409 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
410 return true;
413 /* case 3a: outer-loop stmt defining an inner-loop stmt:
414 outer-loop-header-bb:
415 d = def_stmt
416 inner-loop:
417 stmt # use (d)
418 outer-loop-tail-bb:
419 ... */
420 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
422 if (vect_print_dump_info (REPORT_DETAILS))
423 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
425 switch (relevant)
427 case vect_unused_in_scope:
428 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
429 vect_used_in_scope : vect_unused_in_scope;
430 break;
432 case vect_used_in_outer_by_reduction:
433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
434 relevant = vect_used_by_reduction;
435 break;
437 case vect_used_in_outer:
438 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
439 relevant = vect_used_in_scope;
440 break;
442 case vect_used_in_scope:
443 break;
445 default:
446 gcc_unreachable ();
450 /* case 3b: inner-loop stmt defining an outer-loop stmt:
451 outer-loop-header-bb:
453 inner-loop:
454 d = def_stmt
455 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
456 stmt # use (d) */
457 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
459 if (vect_print_dump_info (REPORT_DETAILS))
460 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
462 switch (relevant)
464 case vect_unused_in_scope:
465 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
467 vect_used_in_outer_by_reduction : vect_unused_in_scope;
468 break;
470 case vect_used_by_reduction:
471 relevant = vect_used_in_outer_by_reduction;
472 break;
474 case vect_used_in_scope:
475 relevant = vect_used_in_outer;
476 break;
478 default:
479 gcc_unreachable ();
483 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
484 is_pattern_stmt_p (stmt_vinfo));
485 return true;
489 /* Function vect_mark_stmts_to_be_vectorized.
491 Not all stmts in the loop need to be vectorized. For example:
493 for i...
494 for j...
495 1. T0 = i + j
496 2. T1 = a[T0]
498 3. j = j + 1
500 Stmt 1 and 3 do not need to be vectorized, because loop control and
501 addressing of vectorized data-refs are handled differently.
503 This pass detects such stmts. */
505 bool
506 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
508 VEC(gimple,heap) *worklist;
509 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
510 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
511 unsigned int nbbs = loop->num_nodes;
512 gimple_stmt_iterator si;
513 gimple stmt;
514 unsigned int i;
515 stmt_vec_info stmt_vinfo;
516 basic_block bb;
517 gimple phi;
518 bool live_p;
519 enum vect_relevant relevant, tmp_relevant;
520 enum vect_def_type def_type;
522 if (vect_print_dump_info (REPORT_DETAILS))
523 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
525 worklist = VEC_alloc (gimple, heap, 64);
527 /* 1. Init worklist. */
528 for (i = 0; i < nbbs; i++)
530 bb = bbs[i];
531 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
533 phi = gsi_stmt (si);
534 if (vect_print_dump_info (REPORT_DETAILS))
536 fprintf (vect_dump, "init: phi relevant? ");
537 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
540 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
541 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
543 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
545 stmt = gsi_stmt (si);
546 if (vect_print_dump_info (REPORT_DETAILS))
548 fprintf (vect_dump, "init: stmt relevant? ");
549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
552 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
553 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
557 /* 2. Process_worklist */
558 while (VEC_length (gimple, worklist) > 0)
560 use_operand_p use_p;
561 ssa_op_iter iter;
563 stmt = VEC_pop (gimple, worklist);
564 if (vect_print_dump_info (REPORT_DETAILS))
566 fprintf (vect_dump, "worklist: examine stmt: ");
567 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
571 (DEF_STMT) as relevant/irrelevant and live/dead according to the
572 liveness and relevance properties of STMT. */
573 stmt_vinfo = vinfo_for_stmt (stmt);
574 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
575 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
577 /* Generally, the liveness and relevance properties of STMT are
578 propagated as is to the DEF_STMTs of its USEs:
579 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
580 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
582 One exception is when STMT has been identified as defining a reduction
583 variable; in this case we set the liveness/relevance as follows:
584 live_p = false
585 relevant = vect_used_by_reduction
586 This is because we distinguish between two kinds of relevant stmts -
587 those that are used by a reduction computation, and those that are
588 (also) used by a regular computation. This allows us later on to
589 identify stmts that are used solely by a reduction, and therefore the
590 order of the results that they produce does not have to be kept. */
592 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
593 tmp_relevant = relevant;
594 switch (def_type)
596 case vect_reduction_def:
597 switch (tmp_relevant)
599 case vect_unused_in_scope:
600 relevant = vect_used_by_reduction;
601 break;
603 case vect_used_by_reduction:
604 if (gimple_code (stmt) == GIMPLE_PHI)
605 break;
606 /* fall through */
608 default:
609 if (vect_print_dump_info (REPORT_DETAILS))
610 fprintf (vect_dump, "unsupported use of reduction.");
612 VEC_free (gimple, heap, worklist);
613 return false;
616 live_p = false;
617 break;
619 case vect_nested_cycle:
620 if (tmp_relevant != vect_unused_in_scope
621 && tmp_relevant != vect_used_in_outer_by_reduction
622 && tmp_relevant != vect_used_in_outer)
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "unsupported use of nested cycle.");
627 VEC_free (gimple, heap, worklist);
628 return false;
631 live_p = false;
632 break;
634 case vect_double_reduction_def:
635 if (tmp_relevant != vect_unused_in_scope
636 && tmp_relevant != vect_used_by_reduction)
638 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "unsupported use of double reduction.");
641 VEC_free (gimple, heap, worklist);
642 return false;
645 live_p = false;
646 break;
648 default:
649 break;
652 if (is_pattern_stmt_p (stmt_vinfo))
654 /* Pattern statements are not inserted into the code, so
655 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
656 have to scan the RHS or function arguments instead. */
657 if (is_gimple_assign (stmt))
659 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
660 tree op = gimple_assign_rhs1 (stmt);
662 i = 1;
663 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
665 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
666 live_p, relevant, &worklist, false)
667 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
668 live_p, relevant, &worklist, false))
670 VEC_free (gimple, heap, worklist);
671 return false;
673 i = 2;
675 for (; i < gimple_num_ops (stmt); i++)
677 op = gimple_op (stmt, i);
678 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
679 &worklist, false))
681 VEC_free (gimple, heap, worklist);
682 return false;
686 else if (is_gimple_call (stmt))
688 for (i = 0; i < gimple_call_num_args (stmt); i++)
690 tree arg = gimple_call_arg (stmt, i);
691 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
692 &worklist, false))
694 VEC_free (gimple, heap, worklist);
695 return false;
700 else
701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
703 tree op = USE_FROM_PTR (use_p);
704 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
705 &worklist, false))
707 VEC_free (gimple, heap, worklist);
708 return false;
712 if (STMT_VINFO_GATHER_P (stmt_vinfo))
714 tree off;
715 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
716 gcc_assert (decl);
717 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
718 &worklist, true))
720 VEC_free (gimple, heap, worklist);
721 return false;
724 } /* while worklist */
726 VEC_free (gimple, heap, worklist);
727 return true;
731 /* Get cost by calling cost target builtin. */
733 static inline
734 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
736 tree dummy_type = NULL;
737 int dummy = 0;
739 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
740 dummy_type, dummy);
744 /* Get cost for STMT. */
747 cost_for_stmt (gimple stmt)
749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
751 switch (STMT_VINFO_TYPE (stmt_info))
753 case load_vec_info_type:
754 return vect_get_stmt_cost (scalar_load);
755 case store_vec_info_type:
756 return vect_get_stmt_cost (scalar_store);
757 case op_vec_info_type:
758 case condition_vec_info_type:
759 case assignment_vec_info_type:
760 case reduc_vec_info_type:
761 case induc_vec_info_type:
762 case type_promotion_vec_info_type:
763 case type_demotion_vec_info_type:
764 case type_conversion_vec_info_type:
765 case call_vec_info_type:
766 return vect_get_stmt_cost (scalar_stmt);
767 case undef_vec_info_type:
768 default:
769 gcc_unreachable ();
773 /* Function vect_model_simple_cost.
775 Models cost for simple operations, i.e. those that only emit ncopies of a
776 single op. Right now, this does not account for multiple insns that could
777 be generated for the single vector op. We will handle that shortly. */
779 void
780 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
781 enum vect_def_type *dt, slp_tree slp_node)
783 int i;
784 int inside_cost = 0, outside_cost = 0;
786 /* The SLP costs were already calculated during SLP tree build. */
787 if (PURE_SLP_STMT (stmt_info))
788 return;
790 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
792 /* FORNOW: Assuming maximum 2 args per stmts. */
793 for (i = 0; i < 2; i++)
795 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
796 outside_cost += vect_get_stmt_cost (vector_stmt);
799 if (vect_print_dump_info (REPORT_COST))
800 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
801 "outside_cost = %d .", inside_cost, outside_cost);
803 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
804 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
805 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
809 /* Function vect_cost_strided_group_size
811 For strided load or store, return the group_size only if it is the first
812 load or store of a group, else return 1. This ensures that group size is
813 only returned once per group. */
815 static int
816 vect_cost_strided_group_size (stmt_vec_info stmt_info)
818 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
820 if (first_stmt == STMT_VINFO_STMT (stmt_info))
821 return GROUP_SIZE (stmt_info);
823 return 1;
827 /* Function vect_model_store_cost
829 Models cost for stores. In the case of strided accesses, one access
830 has the overhead of the strided access attributed to it. */
832 void
833 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
834 bool store_lanes_p, enum vect_def_type dt,
835 slp_tree slp_node)
837 int group_size;
838 unsigned int inside_cost = 0, outside_cost = 0;
839 struct data_reference *first_dr;
840 gimple first_stmt;
842 /* The SLP costs were already calculated during SLP tree build. */
843 if (PURE_SLP_STMT (stmt_info))
844 return;
846 if (dt == vect_constant_def || dt == vect_external_def)
847 outside_cost = vect_get_stmt_cost (scalar_to_vec);
849 /* Strided access? */
850 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
852 if (slp_node)
854 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
855 group_size = 1;
857 else
859 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
860 group_size = vect_cost_strided_group_size (stmt_info);
863 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
865 /* Not a strided access. */
866 else
868 group_size = 1;
869 first_dr = STMT_VINFO_DATA_REF (stmt_info);
872 /* We assume that the cost of a single store-lanes instruction is
873 equivalent to the cost of GROUP_SIZE separate stores. If a strided
874 access is instead being provided by a permute-and-store operation,
875 include the cost of the permutes. */
876 if (!store_lanes_p && group_size > 1)
878 /* Uses a high and low interleave operation for each needed permute. */
879 inside_cost = ncopies * exact_log2(group_size) * group_size
880 * vect_get_stmt_cost (vector_stmt);
882 if (vect_print_dump_info (REPORT_COST))
883 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
884 group_size);
888 /* Costs of the stores. */
889 vect_get_store_cost (first_dr, ncopies, &inside_cost);
891 if (vect_print_dump_info (REPORT_COST))
892 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
893 "outside_cost = %d .", inside_cost, outside_cost);
895 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
896 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
897 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
901 /* Calculate cost of DR's memory access. */
902 void
903 vect_get_store_cost (struct data_reference *dr, int ncopies,
904 unsigned int *inside_cost)
906 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
908 switch (alignment_support_scheme)
910 case dr_aligned:
912 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
914 if (vect_print_dump_info (REPORT_COST))
915 fprintf (vect_dump, "vect_model_store_cost: aligned.");
917 break;
920 case dr_unaligned_supported:
922 gimple stmt = DR_STMT (dr);
923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926 /* Here, we assign an additional cost for the unaligned store. */
927 *inside_cost += ncopies
928 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
929 vectype, DR_MISALIGNMENT (dr));
931 if (vect_print_dump_info (REPORT_COST))
932 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
933 "hardware.");
935 break;
938 default:
939 gcc_unreachable ();
944 /* Function vect_model_load_cost
946 Models cost for loads. In the case of strided accesses, the last access
947 has the overhead of the strided access attributed to it. Since unaligned
948 accesses are supported for loads, we also account for the costs of the
949 access scheme chosen. */
951 void
952 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
953 slp_tree slp_node)
955 int group_size;
956 gimple first_stmt;
957 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
958 unsigned int inside_cost = 0, outside_cost = 0;
960 /* The SLP costs were already calculated during SLP tree build. */
961 if (PURE_SLP_STMT (stmt_info))
962 return;
964 /* Strided accesses? */
965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
966 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && first_stmt && !slp_node)
968 group_size = vect_cost_strided_group_size (stmt_info);
969 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
971 /* Not a strided access. */
972 else
974 group_size = 1;
975 first_dr = dr;
978 /* We assume that the cost of a single load-lanes instruction is
979 equivalent to the cost of GROUP_SIZE separate loads. If a strided
980 access is instead being provided by a load-and-permute operation,
981 include the cost of the permutes. */
982 if (!load_lanes_p && group_size > 1)
984 /* Uses an even and odd extract operations for each needed permute. */
985 inside_cost = ncopies * exact_log2(group_size) * group_size
986 * vect_get_stmt_cost (vector_stmt);
988 if (vect_print_dump_info (REPORT_COST))
989 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
990 group_size);
993 /* The loads themselves. */
994 vect_get_load_cost (first_dr, ncopies,
995 ((!STMT_VINFO_STRIDED_ACCESS (stmt_info)) || group_size > 1
996 || slp_node),
997 &inside_cost, &outside_cost);
999 if (vect_print_dump_info (REPORT_COST))
1000 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1001 "outside_cost = %d .", inside_cost, outside_cost);
1003 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1004 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1005 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1009 /* Calculate cost of DR's memory access. */
1010 void
1011 vect_get_load_cost (struct data_reference *dr, int ncopies,
1012 bool add_realign_cost, unsigned int *inside_cost,
1013 unsigned int *outside_cost)
1015 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1017 switch (alignment_support_scheme)
1019 case dr_aligned:
1021 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1023 if (vect_print_dump_info (REPORT_COST))
1024 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1026 break;
1028 case dr_unaligned_supported:
1030 gimple stmt = DR_STMT (dr);
1031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1032 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1034 /* Here, we assign an additional cost for the unaligned load. */
1035 *inside_cost += ncopies
1036 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1037 vectype, DR_MISALIGNMENT (dr));
1038 if (vect_print_dump_info (REPORT_COST))
1039 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1040 "hardware.");
1042 break;
1044 case dr_explicit_realign:
1046 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1047 + vect_get_stmt_cost (vector_stmt));
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 outside costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += vect_get_stmt_cost (vector_stmt);
1055 break;
1057 case dr_explicit_realign_optimized:
1059 if (vect_print_dump_info (REPORT_COST))
1060 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1061 "pipelined.");
1063 /* Unaligned software pipeline has a load of an address, an initial
1064 load, and possibly a mask operation to "prime" the loop. However,
1065 if this is an access in a group of loads, which provide strided
1066 access, then the above cost should only be considered for one
1067 access in the group. Inside the loop, there is a load op
1068 and a realignment op. */
1070 if (add_realign_cost)
1072 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1073 if (targetm.vectorize.builtin_mask_for_load)
1074 *outside_cost += vect_get_stmt_cost (vector_stmt);
1077 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1078 + vect_get_stmt_cost (vector_stmt));
1079 break;
1082 default:
1083 gcc_unreachable ();
1088 /* Function vect_init_vector.
1090 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1091 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1092 is not NULL. Otherwise, place the initialization at the loop preheader.
1093 Return the DEF of INIT_STMT.
1094 It will be used in the vectorization of STMT. */
1096 tree
1097 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1098 gimple_stmt_iterator *gsi)
1100 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1101 tree new_var;
1102 gimple init_stmt;
1103 tree vec_oprnd;
1104 edge pe;
1105 tree new_temp;
1106 basic_block new_bb;
1108 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1109 add_referenced_var (new_var);
1110 init_stmt = gimple_build_assign (new_var, vector_var);
1111 new_temp = make_ssa_name (new_var, init_stmt);
1112 gimple_assign_set_lhs (init_stmt, new_temp);
1114 if (gsi)
1115 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1116 else
1118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1120 if (loop_vinfo)
1122 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 if (nested_in_vect_loop_p (loop, stmt))
1125 loop = loop->inner;
1127 pe = loop_preheader_edge (loop);
1128 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1129 gcc_assert (!new_bb);
1131 else
1133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1134 basic_block bb;
1135 gimple_stmt_iterator gsi_bb_start;
1137 gcc_assert (bb_vinfo);
1138 bb = BB_VINFO_BB (bb_vinfo);
1139 gsi_bb_start = gsi_after_labels (bb);
1140 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
1144 if (vect_print_dump_info (REPORT_DETAILS))
1146 fprintf (vect_dump, "created new init_stmt: ");
1147 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1150 vec_oprnd = gimple_assign_lhs (init_stmt);
1151 return vec_oprnd;
1155 /* Function vect_get_vec_def_for_operand.
1157 OP is an operand in STMT. This function returns a (vector) def that will be
1158 used in the vectorized stmt for STMT.
1160 In the case that OP is an SSA_NAME which is defined in the loop, then
1161 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1163 In case OP is an invariant or constant, a new stmt that creates a vector def
1164 needs to be introduced. */
1166 tree
1167 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1169 tree vec_oprnd;
1170 gimple vec_stmt;
1171 gimple def_stmt;
1172 stmt_vec_info def_stmt_info = NULL;
1173 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1174 unsigned int nunits;
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1176 tree vec_inv;
1177 tree vec_cst;
1178 tree t = NULL_TREE;
1179 tree def;
1180 int i;
1181 enum vect_def_type dt;
1182 bool is_simple_use;
1183 tree vector_type;
1185 if (vect_print_dump_info (REPORT_DETAILS))
1187 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1188 print_generic_expr (vect_dump, op, TDF_SLIM);
1191 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
1192 &dt);
1193 gcc_assert (is_simple_use);
1194 if (vect_print_dump_info (REPORT_DETAILS))
1196 if (def)
1198 fprintf (vect_dump, "def = ");
1199 print_generic_expr (vect_dump, def, TDF_SLIM);
1201 if (def_stmt)
1203 fprintf (vect_dump, " def_stmt = ");
1204 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1208 switch (dt)
1210 /* Case 1: operand is a constant. */
1211 case vect_constant_def:
1213 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1214 gcc_assert (vector_type);
1215 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1217 if (scalar_def)
1218 *scalar_def = op;
1220 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1224 vec_cst = build_vector_from_val (vector_type,
1225 fold_convert (TREE_TYPE (vector_type),
1226 op));
1227 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1230 /* Case 2: operand is defined outside the loop - loop invariant. */
1231 case vect_external_def:
1233 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1234 gcc_assert (vector_type);
1235 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1237 if (scalar_def)
1238 *scalar_def = def;
1240 /* Create 'vec_inv = {inv,inv,..,inv}' */
1241 if (vect_print_dump_info (REPORT_DETAILS))
1242 fprintf (vect_dump, "Create vector_inv.");
1244 for (i = nunits - 1; i >= 0; --i)
1246 t = tree_cons (NULL_TREE, def, t);
1249 /* FIXME: use build_constructor directly. */
1250 vec_inv = build_constructor_from_list (vector_type, t);
1251 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1254 /* Case 3: operand is defined inside the loop. */
1255 case vect_internal_def:
1257 if (scalar_def)
1258 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1260 /* Get the def from the vectorized stmt. */
1261 def_stmt_info = vinfo_for_stmt (def_stmt);
1263 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1264 /* Get vectorized pattern statement. */
1265 if (!vec_stmt
1266 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1267 && !STMT_VINFO_RELEVANT (def_stmt_info))
1268 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1269 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1270 gcc_assert (vec_stmt);
1271 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1272 vec_oprnd = PHI_RESULT (vec_stmt);
1273 else if (is_gimple_call (vec_stmt))
1274 vec_oprnd = gimple_call_lhs (vec_stmt);
1275 else
1276 vec_oprnd = gimple_assign_lhs (vec_stmt);
1277 return vec_oprnd;
1280 /* Case 4: operand is defined by a loop header phi - reduction */
1281 case vect_reduction_def:
1282 case vect_double_reduction_def:
1283 case vect_nested_cycle:
1285 struct loop *loop;
1287 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1288 loop = (gimple_bb (def_stmt))->loop_father;
1290 /* Get the def before the loop */
1291 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1292 return get_initial_def_for_reduction (stmt, op, scalar_def);
1295 /* Case 5: operand is defined by loop-header phi - induction. */
1296 case vect_induction_def:
1298 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1300 /* Get the def from the vectorized stmt. */
1301 def_stmt_info = vinfo_for_stmt (def_stmt);
1302 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1303 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1304 vec_oprnd = PHI_RESULT (vec_stmt);
1305 else
1306 vec_oprnd = gimple_get_lhs (vec_stmt);
1307 return vec_oprnd;
1310 default:
1311 gcc_unreachable ();
1316 /* Function vect_get_vec_def_for_stmt_copy
1318 Return a vector-def for an operand. This function is used when the
1319 vectorized stmt to be created (by the caller to this function) is a "copy"
1320 created in case the vectorized result cannot fit in one vector, and several
1321 copies of the vector-stmt are required. In this case the vector-def is
1322 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1323 of the stmt that defines VEC_OPRND.
1324 DT is the type of the vector def VEC_OPRND.
1326 Context:
1327 In case the vectorization factor (VF) is bigger than the number
1328 of elements that can fit in a vectype (nunits), we have to generate
1329 more than one vector stmt to vectorize the scalar stmt. This situation
1330 arises when there are multiple data-types operated upon in the loop; the
1331 smallest data-type determines the VF, and as a result, when vectorizing
1332 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1333 vector stmt (each computing a vector of 'nunits' results, and together
1334 computing 'VF' results in each iteration). This function is called when
1335 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1336 which VF=16 and nunits=4, so the number of copies required is 4):
1338 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1340 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1341 VS1.1: vx.1 = memref1 VS1.2
1342 VS1.2: vx.2 = memref2 VS1.3
1343 VS1.3: vx.3 = memref3
1345 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1346 VSnew.1: vz1 = vx.1 + ... VSnew.2
1347 VSnew.2: vz2 = vx.2 + ... VSnew.3
1348 VSnew.3: vz3 = vx.3 + ...
1350 The vectorization of S1 is explained in vectorizable_load.
1351 The vectorization of S2:
1352 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1353 the function 'vect_get_vec_def_for_operand' is called to
1354 get the relevant vector-def for each operand of S2. For operand x it
1355 returns the vector-def 'vx.0'.
1357 To create the remaining copies of the vector-stmt (VSnew.j), this
1358 function is called to get the relevant vector-def for each operand. It is
1359 obtained from the respective VS1.j stmt, which is recorded in the
1360 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1362 For example, to obtain the vector-def 'vx.1' in order to create the
1363 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1364 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1365 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1366 and return its def ('vx.1').
1367 Overall, to create the above sequence this function will be called 3 times:
1368 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1369 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1370 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1372 tree
1373 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1375 gimple vec_stmt_for_operand;
1376 stmt_vec_info def_stmt_info;
1378 /* Do nothing; can reuse same def. */
1379 if (dt == vect_external_def || dt == vect_constant_def )
1380 return vec_oprnd;
1382 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1383 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1384 gcc_assert (def_stmt_info);
1385 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1386 gcc_assert (vec_stmt_for_operand);
1387 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1388 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1390 else
1391 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1392 return vec_oprnd;
1396 /* Get vectorized definitions for the operands to create a copy of an original
1397 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1399 static void
1400 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1401 VEC(tree,heap) **vec_oprnds0,
1402 VEC(tree,heap) **vec_oprnds1)
1404 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1406 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1407 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1409 if (vec_oprnds1 && *vec_oprnds1)
1411 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1412 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1413 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1418 /* Get vectorized definitions for OP0 and OP1.
1419 REDUC_INDEX is the index of reduction operand in case of reduction,
1420 and -1 otherwise. */
1422 void
1423 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1424 VEC (tree, heap) **vec_oprnds0,
1425 VEC (tree, heap) **vec_oprnds1,
1426 slp_tree slp_node, int reduc_index)
1428 if (slp_node)
1430 int nops = (op1 == NULL_TREE) ? 1 : 2;
1431 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1432 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1434 VEC_quick_push (tree, ops, op0);
1435 if (op1)
1436 VEC_quick_push (tree, ops, op1);
1438 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1440 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1441 if (op1)
1442 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1444 VEC_free (tree, heap, ops);
1445 VEC_free (slp_void_p, heap, vec_defs);
1447 else
1449 tree vec_oprnd;
1451 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1452 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1453 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1455 if (op1)
1457 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1458 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1459 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1465 /* Function vect_finish_stmt_generation.
1467 Insert a new stmt. */
1469 void
1470 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1471 gimple_stmt_iterator *gsi)
1473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1474 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1475 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1477 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1479 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1481 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1482 bb_vinfo));
1484 if (vect_print_dump_info (REPORT_DETAILS))
1486 fprintf (vect_dump, "add new stmt: ");
1487 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1490 gimple_set_location (vec_stmt, gimple_location (stmt));
1493 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1494 a function declaration if the target has a vectorized version
1495 of the function, or NULL_TREE if the function cannot be vectorized. */
1497 tree
1498 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1500 tree fndecl = gimple_call_fndecl (call);
1502 /* We only handle functions that do not read or clobber memory -- i.e.
1503 const or novops ones. */
1504 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1505 return NULL_TREE;
1507 if (!fndecl
1508 || TREE_CODE (fndecl) != FUNCTION_DECL
1509 || !DECL_BUILT_IN (fndecl))
1510 return NULL_TREE;
1512 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1513 vectype_in);
1516 /* Function vectorizable_call.
1518 Check if STMT performs a function call that can be vectorized.
1519 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1520 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1521 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1523 static bool
1524 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1525 slp_tree slp_node)
1527 tree vec_dest;
1528 tree scalar_dest;
1529 tree op, type;
1530 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1531 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1532 tree vectype_out, vectype_in;
1533 int nunits_in;
1534 int nunits_out;
1535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1536 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1537 tree fndecl, new_temp, def, rhs_type;
1538 gimple def_stmt;
1539 enum vect_def_type dt[3]
1540 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1541 gimple new_stmt = NULL;
1542 int ncopies, j;
1543 VEC(tree, heap) *vargs = NULL;
1544 enum { NARROW, NONE, WIDEN } modifier;
1545 size_t i, nargs;
1546 tree lhs;
1548 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1549 return false;
1551 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1552 return false;
1554 /* Is STMT a vectorizable call? */
1555 if (!is_gimple_call (stmt))
1556 return false;
1558 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1559 return false;
1561 if (stmt_can_throw_internal (stmt))
1562 return false;
1564 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1566 /* Process function arguments. */
1567 rhs_type = NULL_TREE;
1568 vectype_in = NULL_TREE;
1569 nargs = gimple_call_num_args (stmt);
1571 /* Bail out if the function has more than three arguments, we do not have
1572 interesting builtin functions to vectorize with more than two arguments
1573 except for fma. No arguments is also not good. */
1574 if (nargs == 0 || nargs > 3)
1575 return false;
1577 for (i = 0; i < nargs; i++)
1579 tree opvectype;
1581 op = gimple_call_arg (stmt, i);
1583 /* We can only handle calls with arguments of the same type. */
1584 if (rhs_type
1585 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1587 if (vect_print_dump_info (REPORT_DETAILS))
1588 fprintf (vect_dump, "argument types differ.");
1589 return false;
1591 if (!rhs_type)
1592 rhs_type = TREE_TYPE (op);
1594 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1595 &def_stmt, &def, &dt[i], &opvectype))
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "use not simple.");
1599 return false;
1602 if (!vectype_in)
1603 vectype_in = opvectype;
1604 else if (opvectype
1605 && opvectype != vectype_in)
1607 if (vect_print_dump_info (REPORT_DETAILS))
1608 fprintf (vect_dump, "argument vector types differ.");
1609 return false;
1612 /* If all arguments are external or constant defs use a vector type with
1613 the same size as the output vector type. */
1614 if (!vectype_in)
1615 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1616 if (vec_stmt)
1617 gcc_assert (vectype_in);
1618 if (!vectype_in)
1620 if (vect_print_dump_info (REPORT_DETAILS))
1622 fprintf (vect_dump, "no vectype for scalar type ");
1623 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1626 return false;
1629 /* FORNOW */
1630 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1631 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1632 if (nunits_in == nunits_out / 2)
1633 modifier = NARROW;
1634 else if (nunits_out == nunits_in)
1635 modifier = NONE;
1636 else if (nunits_out == nunits_in / 2)
1637 modifier = WIDEN;
1638 else
1639 return false;
1641 /* For now, we only vectorize functions if a target specific builtin
1642 is available. TODO -- in some cases, it might be profitable to
1643 insert the calls for pieces of the vector, in order to be able
1644 to vectorize other operations in the loop. */
1645 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1646 if (fndecl == NULL_TREE)
1648 if (vect_print_dump_info (REPORT_DETAILS))
1649 fprintf (vect_dump, "function is not vectorizable.");
1651 return false;
1654 gcc_assert (!gimple_vuse (stmt));
1656 if (slp_node || PURE_SLP_STMT (stmt_info))
1657 ncopies = 1;
1658 else if (modifier == NARROW)
1659 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1660 else
1661 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1663 /* Sanity check: make sure that at least one copy of the vectorized stmt
1664 needs to be generated. */
1665 gcc_assert (ncopies >= 1);
1667 if (!vec_stmt) /* transformation not required. */
1669 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1670 if (vect_print_dump_info (REPORT_DETAILS))
1671 fprintf (vect_dump, "=== vectorizable_call ===");
1672 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1673 return true;
1676 /** Transform. **/
1678 if (vect_print_dump_info (REPORT_DETAILS))
1679 fprintf (vect_dump, "transform call.");
1681 /* Handle def. */
1682 scalar_dest = gimple_call_lhs (stmt);
1683 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1685 prev_stmt_info = NULL;
1686 switch (modifier)
1688 case NONE:
1689 for (j = 0; j < ncopies; ++j)
1691 /* Build argument list for the vectorized call. */
1692 if (j == 0)
1693 vargs = VEC_alloc (tree, heap, nargs);
1694 else
1695 VEC_truncate (tree, vargs, 0);
1697 if (slp_node)
1699 VEC (slp_void_p, heap) *vec_defs
1700 = VEC_alloc (slp_void_p, heap, nargs);
1701 VEC (tree, heap) *vec_oprnds0;
1703 for (i = 0; i < nargs; i++)
1704 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1705 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1706 vec_oprnds0
1707 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1709 /* Arguments are ready. Create the new vector stmt. */
1710 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1712 size_t k;
1713 for (k = 0; k < nargs; k++)
1715 VEC (tree, heap) *vec_oprndsk
1716 = (VEC (tree, heap) *)
1717 VEC_index (slp_void_p, vec_defs, k);
1718 VEC_replace (tree, vargs, k,
1719 VEC_index (tree, vec_oprndsk, i));
1721 new_stmt = gimple_build_call_vec (fndecl, vargs);
1722 new_temp = make_ssa_name (vec_dest, new_stmt);
1723 gimple_call_set_lhs (new_stmt, new_temp);
1724 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1725 mark_symbols_for_renaming (new_stmt);
1726 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1727 new_stmt);
1730 for (i = 0; i < nargs; i++)
1732 VEC (tree, heap) *vec_oprndsi
1733 = (VEC (tree, heap) *)
1734 VEC_index (slp_void_p, vec_defs, i);
1735 VEC_free (tree, heap, vec_oprndsi);
1737 VEC_free (slp_void_p, heap, vec_defs);
1738 continue;
1741 for (i = 0; i < nargs; i++)
1743 op = gimple_call_arg (stmt, i);
1744 if (j == 0)
1745 vec_oprnd0
1746 = vect_get_vec_def_for_operand (op, stmt, NULL);
1747 else
1749 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1750 vec_oprnd0
1751 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1754 VEC_quick_push (tree, vargs, vec_oprnd0);
1757 new_stmt = gimple_build_call_vec (fndecl, vargs);
1758 new_temp = make_ssa_name (vec_dest, new_stmt);
1759 gimple_call_set_lhs (new_stmt, new_temp);
1761 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1762 mark_symbols_for_renaming (new_stmt);
1764 if (j == 0)
1765 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1766 else
1767 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1769 prev_stmt_info = vinfo_for_stmt (new_stmt);
1772 break;
1774 case NARROW:
1775 for (j = 0; j < ncopies; ++j)
1777 /* Build argument list for the vectorized call. */
1778 if (j == 0)
1779 vargs = VEC_alloc (tree, heap, nargs * 2);
1780 else
1781 VEC_truncate (tree, vargs, 0);
1783 if (slp_node)
1785 VEC (slp_void_p, heap) *vec_defs
1786 = VEC_alloc (slp_void_p, heap, nargs);
1787 VEC (tree, heap) *vec_oprnds0;
1789 for (i = 0; i < nargs; i++)
1790 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1791 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1792 vec_oprnds0
1793 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1795 /* Arguments are ready. Create the new vector stmt. */
1796 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1797 i += 2)
1799 size_t k;
1800 VEC_truncate (tree, vargs, 0);
1801 for (k = 0; k < nargs; k++)
1803 VEC (tree, heap) *vec_oprndsk
1804 = (VEC (tree, heap) *)
1805 VEC_index (slp_void_p, vec_defs, k);
1806 VEC_quick_push (tree, vargs,
1807 VEC_index (tree, vec_oprndsk, i));
1808 VEC_quick_push (tree, vargs,
1809 VEC_index (tree, vec_oprndsk, i + 1));
1811 new_stmt = gimple_build_call_vec (fndecl, vargs);
1812 new_temp = make_ssa_name (vec_dest, new_stmt);
1813 gimple_call_set_lhs (new_stmt, new_temp);
1814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1815 mark_symbols_for_renaming (new_stmt);
1816 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1817 new_stmt);
1820 for (i = 0; i < nargs; i++)
1822 VEC (tree, heap) *vec_oprndsi
1823 = (VEC (tree, heap) *)
1824 VEC_index (slp_void_p, vec_defs, i);
1825 VEC_free (tree, heap, vec_oprndsi);
1827 VEC_free (slp_void_p, heap, vec_defs);
1828 continue;
1831 for (i = 0; i < nargs; i++)
1833 op = gimple_call_arg (stmt, i);
1834 if (j == 0)
1836 vec_oprnd0
1837 = vect_get_vec_def_for_operand (op, stmt, NULL);
1838 vec_oprnd1
1839 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1841 else
1843 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1844 vec_oprnd0
1845 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1846 vec_oprnd1
1847 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1850 VEC_quick_push (tree, vargs, vec_oprnd0);
1851 VEC_quick_push (tree, vargs, vec_oprnd1);
1854 new_stmt = gimple_build_call_vec (fndecl, vargs);
1855 new_temp = make_ssa_name (vec_dest, new_stmt);
1856 gimple_call_set_lhs (new_stmt, new_temp);
1858 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1859 mark_symbols_for_renaming (new_stmt);
1861 if (j == 0)
1862 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1863 else
1864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1866 prev_stmt_info = vinfo_for_stmt (new_stmt);
1869 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1871 break;
1873 case WIDEN:
1874 /* No current target implements this case. */
1875 return false;
1878 VEC_free (tree, heap, vargs);
1880 /* Update the exception handling table with the vector stmt if necessary. */
1881 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1882 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1884 /* The call in STMT might prevent it from being removed in dce.
1885 We however cannot remove it here, due to the way the ssa name
1886 it defines is mapped to the new definition. So just replace
1887 rhs of the statement with something harmless. */
1889 if (slp_node)
1890 return true;
1892 type = TREE_TYPE (scalar_dest);
1893 if (is_pattern_stmt_p (stmt_info))
1894 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1895 else
1896 lhs = gimple_call_lhs (stmt);
1897 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1898 set_vinfo_for_stmt (new_stmt, stmt_info);
1899 set_vinfo_for_stmt (stmt, NULL);
1900 STMT_VINFO_STMT (stmt_info) = new_stmt;
1901 gsi_replace (gsi, new_stmt, false);
1902 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1904 return true;
1908 /* Function vect_gen_widened_results_half
1910 Create a vector stmt whose code, type, number of arguments, and result
1911 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1912 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1913 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1914 needs to be created (DECL is a function-decl of a target-builtin).
1915 STMT is the original scalar stmt that we are vectorizing. */
1917 static gimple
1918 vect_gen_widened_results_half (enum tree_code code,
1919 tree decl,
1920 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1921 tree vec_dest, gimple_stmt_iterator *gsi,
1922 gimple stmt)
1924 gimple new_stmt;
1925 tree new_temp;
1927 /* Generate half of the widened result: */
1928 if (code == CALL_EXPR)
1930 /* Target specific support */
1931 if (op_type == binary_op)
1932 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1933 else
1934 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1935 new_temp = make_ssa_name (vec_dest, new_stmt);
1936 gimple_call_set_lhs (new_stmt, new_temp);
1938 else
1940 /* Generic support */
1941 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1942 if (op_type != binary_op)
1943 vec_oprnd1 = NULL;
1944 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1945 vec_oprnd1);
1946 new_temp = make_ssa_name (vec_dest, new_stmt);
1947 gimple_assign_set_lhs (new_stmt, new_temp);
1949 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1951 return new_stmt;
1955 /* Get vectorized definitions for loop-based vectorization. For the first
1956 operand we call vect_get_vec_def_for_operand() (with OPRND containing
1957 scalar operand), and for the rest we get a copy with
1958 vect_get_vec_def_for_stmt_copy() using the previous vector definition
1959 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
1960 The vectors are collected into VEC_OPRNDS. */
1962 static void
1963 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
1964 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
1966 tree vec_oprnd;
1968 /* Get first vector operand. */
1969 /* All the vector operands except the very first one (that is scalar oprnd)
1970 are stmt copies. */
1971 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
1972 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
1973 else
1974 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
1976 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1978 /* Get second vector operand. */
1979 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
1980 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1982 *oprnd = vec_oprnd;
1984 /* For conversion in multiple steps, continue to get operands
1985 recursively. */
1986 if (multi_step_cvt)
1987 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
1991 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
1992 For multi-step conversions store the resulting vectors and call the function
1993 recursively. */
1995 static void
1996 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
1997 int multi_step_cvt, gimple stmt,
1998 VEC (tree, heap) *vec_dsts,
1999 gimple_stmt_iterator *gsi,
2000 slp_tree slp_node, enum tree_code code,
2001 stmt_vec_info *prev_stmt_info)
2003 unsigned int i;
2004 tree vop0, vop1, new_tmp, vec_dest;
2005 gimple new_stmt;
2006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2008 vec_dest = VEC_pop (tree, vec_dsts);
2010 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2012 /* Create demotion operation. */
2013 vop0 = VEC_index (tree, *vec_oprnds, i);
2014 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2015 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2016 new_tmp = make_ssa_name (vec_dest, new_stmt);
2017 gimple_assign_set_lhs (new_stmt, new_tmp);
2018 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2020 if (multi_step_cvt)
2021 /* Store the resulting vector for next recursive call. */
2022 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2023 else
2025 /* This is the last step of the conversion sequence. Store the
2026 vectors in SLP_NODE or in vector info of the scalar statement
2027 (or in STMT_VINFO_RELATED_STMT chain). */
2028 if (slp_node)
2029 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2030 else
2032 if (!*prev_stmt_info)
2033 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2034 else
2035 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2037 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2042 /* For multi-step demotion operations we first generate demotion operations
2043 from the source type to the intermediate types, and then combine the
2044 results (stored in VEC_OPRNDS) in demotion operation to the destination
2045 type. */
2046 if (multi_step_cvt)
2048 /* At each level of recursion we have half of the operands we had at the
2049 previous level. */
2050 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2051 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2052 stmt, vec_dsts, gsi, slp_node,
2053 VEC_PACK_TRUNC_EXPR,
2054 prev_stmt_info);
2057 VEC_quick_push (tree, vec_dsts, vec_dest);
2061 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2062 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2063 the resulting vectors and call the function recursively. */
2065 static void
2066 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2067 VEC (tree, heap) **vec_oprnds1,
2068 gimple stmt, tree vec_dest,
2069 gimple_stmt_iterator *gsi,
2070 enum tree_code code1,
2071 enum tree_code code2, tree decl1,
2072 tree decl2, int op_type)
2074 int i;
2075 tree vop0, vop1, new_tmp1, new_tmp2;
2076 gimple new_stmt1, new_stmt2;
2077 VEC (tree, heap) *vec_tmp = NULL;
2079 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2080 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2082 if (op_type == binary_op)
2083 vop1 = VEC_index (tree, *vec_oprnds1, i);
2084 else
2085 vop1 = NULL_TREE;
2087 /* Generate the two halves of promotion operation. */
2088 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2089 op_type, vec_dest, gsi, stmt);
2090 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2091 op_type, vec_dest, gsi, stmt);
2092 if (is_gimple_call (new_stmt1))
2094 new_tmp1 = gimple_call_lhs (new_stmt1);
2095 new_tmp2 = gimple_call_lhs (new_stmt2);
2097 else
2099 new_tmp1 = gimple_assign_lhs (new_stmt1);
2100 new_tmp2 = gimple_assign_lhs (new_stmt2);
2103 /* Store the results for the next step. */
2104 VEC_quick_push (tree, vec_tmp, new_tmp1);
2105 VEC_quick_push (tree, vec_tmp, new_tmp2);
2108 VEC_free (tree, heap, *vec_oprnds0);
2109 *vec_oprnds0 = vec_tmp;
2113 /* Check if STMT performs a conversion operation, that can be vectorized.
2114 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2115 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2116 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2118 static bool
2119 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2120 gimple *vec_stmt, slp_tree slp_node)
2122 tree vec_dest;
2123 tree scalar_dest;
2124 tree op0, op1 = NULL_TREE;
2125 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2128 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2129 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2130 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2131 tree new_temp;
2132 tree def;
2133 gimple def_stmt;
2134 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2135 gimple new_stmt = NULL;
2136 stmt_vec_info prev_stmt_info;
2137 int nunits_in;
2138 int nunits_out;
2139 tree vectype_out, vectype_in;
2140 int ncopies, i, j;
2141 tree lhs_type, rhs_type;
2142 enum { NARROW, NONE, WIDEN } modifier;
2143 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2144 tree vop0;
2145 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2146 int multi_step_cvt = 0;
2147 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2148 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2149 int op_type;
2150 enum machine_mode rhs_mode;
2151 unsigned short fltsz;
2153 /* Is STMT a vectorizable conversion? */
2155 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2156 return false;
2158 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2159 return false;
2161 if (!is_gimple_assign (stmt))
2162 return false;
2164 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2165 return false;
2167 code = gimple_assign_rhs_code (stmt);
2168 if (!CONVERT_EXPR_CODE_P (code)
2169 && code != FIX_TRUNC_EXPR
2170 && code != FLOAT_EXPR
2171 && code != WIDEN_MULT_EXPR
2172 && code != WIDEN_LSHIFT_EXPR)
2173 return false;
2175 op_type = TREE_CODE_LENGTH (code);
2177 /* Check types of lhs and rhs. */
2178 scalar_dest = gimple_assign_lhs (stmt);
2179 lhs_type = TREE_TYPE (scalar_dest);
2180 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2182 op0 = gimple_assign_rhs1 (stmt);
2183 rhs_type = TREE_TYPE (op0);
2185 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2186 && !((INTEGRAL_TYPE_P (lhs_type)
2187 && INTEGRAL_TYPE_P (rhs_type))
2188 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2189 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2190 return false;
2192 if ((INTEGRAL_TYPE_P (lhs_type)
2193 && (TYPE_PRECISION (lhs_type)
2194 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2195 || (INTEGRAL_TYPE_P (rhs_type)
2196 && (TYPE_PRECISION (rhs_type)
2197 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump,
2201 "type conversion to/from bit-precision unsupported.");
2202 return false;
2205 /* Check the operands of the operation. */
2206 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2207 &def_stmt, &def, &dt[0], &vectype_in))
2209 if (vect_print_dump_info (REPORT_DETAILS))
2210 fprintf (vect_dump, "use not simple.");
2211 return false;
2213 if (op_type == binary_op)
2215 bool ok;
2217 op1 = gimple_assign_rhs2 (stmt);
2218 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2219 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2220 OP1. */
2221 if (CONSTANT_CLASS_P (op0))
2222 ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL,
2223 &def_stmt, &def, &dt[1], &vectype_in);
2224 else
2225 ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def,
2226 &dt[1]);
2228 if (!ok)
2230 if (vect_print_dump_info (REPORT_DETAILS))
2231 fprintf (vect_dump, "use not simple.");
2232 return false;
2236 /* If op0 is an external or constant defs use a vector type of
2237 the same size as the output vector type. */
2238 if (!vectype_in)
2239 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2240 if (vec_stmt)
2241 gcc_assert (vectype_in);
2242 if (!vectype_in)
2244 if (vect_print_dump_info (REPORT_DETAILS))
2246 fprintf (vect_dump, "no vectype for scalar type ");
2247 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2250 return false;
2253 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2254 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2255 if (nunits_in < nunits_out)
2256 modifier = NARROW;
2257 else if (nunits_out == nunits_in)
2258 modifier = NONE;
2259 else
2260 modifier = WIDEN;
2262 /* Multiple types in SLP are handled by creating the appropriate number of
2263 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2264 case of SLP. */
2265 if (slp_node || PURE_SLP_STMT (stmt_info))
2266 ncopies = 1;
2267 else if (modifier == NARROW)
2268 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2269 else
2270 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2272 /* Sanity check: make sure that at least one copy of the vectorized stmt
2273 needs to be generated. */
2274 gcc_assert (ncopies >= 1);
2276 /* Supportable by target? */
2277 switch (modifier)
2279 case NONE:
2280 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2281 return false;
2282 if (supportable_convert_operation (code, vectype_out, vectype_in,
2283 &decl1, &code1))
2284 break;
2285 /* FALLTHRU */
2286 unsupported:
2287 if (vect_print_dump_info (REPORT_DETAILS))
2288 fprintf (vect_dump, "conversion not supported by target.");
2289 return false;
2291 case WIDEN:
2292 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2293 &decl1, &decl2, &code1, &code2,
2294 &multi_step_cvt, &interm_types))
2296 /* Binary widening operation can only be supported directly by the
2297 architecture. */
2298 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2299 break;
2302 if (code != FLOAT_EXPR
2303 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2304 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2305 goto unsupported;
2307 rhs_mode = TYPE_MODE (rhs_type);
2308 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2309 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2310 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2311 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2313 cvt_type
2314 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2315 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2316 if (cvt_type == NULL_TREE)
2317 goto unsupported;
2319 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2321 if (!supportable_convert_operation (code, vectype_out,
2322 cvt_type, &decl1, &codecvt1))
2323 goto unsupported;
2325 else if (!supportable_widening_operation (code, stmt, vectype_out,
2326 cvt_type, &decl1, &decl2,
2327 &codecvt1, &codecvt2,
2328 &multi_step_cvt,
2329 &interm_types))
2330 continue;
2331 else
2332 gcc_assert (multi_step_cvt == 0);
2334 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2335 vectype_in, NULL, NULL, &code1,
2336 &code2, &multi_step_cvt,
2337 &interm_types))
2338 break;
2341 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2342 goto unsupported;
2344 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2345 codecvt2 = ERROR_MARK;
2346 else
2348 multi_step_cvt++;
2349 VEC_safe_push (tree, heap, interm_types, cvt_type);
2350 cvt_type = NULL_TREE;
2352 break;
2354 case NARROW:
2355 gcc_assert (op_type == unary_op);
2356 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2357 &code1, &multi_step_cvt,
2358 &interm_types))
2359 break;
2361 if (code != FIX_TRUNC_EXPR
2362 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2363 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2364 goto unsupported;
2366 rhs_mode = TYPE_MODE (rhs_type);
2367 cvt_type
2368 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2369 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2370 if (cvt_type == NULL_TREE)
2371 goto unsupported;
2372 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2373 &decl1, &codecvt1))
2374 goto unsupported;
2375 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2376 &code1, &multi_step_cvt,
2377 &interm_types))
2378 break;
2379 goto unsupported;
2381 default:
2382 gcc_unreachable ();
2385 if (!vec_stmt) /* transformation not required. */
2387 if (vect_print_dump_info (REPORT_DETAILS))
2388 fprintf (vect_dump, "=== vectorizable_conversion ===");
2389 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2390 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2391 else if (modifier == NARROW)
2393 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2394 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2396 else
2398 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2399 vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
2401 VEC_free (tree, heap, interm_types);
2402 return true;
2405 /** Transform. **/
2406 if (vect_print_dump_info (REPORT_DETAILS))
2407 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2409 if (op_type == binary_op)
2411 if (CONSTANT_CLASS_P (op0))
2412 op0 = fold_convert (TREE_TYPE (op1), op0);
2413 else if (CONSTANT_CLASS_P (op1))
2414 op1 = fold_convert (TREE_TYPE (op0), op1);
2417 /* In case of multi-step conversion, we first generate conversion operations
2418 to the intermediate types, and then from that types to the final one.
2419 We create vector destinations for the intermediate type (TYPES) received
2420 from supportable_*_operation, and store them in the correct order
2421 for future use in vect_create_vectorized_*_stmts (). */
2422 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2423 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2424 VEC_quick_push (tree, vec_dsts, vec_dest);
2426 if (multi_step_cvt)
2428 for (i = VEC_length (tree, interm_types) - 1;
2429 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2431 vec_dest = vect_create_destination_var (scalar_dest,
2432 intermediate_type);
2433 VEC_quick_push (tree, vec_dsts, vec_dest);
2437 if (cvt_type)
2438 vec_dest = vect_create_destination_var (scalar_dest, cvt_type);
2440 if (!slp_node)
2442 if (modifier == NONE)
2443 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2444 else if (modifier == WIDEN)
2446 vec_oprnds0 = VEC_alloc (tree, heap,
2447 (multi_step_cvt
2448 ? vect_pow2 (multi_step_cvt) : 1));
2449 if (op_type == binary_op)
2450 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2452 else
2453 vec_oprnds0 = VEC_alloc (tree, heap,
2454 2 * (multi_step_cvt
2455 ? vect_pow2 (multi_step_cvt) : 1));
2457 else if (code == WIDEN_LSHIFT_EXPR)
2458 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2460 last_oprnd = op0;
2461 prev_stmt_info = NULL;
2462 switch (modifier)
2464 case NONE:
2465 for (j = 0; j < ncopies; j++)
2467 if (j == 0)
2468 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2469 -1);
2470 else
2471 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2473 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2475 /* Arguments are ready, create the new vector stmt. */
2476 if (code1 == CALL_EXPR)
2478 new_stmt = gimple_build_call (decl1, 1, vop0);
2479 new_temp = make_ssa_name (vec_dest, new_stmt);
2480 gimple_call_set_lhs (new_stmt, new_temp);
2482 else
2484 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2485 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2486 vop0, NULL);
2487 new_temp = make_ssa_name (vec_dest, new_stmt);
2488 gimple_assign_set_lhs (new_stmt, new_temp);
2491 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2492 if (slp_node)
2493 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2494 new_stmt);
2497 if (j == 0)
2498 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2499 else
2500 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2501 prev_stmt_info = vinfo_for_stmt (new_stmt);
2503 break;
2505 case WIDEN:
2506 /* In case the vectorization factor (VF) is bigger than the number
2507 of elements that we can fit in a vectype (nunits), we have to
2508 generate more than one vector stmt - i.e - we need to "unroll"
2509 the vector stmt by a factor VF/nunits. */
2510 for (j = 0; j < ncopies; j++)
2512 /* Handle uses. */
2513 if (j == 0)
2515 if (slp_node)
2517 if (code == WIDEN_LSHIFT_EXPR)
2519 unsigned int k;
2521 vec_oprnd1 = op1;
2522 /* Store vec_oprnd1 for every vector stmt to be created
2523 for SLP_NODE. We check during the analysis that all
2524 the shift arguments are the same. */
2525 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2526 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2528 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2529 slp_node, -1);
2531 else
2532 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2533 &vec_oprnds1, slp_node, -1);
2535 else
2537 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2538 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2539 if (op_type == binary_op)
2541 if (code == WIDEN_LSHIFT_EXPR)
2542 vec_oprnd1 = op1;
2543 else
2544 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2545 NULL);
2546 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2550 else
2552 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2553 VEC_truncate (tree, vec_oprnds0, 0);
2554 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2555 if (op_type == binary_op)
2557 if (code == WIDEN_LSHIFT_EXPR)
2558 vec_oprnd1 = op1;
2559 else
2560 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2561 vec_oprnd1);
2562 VEC_truncate (tree, vec_oprnds1, 0);
2563 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2567 /* Arguments are ready. Create the new vector stmts. */
2568 for (i = multi_step_cvt; i >= 0; i--)
2570 tree this_dest = VEC_index (tree, vec_dsts, i);
2571 enum tree_code c1 = code1, c2 = code2;
2572 if (i == 0 && codecvt2 != ERROR_MARK)
2574 c1 = codecvt1;
2575 c2 = codecvt2;
2577 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2578 &vec_oprnds1,
2579 stmt, this_dest, gsi,
2580 c1, c2, decl1, decl2,
2581 op_type);
2584 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2586 if (cvt_type)
2588 if (codecvt1 == CALL_EXPR)
2590 new_stmt = gimple_build_call (decl1, 1, vop0);
2591 new_temp = make_ssa_name (vec_dest, new_stmt);
2592 gimple_call_set_lhs (new_stmt, new_temp);
2594 else
2596 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2597 new_temp = make_ssa_name (vec_dest, NULL);
2598 new_stmt = gimple_build_assign_with_ops (codecvt1,
2599 new_temp,
2600 vop0, NULL);
2603 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2605 else
2606 new_stmt = SSA_NAME_DEF_STMT (vop0);
2608 if (slp_node)
2609 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2610 new_stmt);
2611 else
2613 if (!prev_stmt_info)
2614 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2615 else
2616 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2617 prev_stmt_info = vinfo_for_stmt (new_stmt);
2622 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2623 break;
2625 case NARROW:
2626 /* In case the vectorization factor (VF) is bigger than the number
2627 of elements that we can fit in a vectype (nunits), we have to
2628 generate more than one vector stmt - i.e - we need to "unroll"
2629 the vector stmt by a factor VF/nunits. */
2630 for (j = 0; j < ncopies; j++)
2632 /* Handle uses. */
2633 if (slp_node)
2634 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2635 slp_node, -1);
2636 else
2638 VEC_truncate (tree, vec_oprnds0, 0);
2639 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2640 vect_pow2 (multi_step_cvt) - 1);
2643 /* Arguments are ready. Create the new vector stmts. */
2644 if (cvt_type)
2645 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2647 if (codecvt1 == CALL_EXPR)
2649 new_stmt = gimple_build_call (decl1, 1, vop0);
2650 new_temp = make_ssa_name (vec_dest, new_stmt);
2651 gimple_call_set_lhs (new_stmt, new_temp);
2653 else
2655 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2656 new_temp = make_ssa_name (vec_dest, NULL);
2657 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2658 vop0, NULL);
2661 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2662 VEC_replace (tree, vec_oprnds0, i, new_temp);
2665 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2666 stmt, vec_dsts, gsi,
2667 slp_node, code1,
2668 &prev_stmt_info);
2671 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2672 break;
2675 VEC_free (tree, heap, vec_oprnds0);
2676 VEC_free (tree, heap, vec_oprnds1);
2677 VEC_free (tree, heap, vec_dsts);
2678 VEC_free (tree, heap, interm_types);
2680 return true;
2684 /* Function vectorizable_assignment.
2686 Check if STMT performs an assignment (copy) that can be vectorized.
2687 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2688 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2689 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2691 static bool
2692 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2693 gimple *vec_stmt, slp_tree slp_node)
2695 tree vec_dest;
2696 tree scalar_dest;
2697 tree op;
2698 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2699 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2701 tree new_temp;
2702 tree def;
2703 gimple def_stmt;
2704 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2705 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2706 int ncopies;
2707 int i, j;
2708 VEC(tree,heap) *vec_oprnds = NULL;
2709 tree vop;
2710 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2711 gimple new_stmt = NULL;
2712 stmt_vec_info prev_stmt_info = NULL;
2713 enum tree_code code;
2714 tree vectype_in;
2716 /* Multiple types in SLP are handled by creating the appropriate number of
2717 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2718 case of SLP. */
2719 if (slp_node || PURE_SLP_STMT (stmt_info))
2720 ncopies = 1;
2721 else
2722 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2724 gcc_assert (ncopies >= 1);
2726 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2727 return false;
2729 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2730 return false;
2732 /* Is vectorizable assignment? */
2733 if (!is_gimple_assign (stmt))
2734 return false;
2736 scalar_dest = gimple_assign_lhs (stmt);
2737 if (TREE_CODE (scalar_dest) != SSA_NAME)
2738 return false;
2740 code = gimple_assign_rhs_code (stmt);
2741 if (gimple_assign_single_p (stmt)
2742 || code == PAREN_EXPR
2743 || CONVERT_EXPR_CODE_P (code))
2744 op = gimple_assign_rhs1 (stmt);
2745 else
2746 return false;
2748 if (code == VIEW_CONVERT_EXPR)
2749 op = TREE_OPERAND (op, 0);
2751 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
2752 &def_stmt, &def, &dt[0], &vectype_in))
2754 if (vect_print_dump_info (REPORT_DETAILS))
2755 fprintf (vect_dump, "use not simple.");
2756 return false;
2759 /* We can handle NOP_EXPR conversions that do not change the number
2760 of elements or the vector size. */
2761 if ((CONVERT_EXPR_CODE_P (code)
2762 || code == VIEW_CONVERT_EXPR)
2763 && (!vectype_in
2764 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2765 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2766 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2767 return false;
2769 /* We do not handle bit-precision changes. */
2770 if ((CONVERT_EXPR_CODE_P (code)
2771 || code == VIEW_CONVERT_EXPR)
2772 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2773 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2774 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2775 || ((TYPE_PRECISION (TREE_TYPE (op))
2776 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2777 /* But a conversion that does not change the bit-pattern is ok. */
2778 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2779 > TYPE_PRECISION (TREE_TYPE (op)))
2780 && TYPE_UNSIGNED (TREE_TYPE (op))))
2782 if (vect_print_dump_info (REPORT_DETAILS))
2783 fprintf (vect_dump, "type conversion to/from bit-precision "
2784 "unsupported.");
2785 return false;
2788 if (!vec_stmt) /* transformation not required. */
2790 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2791 if (vect_print_dump_info (REPORT_DETAILS))
2792 fprintf (vect_dump, "=== vectorizable_assignment ===");
2793 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2794 return true;
2797 /** Transform. **/
2798 if (vect_print_dump_info (REPORT_DETAILS))
2799 fprintf (vect_dump, "transform assignment.");
2801 /* Handle def. */
2802 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2804 /* Handle use. */
2805 for (j = 0; j < ncopies; j++)
2807 /* Handle uses. */
2808 if (j == 0)
2809 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2810 else
2811 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2813 /* Arguments are ready. create the new vector stmt. */
2814 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2816 if (CONVERT_EXPR_CODE_P (code)
2817 || code == VIEW_CONVERT_EXPR)
2818 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2819 new_stmt = gimple_build_assign (vec_dest, vop);
2820 new_temp = make_ssa_name (vec_dest, new_stmt);
2821 gimple_assign_set_lhs (new_stmt, new_temp);
2822 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2823 if (slp_node)
2824 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2827 if (slp_node)
2828 continue;
2830 if (j == 0)
2831 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2832 else
2833 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2835 prev_stmt_info = vinfo_for_stmt (new_stmt);
2838 VEC_free (tree, heap, vec_oprnds);
2839 return true;
2843 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2844 either as shift by a scalar or by a vector. */
2846 bool
2847 vect_supportable_shift (enum tree_code code, tree scalar_type)
2850 enum machine_mode vec_mode;
2851 optab optab;
2852 int icode;
2853 tree vectype;
2855 vectype = get_vectype_for_scalar_type (scalar_type);
2856 if (!vectype)
2857 return false;
2859 optab = optab_for_tree_code (code, vectype, optab_scalar);
2860 if (!optab
2861 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2863 optab = optab_for_tree_code (code, vectype, optab_vector);
2864 if (!optab
2865 || (optab_handler (optab, TYPE_MODE (vectype))
2866 == CODE_FOR_nothing))
2867 return false;
2870 vec_mode = TYPE_MODE (vectype);
2871 icode = (int) optab_handler (optab, vec_mode);
2872 if (icode == CODE_FOR_nothing)
2873 return false;
2875 return true;
2879 /* Function vectorizable_shift.
2881 Check if STMT performs a shift operation that can be vectorized.
2882 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2883 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2884 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2886 static bool
2887 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2888 gimple *vec_stmt, slp_tree slp_node)
2890 tree vec_dest;
2891 tree scalar_dest;
2892 tree op0, op1 = NULL;
2893 tree vec_oprnd1 = NULL_TREE;
2894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2895 tree vectype;
2896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2897 enum tree_code code;
2898 enum machine_mode vec_mode;
2899 tree new_temp;
2900 optab optab;
2901 int icode;
2902 enum machine_mode optab_op2_mode;
2903 tree def;
2904 gimple def_stmt;
2905 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2906 gimple new_stmt = NULL;
2907 stmt_vec_info prev_stmt_info;
2908 int nunits_in;
2909 int nunits_out;
2910 tree vectype_out;
2911 tree op1_vectype;
2912 int ncopies;
2913 int j, i;
2914 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2915 tree vop0, vop1;
2916 unsigned int k;
2917 bool scalar_shift_arg = true;
2918 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2919 int vf;
2921 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2922 return false;
2924 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2925 return false;
2927 /* Is STMT a vectorizable binary/unary operation? */
2928 if (!is_gimple_assign (stmt))
2929 return false;
2931 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2932 return false;
2934 code = gimple_assign_rhs_code (stmt);
2936 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2937 || code == RROTATE_EXPR))
2938 return false;
2940 scalar_dest = gimple_assign_lhs (stmt);
2941 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2942 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2943 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2945 if (vect_print_dump_info (REPORT_DETAILS))
2946 fprintf (vect_dump, "bit-precision shifts not supported.");
2947 return false;
2950 op0 = gimple_assign_rhs1 (stmt);
2951 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2952 &def_stmt, &def, &dt[0], &vectype))
2954 if (vect_print_dump_info (REPORT_DETAILS))
2955 fprintf (vect_dump, "use not simple.");
2956 return false;
2958 /* If op0 is an external or constant def use a vector type with
2959 the same size as the output vector type. */
2960 if (!vectype)
2961 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2962 if (vec_stmt)
2963 gcc_assert (vectype);
2964 if (!vectype)
2966 if (vect_print_dump_info (REPORT_DETAILS))
2968 fprintf (vect_dump, "no vectype for scalar type ");
2969 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2972 return false;
2975 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2976 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2977 if (nunits_out != nunits_in)
2978 return false;
2980 op1 = gimple_assign_rhs2 (stmt);
2981 if (!vect_is_simple_use_1 (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2982 &dt[1], &op1_vectype))
2984 if (vect_print_dump_info (REPORT_DETAILS))
2985 fprintf (vect_dump, "use not simple.");
2986 return false;
2989 if (loop_vinfo)
2990 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2991 else
2992 vf = 1;
2994 /* Multiple types in SLP are handled by creating the appropriate number of
2995 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2996 case of SLP. */
2997 if (slp_node || PURE_SLP_STMT (stmt_info))
2998 ncopies = 1;
2999 else
3000 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3002 gcc_assert (ncopies >= 1);
3004 /* Determine whether the shift amount is a vector, or scalar. If the
3005 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3007 if (dt[1] == vect_internal_def && !slp_node)
3008 scalar_shift_arg = false;
3009 else if (dt[1] == vect_constant_def
3010 || dt[1] == vect_external_def
3011 || dt[1] == vect_internal_def)
3013 /* In SLP, need to check whether the shift count is the same,
3014 in loops if it is a constant or invariant, it is always
3015 a scalar shift. */
3016 if (slp_node)
3018 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3019 gimple slpstmt;
3021 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3022 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3023 scalar_shift_arg = false;
3026 else
3028 if (vect_print_dump_info (REPORT_DETAILS))
3029 fprintf (vect_dump, "operand mode requires invariant argument.");
3030 return false;
3033 /* Vector shifted by vector. */
3034 if (!scalar_shift_arg)
3036 optab = optab_for_tree_code (code, vectype, optab_vector);
3037 if (vect_print_dump_info (REPORT_DETAILS))
3038 fprintf (vect_dump, "vector/vector shift/rotate found.");
3039 if (!op1_vectype)
3040 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3041 if (op1_vectype == NULL_TREE
3042 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3044 if (vect_print_dump_info (REPORT_DETAILS))
3045 fprintf (vect_dump, "unusable type for last operand in"
3046 " vector/vector shift/rotate.");
3047 return false;
3050 /* See if the machine has a vector shifted by scalar insn and if not
3051 then see if it has a vector shifted by vector insn. */
3052 else
3054 optab = optab_for_tree_code (code, vectype, optab_scalar);
3055 if (optab
3056 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3058 if (vect_print_dump_info (REPORT_DETAILS))
3059 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3061 else
3063 optab = optab_for_tree_code (code, vectype, optab_vector);
3064 if (optab
3065 && (optab_handler (optab, TYPE_MODE (vectype))
3066 != CODE_FOR_nothing))
3068 scalar_shift_arg = false;
3070 if (vect_print_dump_info (REPORT_DETAILS))
3071 fprintf (vect_dump, "vector/vector shift/rotate found.");
3073 /* Unlike the other binary operators, shifts/rotates have
3074 the rhs being int, instead of the same type as the lhs,
3075 so make sure the scalar is the right type if we are
3076 dealing with vectors of long long/long/short/char. */
3077 if (dt[1] == vect_constant_def)
3078 op1 = fold_convert (TREE_TYPE (vectype), op1);
3079 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3080 TREE_TYPE (op1)))
3082 if (slp_node
3083 && TYPE_MODE (TREE_TYPE (vectype))
3084 != TYPE_MODE (TREE_TYPE (op1)))
3086 if (vect_print_dump_info (REPORT_DETAILS))
3087 fprintf (vect_dump, "unusable type for last operand in"
3088 " vector/vector shift/rotate.");
3089 return false;
3091 if (vec_stmt && !slp_node)
3093 op1 = fold_convert (TREE_TYPE (vectype), op1);
3094 op1 = vect_init_vector (stmt, op1,
3095 TREE_TYPE (vectype), NULL);
3102 /* Supportable by target? */
3103 if (!optab)
3105 if (vect_print_dump_info (REPORT_DETAILS))
3106 fprintf (vect_dump, "no optab.");
3107 return false;
3109 vec_mode = TYPE_MODE (vectype);
3110 icode = (int) optab_handler (optab, vec_mode);
3111 if (icode == CODE_FOR_nothing)
3113 if (vect_print_dump_info (REPORT_DETAILS))
3114 fprintf (vect_dump, "op not supported by target.");
3115 /* Check only during analysis. */
3116 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3117 || (vf < vect_min_worthwhile_factor (code)
3118 && !vec_stmt))
3119 return false;
3120 if (vect_print_dump_info (REPORT_DETAILS))
3121 fprintf (vect_dump, "proceeding using word mode.");
3124 /* Worthwhile without SIMD support? Check only during analysis. */
3125 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3126 && vf < vect_min_worthwhile_factor (code)
3127 && !vec_stmt)
3129 if (vect_print_dump_info (REPORT_DETAILS))
3130 fprintf (vect_dump, "not worthwhile without SIMD support.");
3131 return false;
3134 if (!vec_stmt) /* transformation not required. */
3136 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3137 if (vect_print_dump_info (REPORT_DETAILS))
3138 fprintf (vect_dump, "=== vectorizable_shift ===");
3139 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3140 return true;
3143 /** Transform. **/
3145 if (vect_print_dump_info (REPORT_DETAILS))
3146 fprintf (vect_dump, "transform binary/unary operation.");
3148 /* Handle def. */
3149 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3151 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3152 created in the previous stages of the recursion, so no allocation is
3153 needed, except for the case of shift with scalar shift argument. In that
3154 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3155 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3156 In case of loop-based vectorization we allocate VECs of size 1. We
3157 allocate VEC_OPRNDS1 only in case of binary operation. */
3158 if (!slp_node)
3160 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3161 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3163 else if (scalar_shift_arg)
3164 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3166 prev_stmt_info = NULL;
3167 for (j = 0; j < ncopies; j++)
3169 /* Handle uses. */
3170 if (j == 0)
3172 if (scalar_shift_arg)
3174 /* Vector shl and shr insn patterns can be defined with scalar
3175 operand 2 (shift operand). In this case, use constant or loop
3176 invariant op1 directly, without extending it to vector mode
3177 first. */
3178 optab_op2_mode = insn_data[icode].operand[2].mode;
3179 if (!VECTOR_MODE_P (optab_op2_mode))
3181 if (vect_print_dump_info (REPORT_DETAILS))
3182 fprintf (vect_dump, "operand 1 using scalar mode.");
3183 vec_oprnd1 = op1;
3184 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3185 if (slp_node)
3187 /* Store vec_oprnd1 for every vector stmt to be created
3188 for SLP_NODE. We check during the analysis that all
3189 the shift arguments are the same.
3190 TODO: Allow different constants for different vector
3191 stmts generated for an SLP instance. */
3192 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3193 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3198 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3199 (a special case for certain kind of vector shifts); otherwise,
3200 operand 1 should be of a vector type (the usual case). */
3201 if (vec_oprnd1)
3202 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3203 slp_node, -1);
3204 else
3205 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3206 slp_node, -1);
3208 else
3209 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3211 /* Arguments are ready. Create the new vector stmt. */
3212 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3214 vop1 = VEC_index (tree, vec_oprnds1, i);
3215 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3216 new_temp = make_ssa_name (vec_dest, new_stmt);
3217 gimple_assign_set_lhs (new_stmt, new_temp);
3218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3219 if (slp_node)
3220 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3223 if (slp_node)
3224 continue;
3226 if (j == 0)
3227 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3228 else
3229 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3230 prev_stmt_info = vinfo_for_stmt (new_stmt);
3233 VEC_free (tree, heap, vec_oprnds0);
3234 VEC_free (tree, heap, vec_oprnds1);
3236 return true;
3240 /* Function vectorizable_operation.
3242 Check if STMT performs a binary, unary or ternary operation that can
3243 be vectorized.
3244 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3245 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3246 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3248 static bool
3249 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3250 gimple *vec_stmt, slp_tree slp_node)
3252 tree vec_dest;
3253 tree scalar_dest;
3254 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3256 tree vectype;
3257 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3258 enum tree_code code;
3259 enum machine_mode vec_mode;
3260 tree new_temp;
3261 int op_type;
3262 optab optab;
3263 int icode;
3264 tree def;
3265 gimple def_stmt;
3266 enum vect_def_type dt[3]
3267 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3268 gimple new_stmt = NULL;
3269 stmt_vec_info prev_stmt_info;
3270 int nunits_in;
3271 int nunits_out;
3272 tree vectype_out;
3273 int ncopies;
3274 int j, i;
3275 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3276 tree vop0, vop1, vop2;
3277 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3278 int vf;
3280 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3281 return false;
3283 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3284 return false;
3286 /* Is STMT a vectorizable binary/unary operation? */
3287 if (!is_gimple_assign (stmt))
3288 return false;
3290 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3291 return false;
3293 code = gimple_assign_rhs_code (stmt);
3295 /* For pointer addition, we should use the normal plus for
3296 the vector addition. */
3297 if (code == POINTER_PLUS_EXPR)
3298 code = PLUS_EXPR;
3300 /* Support only unary or binary operations. */
3301 op_type = TREE_CODE_LENGTH (code);
3302 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3304 if (vect_print_dump_info (REPORT_DETAILS))
3305 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3306 op_type);
3307 return false;
3310 scalar_dest = gimple_assign_lhs (stmt);
3311 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3313 /* Most operations cannot handle bit-precision types without extra
3314 truncations. */
3315 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3316 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3317 /* Exception are bitwise binary operations. */
3318 && code != BIT_IOR_EXPR
3319 && code != BIT_XOR_EXPR
3320 && code != BIT_AND_EXPR)
3322 if (vect_print_dump_info (REPORT_DETAILS))
3323 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3324 return false;
3327 op0 = gimple_assign_rhs1 (stmt);
3328 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
3329 &def_stmt, &def, &dt[0], &vectype))
3331 if (vect_print_dump_info (REPORT_DETAILS))
3332 fprintf (vect_dump, "use not simple.");
3333 return false;
3335 /* If op0 is an external or constant def use a vector type with
3336 the same size as the output vector type. */
3337 if (!vectype)
3338 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3339 if (vec_stmt)
3340 gcc_assert (vectype);
3341 if (!vectype)
3343 if (vect_print_dump_info (REPORT_DETAILS))
3345 fprintf (vect_dump, "no vectype for scalar type ");
3346 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3349 return false;
3352 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3353 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3354 if (nunits_out != nunits_in)
3355 return false;
3357 if (op_type == binary_op || op_type == ternary_op)
3359 op1 = gimple_assign_rhs2 (stmt);
3360 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
3361 &dt[1]))
3363 if (vect_print_dump_info (REPORT_DETAILS))
3364 fprintf (vect_dump, "use not simple.");
3365 return false;
3368 if (op_type == ternary_op)
3370 op2 = gimple_assign_rhs3 (stmt);
3371 if (!vect_is_simple_use (op2, loop_vinfo, bb_vinfo, &def_stmt, &def,
3372 &dt[2]))
3374 if (vect_print_dump_info (REPORT_DETAILS))
3375 fprintf (vect_dump, "use not simple.");
3376 return false;
3380 if (loop_vinfo)
3381 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3382 else
3383 vf = 1;
3385 /* Multiple types in SLP are handled by creating the appropriate number of
3386 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3387 case of SLP. */
3388 if (slp_node || PURE_SLP_STMT (stmt_info))
3389 ncopies = 1;
3390 else
3391 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3393 gcc_assert (ncopies >= 1);
3395 /* Shifts are handled in vectorizable_shift (). */
3396 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3397 || code == RROTATE_EXPR)
3398 return false;
3400 optab = optab_for_tree_code (code, vectype, optab_default);
3402 /* Supportable by target? */
3403 if (!optab)
3405 if (vect_print_dump_info (REPORT_DETAILS))
3406 fprintf (vect_dump, "no optab.");
3407 return false;
3409 vec_mode = TYPE_MODE (vectype);
3410 icode = (int) optab_handler (optab, vec_mode);
3411 if (icode == CODE_FOR_nothing)
3413 if (vect_print_dump_info (REPORT_DETAILS))
3414 fprintf (vect_dump, "op not supported by target.");
3415 /* Check only during analysis. */
3416 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3417 || (vf < vect_min_worthwhile_factor (code)
3418 && !vec_stmt))
3419 return false;
3420 if (vect_print_dump_info (REPORT_DETAILS))
3421 fprintf (vect_dump, "proceeding using word mode.");
3424 /* Worthwhile without SIMD support? Check only during analysis. */
3425 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3426 && vf < vect_min_worthwhile_factor (code)
3427 && !vec_stmt)
3429 if (vect_print_dump_info (REPORT_DETAILS))
3430 fprintf (vect_dump, "not worthwhile without SIMD support.");
3431 return false;
3434 if (!vec_stmt) /* transformation not required. */
3436 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3437 if (vect_print_dump_info (REPORT_DETAILS))
3438 fprintf (vect_dump, "=== vectorizable_operation ===");
3439 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3440 return true;
3443 /** Transform. **/
3445 if (vect_print_dump_info (REPORT_DETAILS))
3446 fprintf (vect_dump, "transform binary/unary operation.");
3448 /* Handle def. */
3449 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3451 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3452 created in the previous stages of the recursion, so no allocation is
3453 needed, except for the case of shift with scalar shift argument. In that
3454 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3455 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3456 In case of loop-based vectorization we allocate VECs of size 1. We
3457 allocate VEC_OPRNDS1 only in case of binary operation. */
3458 if (!slp_node)
3460 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3461 if (op_type == binary_op || op_type == ternary_op)
3462 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3463 if (op_type == ternary_op)
3464 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3467 /* In case the vectorization factor (VF) is bigger than the number
3468 of elements that we can fit in a vectype (nunits), we have to generate
3469 more than one vector stmt - i.e - we need to "unroll" the
3470 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3471 from one copy of the vector stmt to the next, in the field
3472 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3473 stages to find the correct vector defs to be used when vectorizing
3474 stmts that use the defs of the current stmt. The example below
3475 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3476 we need to create 4 vectorized stmts):
3478 before vectorization:
3479 RELATED_STMT VEC_STMT
3480 S1: x = memref - -
3481 S2: z = x + 1 - -
3483 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3484 there):
3485 RELATED_STMT VEC_STMT
3486 VS1_0: vx0 = memref0 VS1_1 -
3487 VS1_1: vx1 = memref1 VS1_2 -
3488 VS1_2: vx2 = memref2 VS1_3 -
3489 VS1_3: vx3 = memref3 - -
3490 S1: x = load - VS1_0
3491 S2: z = x + 1 - -
3493 step2: vectorize stmt S2 (done here):
3494 To vectorize stmt S2 we first need to find the relevant vector
3495 def for the first operand 'x'. This is, as usual, obtained from
3496 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3497 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3498 relevant vector def 'vx0'. Having found 'vx0' we can generate
3499 the vector stmt VS2_0, and as usual, record it in the
3500 STMT_VINFO_VEC_STMT of stmt S2.
3501 When creating the second copy (VS2_1), we obtain the relevant vector
3502 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3503 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3504 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3505 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3506 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3507 chain of stmts and pointers:
3508 RELATED_STMT VEC_STMT
3509 VS1_0: vx0 = memref0 VS1_1 -
3510 VS1_1: vx1 = memref1 VS1_2 -
3511 VS1_2: vx2 = memref2 VS1_3 -
3512 VS1_3: vx3 = memref3 - -
3513 S1: x = load - VS1_0
3514 VS2_0: vz0 = vx0 + v1 VS2_1 -
3515 VS2_1: vz1 = vx1 + v1 VS2_2 -
3516 VS2_2: vz2 = vx2 + v1 VS2_3 -
3517 VS2_3: vz3 = vx3 + v1 - -
3518 S2: z = x + 1 - VS2_0 */
3520 prev_stmt_info = NULL;
3521 for (j = 0; j < ncopies; j++)
3523 /* Handle uses. */
3524 if (j == 0)
3526 if (op_type == binary_op || op_type == ternary_op)
3527 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3528 slp_node, -1);
3529 else
3530 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3531 slp_node, -1);
3532 if (op_type == ternary_op)
3534 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3535 VEC_quick_push (tree, vec_oprnds2,
3536 vect_get_vec_def_for_operand (op2, stmt, NULL));
3539 else
3541 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3542 if (op_type == ternary_op)
3544 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3545 VEC_quick_push (tree, vec_oprnds2,
3546 vect_get_vec_def_for_stmt_copy (dt[2],
3547 vec_oprnd));
3551 /* Arguments are ready. Create the new vector stmt. */
3552 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3554 vop1 = ((op_type == binary_op || op_type == ternary_op)
3555 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3556 vop2 = ((op_type == ternary_op)
3557 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3558 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3559 vop0, vop1, vop2);
3560 new_temp = make_ssa_name (vec_dest, new_stmt);
3561 gimple_assign_set_lhs (new_stmt, new_temp);
3562 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3563 if (slp_node)
3564 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3567 if (slp_node)
3568 continue;
3570 if (j == 0)
3571 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3572 else
3573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3574 prev_stmt_info = vinfo_for_stmt (new_stmt);
3577 VEC_free (tree, heap, vec_oprnds0);
3578 if (vec_oprnds1)
3579 VEC_free (tree, heap, vec_oprnds1);
3580 if (vec_oprnds2)
3581 VEC_free (tree, heap, vec_oprnds2);
3583 return true;
3587 /* Function vectorizable_store.
3589 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3590 can be vectorized.
3591 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3592 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3593 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3595 static bool
3596 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3597 slp_tree slp_node)
3599 tree scalar_dest;
3600 tree data_ref;
3601 tree op;
3602 tree vec_oprnd = NULL_TREE;
3603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3604 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3605 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3606 tree elem_type;
3607 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3608 struct loop *loop = NULL;
3609 enum machine_mode vec_mode;
3610 tree dummy;
3611 enum dr_alignment_support alignment_support_scheme;
3612 tree def;
3613 gimple def_stmt;
3614 enum vect_def_type dt;
3615 stmt_vec_info prev_stmt_info = NULL;
3616 tree dataref_ptr = NULL_TREE;
3617 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3618 int ncopies;
3619 int j;
3620 gimple next_stmt, first_stmt = NULL;
3621 bool strided_store = false;
3622 bool store_lanes_p = false;
3623 unsigned int group_size, i;
3624 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3625 bool inv_p;
3626 VEC(tree,heap) *vec_oprnds = NULL;
3627 bool slp = (slp_node != NULL);
3628 unsigned int vec_num;
3629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3630 tree aggr_type;
3632 if (loop_vinfo)
3633 loop = LOOP_VINFO_LOOP (loop_vinfo);
3635 /* Multiple types in SLP are handled by creating the appropriate number of
3636 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3637 case of SLP. */
3638 if (slp || PURE_SLP_STMT (stmt_info))
3639 ncopies = 1;
3640 else
3641 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3643 gcc_assert (ncopies >= 1);
3645 /* FORNOW. This restriction should be relaxed. */
3646 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3648 if (vect_print_dump_info (REPORT_DETAILS))
3649 fprintf (vect_dump, "multiple types in nested loop.");
3650 return false;
3653 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3654 return false;
3656 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3657 return false;
3659 /* Is vectorizable store? */
3661 if (!is_gimple_assign (stmt))
3662 return false;
3664 scalar_dest = gimple_assign_lhs (stmt);
3665 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3666 && is_pattern_stmt_p (stmt_info))
3667 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3668 if (TREE_CODE (scalar_dest) != ARRAY_REF
3669 && TREE_CODE (scalar_dest) != INDIRECT_REF
3670 && TREE_CODE (scalar_dest) != COMPONENT_REF
3671 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3672 && TREE_CODE (scalar_dest) != REALPART_EXPR
3673 && TREE_CODE (scalar_dest) != MEM_REF)
3674 return false;
3676 gcc_assert (gimple_assign_single_p (stmt));
3677 op = gimple_assign_rhs1 (stmt);
3678 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3680 if (vect_print_dump_info (REPORT_DETAILS))
3681 fprintf (vect_dump, "use not simple.");
3682 return false;
3685 elem_type = TREE_TYPE (vectype);
3686 vec_mode = TYPE_MODE (vectype);
3688 /* FORNOW. In some cases can vectorize even if data-type not supported
3689 (e.g. - array initialization with 0). */
3690 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3691 return false;
3693 if (!STMT_VINFO_DATA_REF (stmt_info))
3694 return false;
3696 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
3698 if (vect_print_dump_info (REPORT_DETAILS))
3699 fprintf (vect_dump, "negative step for store.");
3700 return false;
3703 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3705 strided_store = true;
3706 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3707 if (!slp && !PURE_SLP_STMT (stmt_info))
3709 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3710 if (vect_store_lanes_supported (vectype, group_size))
3711 store_lanes_p = true;
3712 else if (!vect_strided_store_supported (vectype, group_size))
3713 return false;
3716 if (first_stmt == stmt)
3718 /* STMT is the leader of the group. Check the operands of all the
3719 stmts of the group. */
3720 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3721 while (next_stmt)
3723 gcc_assert (gimple_assign_single_p (next_stmt));
3724 op = gimple_assign_rhs1 (next_stmt);
3725 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3726 &def, &dt))
3728 if (vect_print_dump_info (REPORT_DETAILS))
3729 fprintf (vect_dump, "use not simple.");
3730 return false;
3732 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3737 if (!vec_stmt) /* transformation not required. */
3739 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3740 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3741 return true;
3744 /** Transform. **/
3746 if (strided_store)
3748 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3749 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3751 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3753 /* FORNOW */
3754 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3756 /* We vectorize all the stmts of the interleaving group when we
3757 reach the last stmt in the group. */
3758 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3759 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3760 && !slp)
3762 *vec_stmt = NULL;
3763 return true;
3766 if (slp)
3768 strided_store = false;
3769 /* VEC_NUM is the number of vect stmts to be created for this
3770 group. */
3771 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3772 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3773 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3774 op = gimple_assign_rhs1 (first_stmt);
3776 else
3777 /* VEC_NUM is the number of vect stmts to be created for this
3778 group. */
3779 vec_num = group_size;
3781 else
3783 first_stmt = stmt;
3784 first_dr = dr;
3785 group_size = vec_num = 1;
3788 if (vect_print_dump_info (REPORT_DETAILS))
3789 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3791 dr_chain = VEC_alloc (tree, heap, group_size);
3792 oprnds = VEC_alloc (tree, heap, group_size);
3794 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3795 gcc_assert (alignment_support_scheme);
3796 /* Targets with store-lane instructions must not require explicit
3797 realignment. */
3798 gcc_assert (!store_lanes_p
3799 || alignment_support_scheme == dr_aligned
3800 || alignment_support_scheme == dr_unaligned_supported);
3802 if (store_lanes_p)
3803 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3804 else
3805 aggr_type = vectype;
3807 /* In case the vectorization factor (VF) is bigger than the number
3808 of elements that we can fit in a vectype (nunits), we have to generate
3809 more than one vector stmt - i.e - we need to "unroll" the
3810 vector stmt by a factor VF/nunits. For more details see documentation in
3811 vect_get_vec_def_for_copy_stmt. */
3813 /* In case of interleaving (non-unit strided access):
3815 S1: &base + 2 = x2
3816 S2: &base = x0
3817 S3: &base + 1 = x1
3818 S4: &base + 3 = x3
3820 We create vectorized stores starting from base address (the access of the
3821 first stmt in the chain (S2 in the above example), when the last store stmt
3822 of the chain (S4) is reached:
3824 VS1: &base = vx2
3825 VS2: &base + vec_size*1 = vx0
3826 VS3: &base + vec_size*2 = vx1
3827 VS4: &base + vec_size*3 = vx3
3829 Then permutation statements are generated:
3831 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3832 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3835 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3836 (the order of the data-refs in the output of vect_permute_store_chain
3837 corresponds to the order of scalar stmts in the interleaving chain - see
3838 the documentation of vect_permute_store_chain()).
3840 In case of both multiple types and interleaving, above vector stores and
3841 permutation stmts are created for every copy. The result vector stmts are
3842 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3843 STMT_VINFO_RELATED_STMT for the next copies.
3846 prev_stmt_info = NULL;
3847 for (j = 0; j < ncopies; j++)
3849 gimple new_stmt;
3850 gimple ptr_incr;
3852 if (j == 0)
3854 if (slp)
3856 /* Get vectorized arguments for SLP_NODE. */
3857 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3858 NULL, slp_node, -1);
3860 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3862 else
3864 /* For interleaved stores we collect vectorized defs for all the
3865 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3866 used as an input to vect_permute_store_chain(), and OPRNDS as
3867 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3869 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3870 OPRNDS are of size 1. */
3871 next_stmt = first_stmt;
3872 for (i = 0; i < group_size; i++)
3874 /* Since gaps are not supported for interleaved stores,
3875 GROUP_SIZE is the exact number of stmts in the chain.
3876 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3877 there is no interleaving, GROUP_SIZE is 1, and only one
3878 iteration of the loop will be executed. */
3879 gcc_assert (next_stmt
3880 && gimple_assign_single_p (next_stmt));
3881 op = gimple_assign_rhs1 (next_stmt);
3883 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3884 NULL);
3885 VEC_quick_push(tree, dr_chain, vec_oprnd);
3886 VEC_quick_push(tree, oprnds, vec_oprnd);
3887 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3891 /* We should have catched mismatched types earlier. */
3892 gcc_assert (useless_type_conversion_p (vectype,
3893 TREE_TYPE (vec_oprnd)));
3894 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
3895 NULL_TREE, &dummy, gsi,
3896 &ptr_incr, false, &inv_p);
3897 gcc_assert (bb_vinfo || !inv_p);
3899 else
3901 /* For interleaved stores we created vectorized defs for all the
3902 defs stored in OPRNDS in the previous iteration (previous copy).
3903 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3904 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3905 next copy.
3906 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3907 OPRNDS are of size 1. */
3908 for (i = 0; i < group_size; i++)
3910 op = VEC_index (tree, oprnds, i);
3911 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3912 &dt);
3913 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3914 VEC_replace(tree, dr_chain, i, vec_oprnd);
3915 VEC_replace(tree, oprnds, i, vec_oprnd);
3917 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3918 TYPE_SIZE_UNIT (aggr_type));
3921 if (store_lanes_p)
3923 tree vec_array;
3925 /* Combine all the vectors into an array. */
3926 vec_array = create_vector_array (vectype, vec_num);
3927 for (i = 0; i < vec_num; i++)
3929 vec_oprnd = VEC_index (tree, dr_chain, i);
3930 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
3933 /* Emit:
3934 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
3935 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
3936 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
3937 gimple_call_set_lhs (new_stmt, data_ref);
3938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3939 mark_symbols_for_renaming (new_stmt);
3941 else
3943 new_stmt = NULL;
3944 if (strided_store)
3946 result_chain = VEC_alloc (tree, heap, group_size);
3947 /* Permute. */
3948 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3949 &result_chain);
3952 next_stmt = first_stmt;
3953 for (i = 0; i < vec_num; i++)
3955 struct ptr_info_def *pi;
3957 if (i > 0)
3958 /* Bump the vector pointer. */
3959 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
3960 stmt, NULL_TREE);
3962 if (slp)
3963 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3964 else if (strided_store)
3965 /* For strided stores vectorized defs are interleaved in
3966 vect_permute_store_chain(). */
3967 vec_oprnd = VEC_index (tree, result_chain, i);
3969 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
3970 build_int_cst (reference_alias_ptr_type
3971 (DR_REF (first_dr)), 0));
3972 pi = get_ptr_info (dataref_ptr);
3973 pi->align = TYPE_ALIGN_UNIT (vectype);
3974 if (aligned_access_p (first_dr))
3975 pi->misalign = 0;
3976 else if (DR_MISALIGNMENT (first_dr) == -1)
3978 TREE_TYPE (data_ref)
3979 = build_aligned_type (TREE_TYPE (data_ref),
3980 TYPE_ALIGN (elem_type));
3981 pi->align = TYPE_ALIGN_UNIT (elem_type);
3982 pi->misalign = 0;
3984 else
3986 TREE_TYPE (data_ref)
3987 = build_aligned_type (TREE_TYPE (data_ref),
3988 TYPE_ALIGN (elem_type));
3989 pi->misalign = DR_MISALIGNMENT (first_dr);
3992 /* Arguments are ready. Create the new vector stmt. */
3993 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3995 mark_symbols_for_renaming (new_stmt);
3997 if (slp)
3998 continue;
4000 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4001 if (!next_stmt)
4002 break;
4005 if (!slp)
4007 if (j == 0)
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4009 else
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4015 VEC_free (tree, heap, dr_chain);
4016 VEC_free (tree, heap, oprnds);
4017 if (result_chain)
4018 VEC_free (tree, heap, result_chain);
4019 if (vec_oprnds)
4020 VEC_free (tree, heap, vec_oprnds);
4022 return true;
4025 /* Given a vector type VECTYPE and permutation SEL returns
4026 the VECTOR_CST mask that implements the permutation of the
4027 vector elements. If that is impossible to do, returns NULL. */
4029 static tree
4030 gen_perm_mask (tree vectype, unsigned char *sel)
4032 tree mask_elt_type, mask_type, mask_vec;
4033 int i, nunits;
4035 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4037 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4038 return NULL;
4040 mask_elt_type
4041 = lang_hooks.types.type_for_size
4042 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
4043 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4045 mask_vec = NULL;
4046 for (i = nunits - 1; i >= 0; i--)
4047 mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]),
4048 mask_vec);
4049 mask_vec = build_vector (mask_type, mask_vec);
4051 return mask_vec;
4054 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4055 reversal of the vector elements. If that is impossible to do,
4056 returns NULL. */
4058 static tree
4059 perm_mask_for_reverse (tree vectype)
4061 int i, nunits;
4062 unsigned char *sel;
4064 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4065 sel = XALLOCAVEC (unsigned char, nunits);
4067 for (i = 0; i < nunits; ++i)
4068 sel[i] = nunits - 1 - i;
4070 return gen_perm_mask (vectype, sel);
4073 /* Given a vector variable X and Y, that was generated for the scalar
4074 STMT, generate instructions to permute the vector elements of X and Y
4075 using permutation mask MASK_VEC, insert them at *GSI and return the
4076 permuted vector variable. */
4078 static tree
4079 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4080 gimple_stmt_iterator *gsi)
4082 tree vectype = TREE_TYPE (x);
4083 tree perm_dest, data_ref;
4084 gimple perm_stmt;
4086 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4087 data_ref = make_ssa_name (perm_dest, NULL);
4089 /* Generate the permute statement. */
4090 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4091 x, y, mask_vec);
4092 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4094 return data_ref;
4097 /* vectorizable_load.
4099 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4100 can be vectorized.
4101 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4102 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4103 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4105 static bool
4106 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4107 slp_tree slp_node, slp_instance slp_node_instance)
4109 tree scalar_dest;
4110 tree vec_dest = NULL;
4111 tree data_ref = NULL;
4112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4113 stmt_vec_info prev_stmt_info;
4114 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4115 struct loop *loop = NULL;
4116 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4117 bool nested_in_vect_loop = false;
4118 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4119 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4120 tree elem_type;
4121 tree new_temp;
4122 enum machine_mode mode;
4123 gimple new_stmt = NULL;
4124 tree dummy;
4125 enum dr_alignment_support alignment_support_scheme;
4126 tree dataref_ptr = NULL_TREE;
4127 gimple ptr_incr;
4128 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4129 int ncopies;
4130 int i, j, group_size;
4131 tree msq = NULL_TREE, lsq;
4132 tree offset = NULL_TREE;
4133 tree realignment_token = NULL_TREE;
4134 gimple phi = NULL;
4135 VEC(tree,heap) *dr_chain = NULL;
4136 bool strided_load = false;
4137 bool load_lanes_p = false;
4138 gimple first_stmt;
4139 bool inv_p;
4140 bool negative;
4141 bool compute_in_loop = false;
4142 struct loop *at_loop;
4143 int vec_num;
4144 bool slp = (slp_node != NULL);
4145 bool slp_perm = false;
4146 enum tree_code code;
4147 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4148 int vf;
4149 tree aggr_type;
4150 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4151 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4152 int gather_scale = 1;
4153 enum vect_def_type gather_dt = vect_unknown_def_type;
4155 if (loop_vinfo)
4157 loop = LOOP_VINFO_LOOP (loop_vinfo);
4158 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4159 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4161 else
4162 vf = 1;
4164 /* Multiple types in SLP are handled by creating the appropriate number of
4165 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4166 case of SLP. */
4167 if (slp || PURE_SLP_STMT (stmt_info))
4168 ncopies = 1;
4169 else
4170 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4172 gcc_assert (ncopies >= 1);
4174 /* FORNOW. This restriction should be relaxed. */
4175 if (nested_in_vect_loop && ncopies > 1)
4177 if (vect_print_dump_info (REPORT_DETAILS))
4178 fprintf (vect_dump, "multiple types in nested loop.");
4179 return false;
4182 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4183 return false;
4185 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4186 return false;
4188 /* Is vectorizable load? */
4189 if (!is_gimple_assign (stmt))
4190 return false;
4192 scalar_dest = gimple_assign_lhs (stmt);
4193 if (TREE_CODE (scalar_dest) != SSA_NAME)
4194 return false;
4196 code = gimple_assign_rhs_code (stmt);
4197 if (code != ARRAY_REF
4198 && code != INDIRECT_REF
4199 && code != COMPONENT_REF
4200 && code != IMAGPART_EXPR
4201 && code != REALPART_EXPR
4202 && code != MEM_REF
4203 && TREE_CODE_CLASS (code) != tcc_declaration)
4204 return false;
4206 if (!STMT_VINFO_DATA_REF (stmt_info))
4207 return false;
4209 negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
4210 if (negative && ncopies > 1)
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "multiple types with negative step.");
4214 return false;
4217 elem_type = TREE_TYPE (vectype);
4218 mode = TYPE_MODE (vectype);
4220 /* FORNOW. In some cases can vectorize even if data-type not supported
4221 (e.g. - data copies). */
4222 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4224 if (vect_print_dump_info (REPORT_DETAILS))
4225 fprintf (vect_dump, "Aligned load, but unsupported type.");
4226 return false;
4229 /* Check if the load is a part of an interleaving chain. */
4230 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4232 strided_load = true;
4233 /* FORNOW */
4234 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4236 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4237 if (!slp && !PURE_SLP_STMT (stmt_info))
4239 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4240 if (vect_load_lanes_supported (vectype, group_size))
4241 load_lanes_p = true;
4242 else if (!vect_strided_load_supported (vectype, group_size))
4243 return false;
4247 if (negative)
4249 gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info));
4250 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4251 if (alignment_support_scheme != dr_aligned
4252 && alignment_support_scheme != dr_unaligned_supported)
4254 if (vect_print_dump_info (REPORT_DETAILS))
4255 fprintf (vect_dump, "negative step but alignment required.");
4256 return false;
4258 if (!perm_mask_for_reverse (vectype))
4260 if (vect_print_dump_info (REPORT_DETAILS))
4261 fprintf (vect_dump, "negative step and reversing not supported.");
4262 return false;
4266 if (STMT_VINFO_GATHER_P (stmt_info))
4268 gimple def_stmt;
4269 tree def;
4270 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4271 &gather_off, &gather_scale);
4272 gcc_assert (gather_decl);
4273 if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo,
4274 &def_stmt, &def, &gather_dt,
4275 &gather_off_vectype))
4277 if (vect_print_dump_info (REPORT_DETAILS))
4278 fprintf (vect_dump, "gather index use not simple.");
4279 return false;
4283 if (!vec_stmt) /* transformation not required. */
4285 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4286 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4287 return true;
4290 if (vect_print_dump_info (REPORT_DETAILS))
4291 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4293 /** Transform. **/
4295 if (STMT_VINFO_GATHER_P (stmt_info))
4297 tree vec_oprnd0 = NULL_TREE, op;
4298 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4299 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4300 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4301 edge pe = loop_preheader_edge (loop);
4302 gimple_seq seq;
4303 basic_block new_bb;
4304 enum { NARROW, NONE, WIDEN } modifier;
4305 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4307 if (nunits == gather_off_nunits)
4308 modifier = NONE;
4309 else if (nunits == gather_off_nunits / 2)
4311 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4312 modifier = WIDEN;
4314 for (i = 0; i < gather_off_nunits; ++i)
4315 sel[i] = i | nunits;
4317 perm_mask = gen_perm_mask (gather_off_vectype, sel);
4318 gcc_assert (perm_mask != NULL_TREE);
4320 else if (nunits == gather_off_nunits * 2)
4322 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4323 modifier = NARROW;
4325 for (i = 0; i < nunits; ++i)
4326 sel[i] = i < gather_off_nunits
4327 ? i : i + nunits - gather_off_nunits;
4329 perm_mask = gen_perm_mask (vectype, sel);
4330 gcc_assert (perm_mask != NULL_TREE);
4331 ncopies *= 2;
4333 else
4334 gcc_unreachable ();
4336 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4337 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4338 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4339 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4340 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4341 scaletype = TREE_VALUE (arglist);
4342 gcc_checking_assert (types_compatible_p (srctype, rettype)
4343 && types_compatible_p (srctype, masktype));
4345 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4347 ptr = fold_convert (ptrtype, gather_base);
4348 if (!is_gimple_min_invariant (ptr))
4350 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4351 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4352 gcc_assert (!new_bb);
4355 /* Currently we support only unconditional gather loads,
4356 so mask should be all ones. */
4357 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4358 mask = build_int_cst (TREE_TYPE (masktype), -1);
4359 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4361 REAL_VALUE_TYPE r;
4362 long tmp[6];
4363 for (j = 0; j < 6; ++j)
4364 tmp[j] = -1;
4365 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4366 mask = build_real (TREE_TYPE (masktype), r);
4368 else
4369 gcc_unreachable ();
4370 mask = build_vector_from_val (masktype, mask);
4371 mask = vect_init_vector (stmt, mask, masktype, NULL);
4373 scale = build_int_cst (scaletype, gather_scale);
4375 prev_stmt_info = NULL;
4376 for (j = 0; j < ncopies; ++j)
4378 if (modifier == WIDEN && (j & 1))
4379 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4380 perm_mask, stmt, gsi);
4381 else if (j == 0)
4382 op = vec_oprnd0
4383 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4384 else
4385 op = vec_oprnd0
4386 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4388 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4390 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4391 == TYPE_VECTOR_SUBPARTS (idxtype));
4392 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4393 add_referenced_var (var);
4394 var = make_ssa_name (var, NULL);
4395 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4396 new_stmt
4397 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4398 op, NULL_TREE);
4399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4400 op = var;
4403 new_stmt
4404 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4406 if (!useless_type_conversion_p (vectype, rettype))
4408 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4409 == TYPE_VECTOR_SUBPARTS (rettype));
4410 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4411 add_referenced_var (var);
4412 op = make_ssa_name (var, new_stmt);
4413 gimple_call_set_lhs (new_stmt, op);
4414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4415 var = make_ssa_name (vec_dest, NULL);
4416 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4417 new_stmt
4418 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4419 NULL_TREE);
4421 else
4423 var = make_ssa_name (vec_dest, new_stmt);
4424 gimple_call_set_lhs (new_stmt, var);
4427 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4429 if (modifier == NARROW)
4431 if ((j & 1) == 0)
4433 prev_res = var;
4434 continue;
4436 var = permute_vec_elements (prev_res, var,
4437 perm_mask, stmt, gsi);
4438 new_stmt = SSA_NAME_DEF_STMT (var);
4441 if (prev_stmt_info == NULL)
4442 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4443 else
4444 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4445 prev_stmt_info = vinfo_for_stmt (new_stmt);
4447 return true;
4450 if (strided_load)
4452 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4453 if (slp
4454 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4455 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4456 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4458 /* Check if the chain of loads is already vectorized. */
4459 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4461 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4462 return true;
4464 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4465 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4467 /* VEC_NUM is the number of vect stmts to be created for this group. */
4468 if (slp)
4470 strided_load = false;
4471 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4472 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4473 slp_perm = true;
4475 else
4476 vec_num = group_size;
4478 else
4480 first_stmt = stmt;
4481 first_dr = dr;
4482 group_size = vec_num = 1;
4485 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4486 gcc_assert (alignment_support_scheme);
4487 /* Targets with load-lane instructions must not require explicit
4488 realignment. */
4489 gcc_assert (!load_lanes_p
4490 || alignment_support_scheme == dr_aligned
4491 || alignment_support_scheme == dr_unaligned_supported);
4493 /* In case the vectorization factor (VF) is bigger than the number
4494 of elements that we can fit in a vectype (nunits), we have to generate
4495 more than one vector stmt - i.e - we need to "unroll" the
4496 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4497 from one copy of the vector stmt to the next, in the field
4498 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4499 stages to find the correct vector defs to be used when vectorizing
4500 stmts that use the defs of the current stmt. The example below
4501 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4502 need to create 4 vectorized stmts):
4504 before vectorization:
4505 RELATED_STMT VEC_STMT
4506 S1: x = memref - -
4507 S2: z = x + 1 - -
4509 step 1: vectorize stmt S1:
4510 We first create the vector stmt VS1_0, and, as usual, record a
4511 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4512 Next, we create the vector stmt VS1_1, and record a pointer to
4513 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4514 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4515 stmts and pointers:
4516 RELATED_STMT VEC_STMT
4517 VS1_0: vx0 = memref0 VS1_1 -
4518 VS1_1: vx1 = memref1 VS1_2 -
4519 VS1_2: vx2 = memref2 VS1_3 -
4520 VS1_3: vx3 = memref3 - -
4521 S1: x = load - VS1_0
4522 S2: z = x + 1 - -
4524 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4525 information we recorded in RELATED_STMT field is used to vectorize
4526 stmt S2. */
4528 /* In case of interleaving (non-unit strided access):
4530 S1: x2 = &base + 2
4531 S2: x0 = &base
4532 S3: x1 = &base + 1
4533 S4: x3 = &base + 3
4535 Vectorized loads are created in the order of memory accesses
4536 starting from the access of the first stmt of the chain:
4538 VS1: vx0 = &base
4539 VS2: vx1 = &base + vec_size*1
4540 VS3: vx3 = &base + vec_size*2
4541 VS4: vx4 = &base + vec_size*3
4543 Then permutation statements are generated:
4545 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4546 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4549 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4550 (the order of the data-refs in the output of vect_permute_load_chain
4551 corresponds to the order of scalar stmts in the interleaving chain - see
4552 the documentation of vect_permute_load_chain()).
4553 The generation of permutation stmts and recording them in
4554 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4556 In case of both multiple types and interleaving, the vector loads and
4557 permutation stmts above are created for every copy. The result vector
4558 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4559 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4561 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4562 on a target that supports unaligned accesses (dr_unaligned_supported)
4563 we generate the following code:
4564 p = initial_addr;
4565 indx = 0;
4566 loop {
4567 p = p + indx * vectype_size;
4568 vec_dest = *(p);
4569 indx = indx + 1;
4572 Otherwise, the data reference is potentially unaligned on a target that
4573 does not support unaligned accesses (dr_explicit_realign_optimized) -
4574 then generate the following code, in which the data in each iteration is
4575 obtained by two vector loads, one from the previous iteration, and one
4576 from the current iteration:
4577 p1 = initial_addr;
4578 msq_init = *(floor(p1))
4579 p2 = initial_addr + VS - 1;
4580 realignment_token = call target_builtin;
4581 indx = 0;
4582 loop {
4583 p2 = p2 + indx * vectype_size
4584 lsq = *(floor(p2))
4585 vec_dest = realign_load (msq, lsq, realignment_token)
4586 indx = indx + 1;
4587 msq = lsq;
4588 } */
4590 /* If the misalignment remains the same throughout the execution of the
4591 loop, we can create the init_addr and permutation mask at the loop
4592 preheader. Otherwise, it needs to be created inside the loop.
4593 This can only occur when vectorizing memory accesses in the inner-loop
4594 nested within an outer-loop that is being vectorized. */
4596 if (loop && nested_in_vect_loop_p (loop, stmt)
4597 && (TREE_INT_CST_LOW (DR_STEP (dr))
4598 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4600 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4601 compute_in_loop = true;
4604 if ((alignment_support_scheme == dr_explicit_realign_optimized
4605 || alignment_support_scheme == dr_explicit_realign)
4606 && !compute_in_loop)
4608 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4609 alignment_support_scheme, NULL_TREE,
4610 &at_loop);
4611 if (alignment_support_scheme == dr_explicit_realign_optimized)
4613 phi = SSA_NAME_DEF_STMT (msq);
4614 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4617 else
4618 at_loop = loop;
4620 if (negative)
4621 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4623 if (load_lanes_p)
4624 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4625 else
4626 aggr_type = vectype;
4628 prev_stmt_info = NULL;
4629 for (j = 0; j < ncopies; j++)
4631 /* 1. Create the vector or array pointer update chain. */
4632 if (j == 0)
4633 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4634 offset, &dummy, gsi,
4635 &ptr_incr, false, &inv_p);
4636 else
4637 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4638 TYPE_SIZE_UNIT (aggr_type));
4640 if (strided_load || slp_perm)
4641 dr_chain = VEC_alloc (tree, heap, vec_num);
4643 if (load_lanes_p)
4645 tree vec_array;
4647 vec_array = create_vector_array (vectype, vec_num);
4649 /* Emit:
4650 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4651 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4652 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4653 gimple_call_set_lhs (new_stmt, vec_array);
4654 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4655 mark_symbols_for_renaming (new_stmt);
4657 /* Extract each vector into an SSA_NAME. */
4658 for (i = 0; i < vec_num; i++)
4660 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4661 vec_array, i);
4662 VEC_quick_push (tree, dr_chain, new_temp);
4665 /* Record the mapping between SSA_NAMEs and statements. */
4666 vect_record_strided_load_vectors (stmt, dr_chain);
4668 else
4670 for (i = 0; i < vec_num; i++)
4672 if (i > 0)
4673 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4674 stmt, NULL_TREE);
4676 /* 2. Create the vector-load in the loop. */
4677 switch (alignment_support_scheme)
4679 case dr_aligned:
4680 case dr_unaligned_supported:
4682 struct ptr_info_def *pi;
4683 data_ref
4684 = build2 (MEM_REF, vectype, dataref_ptr,
4685 build_int_cst (reference_alias_ptr_type
4686 (DR_REF (first_dr)), 0));
4687 pi = get_ptr_info (dataref_ptr);
4688 pi->align = TYPE_ALIGN_UNIT (vectype);
4689 if (alignment_support_scheme == dr_aligned)
4691 gcc_assert (aligned_access_p (first_dr));
4692 pi->misalign = 0;
4694 else if (DR_MISALIGNMENT (first_dr) == -1)
4696 TREE_TYPE (data_ref)
4697 = build_aligned_type (TREE_TYPE (data_ref),
4698 TYPE_ALIGN (elem_type));
4699 pi->align = TYPE_ALIGN_UNIT (elem_type);
4700 pi->misalign = 0;
4702 else
4704 TREE_TYPE (data_ref)
4705 = build_aligned_type (TREE_TYPE (data_ref),
4706 TYPE_ALIGN (elem_type));
4707 pi->misalign = DR_MISALIGNMENT (first_dr);
4709 break;
4711 case dr_explicit_realign:
4713 tree ptr, bump;
4714 tree vs_minus_1;
4716 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4718 if (compute_in_loop)
4719 msq = vect_setup_realignment (first_stmt, gsi,
4720 &realignment_token,
4721 dr_explicit_realign,
4722 dataref_ptr, NULL);
4724 new_stmt = gimple_build_assign_with_ops
4725 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4726 build_int_cst
4727 (TREE_TYPE (dataref_ptr),
4728 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4729 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4730 gimple_assign_set_lhs (new_stmt, ptr);
4731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4732 data_ref
4733 = build2 (MEM_REF, vectype, ptr,
4734 build_int_cst (reference_alias_ptr_type
4735 (DR_REF (first_dr)), 0));
4736 vec_dest = vect_create_destination_var (scalar_dest,
4737 vectype);
4738 new_stmt = gimple_build_assign (vec_dest, data_ref);
4739 new_temp = make_ssa_name (vec_dest, new_stmt);
4740 gimple_assign_set_lhs (new_stmt, new_temp);
4741 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4742 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4743 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4744 msq = new_temp;
4746 bump = size_binop (MULT_EXPR, vs_minus_1,
4747 TYPE_SIZE_UNIT (elem_type));
4748 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4749 new_stmt = gimple_build_assign_with_ops
4750 (BIT_AND_EXPR, NULL_TREE, ptr,
4751 build_int_cst
4752 (TREE_TYPE (ptr),
4753 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4754 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4755 gimple_assign_set_lhs (new_stmt, ptr);
4756 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4757 data_ref
4758 = build2 (MEM_REF, vectype, ptr,
4759 build_int_cst (reference_alias_ptr_type
4760 (DR_REF (first_dr)), 0));
4761 break;
4763 case dr_explicit_realign_optimized:
4764 new_stmt = gimple_build_assign_with_ops
4765 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4766 build_int_cst
4767 (TREE_TYPE (dataref_ptr),
4768 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4769 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
4770 new_stmt);
4771 gimple_assign_set_lhs (new_stmt, new_temp);
4772 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4773 data_ref
4774 = build2 (MEM_REF, vectype, new_temp,
4775 build_int_cst (reference_alias_ptr_type
4776 (DR_REF (first_dr)), 0));
4777 break;
4778 default:
4779 gcc_unreachable ();
4781 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4782 new_stmt = gimple_build_assign (vec_dest, data_ref);
4783 new_temp = make_ssa_name (vec_dest, new_stmt);
4784 gimple_assign_set_lhs (new_stmt, new_temp);
4785 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4786 mark_symbols_for_renaming (new_stmt);
4788 /* 3. Handle explicit realignment if necessary/supported.
4789 Create in loop:
4790 vec_dest = realign_load (msq, lsq, realignment_token) */
4791 if (alignment_support_scheme == dr_explicit_realign_optimized
4792 || alignment_support_scheme == dr_explicit_realign)
4794 lsq = gimple_assign_lhs (new_stmt);
4795 if (!realignment_token)
4796 realignment_token = dataref_ptr;
4797 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4798 new_stmt
4799 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
4800 vec_dest, msq, lsq,
4801 realignment_token);
4802 new_temp = make_ssa_name (vec_dest, new_stmt);
4803 gimple_assign_set_lhs (new_stmt, new_temp);
4804 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4806 if (alignment_support_scheme == dr_explicit_realign_optimized)
4808 gcc_assert (phi);
4809 if (i == vec_num - 1 && j == ncopies - 1)
4810 add_phi_arg (phi, lsq,
4811 loop_latch_edge (containing_loop),
4812 UNKNOWN_LOCATION);
4813 msq = lsq;
4817 /* 4. Handle invariant-load. */
4818 if (inv_p && !bb_vinfo)
4820 tree tem, vec_inv;
4821 gimple_stmt_iterator gsi2 = *gsi;
4822 gcc_assert (!strided_load);
4823 gsi_next (&gsi2);
4824 tem = scalar_dest;
4825 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4826 TREE_TYPE (tem)))
4828 tem = fold_convert (TREE_TYPE (vectype), tem);
4829 tem = force_gimple_operand_gsi (&gsi2, tem, true,
4830 NULL_TREE, true,
4831 GSI_SAME_STMT);
4833 vec_inv = build_vector_from_val (vectype, tem);
4834 new_temp = vect_init_vector (stmt, vec_inv,
4835 vectype, &gsi2);
4836 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4839 if (negative)
4841 tree perm_mask = perm_mask_for_reverse (vectype);
4842 new_temp = permute_vec_elements (new_temp, new_temp,
4843 perm_mask, stmt, gsi);
4844 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4847 /* Collect vector loads and later create their permutation in
4848 vect_transform_strided_load (). */
4849 if (strided_load || slp_perm)
4850 VEC_quick_push (tree, dr_chain, new_temp);
4852 /* Store vector loads in the corresponding SLP_NODE. */
4853 if (slp && !slp_perm)
4854 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
4855 new_stmt);
4859 if (slp && !slp_perm)
4860 continue;
4862 if (slp_perm)
4864 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
4865 slp_node_instance, false))
4867 VEC_free (tree, heap, dr_chain);
4868 return false;
4871 else
4873 if (strided_load)
4875 if (!load_lanes_p)
4876 vect_transform_strided_load (stmt, dr_chain, group_size, gsi);
4877 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4879 else
4881 if (j == 0)
4882 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4883 else
4884 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4885 prev_stmt_info = vinfo_for_stmt (new_stmt);
4888 if (dr_chain)
4889 VEC_free (tree, heap, dr_chain);
4892 return true;
4895 /* Function vect_is_simple_cond.
4897 Input:
4898 LOOP - the loop that is being vectorized.
4899 COND - Condition that is checked for simple use.
4901 Output:
4902 *COMP_VECTYPE - the vector type for the comparison.
4904 Returns whether a COND can be vectorized. Checks whether
4905 condition operands are supportable using vec_is_simple_use. */
4907 static bool
4908 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
4909 tree *comp_vectype)
4911 tree lhs, rhs;
4912 tree def;
4913 enum vect_def_type dt;
4914 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
4916 if (!COMPARISON_CLASS_P (cond))
4917 return false;
4919 lhs = TREE_OPERAND (cond, 0);
4920 rhs = TREE_OPERAND (cond, 1);
4922 if (TREE_CODE (lhs) == SSA_NAME)
4924 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4925 if (!vect_is_simple_use_1 (lhs, loop_vinfo, bb_vinfo, &lhs_def_stmt, &def,
4926 &dt, &vectype1))
4927 return false;
4929 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
4930 && TREE_CODE (lhs) != FIXED_CST)
4931 return false;
4933 if (TREE_CODE (rhs) == SSA_NAME)
4935 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4936 if (!vect_is_simple_use_1 (rhs, loop_vinfo, bb_vinfo, &rhs_def_stmt, &def,
4937 &dt, &vectype2))
4938 return false;
4940 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
4941 && TREE_CODE (rhs) != FIXED_CST)
4942 return false;
4944 *comp_vectype = vectype1 ? vectype1 : vectype2;
4945 return true;
4948 /* vectorizable_condition.
4950 Check if STMT is conditional modify expression that can be vectorized.
4951 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4952 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4953 at GSI.
4955 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
4956 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
4957 else caluse if it is 2).
4959 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4961 bool
4962 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
4963 gimple *vec_stmt, tree reduc_def, int reduc_index,
4964 slp_tree slp_node)
4966 tree scalar_dest = NULL_TREE;
4967 tree vec_dest = NULL_TREE;
4968 tree cond_expr, then_clause, else_clause;
4969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4971 tree comp_vectype = NULL_TREE;
4972 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
4973 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
4974 tree vec_compare, vec_cond_expr;
4975 tree new_temp;
4976 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4977 tree def;
4978 enum vect_def_type dt, dts[4];
4979 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4980 int ncopies;
4981 enum tree_code code;
4982 stmt_vec_info prev_stmt_info = NULL;
4983 int i, j;
4984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4985 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
4986 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
4988 if (slp_node || PURE_SLP_STMT (stmt_info))
4989 ncopies = 1;
4990 else
4991 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4993 gcc_assert (ncopies >= 1);
4994 if (reduc_index && ncopies > 1)
4995 return false; /* FORNOW */
4997 if (reduc_index && STMT_SLP_TYPE (stmt_info))
4998 return false;
5000 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5001 return false;
5003 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5004 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5005 && reduc_def))
5006 return false;
5008 /* FORNOW: not yet supported. */
5009 if (STMT_VINFO_LIVE_P (stmt_info))
5011 if (vect_print_dump_info (REPORT_DETAILS))
5012 fprintf (vect_dump, "value used after loop.");
5013 return false;
5016 /* Is vectorizable conditional operation? */
5017 if (!is_gimple_assign (stmt))
5018 return false;
5020 code = gimple_assign_rhs_code (stmt);
5022 if (code != COND_EXPR)
5023 return false;
5025 cond_expr = gimple_assign_rhs1 (stmt);
5026 then_clause = gimple_assign_rhs2 (stmt);
5027 else_clause = gimple_assign_rhs3 (stmt);
5029 if (!vect_is_simple_cond (cond_expr, loop_vinfo, bb_vinfo, &comp_vectype)
5030 || !comp_vectype)
5031 return false;
5033 if (TREE_CODE (then_clause) == SSA_NAME)
5035 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5036 if (!vect_is_simple_use (then_clause, loop_vinfo, bb_vinfo,
5037 &then_def_stmt, &def, &dt))
5038 return false;
5040 else if (TREE_CODE (then_clause) != INTEGER_CST
5041 && TREE_CODE (then_clause) != REAL_CST
5042 && TREE_CODE (then_clause) != FIXED_CST)
5043 return false;
5045 if (TREE_CODE (else_clause) == SSA_NAME)
5047 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5048 if (!vect_is_simple_use (else_clause, loop_vinfo, bb_vinfo,
5049 &else_def_stmt, &def, &dt))
5050 return false;
5052 else if (TREE_CODE (else_clause) != INTEGER_CST
5053 && TREE_CODE (else_clause) != REAL_CST
5054 && TREE_CODE (else_clause) != FIXED_CST)
5055 return false;
5057 if (!vec_stmt)
5059 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5060 return expand_vec_cond_expr_p (vectype, comp_vectype);
5063 /* Transform. */
5065 if (!slp_node)
5067 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5068 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5069 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5070 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5073 /* Handle def. */
5074 scalar_dest = gimple_assign_lhs (stmt);
5075 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5077 /* Handle cond expr. */
5078 for (j = 0; j < ncopies; j++)
5080 gimple new_stmt = NULL;
5081 if (j == 0)
5083 if (slp_node)
5085 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5086 VEC (slp_void_p, heap) *vec_defs;
5088 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5089 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5090 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5091 VEC_safe_push (tree, heap, ops, then_clause);
5092 VEC_safe_push (tree, heap, ops, else_clause);
5093 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5094 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5095 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5096 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5097 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5099 VEC_free (tree, heap, ops);
5100 VEC_free (slp_void_p, heap, vec_defs);
5102 else
5104 gimple gtemp;
5105 vec_cond_lhs =
5106 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5107 stmt, NULL);
5108 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), loop_vinfo,
5109 NULL, &gtemp, &def, &dts[0]);
5111 vec_cond_rhs =
5112 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5113 stmt, NULL);
5114 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), loop_vinfo,
5115 NULL, &gtemp, &def, &dts[1]);
5116 if (reduc_index == 1)
5117 vec_then_clause = reduc_def;
5118 else
5120 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5121 stmt, NULL);
5122 vect_is_simple_use (then_clause, loop_vinfo,
5123 NULL, &gtemp, &def, &dts[2]);
5125 if (reduc_index == 2)
5126 vec_else_clause = reduc_def;
5127 else
5129 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5130 stmt, NULL);
5131 vect_is_simple_use (else_clause, loop_vinfo,
5132 NULL, &gtemp, &def, &dts[3]);
5136 else
5138 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5139 VEC_pop (tree, vec_oprnds0));
5140 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5141 VEC_pop (tree, vec_oprnds1));
5142 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5143 VEC_pop (tree, vec_oprnds2));
5144 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5145 VEC_pop (tree, vec_oprnds3));
5148 if (!slp_node)
5150 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5151 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5152 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5153 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5156 /* Arguments are ready. Create the new vector stmt. */
5157 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5159 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5160 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5161 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5163 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5164 vec_cond_lhs, vec_cond_rhs);
5165 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5166 vec_compare, vec_then_clause, vec_else_clause);
5168 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5169 new_temp = make_ssa_name (vec_dest, new_stmt);
5170 gimple_assign_set_lhs (new_stmt, new_temp);
5171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5172 if (slp_node)
5173 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5176 if (slp_node)
5177 continue;
5179 if (j == 0)
5180 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5181 else
5182 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5184 prev_stmt_info = vinfo_for_stmt (new_stmt);
5187 VEC_free (tree, heap, vec_oprnds0);
5188 VEC_free (tree, heap, vec_oprnds1);
5189 VEC_free (tree, heap, vec_oprnds2);
5190 VEC_free (tree, heap, vec_oprnds3);
5192 return true;
5196 /* Make sure the statement is vectorizable. */
5198 bool
5199 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5202 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5203 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5204 bool ok;
5205 tree scalar_type, vectype;
5206 gimple pattern_stmt, pattern_def_stmt;
5208 if (vect_print_dump_info (REPORT_DETAILS))
5210 fprintf (vect_dump, "==> examining statement: ");
5211 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5214 if (gimple_has_volatile_ops (stmt))
5216 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5217 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5219 return false;
5222 /* Skip stmts that do not need to be vectorized. In loops this is expected
5223 to include:
5224 - the COND_EXPR which is the loop exit condition
5225 - any LABEL_EXPRs in the loop
5226 - computations that are used only for array indexing or loop control.
5227 In basic blocks we only analyze statements that are a part of some SLP
5228 instance, therefore, all the statements are relevant.
5230 Pattern statement needs to be analyzed instead of the original statement
5231 if the original statement is not relevant. Otherwise, we analyze both
5232 statements. */
5234 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5235 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5236 && !STMT_VINFO_LIVE_P (stmt_info))
5238 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5239 && pattern_stmt
5240 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5241 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5243 /* Analyze PATTERN_STMT instead of the original stmt. */
5244 stmt = pattern_stmt;
5245 stmt_info = vinfo_for_stmt (pattern_stmt);
5246 if (vect_print_dump_info (REPORT_DETAILS))
5248 fprintf (vect_dump, "==> examining pattern statement: ");
5249 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5252 else
5254 if (vect_print_dump_info (REPORT_DETAILS))
5255 fprintf (vect_dump, "irrelevant.");
5257 return true;
5260 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5261 && pattern_stmt
5262 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5263 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5265 /* Analyze PATTERN_STMT too. */
5266 if (vect_print_dump_info (REPORT_DETAILS))
5268 fprintf (vect_dump, "==> examining pattern statement: ");
5269 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5272 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5273 return false;
5276 if (is_pattern_stmt_p (stmt_info)
5277 && (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
5278 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5279 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
5281 /* Analyze def stmt of STMT if it's a pattern stmt. */
5282 if (vect_print_dump_info (REPORT_DETAILS))
5284 fprintf (vect_dump, "==> examining pattern def statement: ");
5285 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5288 if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
5289 return false;
5293 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5295 case vect_internal_def:
5296 break;
5298 case vect_reduction_def:
5299 case vect_nested_cycle:
5300 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5301 || relevance == vect_used_in_outer_by_reduction
5302 || relevance == vect_unused_in_scope));
5303 break;
5305 case vect_induction_def:
5306 case vect_constant_def:
5307 case vect_external_def:
5308 case vect_unknown_def_type:
5309 default:
5310 gcc_unreachable ();
5313 if (bb_vinfo)
5315 gcc_assert (PURE_SLP_STMT (stmt_info));
5317 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5318 if (vect_print_dump_info (REPORT_DETAILS))
5320 fprintf (vect_dump, "get vectype for scalar type: ");
5321 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5324 vectype = get_vectype_for_scalar_type (scalar_type);
5325 if (!vectype)
5327 if (vect_print_dump_info (REPORT_DETAILS))
5329 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5330 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5332 return false;
5335 if (vect_print_dump_info (REPORT_DETAILS))
5337 fprintf (vect_dump, "vectype: ");
5338 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5341 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5344 if (STMT_VINFO_RELEVANT_P (stmt_info))
5346 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5347 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5348 *need_to_vectorize = true;
5351 ok = true;
5352 if (!bb_vinfo
5353 && (STMT_VINFO_RELEVANT_P (stmt_info)
5354 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5355 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5356 || vectorizable_shift (stmt, NULL, NULL, NULL)
5357 || vectorizable_operation (stmt, NULL, NULL, NULL)
5358 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5359 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5360 || vectorizable_call (stmt, NULL, NULL, NULL)
5361 || vectorizable_store (stmt, NULL, NULL, NULL)
5362 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5363 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5364 else
5366 if (bb_vinfo)
5367 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5368 || vectorizable_shift (stmt, NULL, NULL, node)
5369 || vectorizable_operation (stmt, NULL, NULL, node)
5370 || vectorizable_assignment (stmt, NULL, NULL, node)
5371 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5372 || vectorizable_call (stmt, NULL, NULL, node)
5373 || vectorizable_store (stmt, NULL, NULL, node)
5374 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5377 if (!ok)
5379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5381 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5382 fprintf (vect_dump, "supported: ");
5383 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5386 return false;
5389 if (bb_vinfo)
5390 return true;
5392 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5393 need extra handling, except for vectorizable reductions. */
5394 if (STMT_VINFO_LIVE_P (stmt_info)
5395 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5396 ok = vectorizable_live_operation (stmt, NULL, NULL);
5398 if (!ok)
5400 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5402 fprintf (vect_dump, "not vectorized: live stmt not ");
5403 fprintf (vect_dump, "supported: ");
5404 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5407 return false;
5410 return true;
5414 /* Function vect_transform_stmt.
5416 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5418 bool
5419 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5420 bool *strided_store, slp_tree slp_node,
5421 slp_instance slp_node_instance)
5423 bool is_store = false;
5424 gimple vec_stmt = NULL;
5425 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5426 bool done;
5428 switch (STMT_VINFO_TYPE (stmt_info))
5430 case type_demotion_vec_info_type:
5431 case type_promotion_vec_info_type:
5432 case type_conversion_vec_info_type:
5433 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5434 gcc_assert (done);
5435 break;
5437 case induc_vec_info_type:
5438 gcc_assert (!slp_node);
5439 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5440 gcc_assert (done);
5441 break;
5443 case shift_vec_info_type:
5444 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5445 gcc_assert (done);
5446 break;
5448 case op_vec_info_type:
5449 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5450 gcc_assert (done);
5451 break;
5453 case assignment_vec_info_type:
5454 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5455 gcc_assert (done);
5456 break;
5458 case load_vec_info_type:
5459 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5460 slp_node_instance);
5461 gcc_assert (done);
5462 break;
5464 case store_vec_info_type:
5465 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5466 gcc_assert (done);
5467 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
5469 /* In case of interleaving, the whole chain is vectorized when the
5470 last store in the chain is reached. Store stmts before the last
5471 one are skipped, and there vec_stmt_info shouldn't be freed
5472 meanwhile. */
5473 *strided_store = true;
5474 if (STMT_VINFO_VEC_STMT (stmt_info))
5475 is_store = true;
5477 else
5478 is_store = true;
5479 break;
5481 case condition_vec_info_type:
5482 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5483 gcc_assert (done);
5484 break;
5486 case call_vec_info_type:
5487 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5488 stmt = gsi_stmt (*gsi);
5489 break;
5491 case reduc_vec_info_type:
5492 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5493 gcc_assert (done);
5494 break;
5496 default:
5497 if (!STMT_VINFO_LIVE_P (stmt_info))
5499 if (vect_print_dump_info (REPORT_DETAILS))
5500 fprintf (vect_dump, "stmt not supported.");
5501 gcc_unreachable ();
5505 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5506 is being vectorized, but outside the immediately enclosing loop. */
5507 if (vec_stmt
5508 && STMT_VINFO_LOOP_VINFO (stmt_info)
5509 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5510 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5511 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5512 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5513 || STMT_VINFO_RELEVANT (stmt_info) ==
5514 vect_used_in_outer_by_reduction))
5516 struct loop *innerloop = LOOP_VINFO_LOOP (
5517 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5518 imm_use_iterator imm_iter;
5519 use_operand_p use_p;
5520 tree scalar_dest;
5521 gimple exit_phi;
5523 if (vect_print_dump_info (REPORT_DETAILS))
5524 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5526 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5527 (to be used when vectorizing outer-loop stmts that use the DEF of
5528 STMT). */
5529 if (gimple_code (stmt) == GIMPLE_PHI)
5530 scalar_dest = PHI_RESULT (stmt);
5531 else
5532 scalar_dest = gimple_assign_lhs (stmt);
5534 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5536 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5538 exit_phi = USE_STMT (use_p);
5539 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5544 /* Handle stmts whose DEF is used outside the loop-nest that is
5545 being vectorized. */
5546 if (STMT_VINFO_LIVE_P (stmt_info)
5547 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5549 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5550 gcc_assert (done);
5553 if (vec_stmt)
5554 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5556 return is_store;
5560 /* Remove a group of stores (for SLP or interleaving), free their
5561 stmt_vec_info. */
5563 void
5564 vect_remove_stores (gimple first_stmt)
5566 gimple next = first_stmt;
5567 gimple tmp;
5568 gimple_stmt_iterator next_si;
5570 while (next)
5572 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5574 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5575 if (is_pattern_stmt_p (stmt_info))
5576 next = STMT_VINFO_RELATED_STMT (stmt_info);
5577 /* Free the attached stmt_vec_info and remove the stmt. */
5578 next_si = gsi_for_stmt (next);
5579 gsi_remove (&next_si, true);
5580 free_stmt_vec_info (next);
5581 next = tmp;
5586 /* Function new_stmt_vec_info.
5588 Create and initialize a new stmt_vec_info struct for STMT. */
5590 stmt_vec_info
5591 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5592 bb_vec_info bb_vinfo)
5594 stmt_vec_info res;
5595 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5597 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5598 STMT_VINFO_STMT (res) = stmt;
5599 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5600 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5601 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5602 STMT_VINFO_LIVE_P (res) = false;
5603 STMT_VINFO_VECTYPE (res) = NULL;
5604 STMT_VINFO_VEC_STMT (res) = NULL;
5605 STMT_VINFO_VECTORIZABLE (res) = true;
5606 STMT_VINFO_IN_PATTERN_P (res) = false;
5607 STMT_VINFO_RELATED_STMT (res) = NULL;
5608 STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
5609 STMT_VINFO_DATA_REF (res) = NULL;
5611 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5612 STMT_VINFO_DR_OFFSET (res) = NULL;
5613 STMT_VINFO_DR_INIT (res) = NULL;
5614 STMT_VINFO_DR_STEP (res) = NULL;
5615 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5617 if (gimple_code (stmt) == GIMPLE_PHI
5618 && is_loop_header_bb_p (gimple_bb (stmt)))
5619 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5620 else
5621 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5623 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5624 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5625 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5626 STMT_SLP_TYPE (res) = loop_vect;
5627 GROUP_FIRST_ELEMENT (res) = NULL;
5628 GROUP_NEXT_ELEMENT (res) = NULL;
5629 GROUP_SIZE (res) = 0;
5630 GROUP_STORE_COUNT (res) = 0;
5631 GROUP_GAP (res) = 0;
5632 GROUP_SAME_DR_STMT (res) = NULL;
5633 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5635 return res;
5639 /* Create a hash table for stmt_vec_info. */
5641 void
5642 init_stmt_vec_info_vec (void)
5644 gcc_assert (!stmt_vec_info_vec);
5645 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5649 /* Free hash table for stmt_vec_info. */
5651 void
5652 free_stmt_vec_info_vec (void)
5654 gcc_assert (stmt_vec_info_vec);
5655 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5659 /* Free stmt vectorization related info. */
5661 void
5662 free_stmt_vec_info (gimple stmt)
5664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5666 if (!stmt_info)
5667 return;
5669 /* Check if this statement has a related "pattern stmt"
5670 (introduced by the vectorizer during the pattern recognition
5671 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5672 too. */
5673 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5675 stmt_vec_info patt_info
5676 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5677 if (patt_info)
5679 if (STMT_VINFO_PATTERN_DEF_STMT (patt_info))
5680 free_stmt_vec_info (STMT_VINFO_PATTERN_DEF_STMT (patt_info));
5681 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5685 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5686 set_vinfo_for_stmt (stmt, NULL);
5687 free (stmt_info);
5691 /* Function get_vectype_for_scalar_type_and_size.
5693 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5694 by the target. */
5696 static tree
5697 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5699 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5700 enum machine_mode simd_mode;
5701 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5702 int nunits;
5703 tree vectype;
5705 if (nbytes == 0)
5706 return NULL_TREE;
5708 /* We can't build a vector type of elements with alignment bigger than
5709 their size. */
5710 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5711 return NULL_TREE;
5713 /* For vector types of elements whose mode precision doesn't
5714 match their types precision we use a element type of mode
5715 precision. The vectorization routines will have to make sure
5716 they support the proper result truncation/extension. */
5717 if (INTEGRAL_TYPE_P (scalar_type)
5718 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
5719 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5720 TYPE_UNSIGNED (scalar_type));
5722 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5723 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5724 return NULL_TREE;
5726 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5727 When the component mode passes the above test simply use a type
5728 corresponding to that mode. The theory is that any use that
5729 would cause problems with this will disable vectorization anyway. */
5730 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5731 && !INTEGRAL_TYPE_P (scalar_type)
5732 && !POINTER_TYPE_P (scalar_type))
5733 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5735 /* If no size was supplied use the mode the target prefers. Otherwise
5736 lookup a vector mode of the specified size. */
5737 if (size == 0)
5738 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5739 else
5740 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5741 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
5742 if (nunits <= 1)
5743 return NULL_TREE;
5745 vectype = build_vector_type (scalar_type, nunits);
5746 if (vect_print_dump_info (REPORT_DETAILS))
5748 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
5749 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5752 if (!vectype)
5753 return NULL_TREE;
5755 if (vect_print_dump_info (REPORT_DETAILS))
5757 fprintf (vect_dump, "vectype: ");
5758 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5761 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5762 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
5764 if (vect_print_dump_info (REPORT_DETAILS))
5765 fprintf (vect_dump, "mode not supported by target.");
5766 return NULL_TREE;
5769 return vectype;
5772 unsigned int current_vector_size;
5774 /* Function get_vectype_for_scalar_type.
5776 Returns the vector type corresponding to SCALAR_TYPE as supported
5777 by the target. */
5779 tree
5780 get_vectype_for_scalar_type (tree scalar_type)
5782 tree vectype;
5783 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
5784 current_vector_size);
5785 if (vectype
5786 && current_vector_size == 0)
5787 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
5788 return vectype;
5791 /* Function get_same_sized_vectype
5793 Returns a vector type corresponding to SCALAR_TYPE of size
5794 VECTOR_TYPE if supported by the target. */
5796 tree
5797 get_same_sized_vectype (tree scalar_type, tree vector_type)
5799 return get_vectype_for_scalar_type_and_size
5800 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
5803 /* Function vect_is_simple_use.
5805 Input:
5806 LOOP_VINFO - the vect info of the loop that is being vectorized.
5807 BB_VINFO - the vect info of the basic block that is being vectorized.
5808 OPERAND - operand of a stmt in the loop or bb.
5809 DEF - the defining stmt in case OPERAND is an SSA_NAME.
5811 Returns whether a stmt with OPERAND can be vectorized.
5812 For loops, supportable operands are constants, loop invariants, and operands
5813 that are defined by the current iteration of the loop. Unsupportable
5814 operands are those that are defined by a previous iteration of the loop (as
5815 is the case in reduction/induction computations).
5816 For basic blocks, supportable operands are constants and bb invariants.
5817 For now, operands defined outside the basic block are not supported. */
5819 bool
5820 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
5821 bb_vec_info bb_vinfo, gimple *def_stmt,
5822 tree *def, enum vect_def_type *dt)
5824 basic_block bb;
5825 stmt_vec_info stmt_vinfo;
5826 struct loop *loop = NULL;
5828 if (loop_vinfo)
5829 loop = LOOP_VINFO_LOOP (loop_vinfo);
5831 *def_stmt = NULL;
5832 *def = NULL_TREE;
5834 if (vect_print_dump_info (REPORT_DETAILS))
5836 fprintf (vect_dump, "vect_is_simple_use: operand ");
5837 print_generic_expr (vect_dump, operand, TDF_SLIM);
5840 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
5842 *dt = vect_constant_def;
5843 return true;
5846 if (is_gimple_min_invariant (operand))
5848 *def = operand;
5849 *dt = vect_external_def;
5850 return true;
5853 if (TREE_CODE (operand) == PAREN_EXPR)
5855 if (vect_print_dump_info (REPORT_DETAILS))
5856 fprintf (vect_dump, "non-associatable copy.");
5857 operand = TREE_OPERAND (operand, 0);
5860 if (TREE_CODE (operand) != SSA_NAME)
5862 if (vect_print_dump_info (REPORT_DETAILS))
5863 fprintf (vect_dump, "not ssa-name.");
5864 return false;
5867 *def_stmt = SSA_NAME_DEF_STMT (operand);
5868 if (*def_stmt == NULL)
5870 if (vect_print_dump_info (REPORT_DETAILS))
5871 fprintf (vect_dump, "no def_stmt.");
5872 return false;
5875 if (vect_print_dump_info (REPORT_DETAILS))
5877 fprintf (vect_dump, "def_stmt: ");
5878 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
5881 /* Empty stmt is expected only in case of a function argument.
5882 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
5883 if (gimple_nop_p (*def_stmt))
5885 *def = operand;
5886 *dt = vect_external_def;
5887 return true;
5890 bb = gimple_bb (*def_stmt);
5892 if ((loop && !flow_bb_inside_loop_p (loop, bb))
5893 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
5894 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
5895 *dt = vect_external_def;
5896 else
5898 stmt_vinfo = vinfo_for_stmt (*def_stmt);
5899 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
5902 if (*dt == vect_unknown_def_type)
5904 if (vect_print_dump_info (REPORT_DETAILS))
5905 fprintf (vect_dump, "Unsupported pattern.");
5906 return false;
5909 if (vect_print_dump_info (REPORT_DETAILS))
5910 fprintf (vect_dump, "type of def: %d.",*dt);
5912 switch (gimple_code (*def_stmt))
5914 case GIMPLE_PHI:
5915 *def = gimple_phi_result (*def_stmt);
5916 break;
5918 case GIMPLE_ASSIGN:
5919 *def = gimple_assign_lhs (*def_stmt);
5920 break;
5922 case GIMPLE_CALL:
5923 *def = gimple_call_lhs (*def_stmt);
5924 if (*def != NULL)
5925 break;
5926 /* FALLTHRU */
5927 default:
5928 if (vect_print_dump_info (REPORT_DETAILS))
5929 fprintf (vect_dump, "unsupported defining stmt: ");
5930 return false;
5933 return true;
5936 /* Function vect_is_simple_use_1.
5938 Same as vect_is_simple_use_1 but also determines the vector operand
5939 type of OPERAND and stores it to *VECTYPE. If the definition of
5940 OPERAND is vect_uninitialized_def, vect_constant_def or
5941 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
5942 is responsible to compute the best suited vector type for the
5943 scalar operand. */
5945 bool
5946 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
5947 bb_vec_info bb_vinfo, gimple *def_stmt,
5948 tree *def, enum vect_def_type *dt, tree *vectype)
5950 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
5951 return false;
5953 /* Now get a vector type if the def is internal, otherwise supply
5954 NULL_TREE and leave it up to the caller to figure out a proper
5955 type for the use stmt. */
5956 if (*dt == vect_internal_def
5957 || *dt == vect_induction_def
5958 || *dt == vect_reduction_def
5959 || *dt == vect_double_reduction_def
5960 || *dt == vect_nested_cycle)
5962 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
5964 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5965 && !STMT_VINFO_RELEVANT (stmt_info)
5966 && !STMT_VINFO_LIVE_P (stmt_info))
5967 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5969 *vectype = STMT_VINFO_VECTYPE (stmt_info);
5970 gcc_assert (*vectype != NULL_TREE);
5972 else if (*dt == vect_uninitialized_def
5973 || *dt == vect_constant_def
5974 || *dt == vect_external_def)
5975 *vectype = NULL_TREE;
5976 else
5977 gcc_unreachable ();
5979 return true;
5983 /* Function supportable_widening_operation
5985 Check whether an operation represented by the code CODE is a
5986 widening operation that is supported by the target platform in
5987 vector form (i.e., when operating on arguments of type VECTYPE_IN
5988 producing a result of type VECTYPE_OUT).
5990 Widening operations we currently support are NOP (CONVERT), FLOAT
5991 and WIDEN_MULT. This function checks if these operations are supported
5992 by the target platform either directly (via vector tree-codes), or via
5993 target builtins.
5995 Output:
5996 - CODE1 and CODE2 are codes of vector operations to be used when
5997 vectorizing the operation, if available.
5998 - DECL1 and DECL2 are decls of target builtin functions to be used
5999 when vectorizing the operation, if available. In this case,
6000 CODE1 and CODE2 are CALL_EXPR.
6001 - MULTI_STEP_CVT determines the number of required intermediate steps in
6002 case of multi-step conversion (like char->short->int - in that case
6003 MULTI_STEP_CVT will be 1).
6004 - INTERM_TYPES contains the intermediate type required to perform the
6005 widening operation (short in the above example). */
6007 bool
6008 supportable_widening_operation (enum tree_code code, gimple stmt,
6009 tree vectype_out, tree vectype_in,
6010 tree *decl1, tree *decl2,
6011 enum tree_code *code1, enum tree_code *code2,
6012 int *multi_step_cvt,
6013 VEC (tree, heap) **interm_types)
6015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6016 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6017 struct loop *vect_loop = NULL;
6018 bool ordered_p;
6019 enum machine_mode vec_mode;
6020 enum insn_code icode1, icode2;
6021 optab optab1, optab2;
6022 tree vectype = vectype_in;
6023 tree wide_vectype = vectype_out;
6024 enum tree_code c1, c2;
6025 int i;
6026 tree prev_type, intermediate_type;
6027 enum machine_mode intermediate_mode, prev_mode;
6028 optab optab3, optab4;
6030 *multi_step_cvt = 0;
6031 if (loop_info)
6032 vect_loop = LOOP_VINFO_LOOP (loop_info);
6034 /* The result of a vectorized widening operation usually requires two vectors
6035 (because the widened results do not fit into one vector). The generated
6036 vector results would normally be expected to be generated in the same
6037 order as in the original scalar computation, i.e. if 8 results are
6038 generated in each vector iteration, they are to be organized as follows:
6039 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6041 However, in the special case that the result of the widening operation is
6042 used in a reduction computation only, the order doesn't matter (because
6043 when vectorizing a reduction we change the order of the computation).
6044 Some targets can take advantage of this and generate more efficient code.
6045 For example, targets like Altivec, that support widen_mult using a sequence
6046 of {mult_even,mult_odd} generate the following vectors:
6047 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6049 When vectorizing outer-loops, we execute the inner-loop sequentially
6050 (each vectorized inner-loop iteration contributes to VF outer-loop
6051 iterations in parallel). We therefore don't allow to change the order
6052 of the computation in the inner-loop during outer-loop vectorization. */
6054 if (vect_loop
6055 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6056 && !nested_in_vect_loop_p (vect_loop, stmt))
6057 ordered_p = false;
6058 else
6059 ordered_p = true;
6061 if (!ordered_p
6062 && code == WIDEN_MULT_EXPR
6063 && targetm.vectorize.builtin_mul_widen_even
6064 && targetm.vectorize.builtin_mul_widen_even (vectype)
6065 && targetm.vectorize.builtin_mul_widen_odd
6066 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6068 if (vect_print_dump_info (REPORT_DETAILS))
6069 fprintf (vect_dump, "Unordered widening operation detected.");
6071 *code1 = *code2 = CALL_EXPR;
6072 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6073 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6074 return true;
6077 switch (code)
6079 case WIDEN_MULT_EXPR:
6080 c1 = VEC_WIDEN_MULT_LO_EXPR;
6081 c2 = VEC_WIDEN_MULT_HI_EXPR;
6082 break;
6084 case WIDEN_LSHIFT_EXPR:
6085 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6086 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6087 break;
6089 CASE_CONVERT:
6090 c1 = VEC_UNPACK_LO_EXPR;
6091 c2 = VEC_UNPACK_HI_EXPR;
6092 break;
6094 case FLOAT_EXPR:
6095 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6096 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6097 break;
6099 case FIX_TRUNC_EXPR:
6100 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6101 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6102 computing the operation. */
6103 return false;
6105 default:
6106 gcc_unreachable ();
6109 if (BYTES_BIG_ENDIAN)
6111 enum tree_code ctmp = c1;
6112 c1 = c2;
6113 c2 = ctmp;
6116 if (code == FIX_TRUNC_EXPR)
6118 /* The signedness is determined from output operand. */
6119 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6120 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6122 else
6124 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6125 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6128 if (!optab1 || !optab2)
6129 return false;
6131 vec_mode = TYPE_MODE (vectype);
6132 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6133 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6134 return false;
6136 *code1 = c1;
6137 *code2 = c2;
6139 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6140 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6141 return true;
6143 /* Check if it's a multi-step conversion that can be done using intermediate
6144 types. */
6146 prev_type = vectype;
6147 prev_mode = vec_mode;
6149 if (!CONVERT_EXPR_CODE_P (code))
6150 return false;
6152 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6153 intermediate steps in promotion sequence. We try
6154 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6155 not. */
6156 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6157 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6159 intermediate_mode = insn_data[icode1].operand[0].mode;
6160 intermediate_type
6161 = lang_hooks.types.type_for_mode (intermediate_mode,
6162 TYPE_UNSIGNED (prev_type));
6163 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6164 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6166 if (!optab3 || !optab4
6167 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6168 || insn_data[icode1].operand[0].mode != intermediate_mode
6169 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6170 || insn_data[icode2].operand[0].mode != intermediate_mode
6171 || ((icode1 = optab_handler (optab3, intermediate_mode))
6172 == CODE_FOR_nothing)
6173 || ((icode2 = optab_handler (optab4, intermediate_mode))
6174 == CODE_FOR_nothing))
6175 break;
6177 VEC_quick_push (tree, *interm_types, intermediate_type);
6178 (*multi_step_cvt)++;
6180 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6181 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6182 return true;
6184 prev_type = intermediate_type;
6185 prev_mode = intermediate_mode;
6188 VEC_free (tree, heap, *interm_types);
6189 return false;
6193 /* Function supportable_narrowing_operation
6195 Check whether an operation represented by the code CODE is a
6196 narrowing operation that is supported by the target platform in
6197 vector form (i.e., when operating on arguments of type VECTYPE_IN
6198 and producing a result of type VECTYPE_OUT).
6200 Narrowing operations we currently support are NOP (CONVERT) and
6201 FIX_TRUNC. This function checks if these operations are supported by
6202 the target platform directly via vector tree-codes.
6204 Output:
6205 - CODE1 is the code of a vector operation to be used when
6206 vectorizing the operation, if available.
6207 - MULTI_STEP_CVT determines the number of required intermediate steps in
6208 case of multi-step conversion (like int->short->char - in that case
6209 MULTI_STEP_CVT will be 1).
6210 - INTERM_TYPES contains the intermediate type required to perform the
6211 narrowing operation (short in the above example). */
6213 bool
6214 supportable_narrowing_operation (enum tree_code code,
6215 tree vectype_out, tree vectype_in,
6216 enum tree_code *code1, int *multi_step_cvt,
6217 VEC (tree, heap) **interm_types)
6219 enum machine_mode vec_mode;
6220 enum insn_code icode1;
6221 optab optab1, interm_optab;
6222 tree vectype = vectype_in;
6223 tree narrow_vectype = vectype_out;
6224 enum tree_code c1;
6225 tree intermediate_type;
6226 enum machine_mode intermediate_mode, prev_mode;
6227 int i;
6228 bool uns;
6230 *multi_step_cvt = 0;
6231 switch (code)
6233 CASE_CONVERT:
6234 c1 = VEC_PACK_TRUNC_EXPR;
6235 break;
6237 case FIX_TRUNC_EXPR:
6238 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6239 break;
6241 case FLOAT_EXPR:
6242 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6243 tree code and optabs used for computing the operation. */
6244 return false;
6246 default:
6247 gcc_unreachable ();
6250 if (code == FIX_TRUNC_EXPR)
6251 /* The signedness is determined from output operand. */
6252 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6253 else
6254 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6256 if (!optab1)
6257 return false;
6259 vec_mode = TYPE_MODE (vectype);
6260 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6261 return false;
6263 *code1 = c1;
6265 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6266 return true;
6268 /* Check if it's a multi-step conversion that can be done using intermediate
6269 types. */
6270 prev_mode = vec_mode;
6271 if (code == FIX_TRUNC_EXPR)
6272 uns = TYPE_UNSIGNED (vectype_out);
6273 else
6274 uns = TYPE_UNSIGNED (vectype);
6276 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6277 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6278 costly than signed. */
6279 if (code == FIX_TRUNC_EXPR && uns)
6281 enum insn_code icode2;
6283 intermediate_type
6284 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6285 interm_optab
6286 = optab_for_tree_code (c1, intermediate_type, optab_default);
6287 if (interm_optab != NULL
6288 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6289 && insn_data[icode1].operand[0].mode
6290 == insn_data[icode2].operand[0].mode)
6292 uns = false;
6293 optab1 = interm_optab;
6294 icode1 = icode2;
6298 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6299 intermediate steps in promotion sequence. We try
6300 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6301 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6302 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6304 intermediate_mode = insn_data[icode1].operand[0].mode;
6305 intermediate_type
6306 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6307 interm_optab
6308 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6309 optab_default);
6310 if (!interm_optab
6311 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6312 || insn_data[icode1].operand[0].mode != intermediate_mode
6313 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6314 == CODE_FOR_nothing))
6315 break;
6317 VEC_quick_push (tree, *interm_types, intermediate_type);
6318 (*multi_step_cvt)++;
6320 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6321 return true;
6323 prev_mode = intermediate_mode;
6324 optab1 = interm_optab;
6327 VEC_free (tree, heap, *interm_types);
6328 return false;