PR 43726 * config/lm32/lm32.h: Remove definition of GO_IF_MODE_DEPENDENT_ADDRESS.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob8f564104379a2798f13db9adec7fa5f09f100426
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "cfglayout.h"
38 #include "expr.h"
39 #include "recog.h"
40 #include "optabs.h"
41 #include "toplev.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
46 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
48 /* Function vect_mark_relevant.
50 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
52 static void
53 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
54 enum vect_relevant relevant, bool live_p)
56 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
57 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
58 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
60 if (vect_print_dump_info (REPORT_DETAILS))
61 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
63 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
65 gimple pattern_stmt;
67 /* This is the last stmt in a sequence that was detected as a
68 pattern that can potentially be vectorized. Don't mark the stmt
69 as relevant/live because it's not going to be vectorized.
70 Instead mark the pattern-stmt that replaces it. */
72 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
74 if (vect_print_dump_info (REPORT_DETAILS))
75 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
76 stmt_info = vinfo_for_stmt (pattern_stmt);
77 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
78 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
79 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
80 stmt = pattern_stmt;
83 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
84 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
85 STMT_VINFO_RELEVANT (stmt_info) = relevant;
87 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
88 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
90 if (vect_print_dump_info (REPORT_DETAILS))
91 fprintf (vect_dump, "already marked relevant/live.");
92 return;
95 VEC_safe_push (gimple, heap, *worklist, stmt);
99 /* Function vect_stmt_relevant_p.
101 Return true if STMT in loop that is represented by LOOP_VINFO is
102 "relevant for vectorization".
104 A stmt is considered "relevant for vectorization" if:
105 - it has uses outside the loop.
106 - it has vdefs (it alters memory).
107 - control stmts in the loop (except for the exit condition).
109 CHECKME: what other side effects would the vectorizer allow? */
111 static bool
112 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
113 enum vect_relevant *relevant, bool *live_p)
115 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
116 ssa_op_iter op_iter;
117 imm_use_iterator imm_iter;
118 use_operand_p use_p;
119 def_operand_p def_p;
121 *relevant = vect_unused_in_scope;
122 *live_p = false;
124 /* cond stmt other than loop exit cond. */
125 if (is_ctrl_stmt (stmt)
126 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
127 != loop_exit_ctrl_vec_info_type)
128 *relevant = vect_used_in_scope;
130 /* changing memory. */
131 if (gimple_code (stmt) != GIMPLE_PHI)
132 if (gimple_vdef (stmt))
134 if (vect_print_dump_info (REPORT_DETAILS))
135 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
136 *relevant = vect_used_in_scope;
139 /* uses outside the loop. */
140 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
142 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
144 basic_block bb = gimple_bb (USE_STMT (use_p));
145 if (!flow_bb_inside_loop_p (loop, bb))
147 if (vect_print_dump_info (REPORT_DETAILS))
148 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
150 if (is_gimple_debug (USE_STMT (use_p)))
151 continue;
153 /* We expect all such uses to be in the loop exit phis
154 (because of loop closed form) */
155 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
156 gcc_assert (bb == single_exit (loop)->dest);
158 *live_p = true;
163 return (*live_p || *relevant);
167 /* Function exist_non_indexing_operands_for_use_p
169 USE is one of the uses attached to STMT. Check if USE is
170 used in STMT for anything other than indexing an array. */
172 static bool
173 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
175 tree operand;
176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
178 /* USE corresponds to some operand in STMT. If there is no data
179 reference in STMT, then any operand that corresponds to USE
180 is not indexing an array. */
181 if (!STMT_VINFO_DATA_REF (stmt_info))
182 return true;
184 /* STMT has a data_ref. FORNOW this means that its of one of
185 the following forms:
186 -1- ARRAY_REF = var
187 -2- var = ARRAY_REF
188 (This should have been verified in analyze_data_refs).
190 'var' in the second case corresponds to a def, not a use,
191 so USE cannot correspond to any operands that are not used
192 for array indexing.
194 Therefore, all we need to check is if STMT falls into the
195 first case, and whether var corresponds to USE. */
197 if (!gimple_assign_copy_p (stmt))
198 return false;
199 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
200 return false;
201 operand = gimple_assign_rhs1 (stmt);
202 if (TREE_CODE (operand) != SSA_NAME)
203 return false;
205 if (operand == use)
206 return true;
208 return false;
213 Function process_use.
215 Inputs:
216 - a USE in STMT in a loop represented by LOOP_VINFO
217 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
218 that defined USE. This is done by calling mark_relevant and passing it
219 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
221 Outputs:
222 Generally, LIVE_P and RELEVANT are used to define the liveness and
223 relevance info of the DEF_STMT of this USE:
224 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
225 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
226 Exceptions:
227 - case 1: If USE is used only for address computations (e.g. array indexing),
228 which does not need to be directly vectorized, then the liveness/relevance
229 of the respective DEF_STMT is left unchanged.
230 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
231 skip DEF_STMT cause it had already been processed.
232 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
233 be modified accordingly.
235 Return true if everything is as expected. Return false otherwise. */
237 static bool
238 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
239 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
241 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
242 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
243 stmt_vec_info dstmt_vinfo;
244 basic_block bb, def_bb;
245 tree def;
246 gimple def_stmt;
247 enum vect_def_type dt;
249 /* case 1: we are only interested in uses that need to be vectorized. Uses
250 that are used for address computation are not considered relevant. */
251 if (!exist_non_indexing_operands_for_use_p (use, stmt))
252 return true;
254 if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt))
256 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
257 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
258 return false;
261 if (!def_stmt || gimple_nop_p (def_stmt))
262 return true;
264 def_bb = gimple_bb (def_stmt);
265 if (!flow_bb_inside_loop_p (loop, def_bb))
267 if (vect_print_dump_info (REPORT_DETAILS))
268 fprintf (vect_dump, "def_stmt is out of loop.");
269 return true;
272 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
273 DEF_STMT must have already been processed, because this should be the
274 only way that STMT, which is a reduction-phi, was put in the worklist,
275 as there should be no other uses for DEF_STMT in the loop. So we just
276 check that everything is as expected, and we are done. */
277 dstmt_vinfo = vinfo_for_stmt (def_stmt);
278 bb = gimple_bb (stmt);
279 if (gimple_code (stmt) == GIMPLE_PHI
280 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
281 && gimple_code (def_stmt) != GIMPLE_PHI
282 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
283 && bb->loop_father == def_bb->loop_father)
285 if (vect_print_dump_info (REPORT_DETAILS))
286 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
287 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
288 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
289 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
290 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
291 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
292 return true;
295 /* case 3a: outer-loop stmt defining an inner-loop stmt:
296 outer-loop-header-bb:
297 d = def_stmt
298 inner-loop:
299 stmt # use (d)
300 outer-loop-tail-bb:
301 ... */
302 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
304 if (vect_print_dump_info (REPORT_DETAILS))
305 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
307 switch (relevant)
309 case vect_unused_in_scope:
310 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
311 vect_used_in_scope : vect_unused_in_scope;
312 break;
314 case vect_used_in_outer_by_reduction:
315 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
316 relevant = vect_used_by_reduction;
317 break;
319 case vect_used_in_outer:
320 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
321 relevant = vect_used_in_scope;
322 break;
324 case vect_used_in_scope:
325 break;
327 default:
328 gcc_unreachable ();
332 /* case 3b: inner-loop stmt defining an outer-loop stmt:
333 outer-loop-header-bb:
335 inner-loop:
336 d = def_stmt
337 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
338 stmt # use (d) */
339 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
341 if (vect_print_dump_info (REPORT_DETAILS))
342 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
344 switch (relevant)
346 case vect_unused_in_scope:
347 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
348 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
349 vect_used_in_outer_by_reduction : vect_unused_in_scope;
350 break;
352 case vect_used_by_reduction:
353 relevant = vect_used_in_outer_by_reduction;
354 break;
356 case vect_used_in_scope:
357 relevant = vect_used_in_outer;
358 break;
360 default:
361 gcc_unreachable ();
365 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
366 return true;
370 /* Function vect_mark_stmts_to_be_vectorized.
372 Not all stmts in the loop need to be vectorized. For example:
374 for i...
375 for j...
376 1. T0 = i + j
377 2. T1 = a[T0]
379 3. j = j + 1
381 Stmt 1 and 3 do not need to be vectorized, because loop control and
382 addressing of vectorized data-refs are handled differently.
384 This pass detects such stmts. */
386 bool
387 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
389 VEC(gimple,heap) *worklist;
390 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
391 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
392 unsigned int nbbs = loop->num_nodes;
393 gimple_stmt_iterator si;
394 gimple stmt;
395 unsigned int i;
396 stmt_vec_info stmt_vinfo;
397 basic_block bb;
398 gimple phi;
399 bool live_p;
400 enum vect_relevant relevant, tmp_relevant;
401 enum vect_def_type def_type;
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
406 worklist = VEC_alloc (gimple, heap, 64);
408 /* 1. Init worklist. */
409 for (i = 0; i < nbbs; i++)
411 bb = bbs[i];
412 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
414 phi = gsi_stmt (si);
415 if (vect_print_dump_info (REPORT_DETAILS))
417 fprintf (vect_dump, "init: phi relevant? ");
418 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
421 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
422 vect_mark_relevant (&worklist, phi, relevant, live_p);
424 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
426 stmt = gsi_stmt (si);
427 if (vect_print_dump_info (REPORT_DETAILS))
429 fprintf (vect_dump, "init: stmt relevant? ");
430 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
433 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
434 vect_mark_relevant (&worklist, stmt, relevant, live_p);
438 /* 2. Process_worklist */
439 while (VEC_length (gimple, worklist) > 0)
441 use_operand_p use_p;
442 ssa_op_iter iter;
444 stmt = VEC_pop (gimple, worklist);
445 if (vect_print_dump_info (REPORT_DETAILS))
447 fprintf (vect_dump, "worklist: examine stmt: ");
448 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
451 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
452 (DEF_STMT) as relevant/irrelevant and live/dead according to the
453 liveness and relevance properties of STMT. */
454 stmt_vinfo = vinfo_for_stmt (stmt);
455 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
456 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
458 /* Generally, the liveness and relevance properties of STMT are
459 propagated as is to the DEF_STMTs of its USEs:
460 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
461 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
463 One exception is when STMT has been identified as defining a reduction
464 variable; in this case we set the liveness/relevance as follows:
465 live_p = false
466 relevant = vect_used_by_reduction
467 This is because we distinguish between two kinds of relevant stmts -
468 those that are used by a reduction computation, and those that are
469 (also) used by a regular computation. This allows us later on to
470 identify stmts that are used solely by a reduction, and therefore the
471 order of the results that they produce does not have to be kept. */
473 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
474 tmp_relevant = relevant;
475 switch (def_type)
477 case vect_reduction_def:
478 switch (tmp_relevant)
480 case vect_unused_in_scope:
481 relevant = vect_used_by_reduction;
482 break;
484 case vect_used_by_reduction:
485 if (gimple_code (stmt) == GIMPLE_PHI)
486 break;
487 /* fall through */
489 default:
490 if (vect_print_dump_info (REPORT_DETAILS))
491 fprintf (vect_dump, "unsupported use of reduction.");
493 VEC_free (gimple, heap, worklist);
494 return false;
497 live_p = false;
498 break;
500 case vect_nested_cycle:
501 if (tmp_relevant != vect_unused_in_scope
502 && tmp_relevant != vect_used_in_outer_by_reduction
503 && tmp_relevant != vect_used_in_outer)
505 if (vect_print_dump_info (REPORT_DETAILS))
506 fprintf (vect_dump, "unsupported use of nested cycle.");
508 VEC_free (gimple, heap, worklist);
509 return false;
512 live_p = false;
513 break;
515 case vect_double_reduction_def:
516 if (tmp_relevant != vect_unused_in_scope
517 && tmp_relevant != vect_used_by_reduction)
519 if (vect_print_dump_info (REPORT_DETAILS))
520 fprintf (vect_dump, "unsupported use of double reduction.");
522 VEC_free (gimple, heap, worklist);
523 return false;
526 live_p = false;
527 break;
529 default:
530 break;
533 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
535 tree op = USE_FROM_PTR (use_p);
536 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
538 VEC_free (gimple, heap, worklist);
539 return false;
542 } /* while worklist */
544 VEC_free (gimple, heap, worklist);
545 return true;
550 cost_for_stmt (gimple stmt)
552 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
554 switch (STMT_VINFO_TYPE (stmt_info))
556 case load_vec_info_type:
557 return TARG_SCALAR_LOAD_COST;
558 case store_vec_info_type:
559 return TARG_SCALAR_STORE_COST;
560 case op_vec_info_type:
561 case condition_vec_info_type:
562 case assignment_vec_info_type:
563 case reduc_vec_info_type:
564 case induc_vec_info_type:
565 case type_promotion_vec_info_type:
566 case type_demotion_vec_info_type:
567 case type_conversion_vec_info_type:
568 case call_vec_info_type:
569 return TARG_SCALAR_STMT_COST;
570 case undef_vec_info_type:
571 default:
572 gcc_unreachable ();
576 /* Function vect_model_simple_cost.
578 Models cost for simple operations, i.e. those that only emit ncopies of a
579 single op. Right now, this does not account for multiple insns that could
580 be generated for the single vector op. We will handle that shortly. */
582 void
583 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
584 enum vect_def_type *dt, slp_tree slp_node)
586 int i;
587 int inside_cost = 0, outside_cost = 0;
589 /* The SLP costs were already calculated during SLP tree build. */
590 if (PURE_SLP_STMT (stmt_info))
591 return;
593 inside_cost = ncopies * TARG_VEC_STMT_COST;
595 /* FORNOW: Assuming maximum 2 args per stmts. */
596 for (i = 0; i < 2; i++)
598 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
599 outside_cost += TARG_SCALAR_TO_VEC_COST;
602 if (vect_print_dump_info (REPORT_COST))
603 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
604 "outside_cost = %d .", inside_cost, outside_cost);
606 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
607 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
608 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
612 /* Function vect_cost_strided_group_size
614 For strided load or store, return the group_size only if it is the first
615 load or store of a group, else return 1. This ensures that group size is
616 only returned once per group. */
618 static int
619 vect_cost_strided_group_size (stmt_vec_info stmt_info)
621 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
623 if (first_stmt == STMT_VINFO_STMT (stmt_info))
624 return DR_GROUP_SIZE (stmt_info);
626 return 1;
630 /* Function vect_model_store_cost
632 Models cost for stores. In the case of strided accesses, one access
633 has the overhead of the strided access attributed to it. */
635 void
636 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
637 enum vect_def_type dt, slp_tree slp_node)
639 int group_size;
640 int inside_cost = 0, outside_cost = 0;
642 /* The SLP costs were already calculated during SLP tree build. */
643 if (PURE_SLP_STMT (stmt_info))
644 return;
646 if (dt == vect_constant_def || dt == vect_external_def)
647 outside_cost = TARG_SCALAR_TO_VEC_COST;
649 /* Strided access? */
650 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
651 group_size = vect_cost_strided_group_size (stmt_info);
652 /* Not a strided access. */
653 else
654 group_size = 1;
656 /* Is this an access in a group of stores, which provide strided access?
657 If so, add in the cost of the permutes. */
658 if (group_size > 1)
660 /* Uses a high and low interleave operation for each needed permute. */
661 inside_cost = ncopies * exact_log2(group_size) * group_size
662 * TARG_VEC_STMT_COST;
664 if (vect_print_dump_info (REPORT_COST))
665 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
666 group_size);
670 /* Costs of the stores. */
671 inside_cost += ncopies * TARG_VEC_STORE_COST;
673 if (vect_print_dump_info (REPORT_COST))
674 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
675 "outside_cost = %d .", inside_cost, outside_cost);
677 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
678 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
679 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
683 /* Function vect_model_load_cost
685 Models cost for loads. In the case of strided accesses, the last access
686 has the overhead of the strided access attributed to it. Since unaligned
687 accesses are supported for loads, we also account for the costs of the
688 access scheme chosen. */
690 void
691 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
694 int group_size;
695 int alignment_support_cheme;
696 gimple first_stmt;
697 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
698 int inside_cost = 0, outside_cost = 0;
700 /* The SLP costs were already calculated during SLP tree build. */
701 if (PURE_SLP_STMT (stmt_info))
702 return;
704 /* Strided accesses? */
705 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
706 if (first_stmt && !slp_node)
708 group_size = vect_cost_strided_group_size (stmt_info);
709 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
711 /* Not a strided access. */
712 else
714 group_size = 1;
715 first_dr = dr;
718 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
720 /* Is this an access in a group of loads providing strided access?
721 If so, add in the cost of the permutes. */
722 if (group_size > 1)
724 /* Uses an even and odd extract operations for each needed permute. */
725 inside_cost = ncopies * exact_log2(group_size) * group_size
726 * TARG_VEC_STMT_COST;
728 if (vect_print_dump_info (REPORT_COST))
729 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
730 group_size);
734 /* The loads themselves. */
735 switch (alignment_support_cheme)
737 case dr_aligned:
739 inside_cost += ncopies * TARG_VEC_LOAD_COST;
741 if (vect_print_dump_info (REPORT_COST))
742 fprintf (vect_dump, "vect_model_load_cost: aligned.");
744 break;
746 case dr_unaligned_supported:
748 /* Here, we assign an additional cost for the unaligned load. */
749 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
751 if (vect_print_dump_info (REPORT_COST))
752 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
753 "hardware.");
755 break;
757 case dr_explicit_realign:
759 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
761 /* FIXME: If the misalignment remains fixed across the iterations of
762 the containing loop, the following cost should be added to the
763 outside costs. */
764 if (targetm.vectorize.builtin_mask_for_load)
765 inside_cost += TARG_VEC_STMT_COST;
767 break;
769 case dr_explicit_realign_optimized:
771 if (vect_print_dump_info (REPORT_COST))
772 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
773 "pipelined.");
775 /* Unaligned software pipeline has a load of an address, an initial
776 load, and possibly a mask operation to "prime" the loop. However,
777 if this is an access in a group of loads, which provide strided
778 access, then the above cost should only be considered for one
779 access in the group. Inside the loop, there is a load op
780 and a realignment op. */
782 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
784 outside_cost = 2*TARG_VEC_STMT_COST;
785 if (targetm.vectorize.builtin_mask_for_load)
786 outside_cost += TARG_VEC_STMT_COST;
789 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
791 break;
794 default:
795 gcc_unreachable ();
798 if (vect_print_dump_info (REPORT_COST))
799 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
800 "outside_cost = %d .", inside_cost, outside_cost);
802 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
803 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
804 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
808 /* Function vect_init_vector.
810 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
811 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
812 is not NULL. Otherwise, place the initialization at the loop preheader.
813 Return the DEF of INIT_STMT.
814 It will be used in the vectorization of STMT. */
816 tree
817 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
818 gimple_stmt_iterator *gsi)
820 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
821 tree new_var;
822 gimple init_stmt;
823 tree vec_oprnd;
824 edge pe;
825 tree new_temp;
826 basic_block new_bb;
828 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
829 add_referenced_var (new_var);
830 init_stmt = gimple_build_assign (new_var, vector_var);
831 new_temp = make_ssa_name (new_var, init_stmt);
832 gimple_assign_set_lhs (init_stmt, new_temp);
834 if (gsi)
835 vect_finish_stmt_generation (stmt, init_stmt, gsi);
836 else
838 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
840 if (loop_vinfo)
842 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
844 if (nested_in_vect_loop_p (loop, stmt))
845 loop = loop->inner;
847 pe = loop_preheader_edge (loop);
848 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
849 gcc_assert (!new_bb);
851 else
853 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
854 basic_block bb;
855 gimple_stmt_iterator gsi_bb_start;
857 gcc_assert (bb_vinfo);
858 bb = BB_VINFO_BB (bb_vinfo);
859 gsi_bb_start = gsi_after_labels (bb);
860 gsi_insert_before (&gsi_bb_start, init_stmt, GSI_SAME_STMT);
864 if (vect_print_dump_info (REPORT_DETAILS))
866 fprintf (vect_dump, "created new init_stmt: ");
867 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
870 vec_oprnd = gimple_assign_lhs (init_stmt);
871 return vec_oprnd;
875 /* Function vect_get_vec_def_for_operand.
877 OP is an operand in STMT. This function returns a (vector) def that will be
878 used in the vectorized stmt for STMT.
880 In the case that OP is an SSA_NAME which is defined in the loop, then
881 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
883 In case OP is an invariant or constant, a new stmt that creates a vector def
884 needs to be introduced. */
886 tree
887 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
889 tree vec_oprnd;
890 gimple vec_stmt;
891 gimple def_stmt;
892 stmt_vec_info def_stmt_info = NULL;
893 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
894 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
895 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
897 tree vec_inv;
898 tree vec_cst;
899 tree t = NULL_TREE;
900 tree def;
901 int i;
902 enum vect_def_type dt;
903 bool is_simple_use;
904 tree vector_type;
906 if (vect_print_dump_info (REPORT_DETAILS))
908 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
909 print_generic_expr (vect_dump, op, TDF_SLIM);
912 is_simple_use = vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def,
913 &dt);
914 gcc_assert (is_simple_use);
915 if (vect_print_dump_info (REPORT_DETAILS))
917 if (def)
919 fprintf (vect_dump, "def = ");
920 print_generic_expr (vect_dump, def, TDF_SLIM);
922 if (def_stmt)
924 fprintf (vect_dump, " def_stmt = ");
925 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
929 switch (dt)
931 /* Case 1: operand is a constant. */
932 case vect_constant_def:
934 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
935 gcc_assert (vector_type);
937 if (scalar_def)
938 *scalar_def = op;
940 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
941 if (vect_print_dump_info (REPORT_DETAILS))
942 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
944 for (i = nunits - 1; i >= 0; --i)
946 t = tree_cons (NULL_TREE, op, t);
948 vec_cst = build_vector (vector_type, t);
949 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
952 /* Case 2: operand is defined outside the loop - loop invariant. */
953 case vect_external_def:
955 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
956 gcc_assert (vector_type);
957 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
959 if (scalar_def)
960 *scalar_def = def;
962 /* Create 'vec_inv = {inv,inv,..,inv}' */
963 if (vect_print_dump_info (REPORT_DETAILS))
964 fprintf (vect_dump, "Create vector_inv.");
966 for (i = nunits - 1; i >= 0; --i)
968 t = tree_cons (NULL_TREE, def, t);
971 /* FIXME: use build_constructor directly. */
972 vec_inv = build_constructor_from_list (vector_type, t);
973 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
976 /* Case 3: operand is defined inside the loop. */
977 case vect_internal_def:
979 if (scalar_def)
980 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
982 /* Get the def from the vectorized stmt. */
983 def_stmt_info = vinfo_for_stmt (def_stmt);
984 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
985 gcc_assert (vec_stmt);
986 if (gimple_code (vec_stmt) == GIMPLE_PHI)
987 vec_oprnd = PHI_RESULT (vec_stmt);
988 else if (is_gimple_call (vec_stmt))
989 vec_oprnd = gimple_call_lhs (vec_stmt);
990 else
991 vec_oprnd = gimple_assign_lhs (vec_stmt);
992 return vec_oprnd;
995 /* Case 4: operand is defined by a loop header phi - reduction */
996 case vect_reduction_def:
997 case vect_double_reduction_def:
998 case vect_nested_cycle:
1000 struct loop *loop;
1002 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1003 loop = (gimple_bb (def_stmt))->loop_father;
1005 /* Get the def before the loop */
1006 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1007 return get_initial_def_for_reduction (stmt, op, scalar_def);
1010 /* Case 5: operand is defined by loop-header phi - induction. */
1011 case vect_induction_def:
1013 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1015 /* Get the def from the vectorized stmt. */
1016 def_stmt_info = vinfo_for_stmt (def_stmt);
1017 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1018 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
1019 vec_oprnd = PHI_RESULT (vec_stmt);
1020 return vec_oprnd;
1023 default:
1024 gcc_unreachable ();
1029 /* Function vect_get_vec_def_for_stmt_copy
1031 Return a vector-def for an operand. This function is used when the
1032 vectorized stmt to be created (by the caller to this function) is a "copy"
1033 created in case the vectorized result cannot fit in one vector, and several
1034 copies of the vector-stmt are required. In this case the vector-def is
1035 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1036 of the stmt that defines VEC_OPRND.
1037 DT is the type of the vector def VEC_OPRND.
1039 Context:
1040 In case the vectorization factor (VF) is bigger than the number
1041 of elements that can fit in a vectype (nunits), we have to generate
1042 more than one vector stmt to vectorize the scalar stmt. This situation
1043 arises when there are multiple data-types operated upon in the loop; the
1044 smallest data-type determines the VF, and as a result, when vectorizing
1045 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1046 vector stmt (each computing a vector of 'nunits' results, and together
1047 computing 'VF' results in each iteration). This function is called when
1048 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1049 which VF=16 and nunits=4, so the number of copies required is 4):
1051 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1053 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1054 VS1.1: vx.1 = memref1 VS1.2
1055 VS1.2: vx.2 = memref2 VS1.3
1056 VS1.3: vx.3 = memref3
1058 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1059 VSnew.1: vz1 = vx.1 + ... VSnew.2
1060 VSnew.2: vz2 = vx.2 + ... VSnew.3
1061 VSnew.3: vz3 = vx.3 + ...
1063 The vectorization of S1 is explained in vectorizable_load.
1064 The vectorization of S2:
1065 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1066 the function 'vect_get_vec_def_for_operand' is called to
1067 get the relevant vector-def for each operand of S2. For operand x it
1068 returns the vector-def 'vx.0'.
1070 To create the remaining copies of the vector-stmt (VSnew.j), this
1071 function is called to get the relevant vector-def for each operand. It is
1072 obtained from the respective VS1.j stmt, which is recorded in the
1073 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1075 For example, to obtain the vector-def 'vx.1' in order to create the
1076 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1077 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1078 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1079 and return its def ('vx.1').
1080 Overall, to create the above sequence this function will be called 3 times:
1081 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1082 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1083 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1085 tree
1086 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1088 gimple vec_stmt_for_operand;
1089 stmt_vec_info def_stmt_info;
1091 /* Do nothing; can reuse same def. */
1092 if (dt == vect_external_def || dt == vect_constant_def )
1093 return vec_oprnd;
1095 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1096 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1097 gcc_assert (def_stmt_info);
1098 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1099 gcc_assert (vec_stmt_for_operand);
1100 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1101 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1102 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1103 else
1104 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1105 return vec_oprnd;
1109 /* Get vectorized definitions for the operands to create a copy of an original
1110 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1112 static void
1113 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1114 VEC(tree,heap) **vec_oprnds0,
1115 VEC(tree,heap) **vec_oprnds1)
1117 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1119 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1120 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1122 if (vec_oprnds1 && *vec_oprnds1)
1124 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1125 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1126 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1131 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1133 static void
1134 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1135 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1136 slp_tree slp_node)
1138 if (slp_node)
1139 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1, -1);
1140 else
1142 tree vec_oprnd;
1144 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1145 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1146 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1148 if (op1)
1150 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1151 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1152 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1158 /* Function vect_finish_stmt_generation.
1160 Insert a new stmt. */
1162 void
1163 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1164 gimple_stmt_iterator *gsi)
1166 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1167 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1168 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1170 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1172 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1174 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1175 bb_vinfo));
1177 if (vect_print_dump_info (REPORT_DETAILS))
1179 fprintf (vect_dump, "add new stmt: ");
1180 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1183 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1186 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1187 a function declaration if the target has a vectorized version
1188 of the function, or NULL_TREE if the function cannot be vectorized. */
1190 tree
1191 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1193 tree fndecl = gimple_call_fndecl (call);
1195 /* We only handle functions that do not read or clobber memory -- i.e.
1196 const or novops ones. */
1197 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1198 return NULL_TREE;
1200 if (!fndecl
1201 || TREE_CODE (fndecl) != FUNCTION_DECL
1202 || !DECL_BUILT_IN (fndecl))
1203 return NULL_TREE;
1205 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1206 vectype_in);
1209 /* Function vectorizable_call.
1211 Check if STMT performs a function call that can be vectorized.
1212 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1213 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1214 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1216 static bool
1217 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1219 tree vec_dest;
1220 tree scalar_dest;
1221 tree op, type;
1222 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1223 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1224 tree vectype_out, vectype_in;
1225 int nunits_in;
1226 int nunits_out;
1227 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1228 tree fndecl, new_temp, def, rhs_type;
1229 gimple def_stmt;
1230 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1231 gimple new_stmt = NULL;
1232 int ncopies, j;
1233 VEC(tree, heap) *vargs = NULL;
1234 enum { NARROW, NONE, WIDEN } modifier;
1235 size_t i, nargs;
1237 /* FORNOW: unsupported in basic block SLP. */
1238 gcc_assert (loop_vinfo);
1240 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1241 return false;
1243 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1244 return false;
1246 /* FORNOW: SLP not supported. */
1247 if (STMT_SLP_TYPE (stmt_info))
1248 return false;
1250 /* Is STMT a vectorizable call? */
1251 if (!is_gimple_call (stmt))
1252 return false;
1254 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1255 return false;
1257 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1259 /* Process function arguments. */
1260 rhs_type = NULL_TREE;
1261 vectype_in = NULL_TREE;
1262 nargs = gimple_call_num_args (stmt);
1264 /* Bail out if the function has more than two arguments, we
1265 do not have interesting builtin functions to vectorize with
1266 more than two arguments. No arguments is also not good. */
1267 if (nargs == 0 || nargs > 2)
1268 return false;
1270 for (i = 0; i < nargs; i++)
1272 tree opvectype;
1274 op = gimple_call_arg (stmt, i);
1276 /* We can only handle calls with arguments of the same type. */
1277 if (rhs_type
1278 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1280 if (vect_print_dump_info (REPORT_DETAILS))
1281 fprintf (vect_dump, "argument types differ.");
1282 return false;
1284 if (!rhs_type)
1285 rhs_type = TREE_TYPE (op);
1287 if (!vect_is_simple_use_1 (op, loop_vinfo, NULL,
1288 &def_stmt, &def, &dt[i], &opvectype))
1290 if (vect_print_dump_info (REPORT_DETAILS))
1291 fprintf (vect_dump, "use not simple.");
1292 return false;
1295 if (!vectype_in)
1296 vectype_in = opvectype;
1297 else if (opvectype
1298 && opvectype != vectype_in)
1300 if (vect_print_dump_info (REPORT_DETAILS))
1301 fprintf (vect_dump, "argument vector types differ.");
1302 return false;
1305 /* If all arguments are external or constant defs use a vector type with
1306 the same size as the output vector type. */
1307 if (!vectype_in)
1308 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1309 if (vec_stmt)
1310 gcc_assert (vectype_in);
1311 if (!vectype_in)
1313 if (vect_print_dump_info (REPORT_DETAILS))
1315 fprintf (vect_dump, "no vectype for scalar type ");
1316 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1319 return false;
1322 /* FORNOW */
1323 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1324 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1325 if (nunits_in == nunits_out / 2)
1326 modifier = NARROW;
1327 else if (nunits_out == nunits_in)
1328 modifier = NONE;
1329 else if (nunits_out == nunits_in / 2)
1330 modifier = WIDEN;
1331 else
1332 return false;
1334 /* For now, we only vectorize functions if a target specific builtin
1335 is available. TODO -- in some cases, it might be profitable to
1336 insert the calls for pieces of the vector, in order to be able
1337 to vectorize other operations in the loop. */
1338 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1339 if (fndecl == NULL_TREE)
1341 if (vect_print_dump_info (REPORT_DETAILS))
1342 fprintf (vect_dump, "function is not vectorizable.");
1344 return false;
1347 gcc_assert (!gimple_vuse (stmt));
1349 if (modifier == NARROW)
1350 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1351 else
1352 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1354 /* Sanity check: make sure that at least one copy of the vectorized stmt
1355 needs to be generated. */
1356 gcc_assert (ncopies >= 1);
1358 if (!vec_stmt) /* transformation not required. */
1360 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1361 if (vect_print_dump_info (REPORT_DETAILS))
1362 fprintf (vect_dump, "=== vectorizable_call ===");
1363 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1364 return true;
1367 /** Transform. **/
1369 if (vect_print_dump_info (REPORT_DETAILS))
1370 fprintf (vect_dump, "transform operation.");
1372 /* Handle def. */
1373 scalar_dest = gimple_call_lhs (stmt);
1374 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1376 prev_stmt_info = NULL;
1377 switch (modifier)
1379 case NONE:
1380 for (j = 0; j < ncopies; ++j)
1382 /* Build argument list for the vectorized call. */
1383 if (j == 0)
1384 vargs = VEC_alloc (tree, heap, nargs);
1385 else
1386 VEC_truncate (tree, vargs, 0);
1388 for (i = 0; i < nargs; i++)
1390 op = gimple_call_arg (stmt, i);
1391 if (j == 0)
1392 vec_oprnd0
1393 = vect_get_vec_def_for_operand (op, stmt, NULL);
1394 else
1396 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1397 vec_oprnd0
1398 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1401 VEC_quick_push (tree, vargs, vec_oprnd0);
1404 new_stmt = gimple_build_call_vec (fndecl, vargs);
1405 new_temp = make_ssa_name (vec_dest, new_stmt);
1406 gimple_call_set_lhs (new_stmt, new_temp);
1408 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1409 mark_symbols_for_renaming (new_stmt);
1411 if (j == 0)
1412 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1413 else
1414 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1416 prev_stmt_info = vinfo_for_stmt (new_stmt);
1419 break;
1421 case NARROW:
1422 for (j = 0; j < ncopies; ++j)
1424 /* Build argument list for the vectorized call. */
1425 if (j == 0)
1426 vargs = VEC_alloc (tree, heap, nargs * 2);
1427 else
1428 VEC_truncate (tree, vargs, 0);
1430 for (i = 0; i < nargs; i++)
1432 op = gimple_call_arg (stmt, i);
1433 if (j == 0)
1435 vec_oprnd0
1436 = vect_get_vec_def_for_operand (op, stmt, NULL);
1437 vec_oprnd1
1438 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1440 else
1442 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i);
1443 vec_oprnd0
1444 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1445 vec_oprnd1
1446 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1449 VEC_quick_push (tree, vargs, vec_oprnd0);
1450 VEC_quick_push (tree, vargs, vec_oprnd1);
1453 new_stmt = gimple_build_call_vec (fndecl, vargs);
1454 new_temp = make_ssa_name (vec_dest, new_stmt);
1455 gimple_call_set_lhs (new_stmt, new_temp);
1457 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1458 mark_symbols_for_renaming (new_stmt);
1460 if (j == 0)
1461 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1462 else
1463 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1465 prev_stmt_info = vinfo_for_stmt (new_stmt);
1468 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1470 break;
1472 case WIDEN:
1473 /* No current target implements this case. */
1474 return false;
1477 VEC_free (tree, heap, vargs);
1479 /* Update the exception handling table with the vector stmt if necessary. */
1480 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1481 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1483 /* The call in STMT might prevent it from being removed in dce.
1484 We however cannot remove it here, due to the way the ssa name
1485 it defines is mapped to the new definition. So just replace
1486 rhs of the statement with something harmless. */
1488 type = TREE_TYPE (scalar_dest);
1489 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1490 fold_convert (type, integer_zero_node));
1491 set_vinfo_for_stmt (new_stmt, stmt_info);
1492 set_vinfo_for_stmt (stmt, NULL);
1493 STMT_VINFO_STMT (stmt_info) = new_stmt;
1494 gsi_replace (gsi, new_stmt, false);
1495 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1497 return true;
1501 /* Function vect_gen_widened_results_half
1503 Create a vector stmt whose code, type, number of arguments, and result
1504 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1505 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1506 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1507 needs to be created (DECL is a function-decl of a target-builtin).
1508 STMT is the original scalar stmt that we are vectorizing. */
1510 static gimple
1511 vect_gen_widened_results_half (enum tree_code code,
1512 tree decl,
1513 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1514 tree vec_dest, gimple_stmt_iterator *gsi,
1515 gimple stmt)
1517 gimple new_stmt;
1518 tree new_temp;
1520 /* Generate half of the widened result: */
1521 if (code == CALL_EXPR)
1523 /* Target specific support */
1524 if (op_type == binary_op)
1525 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1526 else
1527 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1528 new_temp = make_ssa_name (vec_dest, new_stmt);
1529 gimple_call_set_lhs (new_stmt, new_temp);
1531 else
1533 /* Generic support */
1534 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1535 if (op_type != binary_op)
1536 vec_oprnd1 = NULL;
1537 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1538 vec_oprnd1);
1539 new_temp = make_ssa_name (vec_dest, new_stmt);
1540 gimple_assign_set_lhs (new_stmt, new_temp);
1542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1544 return new_stmt;
1548 /* Check if STMT performs a conversion operation, that can be vectorized.
1549 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1550 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1551 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1553 static bool
1554 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1555 gimple *vec_stmt, slp_tree slp_node)
1557 tree vec_dest;
1558 tree scalar_dest;
1559 tree op0;
1560 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1561 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1562 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1563 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1564 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1565 tree new_temp;
1566 tree def;
1567 gimple def_stmt;
1568 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1569 gimple new_stmt = NULL;
1570 stmt_vec_info prev_stmt_info;
1571 int nunits_in;
1572 int nunits_out;
1573 tree vectype_out, vectype_in;
1574 int ncopies, j;
1575 tree rhs_type;
1576 tree builtin_decl;
1577 enum { NARROW, NONE, WIDEN } modifier;
1578 int i;
1579 VEC(tree,heap) *vec_oprnds0 = NULL;
1580 tree vop0;
1581 VEC(tree,heap) *dummy = NULL;
1582 int dummy_int;
1584 /* Is STMT a vectorizable conversion? */
1586 /* FORNOW: unsupported in basic block SLP. */
1587 gcc_assert (loop_vinfo);
1589 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1590 return false;
1592 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1593 return false;
1595 if (!is_gimple_assign (stmt))
1596 return false;
1598 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1599 return false;
1601 code = gimple_assign_rhs_code (stmt);
1602 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1603 return false;
1605 /* Check types of lhs and rhs. */
1606 scalar_dest = gimple_assign_lhs (stmt);
1607 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1609 op0 = gimple_assign_rhs1 (stmt);
1610 rhs_type = TREE_TYPE (op0);
1611 /* Check the operands of the operation. */
1612 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
1613 &def_stmt, &def, &dt[0], &vectype_in))
1615 if (vect_print_dump_info (REPORT_DETAILS))
1616 fprintf (vect_dump, "use not simple.");
1617 return false;
1619 /* If op0 is an external or constant defs use a vector type of
1620 the same size as the output vector type. */
1621 if (!vectype_in)
1622 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1623 if (vec_stmt)
1624 gcc_assert (vectype_in);
1625 if (!vectype_in)
1627 if (vect_print_dump_info (REPORT_DETAILS))
1629 fprintf (vect_dump, "no vectype for scalar type ");
1630 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1633 return false;
1636 /* FORNOW */
1637 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1638 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1639 if (nunits_in == nunits_out / 2)
1640 modifier = NARROW;
1641 else if (nunits_out == nunits_in)
1642 modifier = NONE;
1643 else if (nunits_out == nunits_in / 2)
1644 modifier = WIDEN;
1645 else
1646 return false;
1648 if (modifier == NARROW)
1649 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1650 else
1651 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1653 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1654 this, so we can safely override NCOPIES with 1 here. */
1655 if (slp_node)
1656 ncopies = 1;
1658 /* Sanity check: make sure that at least one copy of the vectorized stmt
1659 needs to be generated. */
1660 gcc_assert (ncopies >= 1);
1662 /* Supportable by target? */
1663 if ((modifier == NONE
1664 && !targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
1665 || (modifier == WIDEN
1666 && !supportable_widening_operation (code, stmt,
1667 vectype_out, vectype_in,
1668 &decl1, &decl2,
1669 &code1, &code2,
1670 &dummy_int, &dummy))
1671 || (modifier == NARROW
1672 && !supportable_narrowing_operation (code, vectype_out, vectype_in,
1673 &code1, &dummy_int, &dummy)))
1675 if (vect_print_dump_info (REPORT_DETAILS))
1676 fprintf (vect_dump, "conversion not supported by target.");
1677 return false;
1680 if (modifier != NONE)
1682 /* FORNOW: SLP not supported. */
1683 if (STMT_SLP_TYPE (stmt_info))
1684 return false;
1687 if (!vec_stmt) /* transformation not required. */
1689 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1690 return true;
1693 /** Transform. **/
1694 if (vect_print_dump_info (REPORT_DETAILS))
1695 fprintf (vect_dump, "transform conversion.");
1697 /* Handle def. */
1698 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1700 if (modifier == NONE && !slp_node)
1701 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1703 prev_stmt_info = NULL;
1704 switch (modifier)
1706 case NONE:
1707 for (j = 0; j < ncopies; j++)
1709 if (j == 0)
1710 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1711 else
1712 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1714 builtin_decl =
1715 targetm.vectorize.builtin_conversion (code,
1716 vectype_out, vectype_in);
1717 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1719 /* Arguments are ready. create the new vector stmt. */
1720 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1721 new_temp = make_ssa_name (vec_dest, new_stmt);
1722 gimple_call_set_lhs (new_stmt, new_temp);
1723 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1724 if (slp_node)
1725 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1728 if (j == 0)
1729 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1730 else
1731 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1732 prev_stmt_info = vinfo_for_stmt (new_stmt);
1734 break;
1736 case WIDEN:
1737 /* In case the vectorization factor (VF) is bigger than the number
1738 of elements that we can fit in a vectype (nunits), we have to
1739 generate more than one vector stmt - i.e - we need to "unroll"
1740 the vector stmt by a factor VF/nunits. */
1741 for (j = 0; j < ncopies; j++)
1743 if (j == 0)
1744 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1745 else
1746 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1748 /* Generate first half of the widened result: */
1749 new_stmt
1750 = vect_gen_widened_results_half (code1, decl1,
1751 vec_oprnd0, vec_oprnd1,
1752 unary_op, vec_dest, gsi, stmt);
1753 if (j == 0)
1754 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1755 else
1756 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1757 prev_stmt_info = vinfo_for_stmt (new_stmt);
1759 /* Generate second half of the widened result: */
1760 new_stmt
1761 = vect_gen_widened_results_half (code2, decl2,
1762 vec_oprnd0, vec_oprnd1,
1763 unary_op, vec_dest, gsi, stmt);
1764 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1765 prev_stmt_info = vinfo_for_stmt (new_stmt);
1767 break;
1769 case NARROW:
1770 /* In case the vectorization factor (VF) is bigger than the number
1771 of elements that we can fit in a vectype (nunits), we have to
1772 generate more than one vector stmt - i.e - we need to "unroll"
1773 the vector stmt by a factor VF/nunits. */
1774 for (j = 0; j < ncopies; j++)
1776 /* Handle uses. */
1777 if (j == 0)
1779 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1780 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1782 else
1784 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1785 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1788 /* Arguments are ready. Create the new vector stmt. */
1789 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1790 vec_oprnd1);
1791 new_temp = make_ssa_name (vec_dest, new_stmt);
1792 gimple_assign_set_lhs (new_stmt, new_temp);
1793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1795 if (j == 0)
1796 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1797 else
1798 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1800 prev_stmt_info = vinfo_for_stmt (new_stmt);
1803 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1806 if (vec_oprnds0)
1807 VEC_free (tree, heap, vec_oprnds0);
1809 return true;
1811 /* Function vectorizable_assignment.
1813 Check if STMT performs an assignment (copy) that can be vectorized.
1814 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1815 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1816 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1818 static bool
1819 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1820 gimple *vec_stmt, slp_tree slp_node)
1822 tree vec_dest;
1823 tree scalar_dest;
1824 tree op;
1825 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1827 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1828 tree new_temp;
1829 tree def;
1830 gimple def_stmt;
1831 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1832 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1833 int ncopies;
1834 int i, j;
1835 VEC(tree,heap) *vec_oprnds = NULL;
1836 tree vop;
1837 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1838 gimple new_stmt = NULL;
1839 stmt_vec_info prev_stmt_info = NULL;
1840 enum tree_code code;
1841 tree vectype_in;
1843 /* Multiple types in SLP are handled by creating the appropriate number of
1844 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1845 case of SLP. */
1846 if (slp_node)
1847 ncopies = 1;
1848 else
1849 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1851 gcc_assert (ncopies >= 1);
1853 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1854 return false;
1856 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1857 return false;
1859 /* Is vectorizable assignment? */
1860 if (!is_gimple_assign (stmt))
1861 return false;
1863 scalar_dest = gimple_assign_lhs (stmt);
1864 if (TREE_CODE (scalar_dest) != SSA_NAME)
1865 return false;
1867 code = gimple_assign_rhs_code (stmt);
1868 if (gimple_assign_single_p (stmt)
1869 || code == PAREN_EXPR
1870 || CONVERT_EXPR_CODE_P (code))
1871 op = gimple_assign_rhs1 (stmt);
1872 else
1873 return false;
1875 if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
1876 &def_stmt, &def, &dt[0], &vectype_in))
1878 if (vect_print_dump_info (REPORT_DETAILS))
1879 fprintf (vect_dump, "use not simple.");
1880 return false;
1883 /* We can handle NOP_EXPR conversions that do not change the number
1884 of elements or the vector size. */
1885 if (CONVERT_EXPR_CODE_P (code)
1886 && (!vectype_in
1887 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
1888 || (GET_MODE_SIZE (TYPE_MODE (vectype))
1889 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
1890 return false;
1892 if (!vec_stmt) /* transformation not required. */
1894 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1895 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump, "=== vectorizable_assignment ===");
1897 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1898 return true;
1901 /** Transform. **/
1902 if (vect_print_dump_info (REPORT_DETAILS))
1903 fprintf (vect_dump, "transform assignment.");
1905 /* Handle def. */
1906 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1908 /* Handle use. */
1909 for (j = 0; j < ncopies; j++)
1911 /* Handle uses. */
1912 if (j == 0)
1913 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1914 else
1915 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
1917 /* Arguments are ready. create the new vector stmt. */
1918 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1920 if (CONVERT_EXPR_CODE_P (code))
1921 vop = build1_stat (VIEW_CONVERT_EXPR, vectype, vop);
1922 new_stmt = gimple_build_assign (vec_dest, vop);
1923 new_temp = make_ssa_name (vec_dest, new_stmt);
1924 gimple_assign_set_lhs (new_stmt, new_temp);
1925 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1926 if (slp_node)
1927 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1930 if (slp_node)
1931 continue;
1933 if (j == 0)
1934 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1935 else
1936 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1938 prev_stmt_info = vinfo_for_stmt (new_stmt);
1941 VEC_free (tree, heap, vec_oprnds);
1942 return true;
1945 /* Function vectorizable_operation.
1947 Check if STMT performs a binary or unary operation that can be vectorized.
1948 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1949 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1950 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1952 static bool
1953 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1954 gimple *vec_stmt, slp_tree slp_node)
1956 tree vec_dest;
1957 tree scalar_dest;
1958 tree op0, op1 = NULL;
1959 tree vec_oprnd1 = NULL_TREE;
1960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1961 tree vectype;
1962 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1963 enum tree_code code;
1964 enum machine_mode vec_mode;
1965 tree new_temp;
1966 int op_type;
1967 optab optab;
1968 int icode;
1969 enum machine_mode optab_op2_mode;
1970 tree def;
1971 gimple def_stmt;
1972 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1973 gimple new_stmt = NULL;
1974 stmt_vec_info prev_stmt_info;
1975 int nunits_in;
1976 int nunits_out;
1977 tree vectype_out;
1978 int ncopies;
1979 int j, i;
1980 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1981 tree vop0, vop1;
1982 unsigned int k;
1983 bool scalar_shift_arg = false;
1984 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1985 int vf;
1987 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1988 return false;
1990 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1991 return false;
1993 /* Is STMT a vectorizable binary/unary operation? */
1994 if (!is_gimple_assign (stmt))
1995 return false;
1997 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1998 return false;
2000 code = gimple_assign_rhs_code (stmt);
2002 /* For pointer addition, we should use the normal plus for
2003 the vector addition. */
2004 if (code == POINTER_PLUS_EXPR)
2005 code = PLUS_EXPR;
2007 /* Support only unary or binary operations. */
2008 op_type = TREE_CODE_LENGTH (code);
2009 if (op_type != unary_op && op_type != binary_op)
2011 if (vect_print_dump_info (REPORT_DETAILS))
2012 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2013 return false;
2016 scalar_dest = gimple_assign_lhs (stmt);
2017 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2019 op0 = gimple_assign_rhs1 (stmt);
2020 if (!vect_is_simple_use_1 (op0, loop_vinfo, bb_vinfo,
2021 &def_stmt, &def, &dt[0], &vectype))
2023 if (vect_print_dump_info (REPORT_DETAILS))
2024 fprintf (vect_dump, "use not simple.");
2025 return false;
2027 /* If op0 is an external or constant def use a vector type with
2028 the same size as the output vector type. */
2029 if (!vectype)
2030 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2031 if (vec_stmt)
2032 gcc_assert (vectype);
2033 if (!vectype)
2035 if (vect_print_dump_info (REPORT_DETAILS))
2037 fprintf (vect_dump, "no vectype for scalar type ");
2038 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2041 return false;
2044 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2045 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
2046 if (nunits_out != nunits_in)
2047 return false;
2049 if (op_type == binary_op)
2051 op1 = gimple_assign_rhs2 (stmt);
2052 if (!vect_is_simple_use (op1, loop_vinfo, bb_vinfo, &def_stmt, &def,
2053 &dt[1]))
2055 if (vect_print_dump_info (REPORT_DETAILS))
2056 fprintf (vect_dump, "use not simple.");
2057 return false;
2061 if (loop_vinfo)
2062 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2063 else
2064 vf = 1;
2066 /* Multiple types in SLP are handled by creating the appropriate number of
2067 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2068 case of SLP. */
2069 if (slp_node)
2070 ncopies = 1;
2071 else
2072 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2074 gcc_assert (ncopies >= 1);
2076 /* If this is a shift/rotate, determine whether the shift amount is a vector,
2077 or scalar. If the shift/rotate amount is a vector, use the vector/vector
2078 shift optabs. */
2079 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2080 || code == RROTATE_EXPR)
2082 /* vector shifted by vector */
2083 if (dt[1] == vect_internal_def)
2085 optab = optab_for_tree_code (code, vectype, optab_vector);
2086 if (vect_print_dump_info (REPORT_DETAILS))
2087 fprintf (vect_dump, "vector/vector shift/rotate found.");
2090 /* See if the machine has a vector shifted by scalar insn and if not
2091 then see if it has a vector shifted by vector insn */
2092 else if (dt[1] == vect_constant_def || dt[1] == vect_external_def)
2094 optab = optab_for_tree_code (code, vectype, optab_scalar);
2095 if (optab
2096 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2097 != CODE_FOR_nothing))
2099 scalar_shift_arg = true;
2100 if (vect_print_dump_info (REPORT_DETAILS))
2101 fprintf (vect_dump, "vector/scalar shift/rotate found.");
2103 else
2105 optab = optab_for_tree_code (code, vectype, optab_vector);
2106 if (optab
2107 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
2108 != CODE_FOR_nothing))
2110 if (vect_print_dump_info (REPORT_DETAILS))
2111 fprintf (vect_dump, "vector/vector shift/rotate found.");
2113 /* Unlike the other binary operators, shifts/rotates have
2114 the rhs being int, instead of the same type as the lhs,
2115 so make sure the scalar is the right type if we are
2116 dealing with vectors of short/char. */
2117 if (dt[1] == vect_constant_def)
2118 op1 = fold_convert (TREE_TYPE (vectype), op1);
2123 else
2125 if (vect_print_dump_info (REPORT_DETAILS))
2126 fprintf (vect_dump, "operand mode requires invariant argument.");
2127 return false;
2130 else
2131 optab = optab_for_tree_code (code, vectype, optab_default);
2133 /* Supportable by target? */
2134 if (!optab)
2136 if (vect_print_dump_info (REPORT_DETAILS))
2137 fprintf (vect_dump, "no optab.");
2138 return false;
2140 vec_mode = TYPE_MODE (vectype);
2141 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2142 if (icode == CODE_FOR_nothing)
2144 if (vect_print_dump_info (REPORT_DETAILS))
2145 fprintf (vect_dump, "op not supported by target.");
2146 /* Check only during analysis. */
2147 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2148 || (vf < vect_min_worthwhile_factor (code)
2149 && !vec_stmt))
2150 return false;
2151 if (vect_print_dump_info (REPORT_DETAILS))
2152 fprintf (vect_dump, "proceeding using word mode.");
2155 /* Worthwhile without SIMD support? Check only during analysis. */
2156 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2157 && vf < vect_min_worthwhile_factor (code)
2158 && !vec_stmt)
2160 if (vect_print_dump_info (REPORT_DETAILS))
2161 fprintf (vect_dump, "not worthwhile without SIMD support.");
2162 return false;
2165 if (!vec_stmt) /* transformation not required. */
2167 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2168 if (vect_print_dump_info (REPORT_DETAILS))
2169 fprintf (vect_dump, "=== vectorizable_operation ===");
2170 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2171 return true;
2174 /** Transform. **/
2176 if (vect_print_dump_info (REPORT_DETAILS))
2177 fprintf (vect_dump, "transform binary/unary operation.");
2179 /* Handle def. */
2180 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2182 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2183 created in the previous stages of the recursion, so no allocation is
2184 needed, except for the case of shift with scalar shift argument. In that
2185 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2186 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2187 In case of loop-based vectorization we allocate VECs of size 1. We
2188 allocate VEC_OPRNDS1 only in case of binary operation. */
2189 if (!slp_node)
2191 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2192 if (op_type == binary_op)
2193 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2195 else if (scalar_shift_arg)
2196 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2198 /* In case the vectorization factor (VF) is bigger than the number
2199 of elements that we can fit in a vectype (nunits), we have to generate
2200 more than one vector stmt - i.e - we need to "unroll" the
2201 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2202 from one copy of the vector stmt to the next, in the field
2203 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2204 stages to find the correct vector defs to be used when vectorizing
2205 stmts that use the defs of the current stmt. The example below illustrates
2206 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2207 4 vectorized stmts):
2209 before vectorization:
2210 RELATED_STMT VEC_STMT
2211 S1: x = memref - -
2212 S2: z = x + 1 - -
2214 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2215 there):
2216 RELATED_STMT VEC_STMT
2217 VS1_0: vx0 = memref0 VS1_1 -
2218 VS1_1: vx1 = memref1 VS1_2 -
2219 VS1_2: vx2 = memref2 VS1_3 -
2220 VS1_3: vx3 = memref3 - -
2221 S1: x = load - VS1_0
2222 S2: z = x + 1 - -
2224 step2: vectorize stmt S2 (done here):
2225 To vectorize stmt S2 we first need to find the relevant vector
2226 def for the first operand 'x'. This is, as usual, obtained from
2227 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2228 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2229 relevant vector def 'vx0'. Having found 'vx0' we can generate
2230 the vector stmt VS2_0, and as usual, record it in the
2231 STMT_VINFO_VEC_STMT of stmt S2.
2232 When creating the second copy (VS2_1), we obtain the relevant vector
2233 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2234 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2235 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2236 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2237 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2238 chain of stmts and pointers:
2239 RELATED_STMT VEC_STMT
2240 VS1_0: vx0 = memref0 VS1_1 -
2241 VS1_1: vx1 = memref1 VS1_2 -
2242 VS1_2: vx2 = memref2 VS1_3 -
2243 VS1_3: vx3 = memref3 - -
2244 S1: x = load - VS1_0
2245 VS2_0: vz0 = vx0 + v1 VS2_1 -
2246 VS2_1: vz1 = vx1 + v1 VS2_2 -
2247 VS2_2: vz2 = vx2 + v1 VS2_3 -
2248 VS2_3: vz3 = vx3 + v1 - -
2249 S2: z = x + 1 - VS2_0 */
2251 prev_stmt_info = NULL;
2252 for (j = 0; j < ncopies; j++)
2254 /* Handle uses. */
2255 if (j == 0)
2257 if (op_type == binary_op && scalar_shift_arg)
2259 /* Vector shl and shr insn patterns can be defined with scalar
2260 operand 2 (shift operand). In this case, use constant or loop
2261 invariant op1 directly, without extending it to vector mode
2262 first. */
2263 optab_op2_mode = insn_data[icode].operand[2].mode;
2264 if (!VECTOR_MODE_P (optab_op2_mode))
2266 if (vect_print_dump_info (REPORT_DETAILS))
2267 fprintf (vect_dump, "operand 1 using scalar mode.");
2268 vec_oprnd1 = op1;
2269 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2270 if (slp_node)
2272 /* Store vec_oprnd1 for every vector stmt to be created
2273 for SLP_NODE. We check during the analysis that all the
2274 shift arguments are the same.
2275 TODO: Allow different constants for different vector
2276 stmts generated for an SLP instance. */
2277 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2278 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2283 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2284 (a special case for certain kind of vector shifts); otherwise,
2285 operand 1 should be of a vector type (the usual case). */
2286 if (op_type == binary_op && !vec_oprnd1)
2287 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2288 slp_node);
2289 else
2290 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2291 slp_node);
2293 else
2294 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2296 /* Arguments are ready. Create the new vector stmt. */
2297 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2299 vop1 = ((op_type == binary_op)
2300 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2301 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2302 new_temp = make_ssa_name (vec_dest, new_stmt);
2303 gimple_assign_set_lhs (new_stmt, new_temp);
2304 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2305 if (slp_node)
2306 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2309 if (slp_node)
2310 continue;
2312 if (j == 0)
2313 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2314 else
2315 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2316 prev_stmt_info = vinfo_for_stmt (new_stmt);
2319 VEC_free (tree, heap, vec_oprnds0);
2320 if (vec_oprnds1)
2321 VEC_free (tree, heap, vec_oprnds1);
2323 return true;
2327 /* Get vectorized definitions for loop-based vectorization. For the first
2328 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2329 scalar operand), and for the rest we get a copy with
2330 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2331 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2332 The vectors are collected into VEC_OPRNDS. */
2334 static void
2335 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2336 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2338 tree vec_oprnd;
2340 /* Get first vector operand. */
2341 /* All the vector operands except the very first one (that is scalar oprnd)
2342 are stmt copies. */
2343 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2344 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2345 else
2346 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2348 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2350 /* Get second vector operand. */
2351 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2352 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2354 *oprnd = vec_oprnd;
2356 /* For conversion in multiple steps, continue to get operands
2357 recursively. */
2358 if (multi_step_cvt)
2359 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2363 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2364 For multi-step conversions store the resulting vectors and call the function
2365 recursively. */
2367 static void
2368 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2369 int multi_step_cvt, gimple stmt,
2370 VEC (tree, heap) *vec_dsts,
2371 gimple_stmt_iterator *gsi,
2372 slp_tree slp_node, enum tree_code code,
2373 stmt_vec_info *prev_stmt_info)
2375 unsigned int i;
2376 tree vop0, vop1, new_tmp, vec_dest;
2377 gimple new_stmt;
2378 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2380 vec_dest = VEC_pop (tree, vec_dsts);
2382 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2384 /* Create demotion operation. */
2385 vop0 = VEC_index (tree, *vec_oprnds, i);
2386 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2387 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2388 new_tmp = make_ssa_name (vec_dest, new_stmt);
2389 gimple_assign_set_lhs (new_stmt, new_tmp);
2390 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2392 if (multi_step_cvt)
2393 /* Store the resulting vector for next recursive call. */
2394 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2395 else
2397 /* This is the last step of the conversion sequence. Store the
2398 vectors in SLP_NODE or in vector info of the scalar statement
2399 (or in STMT_VINFO_RELATED_STMT chain). */
2400 if (slp_node)
2401 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2402 else
2404 if (!*prev_stmt_info)
2405 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2406 else
2407 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2409 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2414 /* For multi-step demotion operations we first generate demotion operations
2415 from the source type to the intermediate types, and then combine the
2416 results (stored in VEC_OPRNDS) in demotion operation to the destination
2417 type. */
2418 if (multi_step_cvt)
2420 /* At each level of recursion we have have of the operands we had at the
2421 previous level. */
2422 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2423 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2424 stmt, vec_dsts, gsi, slp_node,
2425 code, prev_stmt_info);
2430 /* Function vectorizable_type_demotion
2432 Check if STMT performs a binary or unary operation that involves
2433 type demotion, and if it can be vectorized.
2434 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2435 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2436 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2438 static bool
2439 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2440 gimple *vec_stmt, slp_tree slp_node)
2442 tree vec_dest;
2443 tree scalar_dest;
2444 tree op0;
2445 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2446 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2447 enum tree_code code, code1 = ERROR_MARK;
2448 tree def;
2449 gimple def_stmt;
2450 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2451 stmt_vec_info prev_stmt_info;
2452 int nunits_in;
2453 int nunits_out;
2454 tree vectype_out;
2455 int ncopies;
2456 int j, i;
2457 tree vectype_in;
2458 int multi_step_cvt = 0;
2459 VEC (tree, heap) *vec_oprnds0 = NULL;
2460 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2461 tree last_oprnd, intermediate_type;
2463 /* FORNOW: not supported by basic block SLP vectorization. */
2464 gcc_assert (loop_vinfo);
2466 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2467 return false;
2469 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2470 return false;
2472 /* Is STMT a vectorizable type-demotion operation? */
2473 if (!is_gimple_assign (stmt))
2474 return false;
2476 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2477 return false;
2479 code = gimple_assign_rhs_code (stmt);
2480 if (!CONVERT_EXPR_CODE_P (code))
2481 return false;
2483 scalar_dest = gimple_assign_lhs (stmt);
2484 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2486 /* Check the operands of the operation. */
2487 op0 = gimple_assign_rhs1 (stmt);
2488 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2489 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2490 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2491 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2492 && CONVERT_EXPR_CODE_P (code))))
2493 return false;
2494 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
2495 &def_stmt, &def, &dt[0], &vectype_in))
2497 if (vect_print_dump_info (REPORT_DETAILS))
2498 fprintf (vect_dump, "use not simple.");
2499 return false;
2501 /* If op0 is an external def use a vector type with the
2502 same size as the output vector type if possible. */
2503 if (!vectype_in)
2504 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2505 if (vec_stmt)
2506 gcc_assert (vectype_in);
2507 if (!vectype_in)
2509 if (vect_print_dump_info (REPORT_DETAILS))
2511 fprintf (vect_dump, "no vectype for scalar type ");
2512 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2515 return false;
2518 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2519 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2520 if (nunits_in >= nunits_out)
2521 return false;
2523 /* Multiple types in SLP are handled by creating the appropriate number of
2524 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2525 case of SLP. */
2526 if (slp_node)
2527 ncopies = 1;
2528 else
2529 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2530 gcc_assert (ncopies >= 1);
2532 /* Supportable by target? */
2533 if (!supportable_narrowing_operation (code, vectype_out, vectype_in,
2534 &code1, &multi_step_cvt, &interm_types))
2535 return false;
2537 if (!vec_stmt) /* transformation not required. */
2539 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2540 if (vect_print_dump_info (REPORT_DETAILS))
2541 fprintf (vect_dump, "=== vectorizable_demotion ===");
2542 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2543 return true;
2546 /** Transform. **/
2547 if (vect_print_dump_info (REPORT_DETAILS))
2548 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2549 ncopies);
2551 /* In case of multi-step demotion, we first generate demotion operations to
2552 the intermediate types, and then from that types to the final one.
2553 We create vector destinations for the intermediate type (TYPES) received
2554 from supportable_narrowing_operation, and store them in the correct order
2555 for future use in vect_create_vectorized_demotion_stmts(). */
2556 if (multi_step_cvt)
2557 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2558 else
2559 vec_dsts = VEC_alloc (tree, heap, 1);
2561 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2562 VEC_quick_push (tree, vec_dsts, vec_dest);
2564 if (multi_step_cvt)
2566 for (i = VEC_length (tree, interm_types) - 1;
2567 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2569 vec_dest = vect_create_destination_var (scalar_dest,
2570 intermediate_type);
2571 VEC_quick_push (tree, vec_dsts, vec_dest);
2575 /* In case the vectorization factor (VF) is bigger than the number
2576 of elements that we can fit in a vectype (nunits), we have to generate
2577 more than one vector stmt - i.e - we need to "unroll" the
2578 vector stmt by a factor VF/nunits. */
2579 last_oprnd = op0;
2580 prev_stmt_info = NULL;
2581 for (j = 0; j < ncopies; j++)
2583 /* Handle uses. */
2584 if (slp_node)
2585 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL, -1);
2586 else
2588 VEC_free (tree, heap, vec_oprnds0);
2589 vec_oprnds0 = VEC_alloc (tree, heap,
2590 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2591 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2592 vect_pow2 (multi_step_cvt) - 1);
2595 /* Arguments are ready. Create the new vector stmts. */
2596 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2597 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2598 multi_step_cvt, stmt, tmp_vec_dsts,
2599 gsi, slp_node, code1,
2600 &prev_stmt_info);
2603 VEC_free (tree, heap, vec_oprnds0);
2604 VEC_free (tree, heap, vec_dsts);
2605 VEC_free (tree, heap, tmp_vec_dsts);
2606 VEC_free (tree, heap, interm_types);
2608 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2609 return true;
2613 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2614 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2615 the resulting vectors and call the function recursively. */
2617 static void
2618 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2619 VEC (tree, heap) **vec_oprnds1,
2620 int multi_step_cvt, gimple stmt,
2621 VEC (tree, heap) *vec_dsts,
2622 gimple_stmt_iterator *gsi,
2623 slp_tree slp_node, enum tree_code code1,
2624 enum tree_code code2, tree decl1,
2625 tree decl2, int op_type,
2626 stmt_vec_info *prev_stmt_info)
2628 int i;
2629 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2630 gimple new_stmt1, new_stmt2;
2631 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2632 VEC (tree, heap) *vec_tmp;
2634 vec_dest = VEC_pop (tree, vec_dsts);
2635 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2637 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2639 if (op_type == binary_op)
2640 vop1 = VEC_index (tree, *vec_oprnds1, i);
2641 else
2642 vop1 = NULL_TREE;
2644 /* Generate the two halves of promotion operation. */
2645 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2646 op_type, vec_dest, gsi, stmt);
2647 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2648 op_type, vec_dest, gsi, stmt);
2649 if (is_gimple_call (new_stmt1))
2651 new_tmp1 = gimple_call_lhs (new_stmt1);
2652 new_tmp2 = gimple_call_lhs (new_stmt2);
2654 else
2656 new_tmp1 = gimple_assign_lhs (new_stmt1);
2657 new_tmp2 = gimple_assign_lhs (new_stmt2);
2660 if (multi_step_cvt)
2662 /* Store the results for the recursive call. */
2663 VEC_quick_push (tree, vec_tmp, new_tmp1);
2664 VEC_quick_push (tree, vec_tmp, new_tmp2);
2666 else
2668 /* Last step of promotion sequience - store the results. */
2669 if (slp_node)
2671 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2672 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2674 else
2676 if (!*prev_stmt_info)
2677 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2678 else
2679 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2681 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2682 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2683 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2688 if (multi_step_cvt)
2690 /* For multi-step promotion operation we first generate we call the
2691 function recurcively for every stage. We start from the input type,
2692 create promotion operations to the intermediate types, and then
2693 create promotions to the output type. */
2694 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2695 VEC_free (tree, heap, vec_tmp);
2696 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2697 multi_step_cvt - 1, stmt,
2698 vec_dsts, gsi, slp_node, code1,
2699 code2, decl2, decl2, op_type,
2700 prev_stmt_info);
2705 /* Function vectorizable_type_promotion
2707 Check if STMT performs a binary or unary operation that involves
2708 type promotion, and if it can be vectorized.
2709 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2710 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2713 static bool
2714 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2715 gimple *vec_stmt, slp_tree slp_node)
2717 tree vec_dest;
2718 tree scalar_dest;
2719 tree op0, op1 = NULL;
2720 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2721 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2722 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2723 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2724 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2725 int op_type;
2726 tree def;
2727 gimple def_stmt;
2728 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2729 stmt_vec_info prev_stmt_info;
2730 int nunits_in;
2731 int nunits_out;
2732 tree vectype_out;
2733 int ncopies;
2734 int j, i;
2735 tree vectype_in;
2736 tree intermediate_type = NULL_TREE;
2737 int multi_step_cvt = 0;
2738 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2739 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2741 /* FORNOW: not supported by basic block SLP vectorization. */
2742 gcc_assert (loop_vinfo);
2744 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2745 return false;
2747 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2748 return false;
2750 /* Is STMT a vectorizable type-promotion operation? */
2751 if (!is_gimple_assign (stmt))
2752 return false;
2754 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2755 return false;
2757 code = gimple_assign_rhs_code (stmt);
2758 if (!CONVERT_EXPR_CODE_P (code)
2759 && code != WIDEN_MULT_EXPR)
2760 return false;
2762 scalar_dest = gimple_assign_lhs (stmt);
2763 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2765 /* Check the operands of the operation. */
2766 op0 = gimple_assign_rhs1 (stmt);
2767 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2768 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2769 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2770 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2771 && CONVERT_EXPR_CODE_P (code))))
2772 return false;
2773 if (!vect_is_simple_use_1 (op0, loop_vinfo, NULL,
2774 &def_stmt, &def, &dt[0], &vectype_in))
2776 if (vect_print_dump_info (REPORT_DETAILS))
2777 fprintf (vect_dump, "use not simple.");
2778 return false;
2780 /* If op0 is an external or constant def use a vector type with
2781 the same size as the output vector type. */
2782 if (!vectype_in)
2783 vectype_in = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
2784 if (vec_stmt)
2785 gcc_assert (vectype_in);
2786 if (!vectype_in)
2788 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "no vectype for scalar type ");
2791 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
2794 return false;
2797 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2798 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2799 if (nunits_in <= nunits_out)
2800 return false;
2802 /* Multiple types in SLP are handled by creating the appropriate number of
2803 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2804 case of SLP. */
2805 if (slp_node)
2806 ncopies = 1;
2807 else
2808 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2810 gcc_assert (ncopies >= 1);
2812 op_type = TREE_CODE_LENGTH (code);
2813 if (op_type == binary_op)
2815 op1 = gimple_assign_rhs2 (stmt);
2816 if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1]))
2818 if (vect_print_dump_info (REPORT_DETAILS))
2819 fprintf (vect_dump, "use not simple.");
2820 return false;
2824 /* Supportable by target? */
2825 if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2826 &decl1, &decl2, &code1, &code2,
2827 &multi_step_cvt, &interm_types))
2828 return false;
2830 /* Binary widening operation can only be supported directly by the
2831 architecture. */
2832 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2834 if (!vec_stmt) /* transformation not required. */
2836 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2837 if (vect_print_dump_info (REPORT_DETAILS))
2838 fprintf (vect_dump, "=== vectorizable_promotion ===");
2839 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2840 return true;
2843 /** Transform. **/
2845 if (vect_print_dump_info (REPORT_DETAILS))
2846 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2847 ncopies);
2849 /* Handle def. */
2850 /* In case of multi-step promotion, we first generate promotion operations
2851 to the intermediate types, and then from that types to the final one.
2852 We store vector destination in VEC_DSTS in the correct order for
2853 recursive creation of promotion operations in
2854 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2855 according to TYPES recieved from supportable_widening_operation(). */
2856 if (multi_step_cvt)
2857 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2858 else
2859 vec_dsts = VEC_alloc (tree, heap, 1);
2861 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2862 VEC_quick_push (tree, vec_dsts, vec_dest);
2864 if (multi_step_cvt)
2866 for (i = VEC_length (tree, interm_types) - 1;
2867 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2869 vec_dest = vect_create_destination_var (scalar_dest,
2870 intermediate_type);
2871 VEC_quick_push (tree, vec_dsts, vec_dest);
2875 if (!slp_node)
2877 vec_oprnds0 = VEC_alloc (tree, heap,
2878 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2879 if (op_type == binary_op)
2880 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2883 /* In case the vectorization factor (VF) is bigger than the number
2884 of elements that we can fit in a vectype (nunits), we have to generate
2885 more than one vector stmt - i.e - we need to "unroll" the
2886 vector stmt by a factor VF/nunits. */
2888 prev_stmt_info = NULL;
2889 for (j = 0; j < ncopies; j++)
2891 /* Handle uses. */
2892 if (j == 0)
2894 if (slp_node)
2895 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1, -1);
2896 else
2898 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2899 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2900 if (op_type == binary_op)
2902 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2903 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2907 else
2909 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2910 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2911 if (op_type == binary_op)
2913 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2914 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2918 /* Arguments are ready. Create the new vector stmts. */
2919 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2920 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2921 multi_step_cvt, stmt,
2922 tmp_vec_dsts,
2923 gsi, slp_node, code1, code2,
2924 decl1, decl2, op_type,
2925 &prev_stmt_info);
2928 VEC_free (tree, heap, vec_dsts);
2929 VEC_free (tree, heap, tmp_vec_dsts);
2930 VEC_free (tree, heap, interm_types);
2931 VEC_free (tree, heap, vec_oprnds0);
2932 VEC_free (tree, heap, vec_oprnds1);
2934 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2935 return true;
2939 /* Function vectorizable_store.
2941 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2942 can be vectorized.
2943 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2944 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2945 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2947 static bool
2948 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2949 slp_tree slp_node)
2951 tree scalar_dest;
2952 tree data_ref;
2953 tree op;
2954 tree vec_oprnd = NULL_TREE;
2955 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2956 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2957 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2958 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2959 struct loop *loop = NULL;
2960 enum machine_mode vec_mode;
2961 tree dummy;
2962 enum dr_alignment_support alignment_support_scheme;
2963 tree def;
2964 gimple def_stmt;
2965 enum vect_def_type dt;
2966 stmt_vec_info prev_stmt_info = NULL;
2967 tree dataref_ptr = NULL_TREE;
2968 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2969 int ncopies;
2970 int j;
2971 gimple next_stmt, first_stmt = NULL;
2972 bool strided_store = false;
2973 unsigned int group_size, i;
2974 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2975 bool inv_p;
2976 VEC(tree,heap) *vec_oprnds = NULL;
2977 bool slp = (slp_node != NULL);
2978 unsigned int vec_num;
2979 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2981 if (loop_vinfo)
2982 loop = LOOP_VINFO_LOOP (loop_vinfo);
2984 /* Multiple types in SLP are handled by creating the appropriate number of
2985 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2986 case of SLP. */
2987 if (slp)
2988 ncopies = 1;
2989 else
2990 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2992 gcc_assert (ncopies >= 1);
2994 /* FORNOW. This restriction should be relaxed. */
2995 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2997 if (vect_print_dump_info (REPORT_DETAILS))
2998 fprintf (vect_dump, "multiple types in nested loop.");
2999 return false;
3002 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3003 return false;
3005 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3006 return false;
3008 /* Is vectorizable store? */
3010 if (!is_gimple_assign (stmt))
3011 return false;
3013 scalar_dest = gimple_assign_lhs (stmt);
3014 if (TREE_CODE (scalar_dest) != ARRAY_REF
3015 && TREE_CODE (scalar_dest) != INDIRECT_REF
3016 && TREE_CODE (scalar_dest) != COMPONENT_REF
3017 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3018 && TREE_CODE (scalar_dest) != REALPART_EXPR)
3019 return false;
3021 gcc_assert (gimple_assign_single_p (stmt));
3022 op = gimple_assign_rhs1 (stmt);
3023 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt))
3025 if (vect_print_dump_info (REPORT_DETAILS))
3026 fprintf (vect_dump, "use not simple.");
3027 return false;
3030 /* The scalar rhs type needs to be trivially convertible to the vector
3031 component type. This should always be the case. */
3032 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
3034 if (vect_print_dump_info (REPORT_DETAILS))
3035 fprintf (vect_dump, "??? operands of different types");
3036 return false;
3039 vec_mode = TYPE_MODE (vectype);
3040 /* FORNOW. In some cases can vectorize even if data-type not supported
3041 (e.g. - array initialization with 0). */
3042 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
3043 return false;
3045 if (!STMT_VINFO_DATA_REF (stmt_info))
3046 return false;
3048 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3050 strided_store = true;
3051 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3052 if (!vect_strided_store_supported (vectype)
3053 && !PURE_SLP_STMT (stmt_info) && !slp)
3054 return false;
3056 if (first_stmt == stmt)
3058 /* STMT is the leader of the group. Check the operands of all the
3059 stmts of the group. */
3060 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
3061 while (next_stmt)
3063 gcc_assert (gimple_assign_single_p (next_stmt));
3064 op = gimple_assign_rhs1 (next_stmt);
3065 if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt,
3066 &def, &dt))
3068 if (vect_print_dump_info (REPORT_DETAILS))
3069 fprintf (vect_dump, "use not simple.");
3070 return false;
3072 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3077 if (!vec_stmt) /* transformation not required. */
3079 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3080 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
3081 return true;
3084 /** Transform. **/
3086 if (strided_store)
3088 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3089 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3091 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3093 /* FORNOW */
3094 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3096 /* We vectorize all the stmts of the interleaving group when we
3097 reach the last stmt in the group. */
3098 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3099 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
3100 && !slp)
3102 *vec_stmt = NULL;
3103 return true;
3106 if (slp)
3108 strided_store = false;
3109 /* VEC_NUM is the number of vect stmts to be created for this
3110 group. */
3111 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3112 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3113 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3115 else
3116 /* VEC_NUM is the number of vect stmts to be created for this
3117 group. */
3118 vec_num = group_size;
3120 else
3122 first_stmt = stmt;
3123 first_dr = dr;
3124 group_size = vec_num = 1;
3127 if (vect_print_dump_info (REPORT_DETAILS))
3128 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3130 dr_chain = VEC_alloc (tree, heap, group_size);
3131 oprnds = VEC_alloc (tree, heap, group_size);
3133 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3134 gcc_assert (alignment_support_scheme);
3136 /* In case the vectorization factor (VF) is bigger than the number
3137 of elements that we can fit in a vectype (nunits), we have to generate
3138 more than one vector stmt - i.e - we need to "unroll" the
3139 vector stmt by a factor VF/nunits. For more details see documentation in
3140 vect_get_vec_def_for_copy_stmt. */
3142 /* In case of interleaving (non-unit strided access):
3144 S1: &base + 2 = x2
3145 S2: &base = x0
3146 S3: &base + 1 = x1
3147 S4: &base + 3 = x3
3149 We create vectorized stores starting from base address (the access of the
3150 first stmt in the chain (S2 in the above example), when the last store stmt
3151 of the chain (S4) is reached:
3153 VS1: &base = vx2
3154 VS2: &base + vec_size*1 = vx0
3155 VS3: &base + vec_size*2 = vx1
3156 VS4: &base + vec_size*3 = vx3
3158 Then permutation statements are generated:
3160 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3161 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3164 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3165 (the order of the data-refs in the output of vect_permute_store_chain
3166 corresponds to the order of scalar stmts in the interleaving chain - see
3167 the documentation of vect_permute_store_chain()).
3169 In case of both multiple types and interleaving, above vector stores and
3170 permutation stmts are created for every copy. The result vector stmts are
3171 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3172 STMT_VINFO_RELATED_STMT for the next copies.
3175 prev_stmt_info = NULL;
3176 for (j = 0; j < ncopies; j++)
3178 gimple new_stmt;
3179 gimple ptr_incr;
3181 if (j == 0)
3183 if (slp)
3185 /* Get vectorized arguments for SLP_NODE. */
3186 vect_get_slp_defs (slp_node, &vec_oprnds, NULL, -1);
3188 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3190 else
3192 /* For interleaved stores we collect vectorized defs for all the
3193 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3194 used as an input to vect_permute_store_chain(), and OPRNDS as
3195 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3197 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3198 OPRNDS are of size 1. */
3199 next_stmt = first_stmt;
3200 for (i = 0; i < group_size; i++)
3202 /* Since gaps are not supported for interleaved stores,
3203 GROUP_SIZE is the exact number of stmts in the chain.
3204 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3205 there is no interleaving, GROUP_SIZE is 1, and only one
3206 iteration of the loop will be executed. */
3207 gcc_assert (next_stmt
3208 && gimple_assign_single_p (next_stmt));
3209 op = gimple_assign_rhs1 (next_stmt);
3211 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3212 NULL);
3213 VEC_quick_push(tree, dr_chain, vec_oprnd);
3214 VEC_quick_push(tree, oprnds, vec_oprnd);
3215 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3219 /* We should have catched mismatched types earlier. */
3220 gcc_assert (useless_type_conversion_p (vectype,
3221 TREE_TYPE (vec_oprnd)));
3222 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3223 &dummy, &ptr_incr, false,
3224 &inv_p);
3225 gcc_assert (bb_vinfo || !inv_p);
3227 else
3229 /* For interleaved stores we created vectorized defs for all the
3230 defs stored in OPRNDS in the previous iteration (previous copy).
3231 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3232 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3233 next copy.
3234 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3235 OPRNDS are of size 1. */
3236 for (i = 0; i < group_size; i++)
3238 op = VEC_index (tree, oprnds, i);
3239 vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def,
3240 &dt);
3241 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3242 VEC_replace(tree, dr_chain, i, vec_oprnd);
3243 VEC_replace(tree, oprnds, i, vec_oprnd);
3245 dataref_ptr =
3246 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3249 if (strided_store)
3251 result_chain = VEC_alloc (tree, heap, group_size);
3252 /* Permute. */
3253 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3254 &result_chain))
3255 return false;
3258 next_stmt = first_stmt;
3259 for (i = 0; i < vec_num; i++)
3261 if (i > 0)
3262 /* Bump the vector pointer. */
3263 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3264 NULL_TREE);
3266 if (slp)
3267 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3268 else if (strided_store)
3269 /* For strided stores vectorized defs are interleaved in
3270 vect_permute_store_chain(). */
3271 vec_oprnd = VEC_index (tree, result_chain, i);
3273 if (aligned_access_p (first_dr))
3274 data_ref = build_fold_indirect_ref (dataref_ptr);
3275 else
3277 int mis = DR_MISALIGNMENT (first_dr);
3278 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3279 tmis = size_binop (MULT_EXPR, tmis, size_int (BITS_PER_UNIT));
3280 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3283 /* If accesses through a pointer to vectype do not alias the original
3284 memory reference we have a problem. This should never happen. */
3285 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3286 get_alias_set (gimple_assign_lhs (stmt))));
3288 /* Arguments are ready. Create the new vector stmt. */
3289 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3290 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3291 mark_symbols_for_renaming (new_stmt);
3293 if (slp)
3294 continue;
3296 if (j == 0)
3297 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3298 else
3299 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3301 prev_stmt_info = vinfo_for_stmt (new_stmt);
3302 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3303 if (!next_stmt)
3304 break;
3308 VEC_free (tree, heap, dr_chain);
3309 VEC_free (tree, heap, oprnds);
3310 if (result_chain)
3311 VEC_free (tree, heap, result_chain);
3313 return true;
3316 /* vectorizable_load.
3318 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3319 can be vectorized.
3320 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3321 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3322 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3324 static bool
3325 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3326 slp_tree slp_node, slp_instance slp_node_instance)
3328 tree scalar_dest;
3329 tree vec_dest = NULL;
3330 tree data_ref = NULL;
3331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3332 stmt_vec_info prev_stmt_info;
3333 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3334 struct loop *loop = NULL;
3335 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3336 bool nested_in_vect_loop = false;
3337 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3338 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3339 tree new_temp;
3340 int mode;
3341 gimple new_stmt = NULL;
3342 tree dummy;
3343 enum dr_alignment_support alignment_support_scheme;
3344 tree dataref_ptr = NULL_TREE;
3345 gimple ptr_incr;
3346 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3347 int ncopies;
3348 int i, j, group_size;
3349 tree msq = NULL_TREE, lsq;
3350 tree offset = NULL_TREE;
3351 tree realignment_token = NULL_TREE;
3352 gimple phi = NULL;
3353 VEC(tree,heap) *dr_chain = NULL;
3354 bool strided_load = false;
3355 gimple first_stmt;
3356 tree scalar_type;
3357 bool inv_p;
3358 bool compute_in_loop = false;
3359 struct loop *at_loop;
3360 int vec_num;
3361 bool slp = (slp_node != NULL);
3362 bool slp_perm = false;
3363 enum tree_code code;
3364 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3365 int vf;
3367 if (loop_vinfo)
3369 loop = LOOP_VINFO_LOOP (loop_vinfo);
3370 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3371 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3373 else
3374 vf = 1;
3376 /* Multiple types in SLP are handled by creating the appropriate number of
3377 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3378 case of SLP. */
3379 if (slp)
3380 ncopies = 1;
3381 else
3382 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3384 gcc_assert (ncopies >= 1);
3386 /* FORNOW. This restriction should be relaxed. */
3387 if (nested_in_vect_loop && ncopies > 1)
3389 if (vect_print_dump_info (REPORT_DETAILS))
3390 fprintf (vect_dump, "multiple types in nested loop.");
3391 return false;
3394 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3395 return false;
3397 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3398 return false;
3400 /* Is vectorizable load? */
3401 if (!is_gimple_assign (stmt))
3402 return false;
3404 scalar_dest = gimple_assign_lhs (stmt);
3405 if (TREE_CODE (scalar_dest) != SSA_NAME)
3406 return false;
3408 code = gimple_assign_rhs_code (stmt);
3409 if (code != ARRAY_REF
3410 && code != INDIRECT_REF
3411 && code != COMPONENT_REF
3412 && code != IMAGPART_EXPR
3413 && code != REALPART_EXPR)
3414 return false;
3416 if (!STMT_VINFO_DATA_REF (stmt_info))
3417 return false;
3419 scalar_type = TREE_TYPE (DR_REF (dr));
3420 mode = (int) TYPE_MODE (vectype);
3422 /* FORNOW. In some cases can vectorize even if data-type not supported
3423 (e.g. - data copies). */
3424 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3426 if (vect_print_dump_info (REPORT_DETAILS))
3427 fprintf (vect_dump, "Aligned load, but unsupported type.");
3428 return false;
3431 /* The vector component type needs to be trivially convertible to the
3432 scalar lhs. This should always be the case. */
3433 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3435 if (vect_print_dump_info (REPORT_DETAILS))
3436 fprintf (vect_dump, "??? operands of different types");
3437 return false;
3440 /* Check if the load is a part of an interleaving chain. */
3441 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3443 strided_load = true;
3444 /* FORNOW */
3445 gcc_assert (! nested_in_vect_loop);
3447 /* Check if interleaving is supported. */
3448 if (!vect_strided_load_supported (vectype)
3449 && !PURE_SLP_STMT (stmt_info) && !slp)
3450 return false;
3453 if (!vec_stmt) /* transformation not required. */
3455 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3456 vect_model_load_cost (stmt_info, ncopies, NULL);
3457 return true;
3460 if (vect_print_dump_info (REPORT_DETAILS))
3461 fprintf (vect_dump, "transform load.");
3463 /** Transform. **/
3465 if (strided_load)
3467 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3468 /* Check if the chain of loads is already vectorized. */
3469 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3471 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3472 return true;
3474 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3475 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3477 /* VEC_NUM is the number of vect stmts to be created for this group. */
3478 if (slp)
3480 strided_load = false;
3481 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3482 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3483 slp_perm = true;
3485 else
3486 vec_num = group_size;
3488 dr_chain = VEC_alloc (tree, heap, vec_num);
3490 else
3492 first_stmt = stmt;
3493 first_dr = dr;
3494 group_size = vec_num = 1;
3497 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3498 gcc_assert (alignment_support_scheme);
3500 /* In case the vectorization factor (VF) is bigger than the number
3501 of elements that we can fit in a vectype (nunits), we have to generate
3502 more than one vector stmt - i.e - we need to "unroll" the
3503 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3504 from one copy of the vector stmt to the next, in the field
3505 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3506 stages to find the correct vector defs to be used when vectorizing
3507 stmts that use the defs of the current stmt. The example below illustrates
3508 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3509 4 vectorized stmts):
3511 before vectorization:
3512 RELATED_STMT VEC_STMT
3513 S1: x = memref - -
3514 S2: z = x + 1 - -
3516 step 1: vectorize stmt S1:
3517 We first create the vector stmt VS1_0, and, as usual, record a
3518 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3519 Next, we create the vector stmt VS1_1, and record a pointer to
3520 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3521 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3522 stmts and pointers:
3523 RELATED_STMT VEC_STMT
3524 VS1_0: vx0 = memref0 VS1_1 -
3525 VS1_1: vx1 = memref1 VS1_2 -
3526 VS1_2: vx2 = memref2 VS1_3 -
3527 VS1_3: vx3 = memref3 - -
3528 S1: x = load - VS1_0
3529 S2: z = x + 1 - -
3531 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3532 information we recorded in RELATED_STMT field is used to vectorize
3533 stmt S2. */
3535 /* In case of interleaving (non-unit strided access):
3537 S1: x2 = &base + 2
3538 S2: x0 = &base
3539 S3: x1 = &base + 1
3540 S4: x3 = &base + 3
3542 Vectorized loads are created in the order of memory accesses
3543 starting from the access of the first stmt of the chain:
3545 VS1: vx0 = &base
3546 VS2: vx1 = &base + vec_size*1
3547 VS3: vx3 = &base + vec_size*2
3548 VS4: vx4 = &base + vec_size*3
3550 Then permutation statements are generated:
3552 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3553 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3556 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3557 (the order of the data-refs in the output of vect_permute_load_chain
3558 corresponds to the order of scalar stmts in the interleaving chain - see
3559 the documentation of vect_permute_load_chain()).
3560 The generation of permutation stmts and recording them in
3561 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3563 In case of both multiple types and interleaving, the vector loads and
3564 permutation stmts above are created for every copy. The result vector stmts
3565 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3566 STMT_VINFO_RELATED_STMT for the next copies. */
3568 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3569 on a target that supports unaligned accesses (dr_unaligned_supported)
3570 we generate the following code:
3571 p = initial_addr;
3572 indx = 0;
3573 loop {
3574 p = p + indx * vectype_size;
3575 vec_dest = *(p);
3576 indx = indx + 1;
3579 Otherwise, the data reference is potentially unaligned on a target that
3580 does not support unaligned accesses (dr_explicit_realign_optimized) -
3581 then generate the following code, in which the data in each iteration is
3582 obtained by two vector loads, one from the previous iteration, and one
3583 from the current iteration:
3584 p1 = initial_addr;
3585 msq_init = *(floor(p1))
3586 p2 = initial_addr + VS - 1;
3587 realignment_token = call target_builtin;
3588 indx = 0;
3589 loop {
3590 p2 = p2 + indx * vectype_size
3591 lsq = *(floor(p2))
3592 vec_dest = realign_load (msq, lsq, realignment_token)
3593 indx = indx + 1;
3594 msq = lsq;
3595 } */
3597 /* If the misalignment remains the same throughout the execution of the
3598 loop, we can create the init_addr and permutation mask at the loop
3599 preheader. Otherwise, it needs to be created inside the loop.
3600 This can only occur when vectorizing memory accesses in the inner-loop
3601 nested within an outer-loop that is being vectorized. */
3603 if (loop && nested_in_vect_loop_p (loop, stmt)
3604 && (TREE_INT_CST_LOW (DR_STEP (dr))
3605 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3607 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3608 compute_in_loop = true;
3611 if ((alignment_support_scheme == dr_explicit_realign_optimized
3612 || alignment_support_scheme == dr_explicit_realign)
3613 && !compute_in_loop)
3615 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3616 alignment_support_scheme, NULL_TREE,
3617 &at_loop);
3618 if (alignment_support_scheme == dr_explicit_realign_optimized)
3620 phi = SSA_NAME_DEF_STMT (msq);
3621 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3624 else
3625 at_loop = loop;
3627 prev_stmt_info = NULL;
3628 for (j = 0; j < ncopies; j++)
3630 /* 1. Create the vector pointer update chain. */
3631 if (j == 0)
3632 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3633 at_loop, offset,
3634 &dummy, &ptr_incr, false,
3635 &inv_p);
3636 else
3637 dataref_ptr =
3638 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3640 for (i = 0; i < vec_num; i++)
3642 if (i > 0)
3643 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3644 NULL_TREE);
3646 /* 2. Create the vector-load in the loop. */
3647 switch (alignment_support_scheme)
3649 case dr_aligned:
3650 gcc_assert (aligned_access_p (first_dr));
3651 data_ref = build_fold_indirect_ref (dataref_ptr);
3652 break;
3653 case dr_unaligned_supported:
3655 int mis = DR_MISALIGNMENT (first_dr);
3656 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3658 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3659 data_ref =
3660 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3661 break;
3663 case dr_explicit_realign:
3665 tree ptr, bump;
3666 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3668 if (compute_in_loop)
3669 msq = vect_setup_realignment (first_stmt, gsi,
3670 &realignment_token,
3671 dr_explicit_realign,
3672 dataref_ptr, NULL);
3674 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3675 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3676 new_stmt = gimple_build_assign (vec_dest, data_ref);
3677 new_temp = make_ssa_name (vec_dest, new_stmt);
3678 gimple_assign_set_lhs (new_stmt, new_temp);
3679 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3680 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3682 msq = new_temp;
3684 bump = size_binop (MULT_EXPR, vs_minus_1,
3685 TYPE_SIZE_UNIT (scalar_type));
3686 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3687 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3688 break;
3690 case dr_explicit_realign_optimized:
3691 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3692 break;
3693 default:
3694 gcc_unreachable ();
3696 /* If accesses through a pointer to vectype do not alias the original
3697 memory reference we have a problem. This should never happen. */
3698 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3699 get_alias_set (gimple_assign_rhs1 (stmt))));
3700 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3701 new_stmt = gimple_build_assign (vec_dest, data_ref);
3702 new_temp = make_ssa_name (vec_dest, new_stmt);
3703 gimple_assign_set_lhs (new_stmt, new_temp);
3704 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3705 mark_symbols_for_renaming (new_stmt);
3707 /* 3. Handle explicit realignment if necessary/supported. Create in
3708 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3709 if (alignment_support_scheme == dr_explicit_realign_optimized
3710 || alignment_support_scheme == dr_explicit_realign)
3712 tree tmp;
3714 lsq = gimple_assign_lhs (new_stmt);
3715 if (!realignment_token)
3716 realignment_token = dataref_ptr;
3717 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3718 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3719 realignment_token);
3720 new_stmt = gimple_build_assign (vec_dest, tmp);
3721 new_temp = make_ssa_name (vec_dest, new_stmt);
3722 gimple_assign_set_lhs (new_stmt, new_temp);
3723 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3725 if (alignment_support_scheme == dr_explicit_realign_optimized)
3727 gcc_assert (phi);
3728 if (i == vec_num - 1 && j == ncopies - 1)
3729 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
3730 UNKNOWN_LOCATION);
3731 msq = lsq;
3735 /* 4. Handle invariant-load. */
3736 if (inv_p && !bb_vinfo)
3738 gcc_assert (!strided_load);
3739 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3740 if (j == 0)
3742 int k;
3743 tree t = NULL_TREE;
3744 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3746 /* CHECKME: bitpos depends on endianess? */
3747 bitpos = bitsize_zero_node;
3748 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3749 bitsize, bitpos);
3750 vec_dest =
3751 vect_create_destination_var (scalar_dest, NULL_TREE);
3752 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3753 new_temp = make_ssa_name (vec_dest, new_stmt);
3754 gimple_assign_set_lhs (new_stmt, new_temp);
3755 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3757 for (k = nunits - 1; k >= 0; --k)
3758 t = tree_cons (NULL_TREE, new_temp, t);
3759 /* FIXME: use build_constructor directly. */
3760 vec_inv = build_constructor_from_list (vectype, t);
3761 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3762 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3764 else
3765 gcc_unreachable (); /* FORNOW. */
3768 /* Collect vector loads and later create their permutation in
3769 vect_transform_strided_load (). */
3770 if (strided_load || slp_perm)
3771 VEC_quick_push (tree, dr_chain, new_temp);
3773 /* Store vector loads in the corresponding SLP_NODE. */
3774 if (slp && !slp_perm)
3775 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3778 if (slp && !slp_perm)
3779 continue;
3781 if (slp_perm)
3783 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
3784 slp_node_instance, false))
3786 VEC_free (tree, heap, dr_chain);
3787 return false;
3790 else
3792 if (strided_load)
3794 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3795 return false;
3797 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3798 VEC_free (tree, heap, dr_chain);
3799 dr_chain = VEC_alloc (tree, heap, group_size);
3801 else
3803 if (j == 0)
3804 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3805 else
3806 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3807 prev_stmt_info = vinfo_for_stmt (new_stmt);
3812 if (dr_chain)
3813 VEC_free (tree, heap, dr_chain);
3815 return true;
3818 /* Function vect_is_simple_cond.
3820 Input:
3821 LOOP - the loop that is being vectorized.
3822 COND - Condition that is checked for simple use.
3824 Returns whether a COND can be vectorized. Checks whether
3825 condition operands are supportable using vec_is_simple_use. */
3827 static bool
3828 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3830 tree lhs, rhs;
3831 tree def;
3832 enum vect_def_type dt;
3834 if (!COMPARISON_CLASS_P (cond))
3835 return false;
3837 lhs = TREE_OPERAND (cond, 0);
3838 rhs = TREE_OPERAND (cond, 1);
3840 if (TREE_CODE (lhs) == SSA_NAME)
3842 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3843 if (!vect_is_simple_use (lhs, loop_vinfo, NULL, &lhs_def_stmt, &def,
3844 &dt))
3845 return false;
3847 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3848 && TREE_CODE (lhs) != FIXED_CST)
3849 return false;
3851 if (TREE_CODE (rhs) == SSA_NAME)
3853 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3854 if (!vect_is_simple_use (rhs, loop_vinfo, NULL, &rhs_def_stmt, &def,
3855 &dt))
3856 return false;
3858 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3859 && TREE_CODE (rhs) != FIXED_CST)
3860 return false;
3862 return true;
3865 /* vectorizable_condition.
3867 Check if STMT is conditional modify expression that can be vectorized.
3868 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3869 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3870 at GSI.
3872 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
3873 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
3874 else caluse if it is 2).
3876 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3878 bool
3879 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3880 gimple *vec_stmt, tree reduc_def, int reduc_index)
3882 tree scalar_dest = NULL_TREE;
3883 tree vec_dest = NULL_TREE;
3884 tree op = NULL_TREE;
3885 tree cond_expr, then_clause, else_clause;
3886 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3887 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3888 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3889 tree vec_compare, vec_cond_expr;
3890 tree new_temp;
3891 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3892 enum machine_mode vec_mode;
3893 tree def;
3894 enum vect_def_type dt;
3895 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3896 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3897 enum tree_code code;
3899 /* FORNOW: unsupported in basic block SLP. */
3900 gcc_assert (loop_vinfo);
3902 gcc_assert (ncopies >= 1);
3903 if (ncopies > 1)
3904 return false; /* FORNOW */
3906 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3907 return false;
3909 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3910 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
3911 && reduc_def))
3912 return false;
3914 /* FORNOW: SLP not supported. */
3915 if (STMT_SLP_TYPE (stmt_info))
3916 return false;
3918 /* FORNOW: not yet supported. */
3919 if (STMT_VINFO_LIVE_P (stmt_info))
3921 if (vect_print_dump_info (REPORT_DETAILS))
3922 fprintf (vect_dump, "value used after loop.");
3923 return false;
3926 /* Is vectorizable conditional operation? */
3927 if (!is_gimple_assign (stmt))
3928 return false;
3930 code = gimple_assign_rhs_code (stmt);
3932 if (code != COND_EXPR)
3933 return false;
3935 gcc_assert (gimple_assign_single_p (stmt));
3936 op = gimple_assign_rhs1 (stmt);
3937 cond_expr = TREE_OPERAND (op, 0);
3938 then_clause = TREE_OPERAND (op, 1);
3939 else_clause = TREE_OPERAND (op, 2);
3941 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3942 return false;
3944 /* We do not handle two different vector types for the condition
3945 and the values. */
3946 if (!types_compatible_p (TREE_TYPE (TREE_OPERAND (cond_expr, 0)),
3947 TREE_TYPE (vectype)))
3948 return false;
3950 if (TREE_CODE (then_clause) == SSA_NAME)
3952 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3953 if (!vect_is_simple_use (then_clause, loop_vinfo, NULL,
3954 &then_def_stmt, &def, &dt))
3955 return false;
3957 else if (TREE_CODE (then_clause) != INTEGER_CST
3958 && TREE_CODE (then_clause) != REAL_CST
3959 && TREE_CODE (then_clause) != FIXED_CST)
3960 return false;
3962 if (TREE_CODE (else_clause) == SSA_NAME)
3964 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3965 if (!vect_is_simple_use (else_clause, loop_vinfo, NULL,
3966 &else_def_stmt, &def, &dt))
3967 return false;
3969 else if (TREE_CODE (else_clause) != INTEGER_CST
3970 && TREE_CODE (else_clause) != REAL_CST
3971 && TREE_CODE (else_clause) != FIXED_CST)
3972 return false;
3975 vec_mode = TYPE_MODE (vectype);
3977 if (!vec_stmt)
3979 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3980 return expand_vec_cond_expr_p (TREE_TYPE (op), vec_mode);
3983 /* Transform */
3985 /* Handle def. */
3986 scalar_dest = gimple_assign_lhs (stmt);
3987 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3989 /* Handle cond expr. */
3990 vec_cond_lhs =
3991 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3992 vec_cond_rhs =
3993 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3994 if (reduc_index == 1)
3995 vec_then_clause = reduc_def;
3996 else
3997 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3998 if (reduc_index == 2)
3999 vec_else_clause = reduc_def;
4000 else
4001 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
4003 /* Arguments are ready. Create the new vector stmt. */
4004 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4005 vec_cond_lhs, vec_cond_rhs);
4006 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4007 vec_compare, vec_then_clause, vec_else_clause);
4009 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
4010 new_temp = make_ssa_name (vec_dest, *vec_stmt);
4011 gimple_assign_set_lhs (*vec_stmt, new_temp);
4012 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
4014 return true;
4018 /* Make sure the statement is vectorizable. */
4020 bool
4021 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
4023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4024 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4025 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
4026 bool ok;
4027 tree scalar_type, vectype;
4029 if (vect_print_dump_info (REPORT_DETAILS))
4031 fprintf (vect_dump, "==> examining statement: ");
4032 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4035 if (gimple_has_volatile_ops (stmt))
4037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4038 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
4040 return false;
4043 /* Skip stmts that do not need to be vectorized. In loops this is expected
4044 to include:
4045 - the COND_EXPR which is the loop exit condition
4046 - any LABEL_EXPRs in the loop
4047 - computations that are used only for array indexing or loop control.
4048 In basic blocks we only analyze statements that are a part of some SLP
4049 instance, therefore, all the statements are relevant. */
4051 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4052 && !STMT_VINFO_LIVE_P (stmt_info))
4054 if (vect_print_dump_info (REPORT_DETAILS))
4055 fprintf (vect_dump, "irrelevant.");
4057 return true;
4060 switch (STMT_VINFO_DEF_TYPE (stmt_info))
4062 case vect_internal_def:
4063 break;
4065 case vect_reduction_def:
4066 case vect_nested_cycle:
4067 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
4068 || relevance == vect_used_in_outer_by_reduction
4069 || relevance == vect_unused_in_scope));
4070 break;
4072 case vect_induction_def:
4073 case vect_constant_def:
4074 case vect_external_def:
4075 case vect_unknown_def_type:
4076 default:
4077 gcc_unreachable ();
4080 if (bb_vinfo)
4082 gcc_assert (PURE_SLP_STMT (stmt_info));
4084 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
4085 if (vect_print_dump_info (REPORT_DETAILS))
4087 fprintf (vect_dump, "get vectype for scalar type: ");
4088 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4091 vectype = get_vectype_for_scalar_type (scalar_type);
4092 if (!vectype)
4094 if (vect_print_dump_info (REPORT_DETAILS))
4096 fprintf (vect_dump, "not SLPed: unsupported data-type ");
4097 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4099 return false;
4102 if (vect_print_dump_info (REPORT_DETAILS))
4104 fprintf (vect_dump, "vectype: ");
4105 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4108 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4111 if (STMT_VINFO_RELEVANT_P (stmt_info))
4113 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
4114 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4115 *need_to_vectorize = true;
4118 ok = true;
4119 if (!bb_vinfo
4120 && (STMT_VINFO_RELEVANT_P (stmt_info)
4121 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
4122 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
4123 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
4124 || vectorizable_conversion (stmt, NULL, NULL, NULL)
4125 || vectorizable_operation (stmt, NULL, NULL, NULL)
4126 || vectorizable_assignment (stmt, NULL, NULL, NULL)
4127 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
4128 || vectorizable_call (stmt, NULL, NULL)
4129 || vectorizable_store (stmt, NULL, NULL, NULL)
4130 || vectorizable_reduction (stmt, NULL, NULL, NULL)
4131 || vectorizable_condition (stmt, NULL, NULL, NULL, 0));
4132 else
4134 if (bb_vinfo)
4135 ok = (vectorizable_operation (stmt, NULL, NULL, node)
4136 || vectorizable_assignment (stmt, NULL, NULL, node)
4137 || vectorizable_load (stmt, NULL, NULL, node, NULL)
4138 || vectorizable_store (stmt, NULL, NULL, node));
4141 if (!ok)
4143 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4145 fprintf (vect_dump, "not vectorized: relevant stmt not ");
4146 fprintf (vect_dump, "supported: ");
4147 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4150 return false;
4153 if (bb_vinfo)
4154 return true;
4156 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
4157 need extra handling, except for vectorizable reductions. */
4158 if (STMT_VINFO_LIVE_P (stmt_info)
4159 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4160 ok = vectorizable_live_operation (stmt, NULL, NULL);
4162 if (!ok)
4164 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
4166 fprintf (vect_dump, "not vectorized: live stmt not ");
4167 fprintf (vect_dump, "supported: ");
4168 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4171 return false;
4174 if (!PURE_SLP_STMT (stmt_info))
4176 /* Groups of strided accesses whose size is not a power of 2 are not
4177 vectorizable yet using loop-vectorization. Therefore, if this stmt
4178 feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
4179 loop-based vectorized), the loop cannot be vectorized. */
4180 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4181 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4182 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4184 if (vect_print_dump_info (REPORT_DETAILS))
4186 fprintf (vect_dump, "not vectorized: the size of group "
4187 "of strided accesses is not a power of 2");
4188 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4191 return false;
4195 return true;
4199 /* Function vect_transform_stmt.
4201 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4203 bool
4204 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4205 bool *strided_store, slp_tree slp_node,
4206 slp_instance slp_node_instance)
4208 bool is_store = false;
4209 gimple vec_stmt = NULL;
4210 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4211 gimple orig_stmt_in_pattern;
4212 bool done;
4214 switch (STMT_VINFO_TYPE (stmt_info))
4216 case type_demotion_vec_info_type:
4217 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4218 gcc_assert (done);
4219 break;
4221 case type_promotion_vec_info_type:
4222 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4223 gcc_assert (done);
4224 break;
4226 case type_conversion_vec_info_type:
4227 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4228 gcc_assert (done);
4229 break;
4231 case induc_vec_info_type:
4232 gcc_assert (!slp_node);
4233 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4234 gcc_assert (done);
4235 break;
4237 case op_vec_info_type:
4238 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4239 gcc_assert (done);
4240 break;
4242 case assignment_vec_info_type:
4243 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4244 gcc_assert (done);
4245 break;
4247 case load_vec_info_type:
4248 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4249 slp_node_instance);
4250 gcc_assert (done);
4251 break;
4253 case store_vec_info_type:
4254 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4255 gcc_assert (done);
4256 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4258 /* In case of interleaving, the whole chain is vectorized when the
4259 last store in the chain is reached. Store stmts before the last
4260 one are skipped, and there vec_stmt_info shouldn't be freed
4261 meanwhile. */
4262 *strided_store = true;
4263 if (STMT_VINFO_VEC_STMT (stmt_info))
4264 is_store = true;
4266 else
4267 is_store = true;
4268 break;
4270 case condition_vec_info_type:
4271 gcc_assert (!slp_node);
4272 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0);
4273 gcc_assert (done);
4274 break;
4276 case call_vec_info_type:
4277 gcc_assert (!slp_node);
4278 done = vectorizable_call (stmt, gsi, &vec_stmt);
4279 break;
4281 case reduc_vec_info_type:
4282 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
4283 gcc_assert (done);
4284 break;
4286 default:
4287 if (!STMT_VINFO_LIVE_P (stmt_info))
4289 if (vect_print_dump_info (REPORT_DETAILS))
4290 fprintf (vect_dump, "stmt not supported.");
4291 gcc_unreachable ();
4295 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4296 is being vectorized, but outside the immediately enclosing loop. */
4297 if (vec_stmt
4298 && STMT_VINFO_LOOP_VINFO (stmt_info)
4299 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
4300 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
4301 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4302 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4303 || STMT_VINFO_RELEVANT (stmt_info) ==
4304 vect_used_in_outer_by_reduction))
4306 struct loop *innerloop = LOOP_VINFO_LOOP (
4307 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
4308 imm_use_iterator imm_iter;
4309 use_operand_p use_p;
4310 tree scalar_dest;
4311 gimple exit_phi;
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4316 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4317 (to be used when vectorizing outer-loop stmts that use the DEF of
4318 STMT). */
4319 if (gimple_code (stmt) == GIMPLE_PHI)
4320 scalar_dest = PHI_RESULT (stmt);
4321 else
4322 scalar_dest = gimple_assign_lhs (stmt);
4324 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4326 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4328 exit_phi = USE_STMT (use_p);
4329 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4334 /* Handle stmts whose DEF is used outside the loop-nest that is
4335 being vectorized. */
4336 if (STMT_VINFO_LIVE_P (stmt_info)
4337 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4339 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4340 gcc_assert (done);
4343 if (vec_stmt)
4345 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4346 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4347 if (orig_stmt_in_pattern)
4349 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4350 /* STMT was inserted by the vectorizer to replace a computation idiom.
4351 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4352 computed this idiom. We need to record a pointer to VEC_STMT in
4353 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4354 documentation of vect_pattern_recog. */
4355 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4357 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4358 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4363 return is_store;
4367 /* Remove a group of stores (for SLP or interleaving), free their
4368 stmt_vec_info. */
4370 void
4371 vect_remove_stores (gimple first_stmt)
4373 gimple next = first_stmt;
4374 gimple tmp;
4375 gimple_stmt_iterator next_si;
4377 while (next)
4379 /* Free the attached stmt_vec_info and remove the stmt. */
4380 next_si = gsi_for_stmt (next);
4381 gsi_remove (&next_si, true);
4382 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4383 free_stmt_vec_info (next);
4384 next = tmp;
4389 /* Function new_stmt_vec_info.
4391 Create and initialize a new stmt_vec_info struct for STMT. */
4393 stmt_vec_info
4394 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
4395 bb_vec_info bb_vinfo)
4397 stmt_vec_info res;
4398 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4400 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4401 STMT_VINFO_STMT (res) = stmt;
4402 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4403 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
4404 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
4405 STMT_VINFO_LIVE_P (res) = false;
4406 STMT_VINFO_VECTYPE (res) = NULL;
4407 STMT_VINFO_VEC_STMT (res) = NULL;
4408 STMT_VINFO_VECTORIZABLE (res) = true;
4409 STMT_VINFO_IN_PATTERN_P (res) = false;
4410 STMT_VINFO_RELATED_STMT (res) = NULL;
4411 STMT_VINFO_DATA_REF (res) = NULL;
4413 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4414 STMT_VINFO_DR_OFFSET (res) = NULL;
4415 STMT_VINFO_DR_INIT (res) = NULL;
4416 STMT_VINFO_DR_STEP (res) = NULL;
4417 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4419 if (gimple_code (stmt) == GIMPLE_PHI
4420 && is_loop_header_bb_p (gimple_bb (stmt)))
4421 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4422 else
4423 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
4425 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4426 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4427 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4428 STMT_SLP_TYPE (res) = loop_vect;
4429 DR_GROUP_FIRST_DR (res) = NULL;
4430 DR_GROUP_NEXT_DR (res) = NULL;
4431 DR_GROUP_SIZE (res) = 0;
4432 DR_GROUP_STORE_COUNT (res) = 0;
4433 DR_GROUP_GAP (res) = 0;
4434 DR_GROUP_SAME_DR_STMT (res) = NULL;
4435 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4437 return res;
4441 /* Create a hash table for stmt_vec_info. */
4443 void
4444 init_stmt_vec_info_vec (void)
4446 gcc_assert (!stmt_vec_info_vec);
4447 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4451 /* Free hash table for stmt_vec_info. */
4453 void
4454 free_stmt_vec_info_vec (void)
4456 gcc_assert (stmt_vec_info_vec);
4457 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4461 /* Free stmt vectorization related info. */
4463 void
4464 free_stmt_vec_info (gimple stmt)
4466 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4468 if (!stmt_info)
4469 return;
4471 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4472 set_vinfo_for_stmt (stmt, NULL);
4473 free (stmt_info);
4477 /* Function get_vectype_for_scalar_type.
4479 Returns the vector type corresponding to SCALAR_TYPE as supported
4480 by the target. */
4482 tree
4483 get_vectype_for_scalar_type (tree scalar_type)
4485 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4486 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
4487 int nunits;
4488 tree vectype;
4490 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4491 return NULL_TREE;
4493 /* We can't build a vector type of elements with alignment bigger than
4494 their size. */
4495 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
4496 return NULL_TREE;
4498 /* If we'd build a vector type of elements whose mode precision doesn't
4499 match their types precision we'll get mismatched types on vector
4500 extracts via BIT_FIELD_REFs. This effectively means we disable
4501 vectorization of bool and/or enum types in some languages. */
4502 if (INTEGRAL_TYPE_P (scalar_type)
4503 && GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type))
4504 return NULL_TREE;
4506 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4507 is expected. */
4508 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4510 vectype = build_vector_type (scalar_type, nunits);
4511 if (vect_print_dump_info (REPORT_DETAILS))
4513 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4514 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4517 if (!vectype)
4518 return NULL_TREE;
4520 if (vect_print_dump_info (REPORT_DETAILS))
4522 fprintf (vect_dump, "vectype: ");
4523 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4526 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4527 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4529 if (vect_print_dump_info (REPORT_DETAILS))
4530 fprintf (vect_dump, "mode not supported by target.");
4531 return NULL_TREE;
4534 return vectype;
4537 /* Function get_same_sized_vectype
4539 Returns a vector type corresponding to SCALAR_TYPE of size
4540 VECTOR_TYPE if supported by the target. */
4542 tree
4543 get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED)
4545 return get_vectype_for_scalar_type (scalar_type);
4548 /* Function vect_is_simple_use.
4550 Input:
4551 LOOP_VINFO - the vect info of the loop that is being vectorized.
4552 BB_VINFO - the vect info of the basic block that is being vectorized.
4553 OPERAND - operand of a stmt in the loop or bb.
4554 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4556 Returns whether a stmt with OPERAND can be vectorized.
4557 For loops, supportable operands are constants, loop invariants, and operands
4558 that are defined by the current iteration of the loop. Unsupportable
4559 operands are those that are defined by a previous iteration of the loop (as
4560 is the case in reduction/induction computations).
4561 For basic blocks, supportable operands are constants and bb invariants.
4562 For now, operands defined outside the basic block are not supported. */
4564 bool
4565 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo,
4566 bb_vec_info bb_vinfo, gimple *def_stmt,
4567 tree *def, enum vect_def_type *dt)
4569 basic_block bb;
4570 stmt_vec_info stmt_vinfo;
4571 struct loop *loop = NULL;
4573 if (loop_vinfo)
4574 loop = LOOP_VINFO_LOOP (loop_vinfo);
4576 *def_stmt = NULL;
4577 *def = NULL_TREE;
4579 if (vect_print_dump_info (REPORT_DETAILS))
4581 fprintf (vect_dump, "vect_is_simple_use: operand ");
4582 print_generic_expr (vect_dump, operand, TDF_SLIM);
4585 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4587 *dt = vect_constant_def;
4588 return true;
4591 if (is_gimple_min_invariant (operand))
4593 *def = operand;
4594 *dt = vect_external_def;
4595 return true;
4598 if (TREE_CODE (operand) == PAREN_EXPR)
4600 if (vect_print_dump_info (REPORT_DETAILS))
4601 fprintf (vect_dump, "non-associatable copy.");
4602 operand = TREE_OPERAND (operand, 0);
4605 if (TREE_CODE (operand) != SSA_NAME)
4607 if (vect_print_dump_info (REPORT_DETAILS))
4608 fprintf (vect_dump, "not ssa-name.");
4609 return false;
4612 *def_stmt = SSA_NAME_DEF_STMT (operand);
4613 if (*def_stmt == NULL)
4615 if (vect_print_dump_info (REPORT_DETAILS))
4616 fprintf (vect_dump, "no def_stmt.");
4617 return false;
4620 if (vect_print_dump_info (REPORT_DETAILS))
4622 fprintf (vect_dump, "def_stmt: ");
4623 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4626 /* Empty stmt is expected only in case of a function argument.
4627 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4628 if (gimple_nop_p (*def_stmt))
4630 *def = operand;
4631 *dt = vect_external_def;
4632 return true;
4635 bb = gimple_bb (*def_stmt);
4637 if ((loop && !flow_bb_inside_loop_p (loop, bb))
4638 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
4639 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
4640 *dt = vect_external_def;
4641 else
4643 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4644 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4647 if (*dt == vect_unknown_def_type)
4649 if (vect_print_dump_info (REPORT_DETAILS))
4650 fprintf (vect_dump, "Unsupported pattern.");
4651 return false;
4654 if (vect_print_dump_info (REPORT_DETAILS))
4655 fprintf (vect_dump, "type of def: %d.",*dt);
4657 switch (gimple_code (*def_stmt))
4659 case GIMPLE_PHI:
4660 *def = gimple_phi_result (*def_stmt);
4661 break;
4663 case GIMPLE_ASSIGN:
4664 *def = gimple_assign_lhs (*def_stmt);
4665 break;
4667 case GIMPLE_CALL:
4668 *def = gimple_call_lhs (*def_stmt);
4669 if (*def != NULL)
4670 break;
4671 /* FALLTHRU */
4672 default:
4673 if (vect_print_dump_info (REPORT_DETAILS))
4674 fprintf (vect_dump, "unsupported defining stmt: ");
4675 return false;
4678 return true;
4681 /* Function vect_is_simple_use_1.
4683 Same as vect_is_simple_use_1 but also determines the vector operand
4684 type of OPERAND and stores it to *VECTYPE. If the definition of
4685 OPERAND is vect_uninitialized_def, vect_constant_def or
4686 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
4687 is responsible to compute the best suited vector type for the
4688 scalar operand. */
4690 bool
4691 vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
4692 bb_vec_info bb_vinfo, gimple *def_stmt,
4693 tree *def, enum vect_def_type *dt, tree *vectype)
4695 if (!vect_is_simple_use (operand, loop_vinfo, bb_vinfo, def_stmt, def, dt))
4696 return false;
4698 /* Now get a vector type if the def is internal, otherwise supply
4699 NULL_TREE and leave it up to the caller to figure out a proper
4700 type for the use stmt. */
4701 if (*dt == vect_internal_def
4702 || *dt == vect_induction_def
4703 || *dt == vect_reduction_def
4704 || *dt == vect_double_reduction_def
4705 || *dt == vect_nested_cycle)
4707 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
4708 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4709 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
4710 *vectype = STMT_VINFO_VECTYPE (stmt_info);
4711 gcc_assert (*vectype != NULL_TREE);
4713 else if (*dt == vect_uninitialized_def
4714 || *dt == vect_constant_def
4715 || *dt == vect_external_def)
4716 *vectype = NULL_TREE;
4717 else
4718 gcc_unreachable ();
4720 return true;
4724 /* Function supportable_widening_operation
4726 Check whether an operation represented by the code CODE is a
4727 widening operation that is supported by the target platform in
4728 vector form (i.e., when operating on arguments of type VECTYPE_IN
4729 producing a result of type VECTYPE_OUT).
4731 Widening operations we currently support are NOP (CONVERT), FLOAT
4732 and WIDEN_MULT. This function checks if these operations are supported
4733 by the target platform either directly (via vector tree-codes), or via
4734 target builtins.
4736 Output:
4737 - CODE1 and CODE2 are codes of vector operations to be used when
4738 vectorizing the operation, if available.
4739 - DECL1 and DECL2 are decls of target builtin functions to be used
4740 when vectorizing the operation, if available. In this case,
4741 CODE1 and CODE2 are CALL_EXPR.
4742 - MULTI_STEP_CVT determines the number of required intermediate steps in
4743 case of multi-step conversion (like char->short->int - in that case
4744 MULTI_STEP_CVT will be 1).
4745 - INTERM_TYPES contains the intermediate type required to perform the
4746 widening operation (short in the above example). */
4748 bool
4749 supportable_widening_operation (enum tree_code code, gimple stmt,
4750 tree vectype_out, tree vectype_in,
4751 tree *decl1, tree *decl2,
4752 enum tree_code *code1, enum tree_code *code2,
4753 int *multi_step_cvt,
4754 VEC (tree, heap) **interm_types)
4756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4757 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4758 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4759 bool ordered_p;
4760 enum machine_mode vec_mode;
4761 enum insn_code icode1, icode2;
4762 optab optab1, optab2;
4763 tree vectype = vectype_in;
4764 tree wide_vectype = vectype_out;
4765 enum tree_code c1, c2;
4767 /* The result of a vectorized widening operation usually requires two vectors
4768 (because the widened results do not fit int one vector). The generated
4769 vector results would normally be expected to be generated in the same
4770 order as in the original scalar computation, i.e. if 8 results are
4771 generated in each vector iteration, they are to be organized as follows:
4772 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4774 However, in the special case that the result of the widening operation is
4775 used in a reduction computation only, the order doesn't matter (because
4776 when vectorizing a reduction we change the order of the computation).
4777 Some targets can take advantage of this and generate more efficient code.
4778 For example, targets like Altivec, that support widen_mult using a sequence
4779 of {mult_even,mult_odd} generate the following vectors:
4780 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4782 When vectorizing outer-loops, we execute the inner-loop sequentially
4783 (each vectorized inner-loop iteration contributes to VF outer-loop
4784 iterations in parallel). We therefore don't allow to change the order
4785 of the computation in the inner-loop during outer-loop vectorization. */
4787 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4788 && !nested_in_vect_loop_p (vect_loop, stmt))
4789 ordered_p = false;
4790 else
4791 ordered_p = true;
4793 if (!ordered_p
4794 && code == WIDEN_MULT_EXPR
4795 && targetm.vectorize.builtin_mul_widen_even
4796 && targetm.vectorize.builtin_mul_widen_even (vectype)
4797 && targetm.vectorize.builtin_mul_widen_odd
4798 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4800 if (vect_print_dump_info (REPORT_DETAILS))
4801 fprintf (vect_dump, "Unordered widening operation detected.");
4803 *code1 = *code2 = CALL_EXPR;
4804 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4805 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4806 return true;
4809 switch (code)
4811 case WIDEN_MULT_EXPR:
4812 if (BYTES_BIG_ENDIAN)
4814 c1 = VEC_WIDEN_MULT_HI_EXPR;
4815 c2 = VEC_WIDEN_MULT_LO_EXPR;
4817 else
4819 c2 = VEC_WIDEN_MULT_HI_EXPR;
4820 c1 = VEC_WIDEN_MULT_LO_EXPR;
4822 break;
4824 CASE_CONVERT:
4825 if (BYTES_BIG_ENDIAN)
4827 c1 = VEC_UNPACK_HI_EXPR;
4828 c2 = VEC_UNPACK_LO_EXPR;
4830 else
4832 c2 = VEC_UNPACK_HI_EXPR;
4833 c1 = VEC_UNPACK_LO_EXPR;
4835 break;
4837 case FLOAT_EXPR:
4838 if (BYTES_BIG_ENDIAN)
4840 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4841 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4843 else
4845 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4846 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4848 break;
4850 case FIX_TRUNC_EXPR:
4851 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4852 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4853 computing the operation. */
4854 return false;
4856 default:
4857 gcc_unreachable ();
4860 if (code == FIX_TRUNC_EXPR)
4862 /* The signedness is determined from output operand. */
4863 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
4864 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
4866 else
4868 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4869 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4872 if (!optab1 || !optab2)
4873 return false;
4875 vec_mode = TYPE_MODE (vectype);
4876 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4877 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4878 == CODE_FOR_nothing)
4879 return false;
4881 /* Check if it's a multi-step conversion that can be done using intermediate
4882 types. */
4883 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4884 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4886 int i;
4887 tree prev_type = vectype, intermediate_type;
4888 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4889 optab optab3, optab4;
4891 if (!CONVERT_EXPR_CODE_P (code))
4892 return false;
4894 *code1 = c1;
4895 *code2 = c2;
4897 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4898 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4899 to get to NARROW_VECTYPE, and fail if we do not. */
4900 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4901 for (i = 0; i < 3; i++)
4903 intermediate_mode = insn_data[icode1].operand[0].mode;
4904 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4905 TYPE_UNSIGNED (prev_type));
4906 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4907 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4909 if (!optab3 || !optab4
4910 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4911 == CODE_FOR_nothing
4912 || insn_data[icode1].operand[0].mode != intermediate_mode
4913 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4914 == CODE_FOR_nothing
4915 || insn_data[icode2].operand[0].mode != intermediate_mode
4916 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4917 == CODE_FOR_nothing
4918 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4919 == CODE_FOR_nothing)
4920 return false;
4922 VEC_quick_push (tree, *interm_types, intermediate_type);
4923 (*multi_step_cvt)++;
4925 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4926 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4927 return true;
4929 prev_type = intermediate_type;
4930 prev_mode = intermediate_mode;
4933 return false;
4936 *code1 = c1;
4937 *code2 = c2;
4938 return true;
4942 /* Function supportable_narrowing_operation
4944 Check whether an operation represented by the code CODE is a
4945 narrowing operation that is supported by the target platform in
4946 vector form (i.e., when operating on arguments of type VECTYPE_IN
4947 and producing a result of type VECTYPE_OUT).
4949 Narrowing operations we currently support are NOP (CONVERT) and
4950 FIX_TRUNC. This function checks if these operations are supported by
4951 the target platform directly via vector tree-codes.
4953 Output:
4954 - CODE1 is the code of a vector operation to be used when
4955 vectorizing the operation, if available.
4956 - MULTI_STEP_CVT determines the number of required intermediate steps in
4957 case of multi-step conversion (like int->short->char - in that case
4958 MULTI_STEP_CVT will be 1).
4959 - INTERM_TYPES contains the intermediate type required to perform the
4960 narrowing operation (short in the above example). */
4962 bool
4963 supportable_narrowing_operation (enum tree_code code,
4964 tree vectype_out, tree vectype_in,
4965 enum tree_code *code1, int *multi_step_cvt,
4966 VEC (tree, heap) **interm_types)
4968 enum machine_mode vec_mode;
4969 enum insn_code icode1;
4970 optab optab1, interm_optab;
4971 tree vectype = vectype_in;
4972 tree narrow_vectype = vectype_out;
4973 enum tree_code c1;
4974 tree intermediate_type, prev_type;
4975 int i;
4977 switch (code)
4979 CASE_CONVERT:
4980 c1 = VEC_PACK_TRUNC_EXPR;
4981 break;
4983 case FIX_TRUNC_EXPR:
4984 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4985 break;
4987 case FLOAT_EXPR:
4988 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4989 tree code and optabs used for computing the operation. */
4990 return false;
4992 default:
4993 gcc_unreachable ();
4996 if (code == FIX_TRUNC_EXPR)
4997 /* The signedness is determined from output operand. */
4998 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
4999 else
5000 optab1 = optab_for_tree_code (c1, vectype, optab_default);
5002 if (!optab1)
5003 return false;
5005 vec_mode = TYPE_MODE (vectype);
5006 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
5007 == CODE_FOR_nothing)
5008 return false;
5010 /* Check if it's a multi-step conversion that can be done using intermediate
5011 types. */
5012 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
5014 enum machine_mode intermediate_mode, prev_mode = vec_mode;
5016 *code1 = c1;
5017 prev_type = vectype;
5018 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
5019 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
5020 to get to NARROW_VECTYPE, and fail if we do not. */
5021 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
5022 for (i = 0; i < 3; i++)
5024 intermediate_mode = insn_data[icode1].operand[0].mode;
5025 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
5026 TYPE_UNSIGNED (prev_type));
5027 interm_optab = optab_for_tree_code (c1, intermediate_type,
5028 optab_default);
5029 if (!interm_optab
5030 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
5031 == CODE_FOR_nothing
5032 || insn_data[icode1].operand[0].mode != intermediate_mode
5033 || (icode1
5034 = interm_optab->handlers[(int) intermediate_mode].insn_code)
5035 == CODE_FOR_nothing)
5036 return false;
5038 VEC_quick_push (tree, *interm_types, intermediate_type);
5039 (*multi_step_cvt)++;
5041 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
5042 return true;
5044 prev_type = intermediate_type;
5045 prev_mode = intermediate_mode;
5048 return false;
5051 *code1 = c1;
5052 return true;