2009-04-16 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob26748c9bf061ca9089cb5bc27fe92f256916e022
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "cfglayout.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "toplev.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
44 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
46 /* Function vect_mark_relevant.
48 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
50 static void
51 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
52 enum vect_relevant relevant, bool live_p)
54 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
55 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
56 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
58 if (vect_print_dump_info (REPORT_DETAILS))
59 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
61 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
63 gimple pattern_stmt;
65 /* This is the last stmt in a sequence that was detected as a
66 pattern that can potentially be vectorized. Don't mark the stmt
67 as relevant/live because it's not going to be vectorized.
68 Instead mark the pattern-stmt that replaces it. */
70 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
72 if (vect_print_dump_info (REPORT_DETAILS))
73 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
74 stmt_info = vinfo_for_stmt (pattern_stmt);
75 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
76 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
77 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
78 stmt = pattern_stmt;
81 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
82 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
83 STMT_VINFO_RELEVANT (stmt_info) = relevant;
85 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
86 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
88 if (vect_print_dump_info (REPORT_DETAILS))
89 fprintf (vect_dump, "already marked relevant/live.");
90 return;
93 VEC_safe_push (gimple, heap, *worklist, stmt);
97 /* Function vect_stmt_relevant_p.
99 Return true if STMT in loop that is represented by LOOP_VINFO is
100 "relevant for vectorization".
102 A stmt is considered "relevant for vectorization" if:
103 - it has uses outside the loop.
104 - it has vdefs (it alters memory).
105 - control stmts in the loop (except for the exit condition).
107 CHECKME: what other side effects would the vectorizer allow? */
109 static bool
110 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
111 enum vect_relevant *relevant, bool *live_p)
113 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
114 ssa_op_iter op_iter;
115 imm_use_iterator imm_iter;
116 use_operand_p use_p;
117 def_operand_p def_p;
119 *relevant = vect_unused_in_loop;
120 *live_p = false;
122 /* cond stmt other than loop exit cond. */
123 if (is_ctrl_stmt (stmt)
124 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
125 *relevant = vect_used_in_loop;
127 /* changing memory. */
128 if (gimple_code (stmt) != GIMPLE_PHI)
129 if (gimple_vdef (stmt))
131 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
133 *relevant = vect_used_in_loop;
136 /* uses outside the loop. */
137 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
139 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
141 basic_block bb = gimple_bb (USE_STMT (use_p));
142 if (!flow_bb_inside_loop_p (loop, bb))
144 if (vect_print_dump_info (REPORT_DETAILS))
145 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
147 /* We expect all such uses to be in the loop exit phis
148 (because of loop closed form) */
149 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
150 gcc_assert (bb == single_exit (loop)->dest);
152 *live_p = true;
157 return (*live_p || *relevant);
161 /* Function exist_non_indexing_operands_for_use_p
163 USE is one of the uses attached to STMT. Check if USE is
164 used in STMT for anything other than indexing an array. */
166 static bool
167 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
169 tree operand;
170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
172 /* USE corresponds to some operand in STMT. If there is no data
173 reference in STMT, then any operand that corresponds to USE
174 is not indexing an array. */
175 if (!STMT_VINFO_DATA_REF (stmt_info))
176 return true;
178 /* STMT has a data_ref. FORNOW this means that its of one of
179 the following forms:
180 -1- ARRAY_REF = var
181 -2- var = ARRAY_REF
182 (This should have been verified in analyze_data_refs).
184 'var' in the second case corresponds to a def, not a use,
185 so USE cannot correspond to any operands that are not used
186 for array indexing.
188 Therefore, all we need to check is if STMT falls into the
189 first case, and whether var corresponds to USE. */
191 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
192 return false;
194 if (!gimple_assign_copy_p (stmt))
195 return false;
196 operand = gimple_assign_rhs1 (stmt);
198 if (TREE_CODE (operand) != SSA_NAME)
199 return false;
201 if (operand == use)
202 return true;
204 return false;
209 Function process_use.
211 Inputs:
212 - a USE in STMT in a loop represented by LOOP_VINFO
213 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
214 that defined USE. This is done by calling mark_relevant and passing it
215 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
217 Outputs:
218 Generally, LIVE_P and RELEVANT are used to define the liveness and
219 relevance info of the DEF_STMT of this USE:
220 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
221 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
222 Exceptions:
223 - case 1: If USE is used only for address computations (e.g. array indexing),
224 which does not need to be directly vectorized, then the liveness/relevance
225 of the respective DEF_STMT is left unchanged.
226 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
227 skip DEF_STMT cause it had already been processed.
228 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
229 be modified accordingly.
231 Return true if everything is as expected. Return false otherwise. */
233 static bool
234 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
235 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
237 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
238 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
239 stmt_vec_info dstmt_vinfo;
240 basic_block bb, def_bb;
241 tree def;
242 gimple def_stmt;
243 enum vect_def_type dt;
245 /* case 1: we are only interested in uses that need to be vectorized. Uses
246 that are used for address computation are not considered relevant. */
247 if (!exist_non_indexing_operands_for_use_p (use, stmt))
248 return true;
250 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
252 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
253 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
254 return false;
257 if (!def_stmt || gimple_nop_p (def_stmt))
258 return true;
260 def_bb = gimple_bb (def_stmt);
261 if (!flow_bb_inside_loop_p (loop, def_bb))
263 if (vect_print_dump_info (REPORT_DETAILS))
264 fprintf (vect_dump, "def_stmt is out of loop.");
265 return true;
268 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
269 DEF_STMT must have already been processed, because this should be the
270 only way that STMT, which is a reduction-phi, was put in the worklist,
271 as there should be no other uses for DEF_STMT in the loop. So we just
272 check that everything is as expected, and we are done. */
273 dstmt_vinfo = vinfo_for_stmt (def_stmt);
274 bb = gimple_bb (stmt);
275 if (gimple_code (stmt) == GIMPLE_PHI
276 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
277 && gimple_code (def_stmt) != GIMPLE_PHI
278 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
279 && bb->loop_father == def_bb->loop_father)
281 if (vect_print_dump_info (REPORT_DETAILS))
282 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
283 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
284 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
285 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
286 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
287 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
288 return true;
291 /* case 3a: outer-loop stmt defining an inner-loop stmt:
292 outer-loop-header-bb:
293 d = def_stmt
294 inner-loop:
295 stmt # use (d)
296 outer-loop-tail-bb:
297 ... */
298 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
300 if (vect_print_dump_info (REPORT_DETAILS))
301 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
302 switch (relevant)
304 case vect_unused_in_loop:
305 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
306 vect_used_by_reduction : vect_unused_in_loop;
307 break;
308 case vect_used_in_outer_by_reduction:
309 relevant = vect_used_by_reduction;
310 break;
311 case vect_used_in_outer:
312 relevant = vect_used_in_loop;
313 break;
314 case vect_used_by_reduction:
315 case vect_used_in_loop:
316 break;
318 default:
319 gcc_unreachable ();
323 /* case 3b: inner-loop stmt defining an outer-loop stmt:
324 outer-loop-header-bb:
326 inner-loop:
327 d = def_stmt
328 outer-loop-tail-bb:
329 stmt # use (d) */
330 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
332 if (vect_print_dump_info (REPORT_DETAILS))
333 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
334 switch (relevant)
336 case vect_unused_in_loop:
337 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
338 vect_used_in_outer_by_reduction : vect_unused_in_loop;
339 break;
341 case vect_used_in_outer_by_reduction:
342 case vect_used_in_outer:
343 break;
345 case vect_used_by_reduction:
346 relevant = vect_used_in_outer_by_reduction;
347 break;
349 case vect_used_in_loop:
350 relevant = vect_used_in_outer;
351 break;
353 default:
354 gcc_unreachable ();
358 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
359 return true;
363 /* Function vect_mark_stmts_to_be_vectorized.
365 Not all stmts in the loop need to be vectorized. For example:
367 for i...
368 for j...
369 1. T0 = i + j
370 2. T1 = a[T0]
372 3. j = j + 1
374 Stmt 1 and 3 do not need to be vectorized, because loop control and
375 addressing of vectorized data-refs are handled differently.
377 This pass detects such stmts. */
379 bool
380 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
382 VEC(gimple,heap) *worklist;
383 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
384 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
385 unsigned int nbbs = loop->num_nodes;
386 gimple_stmt_iterator si;
387 gimple stmt;
388 unsigned int i;
389 stmt_vec_info stmt_vinfo;
390 basic_block bb;
391 gimple phi;
392 bool live_p;
393 enum vect_relevant relevant;
395 if (vect_print_dump_info (REPORT_DETAILS))
396 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
398 worklist = VEC_alloc (gimple, heap, 64);
400 /* 1. Init worklist. */
401 for (i = 0; i < nbbs; i++)
403 bb = bbs[i];
404 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
406 phi = gsi_stmt (si);
407 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "init: phi relevant? ");
410 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
413 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
414 vect_mark_relevant (&worklist, phi, relevant, live_p);
416 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
418 stmt = gsi_stmt (si);
419 if (vect_print_dump_info (REPORT_DETAILS))
421 fprintf (vect_dump, "init: stmt relevant? ");
422 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
425 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
426 vect_mark_relevant (&worklist, stmt, relevant, live_p);
430 /* 2. Process_worklist */
431 while (VEC_length (gimple, worklist) > 0)
433 use_operand_p use_p;
434 ssa_op_iter iter;
436 stmt = VEC_pop (gimple, worklist);
437 if (vect_print_dump_info (REPORT_DETAILS))
439 fprintf (vect_dump, "worklist: examine stmt: ");
440 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
443 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
444 (DEF_STMT) as relevant/irrelevant and live/dead according to the
445 liveness and relevance properties of STMT. */
446 stmt_vinfo = vinfo_for_stmt (stmt);
447 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
448 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
450 /* Generally, the liveness and relevance properties of STMT are
451 propagated as is to the DEF_STMTs of its USEs:
452 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
453 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
455 One exception is when STMT has been identified as defining a reduction
456 variable; in this case we set the liveness/relevance as follows:
457 live_p = false
458 relevant = vect_used_by_reduction
459 This is because we distinguish between two kinds of relevant stmts -
460 those that are used by a reduction computation, and those that are
461 (also) used by a regular computation. This allows us later on to
462 identify stmts that are used solely by a reduction, and therefore the
463 order of the results that they produce does not have to be kept.
465 Reduction phis are expected to be used by a reduction stmt, or by
466 in an outer loop; Other reduction stmts are expected to be
467 in the loop, and possibly used by a stmt in an outer loop.
468 Here are the expected values of "relevant" for reduction phis/stmts:
470 relevance: phi stmt
471 vect_unused_in_loop ok
472 vect_used_in_outer_by_reduction ok ok
473 vect_used_in_outer ok ok
474 vect_used_by_reduction ok
475 vect_used_in_loop */
477 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
479 enum vect_relevant tmp_relevant = relevant;
480 switch (tmp_relevant)
482 case vect_unused_in_loop:
483 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
484 relevant = vect_used_by_reduction;
485 break;
487 case vect_used_in_outer_by_reduction:
488 case vect_used_in_outer:
489 gcc_assert (gimple_code (stmt) != GIMPLE_ASSIGN
490 || (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR
491 && (gimple_assign_rhs_code (stmt)
492 != DOT_PROD_EXPR)));
493 break;
495 case vect_used_by_reduction:
496 if (gimple_code (stmt) == GIMPLE_PHI)
497 break;
498 /* fall through */
499 case vect_used_in_loop:
500 default:
501 if (vect_print_dump_info (REPORT_DETAILS))
502 fprintf (vect_dump, "unsupported use of reduction.");
503 VEC_free (gimple, heap, worklist);
504 return false;
506 live_p = false;
509 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
511 tree op = USE_FROM_PTR (use_p);
512 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
514 VEC_free (gimple, heap, worklist);
515 return false;
518 } /* while worklist */
520 VEC_free (gimple, heap, worklist);
521 return true;
526 cost_for_stmt (gimple stmt)
528 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
530 switch (STMT_VINFO_TYPE (stmt_info))
532 case load_vec_info_type:
533 return TARG_SCALAR_LOAD_COST;
534 case store_vec_info_type:
535 return TARG_SCALAR_STORE_COST;
536 case op_vec_info_type:
537 case condition_vec_info_type:
538 case assignment_vec_info_type:
539 case reduc_vec_info_type:
540 case induc_vec_info_type:
541 case type_promotion_vec_info_type:
542 case type_demotion_vec_info_type:
543 case type_conversion_vec_info_type:
544 case call_vec_info_type:
545 return TARG_SCALAR_STMT_COST;
546 case undef_vec_info_type:
547 default:
548 gcc_unreachable ();
552 /* Function vect_model_simple_cost.
554 Models cost for simple operations, i.e. those that only emit ncopies of a
555 single op. Right now, this does not account for multiple insns that could
556 be generated for the single vector op. We will handle that shortly. */
558 void
559 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
560 enum vect_def_type *dt, slp_tree slp_node)
562 int i;
563 int inside_cost = 0, outside_cost = 0;
565 /* The SLP costs were already calculated during SLP tree build. */
566 if (PURE_SLP_STMT (stmt_info))
567 return;
569 inside_cost = ncopies * TARG_VEC_STMT_COST;
571 /* FORNOW: Assuming maximum 2 args per stmts. */
572 for (i = 0; i < 2; i++)
574 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
575 outside_cost += TARG_SCALAR_TO_VEC_COST;
578 if (vect_print_dump_info (REPORT_COST))
579 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
580 "outside_cost = %d .", inside_cost, outside_cost);
582 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
583 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
584 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
588 /* Function vect_cost_strided_group_size
590 For strided load or store, return the group_size only if it is the first
591 load or store of a group, else return 1. This ensures that group size is
592 only returned once per group. */
594 static int
595 vect_cost_strided_group_size (stmt_vec_info stmt_info)
597 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
599 if (first_stmt == STMT_VINFO_STMT (stmt_info))
600 return DR_GROUP_SIZE (stmt_info);
602 return 1;
606 /* Function vect_model_store_cost
608 Models cost for stores. In the case of strided accesses, one access
609 has the overhead of the strided access attributed to it. */
611 void
612 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
613 enum vect_def_type dt, slp_tree slp_node)
615 int group_size;
616 int inside_cost = 0, outside_cost = 0;
618 /* The SLP costs were already calculated during SLP tree build. */
619 if (PURE_SLP_STMT (stmt_info))
620 return;
622 if (dt == vect_constant_def || dt == vect_invariant_def)
623 outside_cost = TARG_SCALAR_TO_VEC_COST;
625 /* Strided access? */
626 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
627 group_size = vect_cost_strided_group_size (stmt_info);
628 /* Not a strided access. */
629 else
630 group_size = 1;
632 /* Is this an access in a group of stores, which provide strided access?
633 If so, add in the cost of the permutes. */
634 if (group_size > 1)
636 /* Uses a high and low interleave operation for each needed permute. */
637 inside_cost = ncopies * exact_log2(group_size) * group_size
638 * TARG_VEC_STMT_COST;
640 if (vect_print_dump_info (REPORT_COST))
641 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
642 group_size);
646 /* Costs of the stores. */
647 inside_cost += ncopies * TARG_VEC_STORE_COST;
649 if (vect_print_dump_info (REPORT_COST))
650 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
651 "outside_cost = %d .", inside_cost, outside_cost);
653 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
654 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
655 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
659 /* Function vect_model_load_cost
661 Models cost for loads. In the case of strided accesses, the last access
662 has the overhead of the strided access attributed to it. Since unaligned
663 accesses are supported for loads, we also account for the costs of the
664 access scheme chosen. */
666 void
667 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
670 int group_size;
671 int alignment_support_cheme;
672 gimple first_stmt;
673 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
674 int inside_cost = 0, outside_cost = 0;
676 /* The SLP costs were already calculated during SLP tree build. */
677 if (PURE_SLP_STMT (stmt_info))
678 return;
680 /* Strided accesses? */
681 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
682 if (first_stmt && !slp_node)
684 group_size = vect_cost_strided_group_size (stmt_info);
685 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
687 /* Not a strided access. */
688 else
690 group_size = 1;
691 first_dr = dr;
694 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
696 /* Is this an access in a group of loads providing strided access?
697 If so, add in the cost of the permutes. */
698 if (group_size > 1)
700 /* Uses an even and odd extract operations for each needed permute. */
701 inside_cost = ncopies * exact_log2(group_size) * group_size
702 * TARG_VEC_STMT_COST;
704 if (vect_print_dump_info (REPORT_COST))
705 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
706 group_size);
710 /* The loads themselves. */
711 switch (alignment_support_cheme)
713 case dr_aligned:
715 inside_cost += ncopies * TARG_VEC_LOAD_COST;
717 if (vect_print_dump_info (REPORT_COST))
718 fprintf (vect_dump, "vect_model_load_cost: aligned.");
720 break;
722 case dr_unaligned_supported:
724 /* Here, we assign an additional cost for the unaligned load. */
725 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
727 if (vect_print_dump_info (REPORT_COST))
728 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
729 "hardware.");
731 break;
733 case dr_explicit_realign:
735 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
737 /* FIXME: If the misalignment remains fixed across the iterations of
738 the containing loop, the following cost should be added to the
739 outside costs. */
740 if (targetm.vectorize.builtin_mask_for_load)
741 inside_cost += TARG_VEC_STMT_COST;
743 break;
745 case dr_explicit_realign_optimized:
747 if (vect_print_dump_info (REPORT_COST))
748 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
749 "pipelined.");
751 /* Unaligned software pipeline has a load of an address, an initial
752 load, and possibly a mask operation to "prime" the loop. However,
753 if this is an access in a group of loads, which provide strided
754 access, then the above cost should only be considered for one
755 access in the group. Inside the loop, there is a load op
756 and a realignment op. */
758 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
760 outside_cost = 2*TARG_VEC_STMT_COST;
761 if (targetm.vectorize.builtin_mask_for_load)
762 outside_cost += TARG_VEC_STMT_COST;
765 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
767 break;
770 default:
771 gcc_unreachable ();
774 if (vect_print_dump_info (REPORT_COST))
775 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
776 "outside_cost = %d .", inside_cost, outside_cost);
778 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
779 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
780 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
784 /* Function vect_init_vector.
786 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
787 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
788 is not NULL. Otherwise, place the initialization at the loop preheader.
789 Return the DEF of INIT_STMT.
790 It will be used in the vectorization of STMT. */
792 tree
793 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
794 gimple_stmt_iterator *gsi)
796 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
797 tree new_var;
798 gimple init_stmt;
799 tree vec_oprnd;
800 edge pe;
801 tree new_temp;
802 basic_block new_bb;
804 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
805 add_referenced_var (new_var);
806 init_stmt = gimple_build_assign (new_var, vector_var);
807 new_temp = make_ssa_name (new_var, init_stmt);
808 gimple_assign_set_lhs (init_stmt, new_temp);
810 if (gsi)
811 vect_finish_stmt_generation (stmt, init_stmt, gsi);
812 else
814 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
815 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
817 if (nested_in_vect_loop_p (loop, stmt))
818 loop = loop->inner;
819 pe = loop_preheader_edge (loop);
820 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
821 gcc_assert (!new_bb);
824 if (vect_print_dump_info (REPORT_DETAILS))
826 fprintf (vect_dump, "created new init_stmt: ");
827 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
830 vec_oprnd = gimple_assign_lhs (init_stmt);
831 return vec_oprnd;
834 /* Function vect_get_vec_def_for_operand.
836 OP is an operand in STMT. This function returns a (vector) def that will be
837 used in the vectorized stmt for STMT.
839 In the case that OP is an SSA_NAME which is defined in the loop, then
840 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
842 In case OP is an invariant or constant, a new stmt that creates a vector def
843 needs to be introduced. */
845 tree
846 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
848 tree vec_oprnd;
849 gimple vec_stmt;
850 gimple def_stmt;
851 stmt_vec_info def_stmt_info = NULL;
852 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
853 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
854 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
856 tree vec_inv;
857 tree vec_cst;
858 tree t = NULL_TREE;
859 tree def;
860 int i;
861 enum vect_def_type dt;
862 bool is_simple_use;
863 tree vector_type;
865 if (vect_print_dump_info (REPORT_DETAILS))
867 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
868 print_generic_expr (vect_dump, op, TDF_SLIM);
871 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
872 gcc_assert (is_simple_use);
873 if (vect_print_dump_info (REPORT_DETAILS))
875 if (def)
877 fprintf (vect_dump, "def = ");
878 print_generic_expr (vect_dump, def, TDF_SLIM);
880 if (def_stmt)
882 fprintf (vect_dump, " def_stmt = ");
883 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
887 switch (dt)
889 /* Case 1: operand is a constant. */
890 case vect_constant_def:
892 if (scalar_def)
893 *scalar_def = op;
895 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
896 if (vect_print_dump_info (REPORT_DETAILS))
897 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
899 for (i = nunits - 1; i >= 0; --i)
901 t = tree_cons (NULL_TREE, op, t);
903 vec_cst = build_vector (vectype, t);
904 return vect_init_vector (stmt, vec_cst, vectype, NULL);
907 /* Case 2: operand is defined outside the loop - loop invariant. */
908 case vect_invariant_def:
910 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
911 gcc_assert (vector_type);
912 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
914 if (scalar_def)
915 *scalar_def = def;
917 /* Create 'vec_inv = {inv,inv,..,inv}' */
918 if (vect_print_dump_info (REPORT_DETAILS))
919 fprintf (vect_dump, "Create vector_inv.");
921 for (i = nunits - 1; i >= 0; --i)
923 t = tree_cons (NULL_TREE, def, t);
926 /* FIXME: use build_constructor directly. */
927 vec_inv = build_constructor_from_list (vector_type, t);
928 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
931 /* Case 3: operand is defined inside the loop. */
932 case vect_loop_def:
934 if (scalar_def)
935 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
937 /* Get the def from the vectorized stmt. */
938 def_stmt_info = vinfo_for_stmt (def_stmt);
939 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
940 gcc_assert (vec_stmt);
941 if (gimple_code (vec_stmt) == GIMPLE_PHI)
942 vec_oprnd = PHI_RESULT (vec_stmt);
943 else if (is_gimple_call (vec_stmt))
944 vec_oprnd = gimple_call_lhs (vec_stmt);
945 else
946 vec_oprnd = gimple_assign_lhs (vec_stmt);
947 return vec_oprnd;
950 /* Case 4: operand is defined by a loop header phi - reduction */
951 case vect_reduction_def:
953 struct loop *loop;
955 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
956 loop = (gimple_bb (def_stmt))->loop_father;
958 /* Get the def before the loop */
959 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
960 return get_initial_def_for_reduction (stmt, op, scalar_def);
963 /* Case 5: operand is defined by loop-header phi - induction. */
964 case vect_induction_def:
966 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
968 /* Get the def from the vectorized stmt. */
969 def_stmt_info = vinfo_for_stmt (def_stmt);
970 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
971 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
972 vec_oprnd = PHI_RESULT (vec_stmt);
973 return vec_oprnd;
976 default:
977 gcc_unreachable ();
982 /* Function vect_get_vec_def_for_stmt_copy
984 Return a vector-def for an operand. This function is used when the
985 vectorized stmt to be created (by the caller to this function) is a "copy"
986 created in case the vectorized result cannot fit in one vector, and several
987 copies of the vector-stmt are required. In this case the vector-def is
988 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
989 of the stmt that defines VEC_OPRND.
990 DT is the type of the vector def VEC_OPRND.
992 Context:
993 In case the vectorization factor (VF) is bigger than the number
994 of elements that can fit in a vectype (nunits), we have to generate
995 more than one vector stmt to vectorize the scalar stmt. This situation
996 arises when there are multiple data-types operated upon in the loop; the
997 smallest data-type determines the VF, and as a result, when vectorizing
998 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
999 vector stmt (each computing a vector of 'nunits' results, and together
1000 computing 'VF' results in each iteration). This function is called when
1001 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1002 which VF=16 and nunits=4, so the number of copies required is 4):
1004 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1006 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1007 VS1.1: vx.1 = memref1 VS1.2
1008 VS1.2: vx.2 = memref2 VS1.3
1009 VS1.3: vx.3 = memref3
1011 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1012 VSnew.1: vz1 = vx.1 + ... VSnew.2
1013 VSnew.2: vz2 = vx.2 + ... VSnew.3
1014 VSnew.3: vz3 = vx.3 + ...
1016 The vectorization of S1 is explained in vectorizable_load.
1017 The vectorization of S2:
1018 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1019 the function 'vect_get_vec_def_for_operand' is called to
1020 get the relevant vector-def for each operand of S2. For operand x it
1021 returns the vector-def 'vx.0'.
1023 To create the remaining copies of the vector-stmt (VSnew.j), this
1024 function is called to get the relevant vector-def for each operand. It is
1025 obtained from the respective VS1.j stmt, which is recorded in the
1026 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1028 For example, to obtain the vector-def 'vx.1' in order to create the
1029 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1030 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1031 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1032 and return its def ('vx.1').
1033 Overall, to create the above sequence this function will be called 3 times:
1034 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1035 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1036 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1038 tree
1039 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1041 gimple vec_stmt_for_operand;
1042 stmt_vec_info def_stmt_info;
1044 /* Do nothing; can reuse same def. */
1045 if (dt == vect_invariant_def || dt == vect_constant_def )
1046 return vec_oprnd;
1048 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1049 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1050 gcc_assert (def_stmt_info);
1051 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1052 gcc_assert (vec_stmt_for_operand);
1053 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1054 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1055 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1056 else
1057 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1058 return vec_oprnd;
1062 /* Get vectorized definitions for the operands to create a copy of an original
1063 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1065 static void
1066 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1067 VEC(tree,heap) **vec_oprnds0,
1068 VEC(tree,heap) **vec_oprnds1)
1070 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1072 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1073 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1075 if (vec_oprnds1 && *vec_oprnds1)
1077 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1078 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1079 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1084 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1086 static void
1087 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1088 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
1089 slp_tree slp_node)
1091 if (slp_node)
1092 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1093 else
1095 tree vec_oprnd;
1097 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1098 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1099 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1101 if (op1)
1103 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1104 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1105 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1111 /* Function vect_finish_stmt_generation.
1113 Insert a new stmt. */
1115 void
1116 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1117 gimple_stmt_iterator *gsi)
1119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1120 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1122 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1124 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1126 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "add new stmt: ");
1131 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1134 gimple_set_location (vec_stmt, gimple_location (gsi_stmt (*gsi)));
1137 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1138 a function declaration if the target has a vectorized version
1139 of the function, or NULL_TREE if the function cannot be vectorized. */
1141 tree
1142 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1144 tree fndecl = gimple_call_fndecl (call);
1145 enum built_in_function code;
1147 /* We only handle functions that do not read or clobber memory -- i.e.
1148 const or novops ones. */
1149 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1150 return NULL_TREE;
1152 if (!fndecl
1153 || TREE_CODE (fndecl) != FUNCTION_DECL
1154 || !DECL_BUILT_IN (fndecl))
1155 return NULL_TREE;
1157 code = DECL_FUNCTION_CODE (fndecl);
1158 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
1159 vectype_in);
1162 /* Function vectorizable_call.
1164 Check if STMT performs a function call that can be vectorized.
1165 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1166 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1167 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1169 static bool
1170 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
1172 tree vec_dest;
1173 tree scalar_dest;
1174 tree op, type;
1175 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1177 tree vectype_out, vectype_in;
1178 int nunits_in;
1179 int nunits_out;
1180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1181 tree fndecl, new_temp, def, rhs_type, lhs_type;
1182 gimple def_stmt;
1183 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1184 gimple new_stmt;
1185 int ncopies, j;
1186 VEC(tree, heap) *vargs = NULL;
1187 enum { NARROW, NONE, WIDEN } modifier;
1188 size_t i, nargs;
1190 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1191 return false;
1193 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1194 return false;
1196 /* FORNOW: SLP not supported. */
1197 if (STMT_SLP_TYPE (stmt_info))
1198 return false;
1200 /* Is STMT a vectorizable call? */
1201 if (!is_gimple_call (stmt))
1202 return false;
1204 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1205 return false;
1207 /* Process function arguments. */
1208 rhs_type = NULL_TREE;
1209 nargs = gimple_call_num_args (stmt);
1211 /* Bail out if the function has more than two arguments, we
1212 do not have interesting builtin functions to vectorize with
1213 more than two arguments. No arguments is also not good. */
1214 if (nargs == 0 || nargs > 2)
1215 return false;
1217 for (i = 0; i < nargs; i++)
1219 op = gimple_call_arg (stmt, i);
1221 /* We can only handle calls with arguments of the same type. */
1222 if (rhs_type
1223 && rhs_type != TREE_TYPE (op))
1225 if (vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "argument types differ.");
1227 return false;
1229 rhs_type = TREE_TYPE (op);
1231 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
1233 if (vect_print_dump_info (REPORT_DETAILS))
1234 fprintf (vect_dump, "use not simple.");
1235 return false;
1239 vectype_in = get_vectype_for_scalar_type (rhs_type);
1240 if (!vectype_in)
1241 return false;
1242 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1244 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
1245 vectype_out = get_vectype_for_scalar_type (lhs_type);
1246 if (!vectype_out)
1247 return false;
1248 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1250 /* FORNOW */
1251 if (nunits_in == nunits_out / 2)
1252 modifier = NARROW;
1253 else if (nunits_out == nunits_in)
1254 modifier = NONE;
1255 else if (nunits_out == nunits_in / 2)
1256 modifier = WIDEN;
1257 else
1258 return false;
1260 /* For now, we only vectorize functions if a target specific builtin
1261 is available. TODO -- in some cases, it might be profitable to
1262 insert the calls for pieces of the vector, in order to be able
1263 to vectorize other operations in the loop. */
1264 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1265 if (fndecl == NULL_TREE)
1267 if (vect_print_dump_info (REPORT_DETAILS))
1268 fprintf (vect_dump, "function is not vectorizable.");
1270 return false;
1273 gcc_assert (!gimple_vuse (stmt));
1275 if (modifier == NARROW)
1276 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1277 else
1278 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1280 /* Sanity check: make sure that at least one copy of the vectorized stmt
1281 needs to be generated. */
1282 gcc_assert (ncopies >= 1);
1284 if (!vec_stmt) /* transformation not required. */
1286 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1287 if (vect_print_dump_info (REPORT_DETAILS))
1288 fprintf (vect_dump, "=== vectorizable_call ===");
1289 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1290 return true;
1293 /** Transform. **/
1295 if (vect_print_dump_info (REPORT_DETAILS))
1296 fprintf (vect_dump, "transform operation.");
1298 /* Handle def. */
1299 scalar_dest = gimple_call_lhs (stmt);
1300 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1302 prev_stmt_info = NULL;
1303 switch (modifier)
1305 case NONE:
1306 for (j = 0; j < ncopies; ++j)
1308 /* Build argument list for the vectorized call. */
1309 if (j == 0)
1310 vargs = VEC_alloc (tree, heap, nargs);
1311 else
1312 VEC_truncate (tree, vargs, 0);
1314 for (i = 0; i < nargs; i++)
1316 op = gimple_call_arg (stmt, i);
1317 if (j == 0)
1318 vec_oprnd0
1319 = vect_get_vec_def_for_operand (op, stmt, NULL);
1320 else
1321 vec_oprnd0
1322 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1324 VEC_quick_push (tree, vargs, vec_oprnd0);
1327 new_stmt = gimple_build_call_vec (fndecl, vargs);
1328 new_temp = make_ssa_name (vec_dest, new_stmt);
1329 gimple_call_set_lhs (new_stmt, new_temp);
1331 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1333 if (j == 0)
1334 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1335 else
1336 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1338 prev_stmt_info = vinfo_for_stmt (new_stmt);
1341 break;
1343 case NARROW:
1344 for (j = 0; j < ncopies; ++j)
1346 /* Build argument list for the vectorized call. */
1347 if (j == 0)
1348 vargs = VEC_alloc (tree, heap, nargs * 2);
1349 else
1350 VEC_truncate (tree, vargs, 0);
1352 for (i = 0; i < nargs; i++)
1354 op = gimple_call_arg (stmt, i);
1355 if (j == 0)
1357 vec_oprnd0
1358 = vect_get_vec_def_for_operand (op, stmt, NULL);
1359 vec_oprnd1
1360 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1362 else
1364 vec_oprnd0
1365 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
1366 vec_oprnd1
1367 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
1370 VEC_quick_push (tree, vargs, vec_oprnd0);
1371 VEC_quick_push (tree, vargs, vec_oprnd1);
1374 new_stmt = gimple_build_call_vec (fndecl, vargs);
1375 new_temp = make_ssa_name (vec_dest, new_stmt);
1376 gimple_call_set_lhs (new_stmt, new_temp);
1378 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1380 if (j == 0)
1381 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1382 else
1383 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1385 prev_stmt_info = vinfo_for_stmt (new_stmt);
1388 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1390 break;
1392 case WIDEN:
1393 /* No current target implements this case. */
1394 return false;
1397 VEC_free (tree, heap, vargs);
1399 /* Update the exception handling table with the vector stmt if necessary. */
1400 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1401 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1403 /* The call in STMT might prevent it from being removed in dce.
1404 We however cannot remove it here, due to the way the ssa name
1405 it defines is mapped to the new definition. So just replace
1406 rhs of the statement with something harmless. */
1408 type = TREE_TYPE (scalar_dest);
1409 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1410 fold_convert (type, integer_zero_node));
1411 set_vinfo_for_stmt (new_stmt, stmt_info);
1412 set_vinfo_for_stmt (stmt, NULL);
1413 STMT_VINFO_STMT (stmt_info) = new_stmt;
1414 gsi_replace (gsi, new_stmt, false);
1415 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1417 return true;
1421 /* Function vect_gen_widened_results_half
1423 Create a vector stmt whose code, type, number of arguments, and result
1424 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1425 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1426 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1427 needs to be created (DECL is a function-decl of a target-builtin).
1428 STMT is the original scalar stmt that we are vectorizing. */
1430 static gimple
1431 vect_gen_widened_results_half (enum tree_code code,
1432 tree decl,
1433 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1434 tree vec_dest, gimple_stmt_iterator *gsi,
1435 gimple stmt)
1437 gimple new_stmt;
1438 tree new_temp;
1440 /* Generate half of the widened result: */
1441 if (code == CALL_EXPR)
1443 /* Target specific support */
1444 if (op_type == binary_op)
1445 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1446 else
1447 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1448 new_temp = make_ssa_name (vec_dest, new_stmt);
1449 gimple_call_set_lhs (new_stmt, new_temp);
1451 else
1453 /* Generic support */
1454 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1455 if (op_type != binary_op)
1456 vec_oprnd1 = NULL;
1457 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1458 vec_oprnd1);
1459 new_temp = make_ssa_name (vec_dest, new_stmt);
1460 gimple_assign_set_lhs (new_stmt, new_temp);
1462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1464 return new_stmt;
1468 /* Check if STMT performs a conversion operation, that can be vectorized.
1469 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1470 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1471 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1473 static bool
1474 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
1475 gimple *vec_stmt, slp_tree slp_node)
1477 tree vec_dest;
1478 tree scalar_dest;
1479 tree op0;
1480 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1481 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1482 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1483 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
1484 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
1485 tree new_temp;
1486 tree def;
1487 gimple def_stmt;
1488 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1489 gimple new_stmt = NULL;
1490 stmt_vec_info prev_stmt_info;
1491 int nunits_in;
1492 int nunits_out;
1493 tree vectype_out, vectype_in;
1494 int ncopies, j;
1495 tree expr;
1496 tree rhs_type, lhs_type;
1497 tree builtin_decl;
1498 enum { NARROW, NONE, WIDEN } modifier;
1499 int i;
1500 VEC(tree,heap) *vec_oprnds0 = NULL;
1501 tree vop0;
1502 tree integral_type;
1503 VEC(tree,heap) *dummy = NULL;
1504 int dummy_int;
1506 /* Is STMT a vectorizable conversion? */
1508 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1509 return false;
1511 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1512 return false;
1514 if (!is_gimple_assign (stmt))
1515 return false;
1517 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1518 return false;
1520 code = gimple_assign_rhs_code (stmt);
1521 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
1522 return false;
1524 /* Check types of lhs and rhs. */
1525 op0 = gimple_assign_rhs1 (stmt);
1526 rhs_type = TREE_TYPE (op0);
1527 vectype_in = get_vectype_for_scalar_type (rhs_type);
1528 if (!vectype_in)
1529 return false;
1530 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1532 scalar_dest = gimple_assign_lhs (stmt);
1533 lhs_type = TREE_TYPE (scalar_dest);
1534 vectype_out = get_vectype_for_scalar_type (lhs_type);
1535 if (!vectype_out)
1536 return false;
1537 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1539 /* FORNOW */
1540 if (nunits_in == nunits_out / 2)
1541 modifier = NARROW;
1542 else if (nunits_out == nunits_in)
1543 modifier = NONE;
1544 else if (nunits_out == nunits_in / 2)
1545 modifier = WIDEN;
1546 else
1547 return false;
1549 if (modifier == NONE)
1550 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
1552 /* Bail out if the types are both integral or non-integral. */
1553 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
1554 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
1555 return false;
1557 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
1559 if (modifier == NARROW)
1560 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1561 else
1562 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1564 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
1565 this, so we can safely override NCOPIES with 1 here. */
1566 if (slp_node)
1567 ncopies = 1;
1569 /* Sanity check: make sure that at least one copy of the vectorized stmt
1570 needs to be generated. */
1571 gcc_assert (ncopies >= 1);
1573 /* Check the operands of the operation. */
1574 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1576 if (vect_print_dump_info (REPORT_DETAILS))
1577 fprintf (vect_dump, "use not simple.");
1578 return false;
1581 /* Supportable by target? */
1582 if ((modifier == NONE
1583 && !targetm.vectorize.builtin_conversion (code, integral_type))
1584 || (modifier == WIDEN
1585 && !supportable_widening_operation (code, stmt, vectype_in,
1586 &decl1, &decl2,
1587 &code1, &code2,
1588 &dummy_int, &dummy))
1589 || (modifier == NARROW
1590 && !supportable_narrowing_operation (code, stmt, vectype_in,
1591 &code1, &dummy_int, &dummy)))
1593 if (vect_print_dump_info (REPORT_DETAILS))
1594 fprintf (vect_dump, "conversion not supported by target.");
1595 return false;
1598 if (modifier != NONE)
1600 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1601 /* FORNOW: SLP not supported. */
1602 if (STMT_SLP_TYPE (stmt_info))
1603 return false;
1606 if (!vec_stmt) /* transformation not required. */
1608 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
1609 return true;
1612 /** Transform. **/
1613 if (vect_print_dump_info (REPORT_DETAILS))
1614 fprintf (vect_dump, "transform conversion.");
1616 /* Handle def. */
1617 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1619 if (modifier == NONE && !slp_node)
1620 vec_oprnds0 = VEC_alloc (tree, heap, 1);
1622 prev_stmt_info = NULL;
1623 switch (modifier)
1625 case NONE:
1626 for (j = 0; j < ncopies; j++)
1628 if (j == 0)
1629 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
1630 else
1631 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
1633 builtin_decl =
1634 targetm.vectorize.builtin_conversion (code, integral_type);
1635 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
1637 /* Arguments are ready. create the new vector stmt. */
1638 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
1639 new_temp = make_ssa_name (vec_dest, new_stmt);
1640 gimple_call_set_lhs (new_stmt, new_temp);
1641 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1642 if (slp_node)
1643 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
1646 if (j == 0)
1647 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1648 else
1649 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1650 prev_stmt_info = vinfo_for_stmt (new_stmt);
1652 break;
1654 case WIDEN:
1655 /* In case the vectorization factor (VF) is bigger than the number
1656 of elements that we can fit in a vectype (nunits), we have to
1657 generate more than one vector stmt - i.e - we need to "unroll"
1658 the vector stmt by a factor VF/nunits. */
1659 for (j = 0; j < ncopies; j++)
1661 if (j == 0)
1662 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1663 else
1664 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1666 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
1668 /* Generate first half of the widened result: */
1669 new_stmt
1670 = vect_gen_widened_results_half (code1, decl1,
1671 vec_oprnd0, vec_oprnd1,
1672 unary_op, vec_dest, gsi, stmt);
1673 if (j == 0)
1674 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1675 else
1676 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1677 prev_stmt_info = vinfo_for_stmt (new_stmt);
1679 /* Generate second half of the widened result: */
1680 new_stmt
1681 = vect_gen_widened_results_half (code2, decl2,
1682 vec_oprnd0, vec_oprnd1,
1683 unary_op, vec_dest, gsi, stmt);
1684 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1685 prev_stmt_info = vinfo_for_stmt (new_stmt);
1687 break;
1689 case NARROW:
1690 /* In case the vectorization factor (VF) is bigger than the number
1691 of elements that we can fit in a vectype (nunits), we have to
1692 generate more than one vector stmt - i.e - we need to "unroll"
1693 the vector stmt by a factor VF/nunits. */
1694 for (j = 0; j < ncopies; j++)
1696 /* Handle uses. */
1697 if (j == 0)
1699 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1700 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1702 else
1704 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
1705 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
1708 /* Arguments are ready. Create the new vector stmt. */
1709 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
1710 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
1711 vec_oprnd1);
1712 new_temp = make_ssa_name (vec_dest, new_stmt);
1713 gimple_assign_set_lhs (new_stmt, new_temp);
1714 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1716 if (j == 0)
1717 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1718 else
1719 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1721 prev_stmt_info = vinfo_for_stmt (new_stmt);
1724 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1727 if (vec_oprnds0)
1728 VEC_free (tree, heap, vec_oprnds0);
1730 return true;
1732 /* Function vectorizable_assignment.
1734 Check if STMT performs an assignment (copy) that can be vectorized.
1735 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1736 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1737 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1739 static bool
1740 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
1741 gimple *vec_stmt, slp_tree slp_node)
1743 tree vec_dest;
1744 tree scalar_dest;
1745 tree op;
1746 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1747 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1748 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1749 tree new_temp;
1750 tree def;
1751 gimple def_stmt;
1752 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1753 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1754 int ncopies;
1755 int i;
1756 VEC(tree,heap) *vec_oprnds = NULL;
1757 tree vop;
1759 /* Multiple types in SLP are handled by creating the appropriate number of
1760 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1761 case of SLP. */
1762 if (slp_node)
1763 ncopies = 1;
1764 else
1765 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1767 gcc_assert (ncopies >= 1);
1768 if (ncopies > 1)
1769 return false; /* FORNOW */
1771 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1772 return false;
1774 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1775 return false;
1777 /* Is vectorizable assignment? */
1778 if (!is_gimple_assign (stmt))
1779 return false;
1781 scalar_dest = gimple_assign_lhs (stmt);
1782 if (TREE_CODE (scalar_dest) != SSA_NAME)
1783 return false;
1785 if (gimple_assign_single_p (stmt)
1786 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
1787 op = gimple_assign_rhs1 (stmt);
1788 else
1789 return false;
1791 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
1793 if (vect_print_dump_info (REPORT_DETAILS))
1794 fprintf (vect_dump, "use not simple.");
1795 return false;
1798 if (!vec_stmt) /* transformation not required. */
1800 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1801 if (vect_print_dump_info (REPORT_DETAILS))
1802 fprintf (vect_dump, "=== vectorizable_assignment ===");
1803 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1804 return true;
1807 /** Transform. **/
1808 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "transform assignment.");
1811 /* Handle def. */
1812 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1814 /* Handle use. */
1815 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
1817 /* Arguments are ready. create the new vector stmt. */
1818 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
1820 *vec_stmt = gimple_build_assign (vec_dest, vop);
1821 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1822 gimple_assign_set_lhs (*vec_stmt, new_temp);
1823 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
1824 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
1826 if (slp_node)
1827 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
1830 VEC_free (tree, heap, vec_oprnds);
1831 return true;
1834 /* Function vectorizable_operation.
1836 Check if STMT performs a binary or unary operation that can be vectorized.
1837 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1838 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1839 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1841 static bool
1842 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
1843 gimple *vec_stmt, slp_tree slp_node)
1845 tree vec_dest;
1846 tree scalar_dest;
1847 tree op0, op1 = NULL;
1848 tree vec_oprnd1 = NULL_TREE;
1849 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1850 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1852 enum tree_code code;
1853 enum machine_mode vec_mode;
1854 tree new_temp;
1855 int op_type;
1856 optab optab;
1857 int icode;
1858 enum machine_mode optab_op2_mode;
1859 tree def;
1860 gimple def_stmt;
1861 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
1862 gimple new_stmt = NULL;
1863 stmt_vec_info prev_stmt_info;
1864 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1865 int nunits_out;
1866 tree vectype_out;
1867 int ncopies;
1868 int j, i;
1869 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
1870 tree vop0, vop1;
1871 unsigned int k;
1872 bool shift_p = false;
1873 bool scalar_shift_arg = false;
1875 /* Multiple types in SLP are handled by creating the appropriate number of
1876 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
1877 case of SLP. */
1878 if (slp_node)
1879 ncopies = 1;
1880 else
1881 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1883 gcc_assert (ncopies >= 1);
1885 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1886 return false;
1888 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
1889 return false;
1891 /* Is STMT a vectorizable binary/unary operation? */
1892 if (!is_gimple_assign (stmt))
1893 return false;
1895 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
1896 return false;
1898 scalar_dest = gimple_assign_lhs (stmt);
1899 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1900 if (!vectype_out)
1901 return false;
1902 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1903 if (nunits_out != nunits_in)
1904 return false;
1906 code = gimple_assign_rhs_code (stmt);
1908 /* For pointer addition, we should use the normal plus for
1909 the vector addition. */
1910 if (code == POINTER_PLUS_EXPR)
1911 code = PLUS_EXPR;
1913 /* Support only unary or binary operations. */
1914 op_type = TREE_CODE_LENGTH (code);
1915 if (op_type != unary_op && op_type != binary_op)
1917 if (vect_print_dump_info (REPORT_DETAILS))
1918 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1919 return false;
1922 op0 = gimple_assign_rhs1 (stmt);
1923 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
1925 if (vect_print_dump_info (REPORT_DETAILS))
1926 fprintf (vect_dump, "use not simple.");
1927 return false;
1930 if (op_type == binary_op)
1932 op1 = gimple_assign_rhs2 (stmt);
1933 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
1935 if (vect_print_dump_info (REPORT_DETAILS))
1936 fprintf (vect_dump, "use not simple.");
1937 return false;
1941 /* If this is a shift/rotate, determine whether the shift amount is a vector,
1942 or scalar. If the shift/rotate amount is a vector, use the vector/vector
1943 shift optabs. */
1944 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
1945 || code == RROTATE_EXPR)
1947 shift_p = true;
1949 /* vector shifted by vector */
1950 if (dt[1] == vect_loop_def)
1952 optab = optab_for_tree_code (code, vectype, optab_vector);
1953 if (vect_print_dump_info (REPORT_DETAILS))
1954 fprintf (vect_dump, "vector/vector shift/rotate found.");
1957 /* See if the machine has a vector shifted by scalar insn and if not
1958 then see if it has a vector shifted by vector insn */
1959 else if (dt[1] == vect_constant_def || dt[1] == vect_invariant_def)
1961 optab = optab_for_tree_code (code, vectype, optab_scalar);
1962 if (optab
1963 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1964 != CODE_FOR_nothing))
1966 scalar_shift_arg = true;
1967 if (vect_print_dump_info (REPORT_DETAILS))
1968 fprintf (vect_dump, "vector/scalar shift/rotate found.");
1970 else
1972 optab = optab_for_tree_code (code, vectype, optab_vector);
1973 if (vect_print_dump_info (REPORT_DETAILS)
1974 && optab
1975 && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
1976 != CODE_FOR_nothing))
1977 fprintf (vect_dump, "vector/vector shift/rotate found.");
1981 else
1983 if (vect_print_dump_info (REPORT_DETAILS))
1984 fprintf (vect_dump, "operand mode requires invariant argument.");
1985 return false;
1988 else
1989 optab = optab_for_tree_code (code, vectype, optab_default);
1991 /* Supportable by target? */
1992 if (!optab)
1994 if (vect_print_dump_info (REPORT_DETAILS))
1995 fprintf (vect_dump, "no optab.");
1996 return false;
1998 vec_mode = TYPE_MODE (vectype);
1999 icode = (int) optab_handler (optab, vec_mode)->insn_code;
2000 if (icode == CODE_FOR_nothing)
2002 if (vect_print_dump_info (REPORT_DETAILS))
2003 fprintf (vect_dump, "op not supported by target.");
2004 /* Check only during analysis. */
2005 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2006 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2007 < vect_min_worthwhile_factor (code)
2008 && !vec_stmt))
2009 return false;
2010 if (vect_print_dump_info (REPORT_DETAILS))
2011 fprintf (vect_dump, "proceeding using word mode.");
2014 /* Worthwhile without SIMD support? Check only during analysis. */
2015 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2016 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2017 < vect_min_worthwhile_factor (code)
2018 && !vec_stmt)
2020 if (vect_print_dump_info (REPORT_DETAILS))
2021 fprintf (vect_dump, "not worthwhile without SIMD support.");
2022 return false;
2025 if (!vec_stmt) /* transformation not required. */
2027 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2028 if (vect_print_dump_info (REPORT_DETAILS))
2029 fprintf (vect_dump, "=== vectorizable_operation ===");
2030 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2031 return true;
2034 /** Transform. **/
2036 if (vect_print_dump_info (REPORT_DETAILS))
2037 fprintf (vect_dump, "transform binary/unary operation.");
2039 /* Handle def. */
2040 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2042 /* Allocate VECs for vector operands. In case of SLP, vector operands are
2043 created in the previous stages of the recursion, so no allocation is
2044 needed, except for the case of shift with scalar shift argument. In that
2045 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
2046 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
2047 In case of loop-based vectorization we allocate VECs of size 1. We
2048 allocate VEC_OPRNDS1 only in case of binary operation. */
2049 if (!slp_node)
2051 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2052 if (op_type == binary_op)
2053 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2055 else if (scalar_shift_arg)
2056 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2058 /* In case the vectorization factor (VF) is bigger than the number
2059 of elements that we can fit in a vectype (nunits), we have to generate
2060 more than one vector stmt - i.e - we need to "unroll" the
2061 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2062 from one copy of the vector stmt to the next, in the field
2063 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2064 stages to find the correct vector defs to be used when vectorizing
2065 stmts that use the defs of the current stmt. The example below illustrates
2066 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2067 4 vectorized stmts):
2069 before vectorization:
2070 RELATED_STMT VEC_STMT
2071 S1: x = memref - -
2072 S2: z = x + 1 - -
2074 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
2075 there):
2076 RELATED_STMT VEC_STMT
2077 VS1_0: vx0 = memref0 VS1_1 -
2078 VS1_1: vx1 = memref1 VS1_2 -
2079 VS1_2: vx2 = memref2 VS1_3 -
2080 VS1_3: vx3 = memref3 - -
2081 S1: x = load - VS1_0
2082 S2: z = x + 1 - -
2084 step2: vectorize stmt S2 (done here):
2085 To vectorize stmt S2 we first need to find the relevant vector
2086 def for the first operand 'x'. This is, as usual, obtained from
2087 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
2088 that defines 'x' (S1). This way we find the stmt VS1_0, and the
2089 relevant vector def 'vx0'. Having found 'vx0' we can generate
2090 the vector stmt VS2_0, and as usual, record it in the
2091 STMT_VINFO_VEC_STMT of stmt S2.
2092 When creating the second copy (VS2_1), we obtain the relevant vector
2093 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
2094 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
2095 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
2096 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
2097 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
2098 chain of stmts and pointers:
2099 RELATED_STMT VEC_STMT
2100 VS1_0: vx0 = memref0 VS1_1 -
2101 VS1_1: vx1 = memref1 VS1_2 -
2102 VS1_2: vx2 = memref2 VS1_3 -
2103 VS1_3: vx3 = memref3 - -
2104 S1: x = load - VS1_0
2105 VS2_0: vz0 = vx0 + v1 VS2_1 -
2106 VS2_1: vz1 = vx1 + v1 VS2_2 -
2107 VS2_2: vz2 = vx2 + v1 VS2_3 -
2108 VS2_3: vz3 = vx3 + v1 - -
2109 S2: z = x + 1 - VS2_0 */
2111 prev_stmt_info = NULL;
2112 for (j = 0; j < ncopies; j++)
2114 /* Handle uses. */
2115 if (j == 0)
2117 if (op_type == binary_op && scalar_shift_arg)
2119 /* Vector shl and shr insn patterns can be defined with scalar
2120 operand 2 (shift operand). In this case, use constant or loop
2121 invariant op1 directly, without extending it to vector mode
2122 first. */
2123 optab_op2_mode = insn_data[icode].operand[2].mode;
2124 if (!VECTOR_MODE_P (optab_op2_mode))
2126 if (vect_print_dump_info (REPORT_DETAILS))
2127 fprintf (vect_dump, "operand 1 using scalar mode.");
2128 vec_oprnd1 = op1;
2129 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2130 if (slp_node)
2132 /* Store vec_oprnd1 for every vector stmt to be created
2133 for SLP_NODE. We check during the analysis that all the
2134 shift arguments are the same.
2135 TODO: Allow different constants for different vector
2136 stmts generated for an SLP instance. */
2137 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2138 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2143 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
2144 (a special case for certain kind of vector shifts); otherwise,
2145 operand 1 should be of a vector type (the usual case). */
2146 if (op_type == binary_op && !vec_oprnd1)
2147 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
2148 slp_node);
2149 else
2150 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2151 slp_node);
2153 else
2154 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
2156 /* Arguments are ready. Create the new vector stmt. */
2157 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
2159 vop1 = ((op_type == binary_op)
2160 ? VEC_index (tree, vec_oprnds1, i) : NULL);
2161 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2162 new_temp = make_ssa_name (vec_dest, new_stmt);
2163 gimple_assign_set_lhs (new_stmt, new_temp);
2164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2165 if (slp_node)
2166 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2169 if (slp_node)
2170 continue;
2172 if (j == 0)
2173 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2174 else
2175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2176 prev_stmt_info = vinfo_for_stmt (new_stmt);
2179 VEC_free (tree, heap, vec_oprnds0);
2180 if (vec_oprnds1)
2181 VEC_free (tree, heap, vec_oprnds1);
2183 return true;
2187 /* Get vectorized definitions for loop-based vectorization. For the first
2188 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2189 scalar operand), and for the rest we get a copy with
2190 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2191 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2192 The vectors are collected into VEC_OPRNDS. */
2194 static void
2195 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2196 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2198 tree vec_oprnd;
2200 /* Get first vector operand. */
2201 /* All the vector operands except the very first one (that is scalar oprnd)
2202 are stmt copies. */
2203 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2204 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2205 else
2206 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2208 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2210 /* Get second vector operand. */
2211 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2212 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2214 *oprnd = vec_oprnd;
2216 /* For conversion in multiple steps, continue to get operands
2217 recursively. */
2218 if (multi_step_cvt)
2219 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2223 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2224 For multi-step conversions store the resulting vectors and call the function
2225 recursively. */
2227 static void
2228 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2229 int multi_step_cvt, gimple stmt,
2230 VEC (tree, heap) *vec_dsts,
2231 gimple_stmt_iterator *gsi,
2232 slp_tree slp_node, enum tree_code code,
2233 stmt_vec_info *prev_stmt_info)
2235 unsigned int i;
2236 tree vop0, vop1, new_tmp, vec_dest;
2237 gimple new_stmt;
2238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2240 vec_dest = VEC_pop (tree, vec_dsts);
2242 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2244 /* Create demotion operation. */
2245 vop0 = VEC_index (tree, *vec_oprnds, i);
2246 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2247 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2248 new_tmp = make_ssa_name (vec_dest, new_stmt);
2249 gimple_assign_set_lhs (new_stmt, new_tmp);
2250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2252 if (multi_step_cvt)
2253 /* Store the resulting vector for next recursive call. */
2254 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2255 else
2257 /* This is the last step of the conversion sequence. Store the
2258 vectors in SLP_NODE or in vector info of the scalar statement
2259 (or in STMT_VINFO_RELATED_STMT chain). */
2260 if (slp_node)
2261 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2262 else
2264 if (!*prev_stmt_info)
2265 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2266 else
2267 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2269 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2274 /* For multi-step demotion operations we first generate demotion operations
2275 from the source type to the intermediate types, and then combine the
2276 results (stored in VEC_OPRNDS) in demotion operation to the destination
2277 type. */
2278 if (multi_step_cvt)
2280 /* At each level of recursion we have have of the operands we had at the
2281 previous level. */
2282 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2283 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2284 stmt, vec_dsts, gsi, slp_node,
2285 code, prev_stmt_info);
2290 /* Function vectorizable_type_demotion
2292 Check if STMT performs a binary or unary operation that involves
2293 type demotion, and if it can be vectorized.
2294 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2295 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2296 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2298 static bool
2299 vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
2300 gimple *vec_stmt, slp_tree slp_node)
2302 tree vec_dest;
2303 tree scalar_dest;
2304 tree op0;
2305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2307 enum tree_code code, code1 = ERROR_MARK;
2308 tree def;
2309 gimple def_stmt;
2310 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2311 stmt_vec_info prev_stmt_info;
2312 int nunits_in;
2313 int nunits_out;
2314 tree vectype_out;
2315 int ncopies;
2316 int j, i;
2317 tree vectype_in;
2318 int multi_step_cvt = 0;
2319 VEC (tree, heap) *vec_oprnds0 = NULL;
2320 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2321 tree last_oprnd, intermediate_type;
2323 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2324 return false;
2326 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2327 return false;
2329 /* Is STMT a vectorizable type-demotion operation? */
2330 if (!is_gimple_assign (stmt))
2331 return false;
2333 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2334 return false;
2336 code = gimple_assign_rhs_code (stmt);
2337 if (!CONVERT_EXPR_CODE_P (code))
2338 return false;
2340 op0 = gimple_assign_rhs1 (stmt);
2341 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2342 if (!vectype_in)
2343 return false;
2344 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2346 scalar_dest = gimple_assign_lhs (stmt);
2347 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2348 if (!vectype_out)
2349 return false;
2350 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2351 if (nunits_in >= nunits_out)
2352 return false;
2354 /* Multiple types in SLP are handled by creating the appropriate number of
2355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2356 case of SLP. */
2357 if (slp_node)
2358 ncopies = 1;
2359 else
2360 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2362 gcc_assert (ncopies >= 1);
2364 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2365 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2366 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2367 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2368 && CONVERT_EXPR_CODE_P (code))))
2369 return false;
2371 /* Check the operands of the operation. */
2372 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2374 if (vect_print_dump_info (REPORT_DETAILS))
2375 fprintf (vect_dump, "use not simple.");
2376 return false;
2379 /* Supportable by target? */
2380 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
2381 &multi_step_cvt, &interm_types))
2382 return false;
2384 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2386 if (!vec_stmt) /* transformation not required. */
2388 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2389 if (vect_print_dump_info (REPORT_DETAILS))
2390 fprintf (vect_dump, "=== vectorizable_demotion ===");
2391 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2392 return true;
2395 /** Transform. **/
2396 if (vect_print_dump_info (REPORT_DETAILS))
2397 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2398 ncopies);
2400 /* In case of multi-step demotion, we first generate demotion operations to
2401 the intermediate types, and then from that types to the final one.
2402 We create vector destinations for the intermediate type (TYPES) received
2403 from supportable_narrowing_operation, and store them in the correct order
2404 for future use in vect_create_vectorized_demotion_stmts(). */
2405 if (multi_step_cvt)
2406 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2407 else
2408 vec_dsts = VEC_alloc (tree, heap, 1);
2410 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2411 VEC_quick_push (tree, vec_dsts, vec_dest);
2413 if (multi_step_cvt)
2415 for (i = VEC_length (tree, interm_types) - 1;
2416 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2418 vec_dest = vect_create_destination_var (scalar_dest,
2419 intermediate_type);
2420 VEC_quick_push (tree, vec_dsts, vec_dest);
2424 /* In case the vectorization factor (VF) is bigger than the number
2425 of elements that we can fit in a vectype (nunits), we have to generate
2426 more than one vector stmt - i.e - we need to "unroll" the
2427 vector stmt by a factor VF/nunits. */
2428 last_oprnd = op0;
2429 prev_stmt_info = NULL;
2430 for (j = 0; j < ncopies; j++)
2432 /* Handle uses. */
2433 if (slp_node)
2434 vect_get_slp_defs (slp_node, &vec_oprnds0, NULL);
2435 else
2437 VEC_free (tree, heap, vec_oprnds0);
2438 vec_oprnds0 = VEC_alloc (tree, heap,
2439 (multi_step_cvt ? vect_pow2 (multi_step_cvt) * 2 : 2));
2440 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2441 vect_pow2 (multi_step_cvt) - 1);
2444 /* Arguments are ready. Create the new vector stmts. */
2445 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2446 vect_create_vectorized_demotion_stmts (&vec_oprnds0,
2447 multi_step_cvt, stmt, tmp_vec_dsts,
2448 gsi, slp_node, code1,
2449 &prev_stmt_info);
2452 VEC_free (tree, heap, vec_oprnds0);
2453 VEC_free (tree, heap, vec_dsts);
2454 VEC_free (tree, heap, tmp_vec_dsts);
2455 VEC_free (tree, heap, interm_types);
2457 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2458 return true;
2462 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2463 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2464 the resulting vectors and call the function recursively. */
2466 static void
2467 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2468 VEC (tree, heap) **vec_oprnds1,
2469 int multi_step_cvt, gimple stmt,
2470 VEC (tree, heap) *vec_dsts,
2471 gimple_stmt_iterator *gsi,
2472 slp_tree slp_node, enum tree_code code1,
2473 enum tree_code code2, tree decl1,
2474 tree decl2, int op_type,
2475 stmt_vec_info *prev_stmt_info)
2477 int i;
2478 tree vop0, vop1, new_tmp1, new_tmp2, vec_dest;
2479 gimple new_stmt1, new_stmt2;
2480 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2481 VEC (tree, heap) *vec_tmp;
2483 vec_dest = VEC_pop (tree, vec_dsts);
2484 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2486 for (i = 0; VEC_iterate (tree, *vec_oprnds0, i, vop0); i++)
2488 if (op_type == binary_op)
2489 vop1 = VEC_index (tree, *vec_oprnds1, i);
2490 else
2491 vop1 = NULL_TREE;
2493 /* Generate the two halves of promotion operation. */
2494 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2495 op_type, vec_dest, gsi, stmt);
2496 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2497 op_type, vec_dest, gsi, stmt);
2498 if (is_gimple_call (new_stmt1))
2500 new_tmp1 = gimple_call_lhs (new_stmt1);
2501 new_tmp2 = gimple_call_lhs (new_stmt2);
2503 else
2505 new_tmp1 = gimple_assign_lhs (new_stmt1);
2506 new_tmp2 = gimple_assign_lhs (new_stmt2);
2509 if (multi_step_cvt)
2511 /* Store the results for the recursive call. */
2512 VEC_quick_push (tree, vec_tmp, new_tmp1);
2513 VEC_quick_push (tree, vec_tmp, new_tmp2);
2515 else
2517 /* Last step of promotion sequience - store the results. */
2518 if (slp_node)
2520 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt1);
2521 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt2);
2523 else
2525 if (!*prev_stmt_info)
2526 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt1;
2527 else
2528 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt1;
2530 *prev_stmt_info = vinfo_for_stmt (new_stmt1);
2531 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt2;
2532 *prev_stmt_info = vinfo_for_stmt (new_stmt2);
2537 if (multi_step_cvt)
2539 /* For multi-step promotion operation we first generate we call the
2540 function recurcively for every stage. We start from the input type,
2541 create promotion operations to the intermediate types, and then
2542 create promotions to the output type. */
2543 *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
2544 VEC_free (tree, heap, vec_tmp);
2545 vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
2546 multi_step_cvt - 1, stmt,
2547 vec_dsts, gsi, slp_node, code1,
2548 code2, decl2, decl2, op_type,
2549 prev_stmt_info);
2554 /* Function vectorizable_type_promotion
2556 Check if STMT performs a binary or unary operation that involves
2557 type promotion, and if it can be vectorized.
2558 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2559 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2560 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2562 static bool
2563 vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
2564 gimple *vec_stmt, slp_tree slp_node)
2566 tree vec_dest;
2567 tree scalar_dest;
2568 tree op0, op1 = NULL;
2569 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2570 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2571 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2572 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2573 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2574 int op_type;
2575 tree def;
2576 gimple def_stmt;
2577 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2578 stmt_vec_info prev_stmt_info;
2579 int nunits_in;
2580 int nunits_out;
2581 tree vectype_out;
2582 int ncopies;
2583 int j, i;
2584 tree vectype_in;
2585 tree intermediate_type = NULL_TREE;
2586 int multi_step_cvt = 0;
2587 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2588 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
2590 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2591 return false;
2593 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2594 return false;
2596 /* Is STMT a vectorizable type-promotion operation? */
2597 if (!is_gimple_assign (stmt))
2598 return false;
2600 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2601 return false;
2603 code = gimple_assign_rhs_code (stmt);
2604 if (!CONVERT_EXPR_CODE_P (code)
2605 && code != WIDEN_MULT_EXPR)
2606 return false;
2608 op0 = gimple_assign_rhs1 (stmt);
2609 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2610 if (!vectype_in)
2611 return false;
2612 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2614 scalar_dest = gimple_assign_lhs (stmt);
2615 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2616 if (!vectype_out)
2617 return false;
2618 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2619 if (nunits_in <= nunits_out)
2620 return false;
2622 /* Multiple types in SLP are handled by creating the appropriate number of
2623 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2624 case of SLP. */
2625 if (slp_node)
2626 ncopies = 1;
2627 else
2628 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2630 gcc_assert (ncopies >= 1);
2632 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2633 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2634 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
2635 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
2636 && CONVERT_EXPR_CODE_P (code))))
2637 return false;
2639 /* Check the operands of the operation. */
2640 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
2642 if (vect_print_dump_info (REPORT_DETAILS))
2643 fprintf (vect_dump, "use not simple.");
2644 return false;
2647 op_type = TREE_CODE_LENGTH (code);
2648 if (op_type == binary_op)
2650 op1 = gimple_assign_rhs2 (stmt);
2651 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
2653 if (vect_print_dump_info (REPORT_DETAILS))
2654 fprintf (vect_dump, "use not simple.");
2655 return false;
2659 /* Supportable by target? */
2660 if (!supportable_widening_operation (code, stmt, vectype_in,
2661 &decl1, &decl2, &code1, &code2,
2662 &multi_step_cvt, &interm_types))
2663 return false;
2665 /* Binary widening operation can only be supported directly by the
2666 architecture. */
2667 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2669 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2671 if (!vec_stmt) /* transformation not required. */
2673 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2674 if (vect_print_dump_info (REPORT_DETAILS))
2675 fprintf (vect_dump, "=== vectorizable_promotion ===");
2676 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
2677 return true;
2680 /** Transform. **/
2682 if (vect_print_dump_info (REPORT_DETAILS))
2683 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2684 ncopies);
2686 /* Handle def. */
2687 /* In case of multi-step promotion, we first generate promotion operations
2688 to the intermediate types, and then from that types to the final one.
2689 We store vector destination in VEC_DSTS in the correct order for
2690 recursive creation of promotion operations in
2691 vect_create_vectorized_promotion_stmts(). Vector destinations are created
2692 according to TYPES recieved from supportable_widening_operation(). */
2693 if (multi_step_cvt)
2694 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2695 else
2696 vec_dsts = VEC_alloc (tree, heap, 1);
2698 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2699 VEC_quick_push (tree, vec_dsts, vec_dest);
2701 if (multi_step_cvt)
2703 for (i = VEC_length (tree, interm_types) - 1;
2704 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2706 vec_dest = vect_create_destination_var (scalar_dest,
2707 intermediate_type);
2708 VEC_quick_push (tree, vec_dsts, vec_dest);
2712 if (!slp_node)
2714 vec_oprnds0 = VEC_alloc (tree, heap,
2715 (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2716 if (op_type == binary_op)
2717 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2720 /* In case the vectorization factor (VF) is bigger than the number
2721 of elements that we can fit in a vectype (nunits), we have to generate
2722 more than one vector stmt - i.e - we need to "unroll" the
2723 vector stmt by a factor VF/nunits. */
2725 prev_stmt_info = NULL;
2726 for (j = 0; j < ncopies; j++)
2728 /* Handle uses. */
2729 if (j == 0)
2731 if (slp_node)
2732 vect_get_slp_defs (slp_node, &vec_oprnds0, &vec_oprnds1);
2733 else
2735 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2736 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2737 if (op_type == binary_op)
2739 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2740 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2744 else
2746 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2747 VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
2748 if (op_type == binary_op)
2750 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
2751 VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
2755 /* Arguments are ready. Create the new vector stmts. */
2756 tmp_vec_dsts = VEC_copy (tree, heap, vec_dsts);
2757 vect_create_vectorized_promotion_stmts (&vec_oprnds0, &vec_oprnds1,
2758 multi_step_cvt, stmt,
2759 tmp_vec_dsts,
2760 gsi, slp_node, code1, code2,
2761 decl1, decl2, op_type,
2762 &prev_stmt_info);
2765 VEC_free (tree, heap, vec_dsts);
2766 VEC_free (tree, heap, tmp_vec_dsts);
2767 VEC_free (tree, heap, interm_types);
2768 VEC_free (tree, heap, vec_oprnds0);
2769 VEC_free (tree, heap, vec_oprnds1);
2771 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2772 return true;
2776 /* Function vectorizable_store.
2778 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2779 can be vectorized.
2780 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2781 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2782 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2784 static bool
2785 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2786 slp_tree slp_node)
2788 tree scalar_dest;
2789 tree data_ref;
2790 tree op;
2791 tree vec_oprnd = NULL_TREE;
2792 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2793 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
2794 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2796 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2797 enum machine_mode vec_mode;
2798 tree dummy;
2799 enum dr_alignment_support alignment_support_scheme;
2800 tree def;
2801 gimple def_stmt;
2802 enum vect_def_type dt;
2803 stmt_vec_info prev_stmt_info = NULL;
2804 tree dataref_ptr = NULL_TREE;
2805 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2806 int ncopies;
2807 int j;
2808 gimple next_stmt, first_stmt = NULL;
2809 bool strided_store = false;
2810 unsigned int group_size, i;
2811 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
2812 bool inv_p;
2813 VEC(tree,heap) *vec_oprnds = NULL;
2814 bool slp = (slp_node != NULL);
2815 stmt_vec_info first_stmt_vinfo;
2816 unsigned int vec_num;
2818 /* Multiple types in SLP are handled by creating the appropriate number of
2819 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2820 case of SLP. */
2821 if (slp)
2822 ncopies = 1;
2823 else
2824 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2826 gcc_assert (ncopies >= 1);
2828 /* FORNOW. This restriction should be relaxed. */
2829 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
2831 if (vect_print_dump_info (REPORT_DETAILS))
2832 fprintf (vect_dump, "multiple types in nested loop.");
2833 return false;
2836 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2837 return false;
2839 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2840 return false;
2842 /* Is vectorizable store? */
2844 if (!is_gimple_assign (stmt))
2845 return false;
2847 scalar_dest = gimple_assign_lhs (stmt);
2848 if (TREE_CODE (scalar_dest) != ARRAY_REF
2849 && TREE_CODE (scalar_dest) != INDIRECT_REF
2850 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
2851 return false;
2853 gcc_assert (gimple_assign_single_p (stmt));
2854 op = gimple_assign_rhs1 (stmt);
2855 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2857 if (vect_print_dump_info (REPORT_DETAILS))
2858 fprintf (vect_dump, "use not simple.");
2859 return false;
2862 /* The scalar rhs type needs to be trivially convertible to the vector
2863 component type. This should always be the case. */
2864 if (!useless_type_conversion_p (TREE_TYPE (vectype), TREE_TYPE (op)))
2866 if (vect_print_dump_info (REPORT_DETAILS))
2867 fprintf (vect_dump, "??? operands of different types");
2868 return false;
2871 vec_mode = TYPE_MODE (vectype);
2872 /* FORNOW. In some cases can vectorize even if data-type not supported
2873 (e.g. - array initialization with 0). */
2874 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
2875 return false;
2877 if (!STMT_VINFO_DATA_REF (stmt_info))
2878 return false;
2880 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2882 strided_store = true;
2883 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
2884 if (!vect_strided_store_supported (vectype)
2885 && !PURE_SLP_STMT (stmt_info) && !slp)
2886 return false;
2888 if (first_stmt == stmt)
2890 /* STMT is the leader of the group. Check the operands of all the
2891 stmts of the group. */
2892 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
2893 while (next_stmt)
2895 gcc_assert (gimple_assign_single_p (next_stmt));
2896 op = gimple_assign_rhs1 (next_stmt);
2897 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2899 if (vect_print_dump_info (REPORT_DETAILS))
2900 fprintf (vect_dump, "use not simple.");
2901 return false;
2903 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
2908 if (!vec_stmt) /* transformation not required. */
2910 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2911 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
2912 return true;
2915 /** Transform. **/
2917 if (strided_store)
2919 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2920 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2922 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
2924 /* FORNOW */
2925 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
2927 /* We vectorize all the stmts of the interleaving group when we
2928 reach the last stmt in the group. */
2929 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
2930 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2931 && !slp)
2933 *vec_stmt = NULL;
2934 return true;
2937 if (slp)
2938 strided_store = false;
2940 /* VEC_NUM is the number of vect stmts to be created for this group. */
2941 if (slp)
2942 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2943 else
2944 vec_num = group_size;
2946 else
2948 first_stmt = stmt;
2949 first_dr = dr;
2950 group_size = vec_num = 1;
2951 first_stmt_vinfo = stmt_info;
2954 if (vect_print_dump_info (REPORT_DETAILS))
2955 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2957 dr_chain = VEC_alloc (tree, heap, group_size);
2958 oprnds = VEC_alloc (tree, heap, group_size);
2960 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
2961 gcc_assert (alignment_support_scheme);
2962 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
2964 /* In case the vectorization factor (VF) is bigger than the number
2965 of elements that we can fit in a vectype (nunits), we have to generate
2966 more than one vector stmt - i.e - we need to "unroll" the
2967 vector stmt by a factor VF/nunits. For more details see documentation in
2968 vect_get_vec_def_for_copy_stmt. */
2970 /* In case of interleaving (non-unit strided access):
2972 S1: &base + 2 = x2
2973 S2: &base = x0
2974 S3: &base + 1 = x1
2975 S4: &base + 3 = x3
2977 We create vectorized stores starting from base address (the access of the
2978 first stmt in the chain (S2 in the above example), when the last store stmt
2979 of the chain (S4) is reached:
2981 VS1: &base = vx2
2982 VS2: &base + vec_size*1 = vx0
2983 VS3: &base + vec_size*2 = vx1
2984 VS4: &base + vec_size*3 = vx3
2986 Then permutation statements are generated:
2988 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
2989 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
2992 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
2993 (the order of the data-refs in the output of vect_permute_store_chain
2994 corresponds to the order of scalar stmts in the interleaving chain - see
2995 the documentation of vect_permute_store_chain()).
2997 In case of both multiple types and interleaving, above vector stores and
2998 permutation stmts are created for every copy. The result vector stmts are
2999 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3000 STMT_VINFO_RELATED_STMT for the next copies.
3003 prev_stmt_info = NULL;
3004 for (j = 0; j < ncopies; j++)
3006 gimple new_stmt;
3007 gimple ptr_incr;
3009 if (j == 0)
3011 if (slp)
3013 /* Get vectorized arguments for SLP_NODE. */
3014 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
3016 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
3018 else
3020 /* For interleaved stores we collect vectorized defs for all the
3021 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
3022 used as an input to vect_permute_store_chain(), and OPRNDS as
3023 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
3025 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3026 OPRNDS are of size 1. */
3027 next_stmt = first_stmt;
3028 for (i = 0; i < group_size; i++)
3030 /* Since gaps are not supported for interleaved stores,
3031 GROUP_SIZE is the exact number of stmts in the chain.
3032 Therefore, NEXT_STMT can't be NULL_TREE. In case that
3033 there is no interleaving, GROUP_SIZE is 1, and only one
3034 iteration of the loop will be executed. */
3035 gcc_assert (next_stmt
3036 && gimple_assign_single_p (next_stmt));
3037 op = gimple_assign_rhs1 (next_stmt);
3039 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
3040 NULL);
3041 VEC_quick_push(tree, dr_chain, vec_oprnd);
3042 VEC_quick_push(tree, oprnds, vec_oprnd);
3043 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3047 /* We should have catched mismatched types earlier. */
3048 gcc_assert (useless_type_conversion_p (vectype,
3049 TREE_TYPE (vec_oprnd)));
3050 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
3051 &dummy, &ptr_incr, false,
3052 &inv_p);
3053 gcc_assert (!inv_p);
3055 else
3057 /* For interleaved stores we created vectorized defs for all the
3058 defs stored in OPRNDS in the previous iteration (previous copy).
3059 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3060 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3061 next copy.
3062 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3063 OPRNDS are of size 1. */
3064 for (i = 0; i < group_size; i++)
3066 op = VEC_index (tree, oprnds, i);
3067 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
3068 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
3069 VEC_replace(tree, dr_chain, i, vec_oprnd);
3070 VEC_replace(tree, oprnds, i, vec_oprnd);
3072 dataref_ptr =
3073 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3076 if (strided_store)
3078 result_chain = VEC_alloc (tree, heap, group_size);
3079 /* Permute. */
3080 if (!vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
3081 &result_chain))
3082 return false;
3085 next_stmt = first_stmt;
3086 for (i = 0; i < vec_num; i++)
3088 if (i > 0)
3089 /* Bump the vector pointer. */
3090 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3091 NULL_TREE);
3093 if (slp)
3094 vec_oprnd = VEC_index (tree, vec_oprnds, i);
3095 else if (strided_store)
3096 /* For strided stores vectorized defs are interleaved in
3097 vect_permute_store_chain(). */
3098 vec_oprnd = VEC_index (tree, result_chain, i);
3100 data_ref = build_fold_indirect_ref (dataref_ptr);
3101 /* If accesses through a pointer to vectype do not alias the original
3102 memory reference we have a problem. This should never happen. */
3103 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3104 get_alias_set (gimple_assign_lhs (stmt))));
3106 /* Arguments are ready. Create the new vector stmt. */
3107 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
3108 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3109 mark_symbols_for_renaming (new_stmt);
3111 if (slp)
3112 continue;
3114 if (j == 0)
3115 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3116 else
3117 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3119 prev_stmt_info = vinfo_for_stmt (new_stmt);
3120 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3121 if (!next_stmt)
3122 break;
3126 VEC_free (tree, heap, dr_chain);
3127 VEC_free (tree, heap, oprnds);
3128 if (result_chain)
3129 VEC_free (tree, heap, result_chain);
3131 return true;
3134 /* vectorizable_load.
3136 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
3137 can be vectorized.
3138 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3139 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3140 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3142 static bool
3143 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3144 slp_tree slp_node, slp_instance slp_node_instance)
3146 tree scalar_dest;
3147 tree vec_dest = NULL;
3148 tree data_ref = NULL;
3149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3150 stmt_vec_info prev_stmt_info;
3151 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3152 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3153 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3154 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3155 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
3156 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3157 tree new_temp;
3158 int mode;
3159 gimple new_stmt = NULL;
3160 tree dummy;
3161 enum dr_alignment_support alignment_support_scheme;
3162 tree dataref_ptr = NULL_TREE;
3163 gimple ptr_incr;
3164 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3165 int ncopies;
3166 int i, j, group_size;
3167 tree msq = NULL_TREE, lsq;
3168 tree offset = NULL_TREE;
3169 tree realignment_token = NULL_TREE;
3170 gimple phi = NULL;
3171 VEC(tree,heap) *dr_chain = NULL;
3172 bool strided_load = false;
3173 gimple first_stmt;
3174 tree scalar_type;
3175 bool inv_p;
3176 bool compute_in_loop = false;
3177 struct loop *at_loop;
3178 int vec_num;
3179 bool slp = (slp_node != NULL);
3180 bool slp_perm = false;
3181 enum tree_code code;
3183 /* Multiple types in SLP are handled by creating the appropriate number of
3184 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3185 case of SLP. */
3186 if (slp)
3187 ncopies = 1;
3188 else
3189 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3191 gcc_assert (ncopies >= 1);
3193 /* FORNOW. This restriction should be relaxed. */
3194 if (nested_in_vect_loop && ncopies > 1)
3196 if (vect_print_dump_info (REPORT_DETAILS))
3197 fprintf (vect_dump, "multiple types in nested loop.");
3198 return false;
3201 if (slp && SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
3202 slp_perm = true;
3204 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3205 return false;
3207 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3208 return false;
3210 /* Is vectorizable load? */
3211 if (!is_gimple_assign (stmt))
3212 return false;
3214 scalar_dest = gimple_assign_lhs (stmt);
3215 if (TREE_CODE (scalar_dest) != SSA_NAME)
3216 return false;
3218 code = gimple_assign_rhs_code (stmt);
3219 if (code != ARRAY_REF
3220 && code != INDIRECT_REF
3221 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
3222 return false;
3224 if (!STMT_VINFO_DATA_REF (stmt_info))
3225 return false;
3227 scalar_type = TREE_TYPE (DR_REF (dr));
3228 mode = (int) TYPE_MODE (vectype);
3230 /* FORNOW. In some cases can vectorize even if data-type not supported
3231 (e.g. - data copies). */
3232 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
3234 if (vect_print_dump_info (REPORT_DETAILS))
3235 fprintf (vect_dump, "Aligned load, but unsupported type.");
3236 return false;
3239 /* The vector component type needs to be trivially convertible to the
3240 scalar lhs. This should always be the case. */
3241 if (!useless_type_conversion_p (TREE_TYPE (scalar_dest), TREE_TYPE (vectype)))
3243 if (vect_print_dump_info (REPORT_DETAILS))
3244 fprintf (vect_dump, "??? operands of different types");
3245 return false;
3248 /* Check if the load is a part of an interleaving chain. */
3249 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
3251 strided_load = true;
3252 /* FORNOW */
3253 gcc_assert (! nested_in_vect_loop);
3255 /* Check if interleaving is supported. */
3256 if (!vect_strided_load_supported (vectype)
3257 && !PURE_SLP_STMT (stmt_info) && !slp)
3258 return false;
3261 if (!vec_stmt) /* transformation not required. */
3263 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
3264 vect_model_load_cost (stmt_info, ncopies, NULL);
3265 return true;
3268 if (vect_print_dump_info (REPORT_DETAILS))
3269 fprintf (vect_dump, "transform load.");
3271 /** Transform. **/
3273 if (strided_load)
3275 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3276 /* Check if the chain of loads is already vectorized. */
3277 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
3279 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3280 return true;
3282 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3283 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3285 /* VEC_NUM is the number of vect stmts to be created for this group. */
3286 if (slp)
3288 strided_load = false;
3289 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3291 else
3292 vec_num = group_size;
3294 dr_chain = VEC_alloc (tree, heap, vec_num);
3296 else
3298 first_stmt = stmt;
3299 first_dr = dr;
3300 group_size = vec_num = 1;
3303 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
3304 gcc_assert (alignment_support_scheme);
3306 /* In case the vectorization factor (VF) is bigger than the number
3307 of elements that we can fit in a vectype (nunits), we have to generate
3308 more than one vector stmt - i.e - we need to "unroll" the
3309 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3310 from one copy of the vector stmt to the next, in the field
3311 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3312 stages to find the correct vector defs to be used when vectorizing
3313 stmts that use the defs of the current stmt. The example below illustrates
3314 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3315 4 vectorized stmts):
3317 before vectorization:
3318 RELATED_STMT VEC_STMT
3319 S1: x = memref - -
3320 S2: z = x + 1 - -
3322 step 1: vectorize stmt S1:
3323 We first create the vector stmt VS1_0, and, as usual, record a
3324 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
3325 Next, we create the vector stmt VS1_1, and record a pointer to
3326 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
3327 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
3328 stmts and pointers:
3329 RELATED_STMT VEC_STMT
3330 VS1_0: vx0 = memref0 VS1_1 -
3331 VS1_1: vx1 = memref1 VS1_2 -
3332 VS1_2: vx2 = memref2 VS1_3 -
3333 VS1_3: vx3 = memref3 - -
3334 S1: x = load - VS1_0
3335 S2: z = x + 1 - -
3337 See in documentation in vect_get_vec_def_for_stmt_copy for how the
3338 information we recorded in RELATED_STMT field is used to vectorize
3339 stmt S2. */
3341 /* In case of interleaving (non-unit strided access):
3343 S1: x2 = &base + 2
3344 S2: x0 = &base
3345 S3: x1 = &base + 1
3346 S4: x3 = &base + 3
3348 Vectorized loads are created in the order of memory accesses
3349 starting from the access of the first stmt of the chain:
3351 VS1: vx0 = &base
3352 VS2: vx1 = &base + vec_size*1
3353 VS3: vx3 = &base + vec_size*2
3354 VS4: vx4 = &base + vec_size*3
3356 Then permutation statements are generated:
3358 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
3359 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
3362 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3363 (the order of the data-refs in the output of vect_permute_load_chain
3364 corresponds to the order of scalar stmts in the interleaving chain - see
3365 the documentation of vect_permute_load_chain()).
3366 The generation of permutation stmts and recording them in
3367 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
3369 In case of both multiple types and interleaving, the vector loads and
3370 permutation stmts above are created for every copy. The result vector stmts
3371 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3372 STMT_VINFO_RELATED_STMT for the next copies. */
3374 /* If the data reference is aligned (dr_aligned) or potentially unaligned
3375 on a target that supports unaligned accesses (dr_unaligned_supported)
3376 we generate the following code:
3377 p = initial_addr;
3378 indx = 0;
3379 loop {
3380 p = p + indx * vectype_size;
3381 vec_dest = *(p);
3382 indx = indx + 1;
3385 Otherwise, the data reference is potentially unaligned on a target that
3386 does not support unaligned accesses (dr_explicit_realign_optimized) -
3387 then generate the following code, in which the data in each iteration is
3388 obtained by two vector loads, one from the previous iteration, and one
3389 from the current iteration:
3390 p1 = initial_addr;
3391 msq_init = *(floor(p1))
3392 p2 = initial_addr + VS - 1;
3393 realignment_token = call target_builtin;
3394 indx = 0;
3395 loop {
3396 p2 = p2 + indx * vectype_size
3397 lsq = *(floor(p2))
3398 vec_dest = realign_load (msq, lsq, realignment_token)
3399 indx = indx + 1;
3400 msq = lsq;
3401 } */
3403 /* If the misalignment remains the same throughout the execution of the
3404 loop, we can create the init_addr and permutation mask at the loop
3405 preheader. Otherwise, it needs to be created inside the loop.
3406 This can only occur when vectorizing memory accesses in the inner-loop
3407 nested within an outer-loop that is being vectorized. */
3409 if (nested_in_vect_loop_p (loop, stmt)
3410 && (TREE_INT_CST_LOW (DR_STEP (dr))
3411 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
3413 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
3414 compute_in_loop = true;
3417 if ((alignment_support_scheme == dr_explicit_realign_optimized
3418 || alignment_support_scheme == dr_explicit_realign)
3419 && !compute_in_loop)
3421 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
3422 alignment_support_scheme, NULL_TREE,
3423 &at_loop);
3424 if (alignment_support_scheme == dr_explicit_realign_optimized)
3426 phi = SSA_NAME_DEF_STMT (msq);
3427 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3430 else
3431 at_loop = loop;
3433 prev_stmt_info = NULL;
3434 for (j = 0; j < ncopies; j++)
3436 /* 1. Create the vector pointer update chain. */
3437 if (j == 0)
3438 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
3439 at_loop, offset,
3440 &dummy, &ptr_incr, false,
3441 &inv_p);
3442 else
3443 dataref_ptr =
3444 bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, NULL_TREE);
3446 for (i = 0; i < vec_num; i++)
3448 if (i > 0)
3449 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
3450 NULL_TREE);
3452 /* 2. Create the vector-load in the loop. */
3453 switch (alignment_support_scheme)
3455 case dr_aligned:
3456 gcc_assert (aligned_access_p (first_dr));
3457 data_ref = build_fold_indirect_ref (dataref_ptr);
3458 break;
3459 case dr_unaligned_supported:
3461 int mis = DR_MISALIGNMENT (first_dr);
3462 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
3464 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
3465 data_ref =
3466 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
3467 break;
3469 case dr_explicit_realign:
3471 tree ptr, bump;
3472 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
3474 if (compute_in_loop)
3475 msq = vect_setup_realignment (first_stmt, gsi,
3476 &realignment_token,
3477 dr_explicit_realign,
3478 dataref_ptr, NULL);
3480 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3481 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3482 new_stmt = gimple_build_assign (vec_dest, data_ref);
3483 new_temp = make_ssa_name (vec_dest, new_stmt);
3484 gimple_assign_set_lhs (new_stmt, new_temp);
3485 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3486 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3487 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3488 msq = new_temp;
3490 bump = size_binop (MULT_EXPR, vs_minus_1,
3491 TYPE_SIZE_UNIT (scalar_type));
3492 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
3493 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3494 break;
3496 case dr_explicit_realign_optimized:
3497 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
3498 break;
3499 default:
3500 gcc_unreachable ();
3502 /* If accesses through a pointer to vectype do not alias the original
3503 memory reference we have a problem. This should never happen. */
3504 gcc_assert (alias_sets_conflict_p (get_alias_set (data_ref),
3505 get_alias_set (gimple_assign_rhs1 (stmt))));
3506 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3507 new_stmt = gimple_build_assign (vec_dest, data_ref);
3508 new_temp = make_ssa_name (vec_dest, new_stmt);
3509 gimple_assign_set_lhs (new_stmt, new_temp);
3510 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3511 mark_symbols_for_renaming (new_stmt);
3513 /* 3. Handle explicit realignment if necessary/supported. Create in
3514 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
3515 if (alignment_support_scheme == dr_explicit_realign_optimized
3516 || alignment_support_scheme == dr_explicit_realign)
3518 tree tmp;
3520 lsq = gimple_assign_lhs (new_stmt);
3521 if (!realignment_token)
3522 realignment_token = dataref_ptr;
3523 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3524 tmp = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
3525 realignment_token);
3526 new_stmt = gimple_build_assign (vec_dest, tmp);
3527 new_temp = make_ssa_name (vec_dest, new_stmt);
3528 gimple_assign_set_lhs (new_stmt, new_temp);
3529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3531 if (alignment_support_scheme == dr_explicit_realign_optimized)
3533 gcc_assert (phi);
3534 if (i == vec_num - 1 && j == ncopies - 1)
3535 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
3536 msq = lsq;
3540 /* 4. Handle invariant-load. */
3541 if (inv_p)
3543 gcc_assert (!strided_load);
3544 gcc_assert (nested_in_vect_loop_p (loop, stmt));
3545 if (j == 0)
3547 int k;
3548 tree t = NULL_TREE;
3549 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
3551 /* CHECKME: bitpos depends on endianess? */
3552 bitpos = bitsize_zero_node;
3553 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
3554 bitsize, bitpos);
3555 vec_dest =
3556 vect_create_destination_var (scalar_dest, NULL_TREE);
3557 new_stmt = gimple_build_assign (vec_dest, vec_inv);
3558 new_temp = make_ssa_name (vec_dest, new_stmt);
3559 gimple_assign_set_lhs (new_stmt, new_temp);
3560 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3562 for (k = nunits - 1; k >= 0; --k)
3563 t = tree_cons (NULL_TREE, new_temp, t);
3564 /* FIXME: use build_constructor directly. */
3565 vec_inv = build_constructor_from_list (vectype, t);
3566 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
3567 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3569 else
3570 gcc_unreachable (); /* FORNOW. */
3573 /* Collect vector loads and later create their permutation in
3574 vect_transform_strided_load (). */
3575 if (strided_load || slp_perm)
3576 VEC_quick_push (tree, dr_chain, new_temp);
3578 /* Store vector loads in the corresponding SLP_NODE. */
3579 if (slp && !slp_perm)
3580 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3583 if (slp && !slp_perm)
3584 continue;
3586 if (slp_perm)
3588 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi,
3589 LOOP_VINFO_VECT_FACTOR (loop_vinfo),
3590 slp_node_instance, false))
3592 VEC_free (tree, heap, dr_chain);
3593 return false;
3596 else
3598 if (strided_load)
3600 if (!vect_transform_strided_load (stmt, dr_chain, group_size, gsi))
3601 return false;
3603 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3604 VEC_free (tree, heap, dr_chain);
3605 dr_chain = VEC_alloc (tree, heap, group_size);
3607 else
3609 if (j == 0)
3610 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3611 else
3612 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3613 prev_stmt_info = vinfo_for_stmt (new_stmt);
3618 if (dr_chain)
3619 VEC_free (tree, heap, dr_chain);
3621 return true;
3624 /* Function vect_is_simple_cond.
3626 Input:
3627 LOOP - the loop that is being vectorized.
3628 COND - Condition that is checked for simple use.
3630 Returns whether a COND can be vectorized. Checks whether
3631 condition operands are supportable using vec_is_simple_use. */
3633 static bool
3634 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
3636 tree lhs, rhs;
3637 tree def;
3638 enum vect_def_type dt;
3640 if (!COMPARISON_CLASS_P (cond))
3641 return false;
3643 lhs = TREE_OPERAND (cond, 0);
3644 rhs = TREE_OPERAND (cond, 1);
3646 if (TREE_CODE (lhs) == SSA_NAME)
3648 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
3649 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
3650 return false;
3652 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
3653 && TREE_CODE (lhs) != FIXED_CST)
3654 return false;
3656 if (TREE_CODE (rhs) == SSA_NAME)
3658 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
3659 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
3660 return false;
3662 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
3663 && TREE_CODE (rhs) != FIXED_CST)
3664 return false;
3666 return true;
3669 /* vectorizable_condition.
3671 Check if STMT is conditional modify expression that can be vectorized.
3672 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3673 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
3674 at BSI.
3676 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3678 static bool
3679 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
3680 gimple *vec_stmt)
3682 tree scalar_dest = NULL_TREE;
3683 tree vec_dest = NULL_TREE;
3684 tree op = NULL_TREE;
3685 tree cond_expr, then_clause, else_clause;
3686 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3687 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3688 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
3689 tree vec_compare, vec_cond_expr;
3690 tree new_temp;
3691 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3692 enum machine_mode vec_mode;
3693 tree def;
3694 enum vect_def_type dt;
3695 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3696 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3697 enum tree_code code;
3699 gcc_assert (ncopies >= 1);
3700 if (ncopies > 1)
3701 return false; /* FORNOW */
3703 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3704 return false;
3706 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3707 return false;
3709 /* FORNOW: SLP not supported. */
3710 if (STMT_SLP_TYPE (stmt_info))
3711 return false;
3713 /* FORNOW: not yet supported. */
3714 if (STMT_VINFO_LIVE_P (stmt_info))
3716 if (vect_print_dump_info (REPORT_DETAILS))
3717 fprintf (vect_dump, "value used after loop.");
3718 return false;
3721 /* Is vectorizable conditional operation? */
3722 if (!is_gimple_assign (stmt))
3723 return false;
3725 code = gimple_assign_rhs_code (stmt);
3727 if (code != COND_EXPR)
3728 return false;
3730 gcc_assert (gimple_assign_single_p (stmt));
3731 op = gimple_assign_rhs1 (stmt);
3732 cond_expr = TREE_OPERAND (op, 0);
3733 then_clause = TREE_OPERAND (op, 1);
3734 else_clause = TREE_OPERAND (op, 2);
3736 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
3737 return false;
3739 /* We do not handle two different vector types for the condition
3740 and the values. */
3741 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
3742 return false;
3744 if (TREE_CODE (then_clause) == SSA_NAME)
3746 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
3747 if (!vect_is_simple_use (then_clause, loop_vinfo,
3748 &then_def_stmt, &def, &dt))
3749 return false;
3751 else if (TREE_CODE (then_clause) != INTEGER_CST
3752 && TREE_CODE (then_clause) != REAL_CST
3753 && TREE_CODE (then_clause) != FIXED_CST)
3754 return false;
3756 if (TREE_CODE (else_clause) == SSA_NAME)
3758 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
3759 if (!vect_is_simple_use (else_clause, loop_vinfo,
3760 &else_def_stmt, &def, &dt))
3761 return false;
3763 else if (TREE_CODE (else_clause) != INTEGER_CST
3764 && TREE_CODE (else_clause) != REAL_CST
3765 && TREE_CODE (else_clause) != FIXED_CST)
3766 return false;
3769 vec_mode = TYPE_MODE (vectype);
3771 if (!vec_stmt)
3773 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
3774 return expand_vec_cond_expr_p (op, vec_mode);
3777 /* Transform */
3779 /* Handle def. */
3780 scalar_dest = gimple_assign_lhs (stmt);
3781 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3783 /* Handle cond expr. */
3784 vec_cond_lhs =
3785 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
3786 vec_cond_rhs =
3787 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
3788 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
3789 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
3791 /* Arguments are ready. Create the new vector stmt. */
3792 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
3793 vec_cond_lhs, vec_cond_rhs);
3794 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
3795 vec_compare, vec_then_clause, vec_else_clause);
3797 *vec_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
3798 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3799 gimple_assign_set_lhs (*vec_stmt, new_temp);
3800 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3802 return true;
3806 /* Function vect_analyze_operations.
3808 Scan the loop stmts and make sure they are all vectorizable. */
3810 bool
3811 vect_analyze_operations (loop_vec_info loop_vinfo)
3813 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3814 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3815 int nbbs = loop->num_nodes;
3816 gimple_stmt_iterator si;
3817 unsigned int vectorization_factor = 0;
3818 int i;
3819 bool ok;
3820 gimple phi;
3821 stmt_vec_info stmt_info;
3822 bool need_to_vectorize = false;
3823 int min_profitable_iters;
3824 int min_scalar_loop_bound;
3825 unsigned int th;
3826 bool only_slp_in_loop = true;
3828 if (vect_print_dump_info (REPORT_DETAILS))
3829 fprintf (vect_dump, "=== vect_analyze_operations ===");
3831 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
3832 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3834 for (i = 0; i < nbbs; i++)
3836 basic_block bb = bbs[i];
3838 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
3840 phi = gsi_stmt (si);
3841 ok = true;
3843 stmt_info = vinfo_for_stmt (phi);
3844 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "examining phi: ");
3847 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3850 if (! is_loop_header_bb_p (bb))
3852 /* inner-loop loop-closed exit phi in outer-loop vectorization
3853 (i.e. a phi in the tail of the outer-loop).
3854 FORNOW: we currently don't support the case that these phis
3855 are not used in the outerloop, cause this case requires
3856 to actually do something here. */
3857 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3858 || STMT_VINFO_LIVE_P (stmt_info))
3860 if (vect_print_dump_info (REPORT_DETAILS))
3861 fprintf (vect_dump,
3862 "Unsupported loop-closed phi in outer-loop.");
3863 return false;
3865 continue;
3868 gcc_assert (stmt_info);
3870 if (STMT_VINFO_LIVE_P (stmt_info))
3872 /* FORNOW: not yet supported. */
3873 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3874 fprintf (vect_dump, "not vectorized: value used after loop.");
3875 return false;
3878 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
3879 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
3881 /* A scalar-dependence cycle that we don't support. */
3882 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3883 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
3884 return false;
3887 if (STMT_VINFO_RELEVANT_P (stmt_info))
3889 need_to_vectorize = true;
3890 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
3891 ok = vectorizable_induction (phi, NULL, NULL);
3894 if (!ok)
3896 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3898 fprintf (vect_dump,
3899 "not vectorized: relevant phi not supported: ");
3900 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3902 return false;
3906 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3908 gimple stmt = gsi_stmt (si);
3909 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3910 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
3912 if (vect_print_dump_info (REPORT_DETAILS))
3914 fprintf (vect_dump, "==> examining statement: ");
3915 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3918 gcc_assert (stmt_info);
3920 /* skip stmts which do not need to be vectorized.
3921 this is expected to include:
3922 - the COND_EXPR which is the loop exit condition
3923 - any LABEL_EXPRs in the loop
3924 - computations that are used only for array indexing or loop
3925 control */
3927 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3928 && !STMT_VINFO_LIVE_P (stmt_info))
3930 if (vect_print_dump_info (REPORT_DETAILS))
3931 fprintf (vect_dump, "irrelevant.");
3932 continue;
3935 switch (STMT_VINFO_DEF_TYPE (stmt_info))
3937 case vect_loop_def:
3938 break;
3940 case vect_reduction_def:
3941 gcc_assert (relevance == vect_used_in_outer
3942 || relevance == vect_used_in_outer_by_reduction
3943 || relevance == vect_unused_in_loop);
3944 break;
3946 case vect_induction_def:
3947 case vect_constant_def:
3948 case vect_invariant_def:
3949 case vect_unknown_def_type:
3950 default:
3951 gcc_unreachable ();
3954 if (STMT_VINFO_RELEVANT_P (stmt_info))
3956 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
3957 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
3958 need_to_vectorize = true;
3961 ok = true;
3962 if (STMT_VINFO_RELEVANT_P (stmt_info)
3963 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
3964 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
3965 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
3966 || vectorizable_conversion (stmt, NULL, NULL, NULL)
3967 || vectorizable_operation (stmt, NULL, NULL, NULL)
3968 || vectorizable_assignment (stmt, NULL, NULL, NULL)
3969 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
3970 || vectorizable_call (stmt, NULL, NULL)
3971 || vectorizable_store (stmt, NULL, NULL, NULL)
3972 || vectorizable_condition (stmt, NULL, NULL)
3973 || vectorizable_reduction (stmt, NULL, NULL));
3975 if (!ok)
3977 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3979 fprintf (vect_dump, "not vectorized: relevant stmt not ");
3980 fprintf (vect_dump, "supported: ");
3981 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3983 return false;
3986 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
3987 need extra handling, except for vectorizable reductions. */
3988 if (STMT_VINFO_LIVE_P (stmt_info)
3989 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
3990 ok = vectorizable_live_operation (stmt, NULL, NULL);
3992 if (!ok)
3994 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3996 fprintf (vect_dump, "not vectorized: live stmt not ");
3997 fprintf (vect_dump, "supported: ");
3998 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4000 return false;
4003 if (!PURE_SLP_STMT (stmt_info))
4005 /* STMT needs loop-based vectorization. */
4006 only_slp_in_loop = false;
4008 /* Groups of strided accesses whose size is not a power of 2 are
4009 not vectorizable yet using loop-vectorization. Therefore, if
4010 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
4011 both SLPed and loop-based vectorized), the loop cannot be
4012 vectorized. */
4013 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
4014 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
4015 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
4017 if (vect_print_dump_info (REPORT_DETAILS))
4019 fprintf (vect_dump, "not vectorized: the size of group "
4020 "of strided accesses is not a power of 2");
4021 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4023 return false;
4026 } /* stmts in bb */
4027 } /* bbs */
4029 /* All operations in the loop are either irrelevant (deal with loop
4030 control, or dead), or only used outside the loop and can be moved
4031 out of the loop (e.g. invariants, inductions). The loop can be
4032 optimized away by scalar optimizations. We're better off not
4033 touching this loop. */
4034 if (!need_to_vectorize)
4036 if (vect_print_dump_info (REPORT_DETAILS))
4037 fprintf (vect_dump,
4038 "All the computation can be taken out of the loop.");
4039 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4040 fprintf (vect_dump,
4041 "not vectorized: redundant loop. no profit to vectorize.");
4042 return false;
4045 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
4046 vectorization factor of the loop is the unrolling factor required by the
4047 SLP instances. If that unrolling factor is 1, we say, that we perform
4048 pure SLP on loop - cross iteration parallelism is not exploited. */
4049 if (only_slp_in_loop)
4050 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
4051 else
4052 vectorization_factor = least_common_multiple (vectorization_factor,
4053 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
4055 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
4057 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4058 && vect_print_dump_info (REPORT_DETAILS))
4059 fprintf (vect_dump,
4060 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
4061 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
4063 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4064 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
4066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4067 fprintf (vect_dump, "not vectorized: iteration count too small.");
4068 if (vect_print_dump_info (REPORT_DETAILS))
4069 fprintf (vect_dump,"not vectorized: iteration count smaller than "
4070 "vectorization factor.");
4071 return false;
4074 /* Analyze cost. Decide if worth while to vectorize. */
4076 /* Once VF is set, SLP costs should be updated since the number of created
4077 vector stmts depends on VF. */
4078 vect_update_slp_costs_according_to_vf (loop_vinfo);
4080 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
4081 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
4083 if (min_profitable_iters < 0)
4085 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4086 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
4087 if (vect_print_dump_info (REPORT_DETAILS))
4088 fprintf (vect_dump, "not vectorized: vector version will never be "
4089 "profitable.");
4090 return false;
4093 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
4094 * vectorization_factor) - 1);
4096 /* Use the cost model only if it is more conservative than user specified
4097 threshold. */
4099 th = (unsigned) min_scalar_loop_bound;
4100 if (min_profitable_iters
4101 && (!min_scalar_loop_bound
4102 || min_profitable_iters > min_scalar_loop_bound))
4103 th = (unsigned) min_profitable_iters;
4105 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4106 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
4108 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4109 fprintf (vect_dump, "not vectorized: vectorization not "
4110 "profitable.");
4111 if (vect_print_dump_info (REPORT_DETAILS))
4112 fprintf (vect_dump, "not vectorized: iteration count smaller than "
4113 "user specified loop bound parameter or minimum "
4114 "profitable iterations (whichever is more conservative).");
4115 return false;
4118 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4119 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
4120 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
4122 if (vect_print_dump_info (REPORT_DETAILS))
4123 fprintf (vect_dump, "epilog loop required.");
4124 if (!vect_can_advance_ivs_p (loop_vinfo))
4126 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4127 fprintf (vect_dump,
4128 "not vectorized: can't create epilog loop 1.");
4129 return false;
4131 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
4133 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4134 fprintf (vect_dump,
4135 "not vectorized: can't create epilog loop 2.");
4136 return false;
4140 return true;
4144 /* Function vect_transform_stmt.
4146 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4148 bool
4149 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
4150 bool *strided_store, slp_tree slp_node,
4151 slp_instance slp_node_instance)
4153 bool is_store = false;
4154 gimple vec_stmt = NULL;
4155 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4156 gimple orig_stmt_in_pattern;
4157 bool done;
4158 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4159 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4161 switch (STMT_VINFO_TYPE (stmt_info))
4163 case type_demotion_vec_info_type:
4164 done = vectorizable_type_demotion (stmt, gsi, &vec_stmt, slp_node);
4165 gcc_assert (done);
4166 break;
4168 case type_promotion_vec_info_type:
4169 done = vectorizable_type_promotion (stmt, gsi, &vec_stmt, slp_node);
4170 gcc_assert (done);
4171 break;
4173 case type_conversion_vec_info_type:
4174 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
4175 gcc_assert (done);
4176 break;
4178 case induc_vec_info_type:
4179 gcc_assert (!slp_node);
4180 done = vectorizable_induction (stmt, gsi, &vec_stmt);
4181 gcc_assert (done);
4182 break;
4184 case op_vec_info_type:
4185 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
4186 gcc_assert (done);
4187 break;
4189 case assignment_vec_info_type:
4190 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
4191 gcc_assert (done);
4192 break;
4194 case load_vec_info_type:
4195 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
4196 slp_node_instance);
4197 gcc_assert (done);
4198 break;
4200 case store_vec_info_type:
4201 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
4202 gcc_assert (done);
4203 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
4205 /* In case of interleaving, the whole chain is vectorized when the
4206 last store in the chain is reached. Store stmts before the last
4207 one are skipped, and there vec_stmt_info shouldn't be freed
4208 meanwhile. */
4209 *strided_store = true;
4210 if (STMT_VINFO_VEC_STMT (stmt_info))
4211 is_store = true;
4213 else
4214 is_store = true;
4215 break;
4217 case condition_vec_info_type:
4218 gcc_assert (!slp_node);
4219 done = vectorizable_condition (stmt, gsi, &vec_stmt);
4220 gcc_assert (done);
4221 break;
4223 case call_vec_info_type:
4224 gcc_assert (!slp_node);
4225 done = vectorizable_call (stmt, gsi, &vec_stmt);
4226 break;
4228 case reduc_vec_info_type:
4229 gcc_assert (!slp_node);
4230 done = vectorizable_reduction (stmt, gsi, &vec_stmt);
4231 gcc_assert (done);
4232 break;
4234 default:
4235 if (!STMT_VINFO_LIVE_P (stmt_info))
4237 if (vect_print_dump_info (REPORT_DETAILS))
4238 fprintf (vect_dump, "stmt not supported.");
4239 gcc_unreachable ();
4243 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
4244 is being vectorized, but outside the immediately enclosing loop. */
4245 if (vec_stmt
4246 && nested_in_vect_loop_p (loop, stmt)
4247 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
4248 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
4249 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer_by_reduction))
4251 struct loop *innerloop = loop->inner;
4252 imm_use_iterator imm_iter;
4253 use_operand_p use_p;
4254 tree scalar_dest;
4255 gimple exit_phi;
4257 if (vect_print_dump_info (REPORT_DETAILS))
4258 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
4260 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
4261 (to be used when vectorizing outer-loop stmts that use the DEF of
4262 STMT). */
4263 if (gimple_code (stmt) == GIMPLE_PHI)
4264 scalar_dest = PHI_RESULT (stmt);
4265 else
4266 scalar_dest = gimple_assign_lhs (stmt);
4268 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4270 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
4272 exit_phi = USE_STMT (use_p);
4273 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
4278 /* Handle stmts whose DEF is used outside the loop-nest that is
4279 being vectorized. */
4280 if (STMT_VINFO_LIVE_P (stmt_info)
4281 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
4283 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
4284 gcc_assert (done);
4287 if (vec_stmt)
4289 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
4290 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
4291 if (orig_stmt_in_pattern)
4293 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
4294 /* STMT was inserted by the vectorizer to replace a computation idiom.
4295 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
4296 computed this idiom. We need to record a pointer to VEC_STMT in
4297 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
4298 documentation of vect_pattern_recog. */
4299 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4301 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4302 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
4307 return is_store;
4311 /* Remove a group of stores (for SLP or interleaving), free their
4312 stmt_vec_info. */
4314 void
4315 vect_remove_stores (gimple first_stmt)
4317 gimple next = first_stmt;
4318 gimple tmp;
4319 gimple_stmt_iterator next_si;
4321 while (next)
4323 /* Free the attached stmt_vec_info and remove the stmt. */
4324 next_si = gsi_for_stmt (next);
4325 gsi_remove (&next_si, true);
4326 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
4327 free_stmt_vec_info (next);
4328 next = tmp;
4333 /* Function new_stmt_vec_info.
4335 Create and initialize a new stmt_vec_info struct for STMT. */
4337 stmt_vec_info
4338 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo)
4340 stmt_vec_info res;
4341 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
4343 STMT_VINFO_TYPE (res) = undef_vec_info_type;
4344 STMT_VINFO_STMT (res) = stmt;
4345 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
4346 STMT_VINFO_RELEVANT (res) = 0;
4347 STMT_VINFO_LIVE_P (res) = false;
4348 STMT_VINFO_VECTYPE (res) = NULL;
4349 STMT_VINFO_VEC_STMT (res) = NULL;
4350 STMT_VINFO_IN_PATTERN_P (res) = false;
4351 STMT_VINFO_RELATED_STMT (res) = NULL;
4352 STMT_VINFO_DATA_REF (res) = NULL;
4354 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
4355 STMT_VINFO_DR_OFFSET (res) = NULL;
4356 STMT_VINFO_DR_INIT (res) = NULL;
4357 STMT_VINFO_DR_STEP (res) = NULL;
4358 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
4360 if (gimple_code (stmt) == GIMPLE_PHI
4361 && is_loop_header_bb_p (gimple_bb (stmt)))
4362 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
4363 else
4364 STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
4365 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
4366 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
4367 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
4368 STMT_SLP_TYPE (res) = 0;
4369 DR_GROUP_FIRST_DR (res) = NULL;
4370 DR_GROUP_NEXT_DR (res) = NULL;
4371 DR_GROUP_SIZE (res) = 0;
4372 DR_GROUP_STORE_COUNT (res) = 0;
4373 DR_GROUP_GAP (res) = 0;
4374 DR_GROUP_SAME_DR_STMT (res) = NULL;
4375 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
4377 return res;
4381 /* Create a hash table for stmt_vec_info. */
4383 void
4384 init_stmt_vec_info_vec (void)
4386 gcc_assert (!stmt_vec_info_vec);
4387 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
4391 /* Free hash table for stmt_vec_info. */
4393 void
4394 free_stmt_vec_info_vec (void)
4396 gcc_assert (stmt_vec_info_vec);
4397 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
4401 /* Free stmt vectorization related info. */
4403 void
4404 free_stmt_vec_info (gimple stmt)
4406 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4408 if (!stmt_info)
4409 return;
4411 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
4412 set_vinfo_for_stmt (stmt, NULL);
4413 free (stmt_info);
4417 /* Function get_vectype_for_scalar_type.
4419 Returns the vector type corresponding to SCALAR_TYPE as supported
4420 by the target. */
4422 tree
4423 get_vectype_for_scalar_type (tree scalar_type)
4425 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
4426 int nbytes = GET_MODE_SIZE (inner_mode);
4427 int nunits;
4428 tree vectype;
4430 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
4431 return NULL_TREE;
4433 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
4434 is expected. */
4435 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
4437 vectype = build_vector_type (scalar_type, nunits);
4438 if (vect_print_dump_info (REPORT_DETAILS))
4440 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
4441 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
4444 if (!vectype)
4445 return NULL_TREE;
4447 if (vect_print_dump_info (REPORT_DETAILS))
4449 fprintf (vect_dump, "vectype: ");
4450 print_generic_expr (vect_dump, vectype, TDF_SLIM);
4453 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4454 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
4456 if (vect_print_dump_info (REPORT_DETAILS))
4457 fprintf (vect_dump, "mode not supported by target.");
4458 return NULL_TREE;
4461 return vectype;
4464 /* Function vect_is_simple_use.
4466 Input:
4467 LOOP - the loop that is being vectorized.
4468 OPERAND - operand of a stmt in LOOP.
4469 DEF - the defining stmt in case OPERAND is an SSA_NAME.
4471 Returns whether a stmt with OPERAND can be vectorized.
4472 Supportable operands are constants, loop invariants, and operands that are
4473 defined by the current iteration of the loop. Unsupportable operands are
4474 those that are defined by a previous iteration of the loop (as is the case
4475 in reduction/induction computations). */
4477 bool
4478 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
4479 tree *def, enum vect_def_type *dt)
4481 basic_block bb;
4482 stmt_vec_info stmt_vinfo;
4483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4485 *def_stmt = NULL;
4486 *def = NULL_TREE;
4488 if (vect_print_dump_info (REPORT_DETAILS))
4490 fprintf (vect_dump, "vect_is_simple_use: operand ");
4491 print_generic_expr (vect_dump, operand, TDF_SLIM);
4494 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
4496 *dt = vect_constant_def;
4497 return true;
4499 if (is_gimple_min_invariant (operand))
4501 *def = operand;
4502 *dt = vect_invariant_def;
4503 return true;
4506 if (TREE_CODE (operand) == PAREN_EXPR)
4508 if (vect_print_dump_info (REPORT_DETAILS))
4509 fprintf (vect_dump, "non-associatable copy.");
4510 operand = TREE_OPERAND (operand, 0);
4512 if (TREE_CODE (operand) != SSA_NAME)
4514 if (vect_print_dump_info (REPORT_DETAILS))
4515 fprintf (vect_dump, "not ssa-name.");
4516 return false;
4519 *def_stmt = SSA_NAME_DEF_STMT (operand);
4520 if (*def_stmt == NULL)
4522 if (vect_print_dump_info (REPORT_DETAILS))
4523 fprintf (vect_dump, "no def_stmt.");
4524 return false;
4527 if (vect_print_dump_info (REPORT_DETAILS))
4529 fprintf (vect_dump, "def_stmt: ");
4530 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
4533 /* empty stmt is expected only in case of a function argument.
4534 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
4535 if (gimple_nop_p (*def_stmt))
4537 *def = operand;
4538 *dt = vect_invariant_def;
4539 return true;
4542 bb = gimple_bb (*def_stmt);
4543 if (!flow_bb_inside_loop_p (loop, bb))
4544 *dt = vect_invariant_def;
4545 else
4547 stmt_vinfo = vinfo_for_stmt (*def_stmt);
4548 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
4551 if (*dt == vect_unknown_def_type)
4553 if (vect_print_dump_info (REPORT_DETAILS))
4554 fprintf (vect_dump, "Unsupported pattern.");
4555 return false;
4558 if (vect_print_dump_info (REPORT_DETAILS))
4559 fprintf (vect_dump, "type of def: %d.",*dt);
4561 switch (gimple_code (*def_stmt))
4563 case GIMPLE_PHI:
4564 *def = gimple_phi_result (*def_stmt);
4565 break;
4567 case GIMPLE_ASSIGN:
4568 *def = gimple_assign_lhs (*def_stmt);
4569 break;
4571 case GIMPLE_CALL:
4572 *def = gimple_call_lhs (*def_stmt);
4573 if (*def != NULL)
4574 break;
4575 /* FALLTHRU */
4576 default:
4577 if (vect_print_dump_info (REPORT_DETAILS))
4578 fprintf (vect_dump, "unsupported defining stmt: ");
4579 return false;
4582 return true;
4586 /* Function supportable_widening_operation
4588 Check whether an operation represented by the code CODE is a
4589 widening operation that is supported by the target platform in
4590 vector form (i.e., when operating on arguments of type VECTYPE).
4592 Widening operations we currently support are NOP (CONVERT), FLOAT
4593 and WIDEN_MULT. This function checks if these operations are supported
4594 by the target platform either directly (via vector tree-codes), or via
4595 target builtins.
4597 Output:
4598 - CODE1 and CODE2 are codes of vector operations to be used when
4599 vectorizing the operation, if available.
4600 - DECL1 and DECL2 are decls of target builtin functions to be used
4601 when vectorizing the operation, if available. In this case,
4602 CODE1 and CODE2 are CALL_EXPR.
4603 - MULTI_STEP_CVT determines the number of required intermediate steps in
4604 case of multi-step conversion (like char->short->int - in that case
4605 MULTI_STEP_CVT will be 1).
4606 - INTERM_TYPES contains the intermediate type required to perform the
4607 widening operation (short in the above example). */
4609 bool
4610 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
4611 tree *decl1, tree *decl2,
4612 enum tree_code *code1, enum tree_code *code2,
4613 int *multi_step_cvt,
4614 VEC (tree, heap) **interm_types)
4616 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4617 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4618 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
4619 bool ordered_p;
4620 enum machine_mode vec_mode;
4621 enum insn_code icode1 = 0, icode2 = 0;
4622 optab optab1, optab2;
4623 tree type = gimple_expr_type (stmt);
4624 tree wide_vectype = get_vectype_for_scalar_type (type);
4625 enum tree_code c1, c2;
4627 /* The result of a vectorized widening operation usually requires two vectors
4628 (because the widened results do not fit int one vector). The generated
4629 vector results would normally be expected to be generated in the same
4630 order as in the original scalar computation, i.e. if 8 results are
4631 generated in each vector iteration, they are to be organized as follows:
4632 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
4634 However, in the special case that the result of the widening operation is
4635 used in a reduction computation only, the order doesn't matter (because
4636 when vectorizing a reduction we change the order of the computation).
4637 Some targets can take advantage of this and generate more efficient code.
4638 For example, targets like Altivec, that support widen_mult using a sequence
4639 of {mult_even,mult_odd} generate the following vectors:
4640 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
4642 When vectorizing outer-loops, we execute the inner-loop sequentially
4643 (each vectorized inner-loop iteration contributes to VF outer-loop
4644 iterations in parallel). We therefore don't allow to change the order
4645 of the computation in the inner-loop during outer-loop vectorization. */
4647 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
4648 && !nested_in_vect_loop_p (vect_loop, stmt))
4649 ordered_p = false;
4650 else
4651 ordered_p = true;
4653 if (!ordered_p
4654 && code == WIDEN_MULT_EXPR
4655 && targetm.vectorize.builtin_mul_widen_even
4656 && targetm.vectorize.builtin_mul_widen_even (vectype)
4657 && targetm.vectorize.builtin_mul_widen_odd
4658 && targetm.vectorize.builtin_mul_widen_odd (vectype))
4660 if (vect_print_dump_info (REPORT_DETAILS))
4661 fprintf (vect_dump, "Unordered widening operation detected.");
4663 *code1 = *code2 = CALL_EXPR;
4664 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
4665 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
4666 return true;
4669 switch (code)
4671 case WIDEN_MULT_EXPR:
4672 if (BYTES_BIG_ENDIAN)
4674 c1 = VEC_WIDEN_MULT_HI_EXPR;
4675 c2 = VEC_WIDEN_MULT_LO_EXPR;
4677 else
4679 c2 = VEC_WIDEN_MULT_HI_EXPR;
4680 c1 = VEC_WIDEN_MULT_LO_EXPR;
4682 break;
4684 CASE_CONVERT:
4685 if (BYTES_BIG_ENDIAN)
4687 c1 = VEC_UNPACK_HI_EXPR;
4688 c2 = VEC_UNPACK_LO_EXPR;
4690 else
4692 c2 = VEC_UNPACK_HI_EXPR;
4693 c1 = VEC_UNPACK_LO_EXPR;
4695 break;
4697 case FLOAT_EXPR:
4698 if (BYTES_BIG_ENDIAN)
4700 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
4701 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
4703 else
4705 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
4706 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
4708 break;
4710 case FIX_TRUNC_EXPR:
4711 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
4712 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
4713 computing the operation. */
4714 return false;
4716 default:
4717 gcc_unreachable ();
4720 if (code == FIX_TRUNC_EXPR)
4722 /* The signedness is determined from output operand. */
4723 optab1 = optab_for_tree_code (c1, type, optab_default);
4724 optab2 = optab_for_tree_code (c2, type, optab_default);
4726 else
4728 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4729 optab2 = optab_for_tree_code (c2, vectype, optab_default);
4732 if (!optab1 || !optab2)
4733 return false;
4735 vec_mode = TYPE_MODE (vectype);
4736 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
4737 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
4738 == CODE_FOR_nothing)
4739 return false;
4741 /* Check if it's a multi-step conversion that can be done using intermediate
4742 types. */
4743 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
4744 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
4746 int i;
4747 tree prev_type = vectype, intermediate_type;
4748 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4749 optab optab3, optab4;
4751 if (!CONVERT_EXPR_CODE_P (code))
4752 return false;
4754 *code1 = c1;
4755 *code2 = c2;
4757 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4758 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4759 to get to NARROW_VECTYPE, and fail if we do not. */
4760 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4761 for (i = 0; i < 3; i++)
4763 intermediate_mode = insn_data[icode1].operand[0].mode;
4764 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4765 TYPE_UNSIGNED (prev_type));
4766 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
4767 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
4769 if (!optab3 || !optab4
4770 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4771 == CODE_FOR_nothing
4772 || insn_data[icode1].operand[0].mode != intermediate_mode
4773 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
4774 == CODE_FOR_nothing
4775 || insn_data[icode2].operand[0].mode != intermediate_mode
4776 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
4777 == CODE_FOR_nothing
4778 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
4779 == CODE_FOR_nothing)
4780 return false;
4782 VEC_quick_push (tree, *interm_types, intermediate_type);
4783 (*multi_step_cvt)++;
4785 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
4786 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
4787 return true;
4789 prev_type = intermediate_type;
4790 prev_mode = intermediate_mode;
4793 return false;
4796 *code1 = c1;
4797 *code2 = c2;
4798 return true;
4802 /* Function supportable_narrowing_operation
4804 Check whether an operation represented by the code CODE is a
4805 narrowing operation that is supported by the target platform in
4806 vector form (i.e., when operating on arguments of type VECTYPE).
4808 Narrowing operations we currently support are NOP (CONVERT) and
4809 FIX_TRUNC. This function checks if these operations are supported by
4810 the target platform directly via vector tree-codes.
4812 Output:
4813 - CODE1 is the code of a vector operation to be used when
4814 vectorizing the operation, if available.
4815 - MULTI_STEP_CVT determines the number of required intermediate steps in
4816 case of multi-step conversion (like int->short->char - in that case
4817 MULTI_STEP_CVT will be 1).
4818 - INTERM_TYPES contains the intermediate type required to perform the
4819 narrowing operation (short in the above example). */
4821 bool
4822 supportable_narrowing_operation (enum tree_code code,
4823 const_gimple stmt, tree vectype,
4824 enum tree_code *code1, int *multi_step_cvt,
4825 VEC (tree, heap) **interm_types)
4827 enum machine_mode vec_mode;
4828 enum insn_code icode1;
4829 optab optab1, interm_optab;
4830 tree type = gimple_expr_type (stmt);
4831 tree narrow_vectype = get_vectype_for_scalar_type (type);
4832 enum tree_code c1;
4833 tree intermediate_type, prev_type;
4834 int i;
4836 switch (code)
4838 CASE_CONVERT:
4839 c1 = VEC_PACK_TRUNC_EXPR;
4840 break;
4842 case FIX_TRUNC_EXPR:
4843 c1 = VEC_PACK_FIX_TRUNC_EXPR;
4844 break;
4846 case FLOAT_EXPR:
4847 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
4848 tree code and optabs used for computing the operation. */
4849 return false;
4851 default:
4852 gcc_unreachable ();
4855 if (code == FIX_TRUNC_EXPR)
4856 /* The signedness is determined from output operand. */
4857 optab1 = optab_for_tree_code (c1, type, optab_default);
4858 else
4859 optab1 = optab_for_tree_code (c1, vectype, optab_default);
4861 if (!optab1)
4862 return false;
4864 vec_mode = TYPE_MODE (vectype);
4865 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
4866 == CODE_FOR_nothing)
4867 return false;
4869 /* Check if it's a multi-step conversion that can be done using intermediate
4870 types. */
4871 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
4873 enum machine_mode intermediate_mode, prev_mode = vec_mode;
4875 *code1 = c1;
4876 prev_type = vectype;
4877 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
4878 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
4879 to get to NARROW_VECTYPE, and fail if we do not. */
4880 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
4881 for (i = 0; i < 3; i++)
4883 intermediate_mode = insn_data[icode1].operand[0].mode;
4884 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
4885 TYPE_UNSIGNED (prev_type));
4886 interm_optab = optab_for_tree_code (c1, intermediate_type,
4887 optab_default);
4888 if (!interm_optab
4889 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
4890 == CODE_FOR_nothing
4891 || insn_data[icode1].operand[0].mode != intermediate_mode
4892 || (icode1
4893 = interm_optab->handlers[(int) intermediate_mode].insn_code)
4894 == CODE_FOR_nothing)
4895 return false;
4897 VEC_quick_push (tree, *interm_types, intermediate_type);
4898 (*multi_step_cvt)++;
4900 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
4901 return true;
4903 prev_type = intermediate_type;
4904 prev_mode = intermediate_mode;
4907 return false;
4910 *code1 = c1;
4911 return true;