PR c++/54021
[official-gcc.git] / gcc / tree-vect-stmts.c
blobbb42cbca57925354d1b4231f2c799240006d3c65
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "cfgloop.h"
34 #include "expr.h"
35 #include "recog.h" /* FIXME: for insn_data */
36 #include "optabs.h"
37 #include "diagnostic-core.h"
38 #include "tree-vectorizer.h"
39 #include "dumpfile.h"
41 /* For lang_hooks.types.type_for_mode. */
42 #include "langhooks.h"
44 /* Return the vectorized type for the given statement. */
46 tree
47 stmt_vectype (struct _stmt_vec_info *stmt_info)
49 return STMT_VINFO_VECTYPE (stmt_info);
52 /* Return TRUE iff the given statement is in an inner loop relative to
53 the loop being vectorized. */
54 bool
55 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
57 gimple stmt = STMT_VINFO_STMT (stmt_info);
58 basic_block bb = gimple_bb (stmt);
59 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60 struct loop* loop;
62 if (!loop_vinfo)
63 return false;
65 loop = LOOP_VINFO_LOOP (loop_vinfo);
67 return (bb->loop_father == loop->inner);
70 /* Record the cost of a statement, either by directly informing the
71 target model or by saving it in a vector for later processing.
72 Return a preliminary estimate of the statement's cost. */
74 unsigned
75 record_stmt_cost (stmt_vector_for_cost *stmt_cost_vec, int count,
76 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77 int misalign)
79 if (stmt_cost_vec)
81 tree vectype = stmt_vectype (stmt_info);
82 add_stmt_info_to_vec (stmt_cost_vec, count, kind,
83 STMT_VINFO_STMT (stmt_info), misalign);
84 return (unsigned)
85 (targetm.vectorize.builtin_vectorization_cost (kind, vectype, misalign)
86 * count);
89 else
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93 void *target_cost_data;
95 if (loop_vinfo)
96 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97 else
98 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
100 return add_stmt_cost (target_cost_data, count, kind, stmt_info, misalign);
104 /* Return a variable of type ELEM_TYPE[NELEMS]. */
106 static tree
107 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
110 "vect_array");
113 /* ARRAY is an array of vectors created by create_vector_array.
114 Return an SSA_NAME for the vector in index N. The reference
115 is part of the vectorization of STMT and the vector is associated
116 with scalar destination SCALAR_DEST. */
118 static tree
119 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
120 tree array, unsigned HOST_WIDE_INT n)
122 tree vect_type, vect, vect_name, array_ref;
123 gimple new_stmt;
125 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
126 vect_type = TREE_TYPE (TREE_TYPE (array));
127 vect = vect_create_destination_var (scalar_dest, vect_type);
128 array_ref = build4 (ARRAY_REF, vect_type, array,
129 build_int_cst (size_type_node, n),
130 NULL_TREE, NULL_TREE);
132 new_stmt = gimple_build_assign (vect, array_ref);
133 vect_name = make_ssa_name (vect, new_stmt);
134 gimple_assign_set_lhs (new_stmt, vect_name);
135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
137 return vect_name;
140 /* ARRAY is an array of vectors created by create_vector_array.
141 Emit code to store SSA_NAME VECT in index N of the array.
142 The store is part of the vectorization of STMT. */
144 static void
145 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
146 tree array, unsigned HOST_WIDE_INT n)
148 tree array_ref;
149 gimple new_stmt;
151 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
152 build_int_cst (size_type_node, n),
153 NULL_TREE, NULL_TREE);
155 new_stmt = gimple_build_assign (array_ref, vect);
156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
159 /* PTR is a pointer to an array of type TYPE. Return a representation
160 of *PTR. The memory reference replaces those in FIRST_DR
161 (and its group). */
163 static tree
164 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166 tree mem_ref, alias_ptr_type;
168 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
169 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
170 /* Arrays have the same alignment as their type. */
171 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
172 return mem_ref;
175 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
177 /* Function vect_mark_relevant.
179 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
181 static void
182 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
183 enum vect_relevant relevant, bool live_p,
184 bool used_in_pattern)
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
189 gimple pattern_stmt;
191 if (vect_print_dump_info (REPORT_DETAILS))
192 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
194 /* If this stmt is an original stmt in a pattern, we might need to mark its
195 related pattern stmt instead of the original stmt. However, such stmts
196 may have their own uses that are not in any pattern, in such cases the
197 stmt itself should be marked. */
198 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
200 bool found = false;
201 if (!used_in_pattern)
203 imm_use_iterator imm_iter;
204 use_operand_p use_p;
205 gimple use_stmt;
206 tree lhs;
207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
210 if (is_gimple_assign (stmt))
211 lhs = gimple_assign_lhs (stmt);
212 else
213 lhs = gimple_call_lhs (stmt);
215 /* This use is out of pattern use, if LHS has other uses that are
216 pattern uses, we should mark the stmt itself, and not the pattern
217 stmt. */
218 if (TREE_CODE (lhs) == SSA_NAME)
219 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
221 if (is_gimple_debug (USE_STMT (use_p)))
222 continue;
223 use_stmt = USE_STMT (use_p);
225 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
226 continue;
228 if (vinfo_for_stmt (use_stmt)
229 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
231 found = true;
232 break;
237 if (!found)
239 /* This is the last stmt in a sequence that was detected as a
240 pattern that can potentially be vectorized. Don't mark the stmt
241 as relevant/live because it's not going to be vectorized.
242 Instead mark the pattern-stmt that replaces it. */
244 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
246 if (vect_print_dump_info (REPORT_DETAILS))
247 fprintf (vect_dump, "last stmt in pattern. don't mark"
248 " relevant/live.");
249 stmt_info = vinfo_for_stmt (pattern_stmt);
250 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
251 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
252 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
253 stmt = pattern_stmt;
257 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
258 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
259 STMT_VINFO_RELEVANT (stmt_info) = relevant;
261 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
262 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "already marked relevant/live.");
266 return;
269 VEC_safe_push (gimple, heap, *worklist, stmt);
273 /* Function vect_stmt_relevant_p.
275 Return true if STMT in loop that is represented by LOOP_VINFO is
276 "relevant for vectorization".
278 A stmt is considered "relevant for vectorization" if:
279 - it has uses outside the loop.
280 - it has vdefs (it alters memory).
281 - control stmts in the loop (except for the exit condition).
283 CHECKME: what other side effects would the vectorizer allow? */
285 static bool
286 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
287 enum vect_relevant *relevant, bool *live_p)
289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
290 ssa_op_iter op_iter;
291 imm_use_iterator imm_iter;
292 use_operand_p use_p;
293 def_operand_p def_p;
295 *relevant = vect_unused_in_scope;
296 *live_p = false;
298 /* cond stmt other than loop exit cond. */
299 if (is_ctrl_stmt (stmt)
300 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
301 != loop_exit_ctrl_vec_info_type)
302 *relevant = vect_used_in_scope;
304 /* changing memory. */
305 if (gimple_code (stmt) != GIMPLE_PHI)
306 if (gimple_vdef (stmt))
308 if (vect_print_dump_info (REPORT_DETAILS))
309 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
310 *relevant = vect_used_in_scope;
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
316 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
318 basic_block bb = gimple_bb (USE_STMT (use_p));
319 if (!flow_bb_inside_loop_p (loop, bb))
321 if (vect_print_dump_info (REPORT_DETAILS))
322 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
324 if (is_gimple_debug (USE_STMT (use_p)))
325 continue;
327 /* We expect all such uses to be in the loop exit phis
328 (because of loop closed form) */
329 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
330 gcc_assert (bb == single_exit (loop)->dest);
332 *live_p = true;
337 return (*live_p || *relevant);
341 /* Function exist_non_indexing_operands_for_use_p
343 USE is one of the uses attached to STMT. Check if USE is
344 used in STMT for anything other than indexing an array. */
346 static bool
347 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
349 tree operand;
350 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
352 /* USE corresponds to some operand in STMT. If there is no data
353 reference in STMT, then any operand that corresponds to USE
354 is not indexing an array. */
355 if (!STMT_VINFO_DATA_REF (stmt_info))
356 return true;
358 /* STMT has a data_ref. FORNOW this means that its of one of
359 the following forms:
360 -1- ARRAY_REF = var
361 -2- var = ARRAY_REF
362 (This should have been verified in analyze_data_refs).
364 'var' in the second case corresponds to a def, not a use,
365 so USE cannot correspond to any operands that are not used
366 for array indexing.
368 Therefore, all we need to check is if STMT falls into the
369 first case, and whether var corresponds to USE. */
371 if (!gimple_assign_copy_p (stmt))
372 return false;
373 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
374 return false;
375 operand = gimple_assign_rhs1 (stmt);
376 if (TREE_CODE (operand) != SSA_NAME)
377 return false;
379 if (operand == use)
380 return true;
382 return false;
387 Function process_use.
389 Inputs:
390 - a USE in STMT in a loop represented by LOOP_VINFO
391 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
392 that defined USE. This is done by calling mark_relevant and passing it
393 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
394 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
395 be performed.
397 Outputs:
398 Generally, LIVE_P and RELEVANT are used to define the liveness and
399 relevance info of the DEF_STMT of this USE:
400 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
401 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
402 Exceptions:
403 - case 1: If USE is used only for address computations (e.g. array indexing),
404 which does not need to be directly vectorized, then the liveness/relevance
405 of the respective DEF_STMT is left unchanged.
406 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
407 skip DEF_STMT cause it had already been processed.
408 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
409 be modified accordingly.
411 Return true if everything is as expected. Return false otherwise. */
413 static bool
414 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
415 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
416 bool force)
418 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
419 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
420 stmt_vec_info dstmt_vinfo;
421 basic_block bb, def_bb;
422 tree def;
423 gimple def_stmt;
424 enum vect_def_type dt;
426 /* case 1: we are only interested in uses that need to be vectorized. Uses
427 that are used for address computation are not considered relevant. */
428 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
429 return true;
431 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
433 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
434 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
435 return false;
438 if (!def_stmt || gimple_nop_p (def_stmt))
439 return true;
441 def_bb = gimple_bb (def_stmt);
442 if (!flow_bb_inside_loop_p (loop, def_bb))
444 if (vect_print_dump_info (REPORT_DETAILS))
445 fprintf (vect_dump, "def_stmt is out of loop.");
446 return true;
449 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
450 DEF_STMT must have already been processed, because this should be the
451 only way that STMT, which is a reduction-phi, was put in the worklist,
452 as there should be no other uses for DEF_STMT in the loop. So we just
453 check that everything is as expected, and we are done. */
454 dstmt_vinfo = vinfo_for_stmt (def_stmt);
455 bb = gimple_bb (stmt);
456 if (gimple_code (stmt) == GIMPLE_PHI
457 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
458 && gimple_code (def_stmt) != GIMPLE_PHI
459 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
460 && bb->loop_father == def_bb->loop_father)
462 if (vect_print_dump_info (REPORT_DETAILS))
463 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
464 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
465 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
466 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
467 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
468 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
469 return true;
472 /* case 3a: outer-loop stmt defining an inner-loop stmt:
473 outer-loop-header-bb:
474 d = def_stmt
475 inner-loop:
476 stmt # use (d)
477 outer-loop-tail-bb:
478 ... */
479 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
481 if (vect_print_dump_info (REPORT_DETAILS))
482 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
484 switch (relevant)
486 case vect_unused_in_scope:
487 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
488 vect_used_in_scope : vect_unused_in_scope;
489 break;
491 case vect_used_in_outer_by_reduction:
492 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
493 relevant = vect_used_by_reduction;
494 break;
496 case vect_used_in_outer:
497 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
498 relevant = vect_used_in_scope;
499 break;
501 case vect_used_in_scope:
502 break;
504 default:
505 gcc_unreachable ();
509 /* case 3b: inner-loop stmt defining an outer-loop stmt:
510 outer-loop-header-bb:
512 inner-loop:
513 d = def_stmt
514 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
515 stmt # use (d) */
516 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
518 if (vect_print_dump_info (REPORT_DETAILS))
519 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
521 switch (relevant)
523 case vect_unused_in_scope:
524 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
525 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
526 vect_used_in_outer_by_reduction : vect_unused_in_scope;
527 break;
529 case vect_used_by_reduction:
530 relevant = vect_used_in_outer_by_reduction;
531 break;
533 case vect_used_in_scope:
534 relevant = vect_used_in_outer;
535 break;
537 default:
538 gcc_unreachable ();
542 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
543 is_pattern_stmt_p (stmt_vinfo));
544 return true;
548 /* Function vect_mark_stmts_to_be_vectorized.
550 Not all stmts in the loop need to be vectorized. For example:
552 for i...
553 for j...
554 1. T0 = i + j
555 2. T1 = a[T0]
557 3. j = j + 1
559 Stmt 1 and 3 do not need to be vectorized, because loop control and
560 addressing of vectorized data-refs are handled differently.
562 This pass detects such stmts. */
564 bool
565 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
567 VEC(gimple,heap) *worklist;
568 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
569 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
570 unsigned int nbbs = loop->num_nodes;
571 gimple_stmt_iterator si;
572 gimple stmt;
573 unsigned int i;
574 stmt_vec_info stmt_vinfo;
575 basic_block bb;
576 gimple phi;
577 bool live_p;
578 enum vect_relevant relevant, tmp_relevant;
579 enum vect_def_type def_type;
581 if (vect_print_dump_info (REPORT_DETAILS))
582 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
584 worklist = VEC_alloc (gimple, heap, 64);
586 /* 1. Init worklist. */
587 for (i = 0; i < nbbs; i++)
589 bb = bbs[i];
590 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
592 phi = gsi_stmt (si);
593 if (vect_print_dump_info (REPORT_DETAILS))
595 fprintf (vect_dump, "init: phi relevant? ");
596 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
599 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
600 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
602 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
604 stmt = gsi_stmt (si);
605 if (vect_print_dump_info (REPORT_DETAILS))
607 fprintf (vect_dump, "init: stmt relevant? ");
608 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
611 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
612 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
616 /* 2. Process_worklist */
617 while (VEC_length (gimple, worklist) > 0)
619 use_operand_p use_p;
620 ssa_op_iter iter;
622 stmt = VEC_pop (gimple, worklist);
623 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "worklist: examine stmt: ");
626 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
629 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
630 (DEF_STMT) as relevant/irrelevant and live/dead according to the
631 liveness and relevance properties of STMT. */
632 stmt_vinfo = vinfo_for_stmt (stmt);
633 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
634 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
636 /* Generally, the liveness and relevance properties of STMT are
637 propagated as is to the DEF_STMTs of its USEs:
638 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
639 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
641 One exception is when STMT has been identified as defining a reduction
642 variable; in this case we set the liveness/relevance as follows:
643 live_p = false
644 relevant = vect_used_by_reduction
645 This is because we distinguish between two kinds of relevant stmts -
646 those that are used by a reduction computation, and those that are
647 (also) used by a regular computation. This allows us later on to
648 identify stmts that are used solely by a reduction, and therefore the
649 order of the results that they produce does not have to be kept. */
651 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
652 tmp_relevant = relevant;
653 switch (def_type)
655 case vect_reduction_def:
656 switch (tmp_relevant)
658 case vect_unused_in_scope:
659 relevant = vect_used_by_reduction;
660 break;
662 case vect_used_by_reduction:
663 if (gimple_code (stmt) == GIMPLE_PHI)
664 break;
665 /* fall through */
667 default:
668 if (vect_print_dump_info (REPORT_DETAILS))
669 fprintf (vect_dump, "unsupported use of reduction.");
671 VEC_free (gimple, heap, worklist);
672 return false;
675 live_p = false;
676 break;
678 case vect_nested_cycle:
679 if (tmp_relevant != vect_unused_in_scope
680 && tmp_relevant != vect_used_in_outer_by_reduction
681 && tmp_relevant != vect_used_in_outer)
683 if (vect_print_dump_info (REPORT_DETAILS))
684 fprintf (vect_dump, "unsupported use of nested cycle.");
686 VEC_free (gimple, heap, worklist);
687 return false;
690 live_p = false;
691 break;
693 case vect_double_reduction_def:
694 if (tmp_relevant != vect_unused_in_scope
695 && tmp_relevant != vect_used_by_reduction)
697 if (vect_print_dump_info (REPORT_DETAILS))
698 fprintf (vect_dump, "unsupported use of double reduction.");
700 VEC_free (gimple, heap, worklist);
701 return false;
704 live_p = false;
705 break;
707 default:
708 break;
711 if (is_pattern_stmt_p (stmt_vinfo))
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (is_gimple_assign (stmt))
718 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
719 tree op = gimple_assign_rhs1 (stmt);
721 i = 1;
722 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
724 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
725 live_p, relevant, &worklist, false)
726 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
727 live_p, relevant, &worklist, false))
729 VEC_free (gimple, heap, worklist);
730 return false;
732 i = 2;
734 for (; i < gimple_num_ops (stmt); i++)
736 op = gimple_op (stmt, i);
737 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
738 &worklist, false))
740 VEC_free (gimple, heap, worklist);
741 return false;
745 else if (is_gimple_call (stmt))
747 for (i = 0; i < gimple_call_num_args (stmt); i++)
749 tree arg = gimple_call_arg (stmt, i);
750 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
751 &worklist, false))
753 VEC_free (gimple, heap, worklist);
754 return false;
759 else
760 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
762 tree op = USE_FROM_PTR (use_p);
763 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
764 &worklist, false))
766 VEC_free (gimple, heap, worklist);
767 return false;
771 if (STMT_VINFO_GATHER_P (stmt_vinfo))
773 tree off;
774 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
775 gcc_assert (decl);
776 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
777 &worklist, true))
779 VEC_free (gimple, heap, worklist);
780 return false;
783 } /* while worklist */
785 VEC_free (gimple, heap, worklist);
786 return true;
790 /* Function vect_model_simple_cost.
792 Models cost for simple operations, i.e. those that only emit ncopies of a
793 single op. Right now, this does not account for multiple insns that could
794 be generated for the single vector op. We will handle that shortly. */
796 void
797 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
798 enum vect_def_type *dt, slp_tree slp_node,
799 stmt_vector_for_cost *stmt_cost_vec)
801 int i;
802 int inside_cost = 0, outside_cost = 0;
804 /* The SLP costs were already calculated during SLP tree build. */
805 if (PURE_SLP_STMT (stmt_info))
806 return;
808 /* FORNOW: Assuming maximum 2 args per stmts. */
809 for (i = 0; i < 2; i++)
811 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
812 outside_cost += vect_get_stmt_cost (vector_stmt);
815 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
816 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
818 /* Pass the inside-of-loop statements to the target-specific cost model. */
819 inside_cost = record_stmt_cost (stmt_cost_vec, ncopies, vector_stmt,
820 stmt_info, 0);
822 if (vect_print_dump_info (REPORT_COST))
823 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
824 "outside_cost = %d .", inside_cost, outside_cost);
828 /* Model cost for type demotion and promotion operations. PWR is normally
829 zero for single-step promotions and demotions. It will be one if
830 two-step promotion/demotion is required, and so on. Each additional
831 step doubles the number of instructions required. */
833 static void
834 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
835 enum vect_def_type *dt, int pwr)
837 int i, tmp;
838 int inside_cost = 0, outside_cost = 0;
839 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
840 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
841 void *target_cost_data;
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info))
845 return;
847 if (loop_vinfo)
848 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
849 else
850 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
852 for (i = 0; i < pwr + 1; i++)
854 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
855 (i + 1) : i;
856 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
857 vec_promote_demote, stmt_info, 0);
860 /* FORNOW: Assuming maximum 2 args per stmts. */
861 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 outside_cost += vect_get_stmt_cost (vector_stmt);
867 if (vect_print_dump_info (REPORT_COST))
868 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
869 "outside_cost = %d .", inside_cost, outside_cost);
871 /* Set the costs in STMT_INFO. */
872 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
875 /* Function vect_cost_group_size
877 For grouped load or store, return the group_size only if it is the first
878 load or store of a group, else return 1. This ensures that group size is
879 only returned once per group. */
881 static int
882 vect_cost_group_size (stmt_vec_info stmt_info)
884 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
886 if (first_stmt == STMT_VINFO_STMT (stmt_info))
887 return GROUP_SIZE (stmt_info);
889 return 1;
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
898 void
899 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
900 bool store_lanes_p, enum vect_def_type dt,
901 slp_tree slp_node, stmt_vector_for_cost *stmt_cost_vec)
903 int group_size;
904 unsigned int inside_cost = 0, outside_cost = 0;
905 struct data_reference *first_dr;
906 gimple first_stmt;
908 /* The SLP costs were already calculated during SLP tree build. */
909 if (PURE_SLP_STMT (stmt_info))
910 return;
912 if (dt == vect_constant_def || dt == vect_external_def)
913 outside_cost = vect_get_stmt_cost (scalar_to_vec);
915 /* Grouped access? */
916 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
918 if (slp_node)
920 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
921 group_size = 1;
923 else
925 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
926 group_size = vect_cost_group_size (stmt_info);
929 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
931 /* Not a grouped access. */
932 else
934 group_size = 1;
935 first_dr = STMT_VINFO_DATA_REF (stmt_info);
938 /* We assume that the cost of a single store-lanes instruction is
939 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
940 access is instead being provided by a permute-and-store operation,
941 include the cost of the permutes. */
942 if (!store_lanes_p && group_size > 1)
944 /* Uses a high and low interleave operation for each needed permute. */
946 int nstmts = ncopies * exact_log2 (group_size) * group_size;
947 inside_cost = record_stmt_cost (stmt_cost_vec, nstmts, vec_perm,
948 stmt_info, 0);
950 if (vect_print_dump_info (REPORT_COST))
951 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
952 group_size);
955 /* Costs of the stores. */
956 vect_get_store_cost (first_dr, ncopies, &inside_cost, stmt_cost_vec);
958 if (vect_print_dump_info (REPORT_COST))
959 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
960 "outside_cost = %d .", inside_cost, outside_cost);
962 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
963 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
967 /* Calculate cost of DR's memory access. */
968 void
969 vect_get_store_cost (struct data_reference *dr, int ncopies,
970 unsigned int *inside_cost,
971 stmt_vector_for_cost *stmt_cost_vec)
973 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
974 gimple stmt = DR_STMT (dr);
975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
977 switch (alignment_support_scheme)
979 case dr_aligned:
981 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
982 vector_store, stmt_info, 0);
984 if (vect_print_dump_info (REPORT_COST))
985 fprintf (vect_dump, "vect_model_store_cost: aligned.");
987 break;
990 case dr_unaligned_supported:
992 /* Here, we assign an additional cost for the unaligned store. */
993 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
994 unaligned_store, stmt_info,
995 DR_MISALIGNMENT (dr));
997 if (vect_print_dump_info (REPORT_COST))
998 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
999 "hardware.");
1001 break;
1004 case dr_unaligned_unsupported:
1006 *inside_cost = VECT_MAX_COST;
1008 if (vect_print_dump_info (REPORT_COST))
1009 fprintf (vect_dump, "vect_model_store_cost: unsupported access.");
1011 break;
1014 default:
1015 gcc_unreachable ();
1020 /* Function vect_model_load_cost
1022 Models cost for loads. In the case of grouped accesses, the last access
1023 has the overhead of the grouped access attributed to it. Since unaligned
1024 accesses are supported for loads, we also account for the costs of the
1025 access scheme chosen. */
1027 void
1028 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
1029 slp_tree slp_node, stmt_vector_for_cost *stmt_cost_vec)
1031 int group_size;
1032 gimple first_stmt;
1033 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1034 unsigned int inside_cost = 0, outside_cost = 0;
1036 /* The SLP costs were already calculated during SLP tree build. */
1037 if (PURE_SLP_STMT (stmt_info))
1038 return;
1040 /* Grouped accesses? */
1041 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1042 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1044 group_size = vect_cost_group_size (stmt_info);
1045 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1047 /* Not a grouped access. */
1048 else
1050 group_size = 1;
1051 first_dr = dr;
1054 /* We assume that the cost of a single load-lanes instruction is
1055 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1056 access is instead being provided by a load-and-permute operation,
1057 include the cost of the permutes. */
1058 if (!load_lanes_p && group_size > 1)
1060 /* Uses an even and odd extract operations for each needed permute. */
1061 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1062 inside_cost += record_stmt_cost (stmt_cost_vec, nstmts, vec_perm,
1063 stmt_info, 0);
1065 if (vect_print_dump_info (REPORT_COST))
1066 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
1067 group_size);
1070 /* The loads themselves. */
1071 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1073 /* N scalar loads plus gathering them into a vector. */
1074 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1075 inside_cost += record_stmt_cost (stmt_cost_vec,
1076 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1077 scalar_load, stmt_info, 0);
1078 inside_cost += record_stmt_cost (stmt_cost_vec, ncopies, vec_construct,
1079 stmt_info, 0);
1081 else
1082 vect_get_load_cost (first_dr, ncopies,
1083 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1084 || group_size > 1 || slp_node),
1085 &inside_cost, &outside_cost, stmt_cost_vec);
1087 if (vect_print_dump_info (REPORT_COST))
1088 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1089 "outside_cost = %d .", inside_cost, outside_cost);
1091 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1092 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1096 /* Calculate cost of DR's memory access. */
1097 void
1098 vect_get_load_cost (struct data_reference *dr, int ncopies,
1099 bool add_realign_cost, unsigned int *inside_cost,
1100 unsigned int *outside_cost,
1101 stmt_vector_for_cost *stmt_cost_vec)
1103 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1104 gimple stmt = DR_STMT (dr);
1105 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1107 switch (alignment_support_scheme)
1109 case dr_aligned:
1111 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1112 vector_load, stmt_info, 0);
1114 if (vect_print_dump_info (REPORT_COST))
1115 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1117 break;
1119 case dr_unaligned_supported:
1121 /* Here, we assign an additional cost for the unaligned load. */
1122 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1123 unaligned_load, stmt_info,
1124 DR_MISALIGNMENT (dr));
1126 if (vect_print_dump_info (REPORT_COST))
1127 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1128 "hardware.");
1130 break;
1132 case dr_explicit_realign:
1134 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies * 2,
1135 vector_load, stmt_info, 0);
1136 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1137 vec_perm, stmt_info, 0);
1139 /* FIXME: If the misalignment remains fixed across the iterations of
1140 the containing loop, the following cost should be added to the
1141 outside costs. */
1142 if (targetm.vectorize.builtin_mask_for_load)
1143 *inside_cost += record_stmt_cost (stmt_cost_vec, 1, vector_stmt,
1144 stmt_info, 0);
1146 if (vect_print_dump_info (REPORT_COST))
1147 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1149 break;
1151 case dr_explicit_realign_optimized:
1153 if (vect_print_dump_info (REPORT_COST))
1154 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1155 "pipelined.");
1157 /* Unaligned software pipeline has a load of an address, an initial
1158 load, and possibly a mask operation to "prime" the loop. However,
1159 if this is an access in a group of loads, which provide grouped
1160 access, then the above cost should only be considered for one
1161 access in the group. Inside the loop, there is a load op
1162 and a realignment op. */
1164 if (add_realign_cost)
1166 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1167 if (targetm.vectorize.builtin_mask_for_load)
1168 *outside_cost += vect_get_stmt_cost (vector_stmt);
1171 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1172 vector_load, stmt_info, 0);
1173 *inside_cost += record_stmt_cost (stmt_cost_vec, ncopies,
1174 vec_perm, stmt_info, 0);
1176 if (vect_print_dump_info (REPORT_COST))
1177 fprintf (vect_dump,
1178 "vect_model_load_cost: explicit realign optimized");
1180 break;
1183 case dr_unaligned_unsupported:
1185 *inside_cost = VECT_MAX_COST;
1187 if (vect_print_dump_info (REPORT_COST))
1188 fprintf (vect_dump, "vect_model_load_cost: unsupported access.");
1190 break;
1193 default:
1194 gcc_unreachable ();
1198 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1199 the loop preheader for the vectorized stmt STMT. */
1201 static void
1202 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1204 if (gsi)
1205 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1206 else
1208 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1211 if (loop_vinfo)
1213 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1214 basic_block new_bb;
1215 edge pe;
1217 if (nested_in_vect_loop_p (loop, stmt))
1218 loop = loop->inner;
1220 pe = loop_preheader_edge (loop);
1221 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1222 gcc_assert (!new_bb);
1224 else
1226 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1227 basic_block bb;
1228 gimple_stmt_iterator gsi_bb_start;
1230 gcc_assert (bb_vinfo);
1231 bb = BB_VINFO_BB (bb_vinfo);
1232 gsi_bb_start = gsi_after_labels (bb);
1233 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1237 if (vect_print_dump_info (REPORT_DETAILS))
1239 fprintf (vect_dump, "created new init_stmt: ");
1240 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1244 /* Function vect_init_vector.
1246 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1247 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1248 vector type a vector with all elements equal to VAL is created first.
1249 Place the initialization at BSI if it is not NULL. Otherwise, place the
1250 initialization at the loop preheader.
1251 Return the DEF of INIT_STMT.
1252 It will be used in the vectorization of STMT. */
1254 tree
1255 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1257 tree new_var;
1258 gimple init_stmt;
1259 tree vec_oprnd;
1260 tree new_temp;
1262 if (TREE_CODE (type) == VECTOR_TYPE
1263 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1265 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1267 if (CONSTANT_CLASS_P (val))
1268 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1269 else
1271 new_var = create_tmp_reg (TREE_TYPE (type), NULL);
1272 add_referenced_var (new_var);
1273 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1274 new_var, val,
1275 NULL_TREE);
1276 new_temp = make_ssa_name (new_var, init_stmt);
1277 gimple_assign_set_lhs (init_stmt, new_temp);
1278 vect_init_vector_1 (stmt, init_stmt, gsi);
1279 val = new_temp;
1282 val = build_vector_from_val (type, val);
1285 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1286 init_stmt = gimple_build_assign (new_var, val);
1287 new_temp = make_ssa_name (new_var, init_stmt);
1288 gimple_assign_set_lhs (init_stmt, new_temp);
1289 vect_init_vector_1 (stmt, init_stmt, gsi);
1290 vec_oprnd = gimple_assign_lhs (init_stmt);
1291 return vec_oprnd;
1295 /* Function vect_get_vec_def_for_operand.
1297 OP is an operand in STMT. This function returns a (vector) def that will be
1298 used in the vectorized stmt for STMT.
1300 In the case that OP is an SSA_NAME which is defined in the loop, then
1301 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1303 In case OP is an invariant or constant, a new stmt that creates a vector def
1304 needs to be introduced. */
1306 tree
1307 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1309 tree vec_oprnd;
1310 gimple vec_stmt;
1311 gimple def_stmt;
1312 stmt_vec_info def_stmt_info = NULL;
1313 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1314 unsigned int nunits;
1315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1316 tree def;
1317 enum vect_def_type dt;
1318 bool is_simple_use;
1319 tree vector_type;
1321 if (vect_print_dump_info (REPORT_DETAILS))
1323 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1324 print_generic_expr (vect_dump, op, TDF_SLIM);
1327 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1328 &def_stmt, &def, &dt);
1329 gcc_assert (is_simple_use);
1330 if (vect_print_dump_info (REPORT_DETAILS))
1332 if (def)
1334 fprintf (vect_dump, "def = ");
1335 print_generic_expr (vect_dump, def, TDF_SLIM);
1337 if (def_stmt)
1339 fprintf (vect_dump, " def_stmt = ");
1340 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1344 switch (dt)
1346 /* Case 1: operand is a constant. */
1347 case vect_constant_def:
1349 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1350 gcc_assert (vector_type);
1351 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1353 if (scalar_def)
1354 *scalar_def = op;
1356 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1357 if (vect_print_dump_info (REPORT_DETAILS))
1358 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1360 return vect_init_vector (stmt, op, vector_type, NULL);
1363 /* Case 2: operand is defined outside the loop - loop invariant. */
1364 case vect_external_def:
1366 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1367 gcc_assert (vector_type);
1369 if (scalar_def)
1370 *scalar_def = def;
1372 /* Create 'vec_inv = {inv,inv,..,inv}' */
1373 if (vect_print_dump_info (REPORT_DETAILS))
1374 fprintf (vect_dump, "Create vector_inv.");
1376 return vect_init_vector (stmt, def, vector_type, NULL);
1379 /* Case 3: operand is defined inside the loop. */
1380 case vect_internal_def:
1382 if (scalar_def)
1383 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info = vinfo_for_stmt (def_stmt);
1388 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1389 /* Get vectorized pattern statement. */
1390 if (!vec_stmt
1391 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1392 && !STMT_VINFO_RELEVANT (def_stmt_info))
1393 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1394 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1395 gcc_assert (vec_stmt);
1396 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1397 vec_oprnd = PHI_RESULT (vec_stmt);
1398 else if (is_gimple_call (vec_stmt))
1399 vec_oprnd = gimple_call_lhs (vec_stmt);
1400 else
1401 vec_oprnd = gimple_assign_lhs (vec_stmt);
1402 return vec_oprnd;
1405 /* Case 4: operand is defined by a loop header phi - reduction */
1406 case vect_reduction_def:
1407 case vect_double_reduction_def:
1408 case vect_nested_cycle:
1410 struct loop *loop;
1412 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1413 loop = (gimple_bb (def_stmt))->loop_father;
1415 /* Get the def before the loop */
1416 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1417 return get_initial_def_for_reduction (stmt, op, scalar_def);
1420 /* Case 5: operand is defined by loop-header phi - induction. */
1421 case vect_induction_def:
1423 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1425 /* Get the def from the vectorized stmt. */
1426 def_stmt_info = vinfo_for_stmt (def_stmt);
1427 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1428 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1429 vec_oprnd = PHI_RESULT (vec_stmt);
1430 else
1431 vec_oprnd = gimple_get_lhs (vec_stmt);
1432 return vec_oprnd;
1435 default:
1436 gcc_unreachable ();
1441 /* Function vect_get_vec_def_for_stmt_copy
1443 Return a vector-def for an operand. This function is used when the
1444 vectorized stmt to be created (by the caller to this function) is a "copy"
1445 created in case the vectorized result cannot fit in one vector, and several
1446 copies of the vector-stmt are required. In this case the vector-def is
1447 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1448 of the stmt that defines VEC_OPRND.
1449 DT is the type of the vector def VEC_OPRND.
1451 Context:
1452 In case the vectorization factor (VF) is bigger than the number
1453 of elements that can fit in a vectype (nunits), we have to generate
1454 more than one vector stmt to vectorize the scalar stmt. This situation
1455 arises when there are multiple data-types operated upon in the loop; the
1456 smallest data-type determines the VF, and as a result, when vectorizing
1457 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1458 vector stmt (each computing a vector of 'nunits' results, and together
1459 computing 'VF' results in each iteration). This function is called when
1460 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1461 which VF=16 and nunits=4, so the number of copies required is 4):
1463 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1465 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1466 VS1.1: vx.1 = memref1 VS1.2
1467 VS1.2: vx.2 = memref2 VS1.3
1468 VS1.3: vx.3 = memref3
1470 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1471 VSnew.1: vz1 = vx.1 + ... VSnew.2
1472 VSnew.2: vz2 = vx.2 + ... VSnew.3
1473 VSnew.3: vz3 = vx.3 + ...
1475 The vectorization of S1 is explained in vectorizable_load.
1476 The vectorization of S2:
1477 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1478 the function 'vect_get_vec_def_for_operand' is called to
1479 get the relevant vector-def for each operand of S2. For operand x it
1480 returns the vector-def 'vx.0'.
1482 To create the remaining copies of the vector-stmt (VSnew.j), this
1483 function is called to get the relevant vector-def for each operand. It is
1484 obtained from the respective VS1.j stmt, which is recorded in the
1485 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1487 For example, to obtain the vector-def 'vx.1' in order to create the
1488 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1489 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1490 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1491 and return its def ('vx.1').
1492 Overall, to create the above sequence this function will be called 3 times:
1493 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1494 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1495 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1497 tree
1498 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1500 gimple vec_stmt_for_operand;
1501 stmt_vec_info def_stmt_info;
1503 /* Do nothing; can reuse same def. */
1504 if (dt == vect_external_def || dt == vect_constant_def )
1505 return vec_oprnd;
1507 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1508 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1509 gcc_assert (def_stmt_info);
1510 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1511 gcc_assert (vec_stmt_for_operand);
1512 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1513 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1514 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1515 else
1516 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1517 return vec_oprnd;
1521 /* Get vectorized definitions for the operands to create a copy of an original
1522 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1524 static void
1525 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1526 VEC(tree,heap) **vec_oprnds0,
1527 VEC(tree,heap) **vec_oprnds1)
1529 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1531 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1532 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1534 if (vec_oprnds1 && *vec_oprnds1)
1536 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1537 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1538 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1543 /* Get vectorized definitions for OP0 and OP1.
1544 REDUC_INDEX is the index of reduction operand in case of reduction,
1545 and -1 otherwise. */
1547 void
1548 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1549 VEC (tree, heap) **vec_oprnds0,
1550 VEC (tree, heap) **vec_oprnds1,
1551 slp_tree slp_node, int reduc_index)
1553 if (slp_node)
1555 int nops = (op1 == NULL_TREE) ? 1 : 2;
1556 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1557 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1559 VEC_quick_push (tree, ops, op0);
1560 if (op1)
1561 VEC_quick_push (tree, ops, op1);
1563 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1565 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1566 if (op1)
1567 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1569 VEC_free (tree, heap, ops);
1570 VEC_free (slp_void_p, heap, vec_defs);
1572 else
1574 tree vec_oprnd;
1576 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1577 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1578 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1580 if (op1)
1582 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1583 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1584 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1590 /* Function vect_finish_stmt_generation.
1592 Insert a new stmt. */
1594 void
1595 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1596 gimple_stmt_iterator *gsi)
1598 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1599 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1600 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1602 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1604 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1606 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1607 bb_vinfo));
1609 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "add new stmt: ");
1612 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1615 gimple_set_location (vec_stmt, gimple_location (stmt));
1618 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1619 a function declaration if the target has a vectorized version
1620 of the function, or NULL_TREE if the function cannot be vectorized. */
1622 tree
1623 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1625 tree fndecl = gimple_call_fndecl (call);
1627 /* We only handle functions that do not read or clobber memory -- i.e.
1628 const or novops ones. */
1629 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1630 return NULL_TREE;
1632 if (!fndecl
1633 || TREE_CODE (fndecl) != FUNCTION_DECL
1634 || !DECL_BUILT_IN (fndecl))
1635 return NULL_TREE;
1637 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1638 vectype_in);
1641 /* Function vectorizable_call.
1643 Check if STMT performs a function call that can be vectorized.
1644 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1645 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1646 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1648 static bool
1649 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1650 slp_tree slp_node)
1652 tree vec_dest;
1653 tree scalar_dest;
1654 tree op, type;
1655 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1656 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1657 tree vectype_out, vectype_in;
1658 int nunits_in;
1659 int nunits_out;
1660 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1661 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1662 tree fndecl, new_temp, def, rhs_type;
1663 gimple def_stmt;
1664 enum vect_def_type dt[3]
1665 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1666 gimple new_stmt = NULL;
1667 int ncopies, j;
1668 VEC(tree, heap) *vargs = NULL;
1669 enum { NARROW, NONE, WIDEN } modifier;
1670 size_t i, nargs;
1671 tree lhs;
1673 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1674 return false;
1676 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1677 return false;
1679 /* Is STMT a vectorizable call? */
1680 if (!is_gimple_call (stmt))
1681 return false;
1683 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1684 return false;
1686 if (stmt_can_throw_internal (stmt))
1687 return false;
1689 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1691 /* Process function arguments. */
1692 rhs_type = NULL_TREE;
1693 vectype_in = NULL_TREE;
1694 nargs = gimple_call_num_args (stmt);
1696 /* Bail out if the function has more than three arguments, we do not have
1697 interesting builtin functions to vectorize with more than two arguments
1698 except for fma. No arguments is also not good. */
1699 if (nargs == 0 || nargs > 3)
1700 return false;
1702 for (i = 0; i < nargs; i++)
1704 tree opvectype;
1706 op = gimple_call_arg (stmt, i);
1708 /* We can only handle calls with arguments of the same type. */
1709 if (rhs_type
1710 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1712 if (vect_print_dump_info (REPORT_DETAILS))
1713 fprintf (vect_dump, "argument types differ.");
1714 return false;
1716 if (!rhs_type)
1717 rhs_type = TREE_TYPE (op);
1719 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1720 &def_stmt, &def, &dt[i], &opvectype))
1722 if (vect_print_dump_info (REPORT_DETAILS))
1723 fprintf (vect_dump, "use not simple.");
1724 return false;
1727 if (!vectype_in)
1728 vectype_in = opvectype;
1729 else if (opvectype
1730 && opvectype != vectype_in)
1732 if (vect_print_dump_info (REPORT_DETAILS))
1733 fprintf (vect_dump, "argument vector types differ.");
1734 return false;
1737 /* If all arguments are external or constant defs use a vector type with
1738 the same size as the output vector type. */
1739 if (!vectype_in)
1740 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1741 if (vec_stmt)
1742 gcc_assert (vectype_in);
1743 if (!vectype_in)
1745 if (vect_print_dump_info (REPORT_DETAILS))
1747 fprintf (vect_dump, "no vectype for scalar type ");
1748 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1751 return false;
1754 /* FORNOW */
1755 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1756 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1757 if (nunits_in == nunits_out / 2)
1758 modifier = NARROW;
1759 else if (nunits_out == nunits_in)
1760 modifier = NONE;
1761 else if (nunits_out == nunits_in / 2)
1762 modifier = WIDEN;
1763 else
1764 return false;
1766 /* For now, we only vectorize functions if a target specific builtin
1767 is available. TODO -- in some cases, it might be profitable to
1768 insert the calls for pieces of the vector, in order to be able
1769 to vectorize other operations in the loop. */
1770 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1771 if (fndecl == NULL_TREE)
1773 if (vect_print_dump_info (REPORT_DETAILS))
1774 fprintf (vect_dump, "function is not vectorizable.");
1776 return false;
1779 gcc_assert (!gimple_vuse (stmt));
1781 if (slp_node || PURE_SLP_STMT (stmt_info))
1782 ncopies = 1;
1783 else if (modifier == NARROW)
1784 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1785 else
1786 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1788 /* Sanity check: make sure that at least one copy of the vectorized stmt
1789 needs to be generated. */
1790 gcc_assert (ncopies >= 1);
1792 if (!vec_stmt) /* transformation not required. */
1794 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1795 if (vect_print_dump_info (REPORT_DETAILS))
1796 fprintf (vect_dump, "=== vectorizable_call ===");
1797 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1798 return true;
1801 /** Transform. **/
1803 if (vect_print_dump_info (REPORT_DETAILS))
1804 fprintf (vect_dump, "transform call.");
1806 /* Handle def. */
1807 scalar_dest = gimple_call_lhs (stmt);
1808 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1810 prev_stmt_info = NULL;
1811 switch (modifier)
1813 case NONE:
1814 for (j = 0; j < ncopies; ++j)
1816 /* Build argument list for the vectorized call. */
1817 if (j == 0)
1818 vargs = VEC_alloc (tree, heap, nargs);
1819 else
1820 VEC_truncate (tree, vargs, 0);
1822 if (slp_node)
1824 VEC (slp_void_p, heap) *vec_defs
1825 = VEC_alloc (slp_void_p, heap, nargs);
1826 VEC (tree, heap) *vec_oprnds0;
1828 for (i = 0; i < nargs; i++)
1829 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1830 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1831 vec_oprnds0
1832 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1834 /* Arguments are ready. Create the new vector stmt. */
1835 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1837 size_t k;
1838 for (k = 0; k < nargs; k++)
1840 VEC (tree, heap) *vec_oprndsk
1841 = (VEC (tree, heap) *)
1842 VEC_index (slp_void_p, vec_defs, k);
1843 VEC_replace (tree, vargs, k,
1844 VEC_index (tree, vec_oprndsk, i));
1846 new_stmt = gimple_build_call_vec (fndecl, vargs);
1847 new_temp = make_ssa_name (vec_dest, new_stmt);
1848 gimple_call_set_lhs (new_stmt, new_temp);
1849 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1850 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1851 new_stmt);
1854 for (i = 0; i < nargs; i++)
1856 VEC (tree, heap) *vec_oprndsi
1857 = (VEC (tree, heap) *)
1858 VEC_index (slp_void_p, vec_defs, i);
1859 VEC_free (tree, heap, vec_oprndsi);
1861 VEC_free (slp_void_p, heap, vec_defs);
1862 continue;
1865 for (i = 0; i < nargs; i++)
1867 op = gimple_call_arg (stmt, i);
1868 if (j == 0)
1869 vec_oprnd0
1870 = vect_get_vec_def_for_operand (op, stmt, NULL);
1871 else
1873 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1874 vec_oprnd0
1875 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1878 VEC_quick_push (tree, vargs, vec_oprnd0);
1881 new_stmt = gimple_build_call_vec (fndecl, vargs);
1882 new_temp = make_ssa_name (vec_dest, new_stmt);
1883 gimple_call_set_lhs (new_stmt, new_temp);
1884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1886 if (j == 0)
1887 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1888 else
1889 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1891 prev_stmt_info = vinfo_for_stmt (new_stmt);
1894 break;
1896 case NARROW:
1897 for (j = 0; j < ncopies; ++j)
1899 /* Build argument list for the vectorized call. */
1900 if (j == 0)
1901 vargs = VEC_alloc (tree, heap, nargs * 2);
1902 else
1903 VEC_truncate (tree, vargs, 0);
1905 if (slp_node)
1907 VEC (slp_void_p, heap) *vec_defs
1908 = VEC_alloc (slp_void_p, heap, nargs);
1909 VEC (tree, heap) *vec_oprnds0;
1911 for (i = 0; i < nargs; i++)
1912 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1913 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1914 vec_oprnds0
1915 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1917 /* Arguments are ready. Create the new vector stmt. */
1918 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1919 i += 2)
1921 size_t k;
1922 VEC_truncate (tree, vargs, 0);
1923 for (k = 0; k < nargs; k++)
1925 VEC (tree, heap) *vec_oprndsk
1926 = (VEC (tree, heap) *)
1927 VEC_index (slp_void_p, vec_defs, k);
1928 VEC_quick_push (tree, vargs,
1929 VEC_index (tree, vec_oprndsk, i));
1930 VEC_quick_push (tree, vargs,
1931 VEC_index (tree, vec_oprndsk, i + 1));
1933 new_stmt = gimple_build_call_vec (fndecl, vargs);
1934 new_temp = make_ssa_name (vec_dest, new_stmt);
1935 gimple_call_set_lhs (new_stmt, new_temp);
1936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1937 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1938 new_stmt);
1941 for (i = 0; i < nargs; i++)
1943 VEC (tree, heap) *vec_oprndsi
1944 = (VEC (tree, heap) *)
1945 VEC_index (slp_void_p, vec_defs, i);
1946 VEC_free (tree, heap, vec_oprndsi);
1948 VEC_free (slp_void_p, heap, vec_defs);
1949 continue;
1952 for (i = 0; i < nargs; i++)
1954 op = gimple_call_arg (stmt, i);
1955 if (j == 0)
1957 vec_oprnd0
1958 = vect_get_vec_def_for_operand (op, stmt, NULL);
1959 vec_oprnd1
1960 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1962 else
1964 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1965 vec_oprnd0
1966 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1967 vec_oprnd1
1968 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1971 VEC_quick_push (tree, vargs, vec_oprnd0);
1972 VEC_quick_push (tree, vargs, vec_oprnd1);
1975 new_stmt = gimple_build_call_vec (fndecl, vargs);
1976 new_temp = make_ssa_name (vec_dest, new_stmt);
1977 gimple_call_set_lhs (new_stmt, new_temp);
1978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1980 if (j == 0)
1981 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1982 else
1983 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1985 prev_stmt_info = vinfo_for_stmt (new_stmt);
1988 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1990 break;
1992 case WIDEN:
1993 /* No current target implements this case. */
1994 return false;
1997 VEC_free (tree, heap, vargs);
1999 /* Update the exception handling table with the vector stmt if necessary. */
2000 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2001 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2003 /* The call in STMT might prevent it from being removed in dce.
2004 We however cannot remove it here, due to the way the ssa name
2005 it defines is mapped to the new definition. So just replace
2006 rhs of the statement with something harmless. */
2008 if (slp_node)
2009 return true;
2011 type = TREE_TYPE (scalar_dest);
2012 if (is_pattern_stmt_p (stmt_info))
2013 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2014 else
2015 lhs = gimple_call_lhs (stmt);
2016 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2017 set_vinfo_for_stmt (new_stmt, stmt_info);
2018 set_vinfo_for_stmt (stmt, NULL);
2019 STMT_VINFO_STMT (stmt_info) = new_stmt;
2020 gsi_replace (gsi, new_stmt, false);
2021 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2023 return true;
2027 /* Function vect_gen_widened_results_half
2029 Create a vector stmt whose code, type, number of arguments, and result
2030 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2031 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2032 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2033 needs to be created (DECL is a function-decl of a target-builtin).
2034 STMT is the original scalar stmt that we are vectorizing. */
2036 static gimple
2037 vect_gen_widened_results_half (enum tree_code code,
2038 tree decl,
2039 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2040 tree vec_dest, gimple_stmt_iterator *gsi,
2041 gimple stmt)
2043 gimple new_stmt;
2044 tree new_temp;
2046 /* Generate half of the widened result: */
2047 if (code == CALL_EXPR)
2049 /* Target specific support */
2050 if (op_type == binary_op)
2051 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2052 else
2053 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2054 new_temp = make_ssa_name (vec_dest, new_stmt);
2055 gimple_call_set_lhs (new_stmt, new_temp);
2057 else
2059 /* Generic support */
2060 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2061 if (op_type != binary_op)
2062 vec_oprnd1 = NULL;
2063 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2064 vec_oprnd1);
2065 new_temp = make_ssa_name (vec_dest, new_stmt);
2066 gimple_assign_set_lhs (new_stmt, new_temp);
2068 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2070 return new_stmt;
2074 /* Get vectorized definitions for loop-based vectorization. For the first
2075 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2076 scalar operand), and for the rest we get a copy with
2077 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2078 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2079 The vectors are collected into VEC_OPRNDS. */
2081 static void
2082 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2083 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2085 tree vec_oprnd;
2087 /* Get first vector operand. */
2088 /* All the vector operands except the very first one (that is scalar oprnd)
2089 are stmt copies. */
2090 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2091 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2092 else
2093 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2095 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2097 /* Get second vector operand. */
2098 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2099 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2101 *oprnd = vec_oprnd;
2103 /* For conversion in multiple steps, continue to get operands
2104 recursively. */
2105 if (multi_step_cvt)
2106 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2110 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2111 For multi-step conversions store the resulting vectors and call the function
2112 recursively. */
2114 static void
2115 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2116 int multi_step_cvt, gimple stmt,
2117 VEC (tree, heap) *vec_dsts,
2118 gimple_stmt_iterator *gsi,
2119 slp_tree slp_node, enum tree_code code,
2120 stmt_vec_info *prev_stmt_info)
2122 unsigned int i;
2123 tree vop0, vop1, new_tmp, vec_dest;
2124 gimple new_stmt;
2125 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2127 vec_dest = VEC_pop (tree, vec_dsts);
2129 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2131 /* Create demotion operation. */
2132 vop0 = VEC_index (tree, *vec_oprnds, i);
2133 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2134 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2135 new_tmp = make_ssa_name (vec_dest, new_stmt);
2136 gimple_assign_set_lhs (new_stmt, new_tmp);
2137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2139 if (multi_step_cvt)
2140 /* Store the resulting vector for next recursive call. */
2141 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2142 else
2144 /* This is the last step of the conversion sequence. Store the
2145 vectors in SLP_NODE or in vector info of the scalar statement
2146 (or in STMT_VINFO_RELATED_STMT chain). */
2147 if (slp_node)
2148 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2149 else
2151 if (!*prev_stmt_info)
2152 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2153 else
2154 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2156 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2161 /* For multi-step demotion operations we first generate demotion operations
2162 from the source type to the intermediate types, and then combine the
2163 results (stored in VEC_OPRNDS) in demotion operation to the destination
2164 type. */
2165 if (multi_step_cvt)
2167 /* At each level of recursion we have half of the operands we had at the
2168 previous level. */
2169 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2170 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2171 stmt, vec_dsts, gsi, slp_node,
2172 VEC_PACK_TRUNC_EXPR,
2173 prev_stmt_info);
2176 VEC_quick_push (tree, vec_dsts, vec_dest);
2180 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2181 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2182 the resulting vectors and call the function recursively. */
2184 static void
2185 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2186 VEC (tree, heap) **vec_oprnds1,
2187 gimple stmt, tree vec_dest,
2188 gimple_stmt_iterator *gsi,
2189 enum tree_code code1,
2190 enum tree_code code2, tree decl1,
2191 tree decl2, int op_type)
2193 int i;
2194 tree vop0, vop1, new_tmp1, new_tmp2;
2195 gimple new_stmt1, new_stmt2;
2196 VEC (tree, heap) *vec_tmp = NULL;
2198 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2199 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2201 if (op_type == binary_op)
2202 vop1 = VEC_index (tree, *vec_oprnds1, i);
2203 else
2204 vop1 = NULL_TREE;
2206 /* Generate the two halves of promotion operation. */
2207 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2208 op_type, vec_dest, gsi, stmt);
2209 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2210 op_type, vec_dest, gsi, stmt);
2211 if (is_gimple_call (new_stmt1))
2213 new_tmp1 = gimple_call_lhs (new_stmt1);
2214 new_tmp2 = gimple_call_lhs (new_stmt2);
2216 else
2218 new_tmp1 = gimple_assign_lhs (new_stmt1);
2219 new_tmp2 = gimple_assign_lhs (new_stmt2);
2222 /* Store the results for the next step. */
2223 VEC_quick_push (tree, vec_tmp, new_tmp1);
2224 VEC_quick_push (tree, vec_tmp, new_tmp2);
2227 VEC_free (tree, heap, *vec_oprnds0);
2228 *vec_oprnds0 = vec_tmp;
2232 /* Check if STMT performs a conversion operation, that can be vectorized.
2233 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2234 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2235 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2237 static bool
2238 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2239 gimple *vec_stmt, slp_tree slp_node)
2241 tree vec_dest;
2242 tree scalar_dest;
2243 tree op0, op1 = NULL_TREE;
2244 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2245 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2247 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2248 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2249 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2250 tree new_temp;
2251 tree def;
2252 gimple def_stmt;
2253 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2254 gimple new_stmt = NULL;
2255 stmt_vec_info prev_stmt_info;
2256 int nunits_in;
2257 int nunits_out;
2258 tree vectype_out, vectype_in;
2259 int ncopies, i, j;
2260 tree lhs_type, rhs_type;
2261 enum { NARROW, NONE, WIDEN } modifier;
2262 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2263 tree vop0;
2264 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2265 int multi_step_cvt = 0;
2266 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2267 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2268 int op_type;
2269 enum machine_mode rhs_mode;
2270 unsigned short fltsz;
2272 /* Is STMT a vectorizable conversion? */
2274 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2275 return false;
2277 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2278 return false;
2280 if (!is_gimple_assign (stmt))
2281 return false;
2283 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2284 return false;
2286 code = gimple_assign_rhs_code (stmt);
2287 if (!CONVERT_EXPR_CODE_P (code)
2288 && code != FIX_TRUNC_EXPR
2289 && code != FLOAT_EXPR
2290 && code != WIDEN_MULT_EXPR
2291 && code != WIDEN_LSHIFT_EXPR)
2292 return false;
2294 op_type = TREE_CODE_LENGTH (code);
2296 /* Check types of lhs and rhs. */
2297 scalar_dest = gimple_assign_lhs (stmt);
2298 lhs_type = TREE_TYPE (scalar_dest);
2299 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2301 op0 = gimple_assign_rhs1 (stmt);
2302 rhs_type = TREE_TYPE (op0);
2304 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2305 && !((INTEGRAL_TYPE_P (lhs_type)
2306 && INTEGRAL_TYPE_P (rhs_type))
2307 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2308 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2309 return false;
2311 if ((INTEGRAL_TYPE_P (lhs_type)
2312 && (TYPE_PRECISION (lhs_type)
2313 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2314 || (INTEGRAL_TYPE_P (rhs_type)
2315 && (TYPE_PRECISION (rhs_type)
2316 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2318 if (vect_print_dump_info (REPORT_DETAILS))
2319 fprintf (vect_dump,
2320 "type conversion to/from bit-precision unsupported.");
2321 return false;
2324 /* Check the operands of the operation. */
2325 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2326 &def_stmt, &def, &dt[0], &vectype_in))
2328 if (vect_print_dump_info (REPORT_DETAILS))
2329 fprintf (vect_dump, "use not simple.");
2330 return false;
2332 if (op_type == binary_op)
2334 bool ok;
2336 op1 = gimple_assign_rhs2 (stmt);
2337 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2338 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2339 OP1. */
2340 if (CONSTANT_CLASS_P (op0))
2341 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2342 &def_stmt, &def, &dt[1], &vectype_in);
2343 else
2344 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2345 &def, &dt[1]);
2347 if (!ok)
2349 if (vect_print_dump_info (REPORT_DETAILS))
2350 fprintf (vect_dump, "use not simple.");
2351 return false;
2355 /* If op0 is an external or constant defs use a vector type of
2356 the same size as the output vector type. */
2357 if (!vectype_in)
2358 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2359 if (vec_stmt)
2360 gcc_assert (vectype_in);
2361 if (!vectype_in)
2363 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "no vectype for scalar type ");
2366 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2369 return false;
2372 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2373 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2374 if (nunits_in < nunits_out)
2375 modifier = NARROW;
2376 else if (nunits_out == nunits_in)
2377 modifier = NONE;
2378 else
2379 modifier = WIDEN;
2381 /* Multiple types in SLP are handled by creating the appropriate number of
2382 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2383 case of SLP. */
2384 if (slp_node || PURE_SLP_STMT (stmt_info))
2385 ncopies = 1;
2386 else if (modifier == NARROW)
2387 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2388 else
2389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies >= 1);
2395 /* Supportable by target? */
2396 switch (modifier)
2398 case NONE:
2399 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2400 return false;
2401 if (supportable_convert_operation (code, vectype_out, vectype_in,
2402 &decl1, &code1))
2403 break;
2404 /* FALLTHRU */
2405 unsupported:
2406 if (vect_print_dump_info (REPORT_DETAILS))
2407 fprintf (vect_dump, "conversion not supported by target.");
2408 return false;
2410 case WIDEN:
2411 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2412 &code1, &code2, &multi_step_cvt,
2413 &interm_types))
2415 /* Binary widening operation can only be supported directly by the
2416 architecture. */
2417 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2418 break;
2421 if (code != FLOAT_EXPR
2422 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2423 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2424 goto unsupported;
2426 rhs_mode = TYPE_MODE (rhs_type);
2427 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2428 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2429 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2430 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2432 cvt_type
2433 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2434 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2435 if (cvt_type == NULL_TREE)
2436 goto unsupported;
2438 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2440 if (!supportable_convert_operation (code, vectype_out,
2441 cvt_type, &decl1, &codecvt1))
2442 goto unsupported;
2444 else if (!supportable_widening_operation (code, stmt, vectype_out,
2445 cvt_type, &codecvt1,
2446 &codecvt2, &multi_step_cvt,
2447 &interm_types))
2448 continue;
2449 else
2450 gcc_assert (multi_step_cvt == 0);
2452 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2453 vectype_in, &code1, &code2,
2454 &multi_step_cvt, &interm_types))
2455 break;
2458 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2459 goto unsupported;
2461 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2462 codecvt2 = ERROR_MARK;
2463 else
2465 multi_step_cvt++;
2466 VEC_safe_push (tree, heap, interm_types, cvt_type);
2467 cvt_type = NULL_TREE;
2469 break;
2471 case NARROW:
2472 gcc_assert (op_type == unary_op);
2473 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2474 &code1, &multi_step_cvt,
2475 &interm_types))
2476 break;
2478 if (code != FIX_TRUNC_EXPR
2479 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2480 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2481 goto unsupported;
2483 rhs_mode = TYPE_MODE (rhs_type);
2484 cvt_type
2485 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2486 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2487 if (cvt_type == NULL_TREE)
2488 goto unsupported;
2489 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2490 &decl1, &codecvt1))
2491 goto unsupported;
2492 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2493 &code1, &multi_step_cvt,
2494 &interm_types))
2495 break;
2496 goto unsupported;
2498 default:
2499 gcc_unreachable ();
2502 if (!vec_stmt) /* transformation not required. */
2504 if (vect_print_dump_info (REPORT_DETAILS))
2505 fprintf (vect_dump, "=== vectorizable_conversion ===");
2506 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2508 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2509 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2511 else if (modifier == NARROW)
2513 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2514 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2516 else
2518 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2519 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2521 VEC_free (tree, heap, interm_types);
2522 return true;
2525 /** Transform. **/
2526 if (vect_print_dump_info (REPORT_DETAILS))
2527 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2529 if (op_type == binary_op)
2531 if (CONSTANT_CLASS_P (op0))
2532 op0 = fold_convert (TREE_TYPE (op1), op0);
2533 else if (CONSTANT_CLASS_P (op1))
2534 op1 = fold_convert (TREE_TYPE (op0), op1);
2537 /* In case of multi-step conversion, we first generate conversion operations
2538 to the intermediate types, and then from that types to the final one.
2539 We create vector destinations for the intermediate type (TYPES) received
2540 from supportable_*_operation, and store them in the correct order
2541 for future use in vect_create_vectorized_*_stmts (). */
2542 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2543 vec_dest = vect_create_destination_var (scalar_dest,
2544 (cvt_type && modifier == WIDEN)
2545 ? cvt_type : vectype_out);
2546 VEC_quick_push (tree, vec_dsts, vec_dest);
2548 if (multi_step_cvt)
2550 for (i = VEC_length (tree, interm_types) - 1;
2551 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2553 vec_dest = vect_create_destination_var (scalar_dest,
2554 intermediate_type);
2555 VEC_quick_push (tree, vec_dsts, vec_dest);
2559 if (cvt_type)
2560 vec_dest = vect_create_destination_var (scalar_dest,
2561 modifier == WIDEN
2562 ? vectype_out : cvt_type);
2564 if (!slp_node)
2566 if (modifier == NONE)
2567 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2568 else if (modifier == WIDEN)
2570 vec_oprnds0 = VEC_alloc (tree, heap,
2571 (multi_step_cvt
2572 ? vect_pow2 (multi_step_cvt) : 1));
2573 if (op_type == binary_op)
2574 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2576 else
2577 vec_oprnds0 = VEC_alloc (tree, heap,
2578 2 * (multi_step_cvt
2579 ? vect_pow2 (multi_step_cvt) : 1));
2581 else if (code == WIDEN_LSHIFT_EXPR)
2582 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2584 last_oprnd = op0;
2585 prev_stmt_info = NULL;
2586 switch (modifier)
2588 case NONE:
2589 for (j = 0; j < ncopies; j++)
2591 if (j == 0)
2592 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2593 -1);
2594 else
2595 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2597 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2599 /* Arguments are ready, create the new vector stmt. */
2600 if (code1 == CALL_EXPR)
2602 new_stmt = gimple_build_call (decl1, 1, vop0);
2603 new_temp = make_ssa_name (vec_dest, new_stmt);
2604 gimple_call_set_lhs (new_stmt, new_temp);
2606 else
2608 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2609 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2610 vop0, NULL);
2611 new_temp = make_ssa_name (vec_dest, new_stmt);
2612 gimple_assign_set_lhs (new_stmt, new_temp);
2615 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2616 if (slp_node)
2617 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2618 new_stmt);
2621 if (j == 0)
2622 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2623 else
2624 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2625 prev_stmt_info = vinfo_for_stmt (new_stmt);
2627 break;
2629 case WIDEN:
2630 /* In case the vectorization factor (VF) is bigger than the number
2631 of elements that we can fit in a vectype (nunits), we have to
2632 generate more than one vector stmt - i.e - we need to "unroll"
2633 the vector stmt by a factor VF/nunits. */
2634 for (j = 0; j < ncopies; j++)
2636 /* Handle uses. */
2637 if (j == 0)
2639 if (slp_node)
2641 if (code == WIDEN_LSHIFT_EXPR)
2643 unsigned int k;
2645 vec_oprnd1 = op1;
2646 /* Store vec_oprnd1 for every vector stmt to be created
2647 for SLP_NODE. We check during the analysis that all
2648 the shift arguments are the same. */
2649 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2650 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2652 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2653 slp_node, -1);
2655 else
2656 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2657 &vec_oprnds1, slp_node, -1);
2659 else
2661 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2662 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2663 if (op_type == binary_op)
2665 if (code == WIDEN_LSHIFT_EXPR)
2666 vec_oprnd1 = op1;
2667 else
2668 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2669 NULL);
2670 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2674 else
2676 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2677 VEC_truncate (tree, vec_oprnds0, 0);
2678 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2679 if (op_type == binary_op)
2681 if (code == WIDEN_LSHIFT_EXPR)
2682 vec_oprnd1 = op1;
2683 else
2684 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2685 vec_oprnd1);
2686 VEC_truncate (tree, vec_oprnds1, 0);
2687 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2691 /* Arguments are ready. Create the new vector stmts. */
2692 for (i = multi_step_cvt; i >= 0; i--)
2694 tree this_dest = VEC_index (tree, vec_dsts, i);
2695 enum tree_code c1 = code1, c2 = code2;
2696 if (i == 0 && codecvt2 != ERROR_MARK)
2698 c1 = codecvt1;
2699 c2 = codecvt2;
2701 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2702 &vec_oprnds1,
2703 stmt, this_dest, gsi,
2704 c1, c2, decl1, decl2,
2705 op_type);
2708 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2710 if (cvt_type)
2712 if (codecvt1 == CALL_EXPR)
2714 new_stmt = gimple_build_call (decl1, 1, vop0);
2715 new_temp = make_ssa_name (vec_dest, new_stmt);
2716 gimple_call_set_lhs (new_stmt, new_temp);
2718 else
2720 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2721 new_temp = make_ssa_name (vec_dest, NULL);
2722 new_stmt = gimple_build_assign_with_ops (codecvt1,
2723 new_temp,
2724 vop0, NULL);
2727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2729 else
2730 new_stmt = SSA_NAME_DEF_STMT (vop0);
2732 if (slp_node)
2733 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2734 new_stmt);
2735 else
2737 if (!prev_stmt_info)
2738 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2739 else
2740 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2741 prev_stmt_info = vinfo_for_stmt (new_stmt);
2746 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2747 break;
2749 case NARROW:
2750 /* In case the vectorization factor (VF) is bigger than the number
2751 of elements that we can fit in a vectype (nunits), we have to
2752 generate more than one vector stmt - i.e - we need to "unroll"
2753 the vector stmt by a factor VF/nunits. */
2754 for (j = 0; j < ncopies; j++)
2756 /* Handle uses. */
2757 if (slp_node)
2758 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2759 slp_node, -1);
2760 else
2762 VEC_truncate (tree, vec_oprnds0, 0);
2763 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2764 vect_pow2 (multi_step_cvt) - 1);
2767 /* Arguments are ready. Create the new vector stmts. */
2768 if (cvt_type)
2769 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2771 if (codecvt1 == CALL_EXPR)
2773 new_stmt = gimple_build_call (decl1, 1, vop0);
2774 new_temp = make_ssa_name (vec_dest, new_stmt);
2775 gimple_call_set_lhs (new_stmt, new_temp);
2777 else
2779 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2780 new_temp = make_ssa_name (vec_dest, NULL);
2781 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2782 vop0, NULL);
2785 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2786 VEC_replace (tree, vec_oprnds0, i, new_temp);
2789 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2790 stmt, vec_dsts, gsi,
2791 slp_node, code1,
2792 &prev_stmt_info);
2795 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2796 break;
2799 VEC_free (tree, heap, vec_oprnds0);
2800 VEC_free (tree, heap, vec_oprnds1);
2801 VEC_free (tree, heap, vec_dsts);
2802 VEC_free (tree, heap, interm_types);
2804 return true;
2808 /* Function vectorizable_assignment.
2810 Check if STMT performs an assignment (copy) that can be vectorized.
2811 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2812 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2813 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2815 static bool
2816 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2817 gimple *vec_stmt, slp_tree slp_node)
2819 tree vec_dest;
2820 tree scalar_dest;
2821 tree op;
2822 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2823 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2824 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2825 tree new_temp;
2826 tree def;
2827 gimple def_stmt;
2828 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2829 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2830 int ncopies;
2831 int i, j;
2832 VEC(tree,heap) *vec_oprnds = NULL;
2833 tree vop;
2834 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2835 gimple new_stmt = NULL;
2836 stmt_vec_info prev_stmt_info = NULL;
2837 enum tree_code code;
2838 tree vectype_in;
2840 /* Multiple types in SLP are handled by creating the appropriate number of
2841 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2842 case of SLP. */
2843 if (slp_node || PURE_SLP_STMT (stmt_info))
2844 ncopies = 1;
2845 else
2846 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2848 gcc_assert (ncopies >= 1);
2850 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2851 return false;
2853 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2854 return false;
2856 /* Is vectorizable assignment? */
2857 if (!is_gimple_assign (stmt))
2858 return false;
2860 scalar_dest = gimple_assign_lhs (stmt);
2861 if (TREE_CODE (scalar_dest) != SSA_NAME)
2862 return false;
2864 code = gimple_assign_rhs_code (stmt);
2865 if (gimple_assign_single_p (stmt)
2866 || code == PAREN_EXPR
2867 || CONVERT_EXPR_CODE_P (code))
2868 op = gimple_assign_rhs1 (stmt);
2869 else
2870 return false;
2872 if (code == VIEW_CONVERT_EXPR)
2873 op = TREE_OPERAND (op, 0);
2875 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2876 &def_stmt, &def, &dt[0], &vectype_in))
2878 if (vect_print_dump_info (REPORT_DETAILS))
2879 fprintf (vect_dump, "use not simple.");
2880 return false;
2883 /* We can handle NOP_EXPR conversions that do not change the number
2884 of elements or the vector size. */
2885 if ((CONVERT_EXPR_CODE_P (code)
2886 || code == VIEW_CONVERT_EXPR)
2887 && (!vectype_in
2888 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2889 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2890 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2891 return false;
2893 /* We do not handle bit-precision changes. */
2894 if ((CONVERT_EXPR_CODE_P (code)
2895 || code == VIEW_CONVERT_EXPR)
2896 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2897 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2898 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2899 || ((TYPE_PRECISION (TREE_TYPE (op))
2900 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2901 /* But a conversion that does not change the bit-pattern is ok. */
2902 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2903 > TYPE_PRECISION (TREE_TYPE (op)))
2904 && TYPE_UNSIGNED (TREE_TYPE (op))))
2906 if (vect_print_dump_info (REPORT_DETAILS))
2907 fprintf (vect_dump, "type conversion to/from bit-precision "
2908 "unsupported.");
2909 return false;
2912 if (!vec_stmt) /* transformation not required. */
2914 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2915 if (vect_print_dump_info (REPORT_DETAILS))
2916 fprintf (vect_dump, "=== vectorizable_assignment ===");
2917 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2918 return true;
2921 /** Transform. **/
2922 if (vect_print_dump_info (REPORT_DETAILS))
2923 fprintf (vect_dump, "transform assignment.");
2925 /* Handle def. */
2926 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2928 /* Handle use. */
2929 for (j = 0; j < ncopies; j++)
2931 /* Handle uses. */
2932 if (j == 0)
2933 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2934 else
2935 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2937 /* Arguments are ready. create the new vector stmt. */
2938 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2940 if (CONVERT_EXPR_CODE_P (code)
2941 || code == VIEW_CONVERT_EXPR)
2942 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2943 new_stmt = gimple_build_assign (vec_dest, vop);
2944 new_temp = make_ssa_name (vec_dest, new_stmt);
2945 gimple_assign_set_lhs (new_stmt, new_temp);
2946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2947 if (slp_node)
2948 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2951 if (slp_node)
2952 continue;
2954 if (j == 0)
2955 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2956 else
2957 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2959 prev_stmt_info = vinfo_for_stmt (new_stmt);
2962 VEC_free (tree, heap, vec_oprnds);
2963 return true;
2967 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2968 either as shift by a scalar or by a vector. */
2970 bool
2971 vect_supportable_shift (enum tree_code code, tree scalar_type)
2974 enum machine_mode vec_mode;
2975 optab optab;
2976 int icode;
2977 tree vectype;
2979 vectype = get_vectype_for_scalar_type (scalar_type);
2980 if (!vectype)
2981 return false;
2983 optab = optab_for_tree_code (code, vectype, optab_scalar);
2984 if (!optab
2985 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2987 optab = optab_for_tree_code (code, vectype, optab_vector);
2988 if (!optab
2989 || (optab_handler (optab, TYPE_MODE (vectype))
2990 == CODE_FOR_nothing))
2991 return false;
2994 vec_mode = TYPE_MODE (vectype);
2995 icode = (int) optab_handler (optab, vec_mode);
2996 if (icode == CODE_FOR_nothing)
2997 return false;
2999 return true;
3003 /* Function vectorizable_shift.
3005 Check if STMT performs a shift operation that can be vectorized.
3006 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3007 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3008 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3010 static bool
3011 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3012 gimple *vec_stmt, slp_tree slp_node)
3014 tree vec_dest;
3015 tree scalar_dest;
3016 tree op0, op1 = NULL;
3017 tree vec_oprnd1 = NULL_TREE;
3018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3019 tree vectype;
3020 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3021 enum tree_code code;
3022 enum machine_mode vec_mode;
3023 tree new_temp;
3024 optab optab;
3025 int icode;
3026 enum machine_mode optab_op2_mode;
3027 tree def;
3028 gimple def_stmt;
3029 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3030 gimple new_stmt = NULL;
3031 stmt_vec_info prev_stmt_info;
3032 int nunits_in;
3033 int nunits_out;
3034 tree vectype_out;
3035 tree op1_vectype;
3036 int ncopies;
3037 int j, i;
3038 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3039 tree vop0, vop1;
3040 unsigned int k;
3041 bool scalar_shift_arg = true;
3042 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3043 int vf;
3045 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3046 return false;
3048 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3049 return false;
3051 /* Is STMT a vectorizable binary/unary operation? */
3052 if (!is_gimple_assign (stmt))
3053 return false;
3055 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3056 return false;
3058 code = gimple_assign_rhs_code (stmt);
3060 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3061 || code == RROTATE_EXPR))
3062 return false;
3064 scalar_dest = gimple_assign_lhs (stmt);
3065 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3066 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3067 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3069 if (vect_print_dump_info (REPORT_DETAILS))
3070 fprintf (vect_dump, "bit-precision shifts not supported.");
3071 return false;
3074 op0 = gimple_assign_rhs1 (stmt);
3075 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3076 &def_stmt, &def, &dt[0], &vectype))
3078 if (vect_print_dump_info (REPORT_DETAILS))
3079 fprintf (vect_dump, "use not simple.");
3080 return false;
3082 /* If op0 is an external or constant def use a vector type with
3083 the same size as the output vector type. */
3084 if (!vectype)
3085 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3086 if (vec_stmt)
3087 gcc_assert (vectype);
3088 if (!vectype)
3090 if (vect_print_dump_info (REPORT_DETAILS))
3092 fprintf (vect_dump, "no vectype for scalar type ");
3093 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3096 return false;
3099 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3100 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3101 if (nunits_out != nunits_in)
3102 return false;
3104 op1 = gimple_assign_rhs2 (stmt);
3105 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3106 &def, &dt[1], &op1_vectype))
3108 if (vect_print_dump_info (REPORT_DETAILS))
3109 fprintf (vect_dump, "use not simple.");
3110 return false;
3113 if (loop_vinfo)
3114 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3115 else
3116 vf = 1;
3118 /* Multiple types in SLP are handled by creating the appropriate number of
3119 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3120 case of SLP. */
3121 if (slp_node || PURE_SLP_STMT (stmt_info))
3122 ncopies = 1;
3123 else
3124 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3126 gcc_assert (ncopies >= 1);
3128 /* Determine whether the shift amount is a vector, or scalar. If the
3129 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3131 if (dt[1] == vect_internal_def && !slp_node)
3132 scalar_shift_arg = false;
3133 else if (dt[1] == vect_constant_def
3134 || dt[1] == vect_external_def
3135 || dt[1] == vect_internal_def)
3137 /* In SLP, need to check whether the shift count is the same,
3138 in loops if it is a constant or invariant, it is always
3139 a scalar shift. */
3140 if (slp_node)
3142 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3143 gimple slpstmt;
3145 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3146 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3147 scalar_shift_arg = false;
3150 else
3152 if (vect_print_dump_info (REPORT_DETAILS))
3153 fprintf (vect_dump, "operand mode requires invariant argument.");
3154 return false;
3157 /* Vector shifted by vector. */
3158 if (!scalar_shift_arg)
3160 optab = optab_for_tree_code (code, vectype, optab_vector);
3161 if (vect_print_dump_info (REPORT_DETAILS))
3162 fprintf (vect_dump, "vector/vector shift/rotate found.");
3163 if (!op1_vectype)
3164 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3165 if (op1_vectype == NULL_TREE
3166 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3168 if (vect_print_dump_info (REPORT_DETAILS))
3169 fprintf (vect_dump, "unusable type for last operand in"
3170 " vector/vector shift/rotate.");
3171 return false;
3174 /* See if the machine has a vector shifted by scalar insn and if not
3175 then see if it has a vector shifted by vector insn. */
3176 else
3178 optab = optab_for_tree_code (code, vectype, optab_scalar);
3179 if (optab
3180 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3182 if (vect_print_dump_info (REPORT_DETAILS))
3183 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3185 else
3187 optab = optab_for_tree_code (code, vectype, optab_vector);
3188 if (optab
3189 && (optab_handler (optab, TYPE_MODE (vectype))
3190 != CODE_FOR_nothing))
3192 scalar_shift_arg = false;
3194 if (vect_print_dump_info (REPORT_DETAILS))
3195 fprintf (vect_dump, "vector/vector shift/rotate found.");
3197 /* Unlike the other binary operators, shifts/rotates have
3198 the rhs being int, instead of the same type as the lhs,
3199 so make sure the scalar is the right type if we are
3200 dealing with vectors of long long/long/short/char. */
3201 if (dt[1] == vect_constant_def)
3202 op1 = fold_convert (TREE_TYPE (vectype), op1);
3203 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3204 TREE_TYPE (op1)))
3206 if (slp_node
3207 && TYPE_MODE (TREE_TYPE (vectype))
3208 != TYPE_MODE (TREE_TYPE (op1)))
3210 if (vect_print_dump_info (REPORT_DETAILS))
3211 fprintf (vect_dump, "unusable type for last operand in"
3212 " vector/vector shift/rotate.");
3213 return false;
3215 if (vec_stmt && !slp_node)
3217 op1 = fold_convert (TREE_TYPE (vectype), op1);
3218 op1 = vect_init_vector (stmt, op1,
3219 TREE_TYPE (vectype), NULL);
3226 /* Supportable by target? */
3227 if (!optab)
3229 if (vect_print_dump_info (REPORT_DETAILS))
3230 fprintf (vect_dump, "no optab.");
3231 return false;
3233 vec_mode = TYPE_MODE (vectype);
3234 icode = (int) optab_handler (optab, vec_mode);
3235 if (icode == CODE_FOR_nothing)
3237 if (vect_print_dump_info (REPORT_DETAILS))
3238 fprintf (vect_dump, "op not supported by target.");
3239 /* Check only during analysis. */
3240 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3241 || (vf < vect_min_worthwhile_factor (code)
3242 && !vec_stmt))
3243 return false;
3244 if (vect_print_dump_info (REPORT_DETAILS))
3245 fprintf (vect_dump, "proceeding using word mode.");
3248 /* Worthwhile without SIMD support? Check only during analysis. */
3249 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3250 && vf < vect_min_worthwhile_factor (code)
3251 && !vec_stmt)
3253 if (vect_print_dump_info (REPORT_DETAILS))
3254 fprintf (vect_dump, "not worthwhile without SIMD support.");
3255 return false;
3258 if (!vec_stmt) /* transformation not required. */
3260 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3261 if (vect_print_dump_info (REPORT_DETAILS))
3262 fprintf (vect_dump, "=== vectorizable_shift ===");
3263 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3264 return true;
3267 /** Transform. **/
3269 if (vect_print_dump_info (REPORT_DETAILS))
3270 fprintf (vect_dump, "transform binary/unary operation.");
3272 /* Handle def. */
3273 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3275 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3276 created in the previous stages of the recursion, so no allocation is
3277 needed, except for the case of shift with scalar shift argument. In that
3278 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3279 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3280 In case of loop-based vectorization we allocate VECs of size 1. We
3281 allocate VEC_OPRNDS1 only in case of binary operation. */
3282 if (!slp_node)
3284 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3285 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3287 else if (scalar_shift_arg)
3288 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3290 prev_stmt_info = NULL;
3291 for (j = 0; j < ncopies; j++)
3293 /* Handle uses. */
3294 if (j == 0)
3296 if (scalar_shift_arg)
3298 /* Vector shl and shr insn patterns can be defined with scalar
3299 operand 2 (shift operand). In this case, use constant or loop
3300 invariant op1 directly, without extending it to vector mode
3301 first. */
3302 optab_op2_mode = insn_data[icode].operand[2].mode;
3303 if (!VECTOR_MODE_P (optab_op2_mode))
3305 if (vect_print_dump_info (REPORT_DETAILS))
3306 fprintf (vect_dump, "operand 1 using scalar mode.");
3307 vec_oprnd1 = op1;
3308 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3309 if (slp_node)
3311 /* Store vec_oprnd1 for every vector stmt to be created
3312 for SLP_NODE. We check during the analysis that all
3313 the shift arguments are the same.
3314 TODO: Allow different constants for different vector
3315 stmts generated for an SLP instance. */
3316 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3317 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3322 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3323 (a special case for certain kind of vector shifts); otherwise,
3324 operand 1 should be of a vector type (the usual case). */
3325 if (vec_oprnd1)
3326 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3327 slp_node, -1);
3328 else
3329 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3330 slp_node, -1);
3332 else
3333 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3335 /* Arguments are ready. Create the new vector stmt. */
3336 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3338 vop1 = VEC_index (tree, vec_oprnds1, i);
3339 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3340 new_temp = make_ssa_name (vec_dest, new_stmt);
3341 gimple_assign_set_lhs (new_stmt, new_temp);
3342 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3343 if (slp_node)
3344 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3347 if (slp_node)
3348 continue;
3350 if (j == 0)
3351 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3352 else
3353 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3354 prev_stmt_info = vinfo_for_stmt (new_stmt);
3357 VEC_free (tree, heap, vec_oprnds0);
3358 VEC_free (tree, heap, vec_oprnds1);
3360 return true;
3364 static tree permute_vec_elements (tree, tree, tree, gimple,
3365 gimple_stmt_iterator *);
3368 /* Function vectorizable_operation.
3370 Check if STMT performs a binary, unary or ternary operation that can
3371 be vectorized.
3372 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3373 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3374 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3376 static bool
3377 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3378 gimple *vec_stmt, slp_tree slp_node)
3380 tree vec_dest;
3381 tree scalar_dest;
3382 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3383 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3384 tree vectype;
3385 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3386 enum tree_code code;
3387 enum machine_mode vec_mode;
3388 tree new_temp;
3389 int op_type;
3390 optab optab;
3391 int icode;
3392 tree def;
3393 gimple def_stmt;
3394 enum vect_def_type dt[3]
3395 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3396 gimple new_stmt = NULL;
3397 stmt_vec_info prev_stmt_info;
3398 int nunits_in;
3399 int nunits_out;
3400 tree vectype_out;
3401 int ncopies;
3402 int j, i;
3403 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3404 tree vop0, vop1, vop2;
3405 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3406 int vf;
3408 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3409 return false;
3411 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3412 return false;
3414 /* Is STMT a vectorizable binary/unary operation? */
3415 if (!is_gimple_assign (stmt))
3416 return false;
3418 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3419 return false;
3421 code = gimple_assign_rhs_code (stmt);
3423 /* For pointer addition, we should use the normal plus for
3424 the vector addition. */
3425 if (code == POINTER_PLUS_EXPR)
3426 code = PLUS_EXPR;
3428 /* Support only unary or binary operations. */
3429 op_type = TREE_CODE_LENGTH (code);
3430 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3432 if (vect_print_dump_info (REPORT_DETAILS))
3433 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3434 op_type);
3435 return false;
3438 scalar_dest = gimple_assign_lhs (stmt);
3439 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3441 /* Most operations cannot handle bit-precision types without extra
3442 truncations. */
3443 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3444 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3445 /* Exception are bitwise binary operations. */
3446 && code != BIT_IOR_EXPR
3447 && code != BIT_XOR_EXPR
3448 && code != BIT_AND_EXPR)
3450 if (vect_print_dump_info (REPORT_DETAILS))
3451 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3452 return false;
3455 op0 = gimple_assign_rhs1 (stmt);
3456 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3457 &def_stmt, &def, &dt[0], &vectype))
3459 if (vect_print_dump_info (REPORT_DETAILS))
3460 fprintf (vect_dump, "use not simple.");
3461 return false;
3463 /* If op0 is an external or constant def use a vector type with
3464 the same size as the output vector type. */
3465 if (!vectype)
3466 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3467 if (vec_stmt)
3468 gcc_assert (vectype);
3469 if (!vectype)
3471 if (vect_print_dump_info (REPORT_DETAILS))
3473 fprintf (vect_dump, "no vectype for scalar type ");
3474 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3477 return false;
3480 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3481 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3482 if (nunits_out != nunits_in)
3483 return false;
3485 if (op_type == binary_op || op_type == ternary_op)
3487 op1 = gimple_assign_rhs2 (stmt);
3488 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3489 &def, &dt[1]))
3491 if (vect_print_dump_info (REPORT_DETAILS))
3492 fprintf (vect_dump, "use not simple.");
3493 return false;
3496 if (op_type == ternary_op)
3498 op2 = gimple_assign_rhs3 (stmt);
3499 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3500 &def, &dt[2]))
3502 if (vect_print_dump_info (REPORT_DETAILS))
3503 fprintf (vect_dump, "use not simple.");
3504 return false;
3508 if (loop_vinfo)
3509 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3510 else
3511 vf = 1;
3513 /* Multiple types in SLP are handled by creating the appropriate number of
3514 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3515 case of SLP. */
3516 if (slp_node || PURE_SLP_STMT (stmt_info))
3517 ncopies = 1;
3518 else
3519 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3521 gcc_assert (ncopies >= 1);
3523 /* Shifts are handled in vectorizable_shift (). */
3524 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3525 || code == RROTATE_EXPR)
3526 return false;
3528 /* Supportable by target? */
3530 vec_mode = TYPE_MODE (vectype);
3531 if (code == MULT_HIGHPART_EXPR)
3533 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3534 icode = LAST_INSN_CODE;
3535 else
3536 icode = CODE_FOR_nothing;
3538 else
3540 optab = optab_for_tree_code (code, vectype, optab_default);
3541 if (!optab)
3543 if (vect_print_dump_info (REPORT_DETAILS))
3544 fprintf (vect_dump, "no optab.");
3545 return false;
3547 icode = (int) optab_handler (optab, vec_mode);
3550 if (icode == CODE_FOR_nothing)
3552 if (vect_print_dump_info (REPORT_DETAILS))
3553 fprintf (vect_dump, "op not supported by target.");
3554 /* Check only during analysis. */
3555 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3556 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3557 return false;
3558 if (vect_print_dump_info (REPORT_DETAILS))
3559 fprintf (vect_dump, "proceeding using word mode.");
3562 /* Worthwhile without SIMD support? Check only during analysis. */
3563 if (!VECTOR_MODE_P (vec_mode)
3564 && !vec_stmt
3565 && vf < vect_min_worthwhile_factor (code))
3567 if (vect_print_dump_info (REPORT_DETAILS))
3568 fprintf (vect_dump, "not worthwhile without SIMD support.");
3569 return false;
3572 if (!vec_stmt) /* transformation not required. */
3574 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3575 if (vect_print_dump_info (REPORT_DETAILS))
3576 fprintf (vect_dump, "=== vectorizable_operation ===");
3577 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3578 return true;
3581 /** Transform. **/
3583 if (vect_print_dump_info (REPORT_DETAILS))
3584 fprintf (vect_dump, "transform binary/unary operation.");
3586 /* Handle def. */
3587 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3589 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3590 created in the previous stages of the recursion, so no allocation is
3591 needed, except for the case of shift with scalar shift argument. In that
3592 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3593 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3594 In case of loop-based vectorization we allocate VECs of size 1. We
3595 allocate VEC_OPRNDS1 only in case of binary operation. */
3596 if (!slp_node)
3598 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3599 if (op_type == binary_op || op_type == ternary_op)
3600 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3601 if (op_type == ternary_op)
3602 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3605 /* In case the vectorization factor (VF) is bigger than the number
3606 of elements that we can fit in a vectype (nunits), we have to generate
3607 more than one vector stmt - i.e - we need to "unroll" the
3608 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3609 from one copy of the vector stmt to the next, in the field
3610 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3611 stages to find the correct vector defs to be used when vectorizing
3612 stmts that use the defs of the current stmt. The example below
3613 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3614 we need to create 4 vectorized stmts):
3616 before vectorization:
3617 RELATED_STMT VEC_STMT
3618 S1: x = memref - -
3619 S2: z = x + 1 - -
3621 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3622 there):
3623 RELATED_STMT VEC_STMT
3624 VS1_0: vx0 = memref0 VS1_1 -
3625 VS1_1: vx1 = memref1 VS1_2 -
3626 VS1_2: vx2 = memref2 VS1_3 -
3627 VS1_3: vx3 = memref3 - -
3628 S1: x = load - VS1_0
3629 S2: z = x + 1 - -
3631 step2: vectorize stmt S2 (done here):
3632 To vectorize stmt S2 we first need to find the relevant vector
3633 def for the first operand 'x'. This is, as usual, obtained from
3634 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3635 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3636 relevant vector def 'vx0'. Having found 'vx0' we can generate
3637 the vector stmt VS2_0, and as usual, record it in the
3638 STMT_VINFO_VEC_STMT of stmt S2.
3639 When creating the second copy (VS2_1), we obtain the relevant vector
3640 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3641 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3642 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3643 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3644 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3645 chain of stmts and pointers:
3646 RELATED_STMT VEC_STMT
3647 VS1_0: vx0 = memref0 VS1_1 -
3648 VS1_1: vx1 = memref1 VS1_2 -
3649 VS1_2: vx2 = memref2 VS1_3 -
3650 VS1_3: vx3 = memref3 - -
3651 S1: x = load - VS1_0
3652 VS2_0: vz0 = vx0 + v1 VS2_1 -
3653 VS2_1: vz1 = vx1 + v1 VS2_2 -
3654 VS2_2: vz2 = vx2 + v1 VS2_3 -
3655 VS2_3: vz3 = vx3 + v1 - -
3656 S2: z = x + 1 - VS2_0 */
3658 prev_stmt_info = NULL;
3659 for (j = 0; j < ncopies; j++)
3661 /* Handle uses. */
3662 if (j == 0)
3664 if (op_type == binary_op || op_type == ternary_op)
3665 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3666 slp_node, -1);
3667 else
3668 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3669 slp_node, -1);
3670 if (op_type == ternary_op)
3672 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3673 VEC_quick_push (tree, vec_oprnds2,
3674 vect_get_vec_def_for_operand (op2, stmt, NULL));
3677 else
3679 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3680 if (op_type == ternary_op)
3682 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3683 VEC_quick_push (tree, vec_oprnds2,
3684 vect_get_vec_def_for_stmt_copy (dt[2],
3685 vec_oprnd));
3689 /* Arguments are ready. Create the new vector stmt. */
3690 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3692 vop1 = ((op_type == binary_op || op_type == ternary_op)
3693 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3694 vop2 = ((op_type == ternary_op)
3695 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3696 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3697 vop0, vop1, vop2);
3698 new_temp = make_ssa_name (vec_dest, new_stmt);
3699 gimple_assign_set_lhs (new_stmt, new_temp);
3700 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3701 if (slp_node)
3702 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3705 if (slp_node)
3706 continue;
3708 if (j == 0)
3709 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3710 else
3711 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3712 prev_stmt_info = vinfo_for_stmt (new_stmt);
3715 VEC_free (tree, heap, vec_oprnds0);
3716 if (vec_oprnds1)
3717 VEC_free (tree, heap, vec_oprnds1);
3718 if (vec_oprnds2)
3719 VEC_free (tree, heap, vec_oprnds2);
3721 return true;
3725 /* Function vectorizable_store.
3727 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3728 can be vectorized.
3729 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3730 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3731 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3733 static bool
3734 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3735 slp_tree slp_node)
3737 tree scalar_dest;
3738 tree data_ref;
3739 tree op;
3740 tree vec_oprnd = NULL_TREE;
3741 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3742 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3743 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3744 tree elem_type;
3745 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3746 struct loop *loop = NULL;
3747 enum machine_mode vec_mode;
3748 tree dummy;
3749 enum dr_alignment_support alignment_support_scheme;
3750 tree def;
3751 gimple def_stmt;
3752 enum vect_def_type dt;
3753 stmt_vec_info prev_stmt_info = NULL;
3754 tree dataref_ptr = NULL_TREE;
3755 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3756 int ncopies;
3757 int j;
3758 gimple next_stmt, first_stmt = NULL;
3759 bool grouped_store = false;
3760 bool store_lanes_p = false;
3761 unsigned int group_size, i;
3762 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3763 bool inv_p;
3764 VEC(tree,heap) *vec_oprnds = NULL;
3765 bool slp = (slp_node != NULL);
3766 unsigned int vec_num;
3767 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3768 tree aggr_type;
3770 if (loop_vinfo)
3771 loop = LOOP_VINFO_LOOP (loop_vinfo);
3773 /* Multiple types in SLP are handled by creating the appropriate number of
3774 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3775 case of SLP. */
3776 if (slp || PURE_SLP_STMT (stmt_info))
3777 ncopies = 1;
3778 else
3779 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3781 gcc_assert (ncopies >= 1);
3783 /* FORNOW. This restriction should be relaxed. */
3784 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3786 if (vect_print_dump_info (REPORT_DETAILS))
3787 fprintf (vect_dump, "multiple types in nested loop.");
3788 return false;
3791 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3792 return false;
3794 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3795 return false;
3797 /* Is vectorizable store? */
3799 if (!is_gimple_assign (stmt))
3800 return false;
3802 scalar_dest = gimple_assign_lhs (stmt);
3803 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3804 && is_pattern_stmt_p (stmt_info))
3805 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3806 if (TREE_CODE (scalar_dest) != ARRAY_REF
3807 && TREE_CODE (scalar_dest) != INDIRECT_REF
3808 && TREE_CODE (scalar_dest) != COMPONENT_REF
3809 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3810 && TREE_CODE (scalar_dest) != REALPART_EXPR
3811 && TREE_CODE (scalar_dest) != MEM_REF)
3812 return false;
3814 gcc_assert (gimple_assign_single_p (stmt));
3815 op = gimple_assign_rhs1 (stmt);
3816 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3817 &def, &dt))
3819 if (vect_print_dump_info (REPORT_DETAILS))
3820 fprintf (vect_dump, "use not simple.");
3821 return false;
3824 elem_type = TREE_TYPE (vectype);
3825 vec_mode = TYPE_MODE (vectype);
3827 /* FORNOW. In some cases can vectorize even if data-type not supported
3828 (e.g. - array initialization with 0). */
3829 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3830 return false;
3832 if (!STMT_VINFO_DATA_REF (stmt_info))
3833 return false;
3835 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3836 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3837 size_zero_node) < 0)
3839 if (vect_print_dump_info (REPORT_DETAILS))
3840 fprintf (vect_dump, "negative step for store.");
3841 return false;
3844 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3846 grouped_store = true;
3847 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3848 if (!slp && !PURE_SLP_STMT (stmt_info))
3850 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3851 if (vect_store_lanes_supported (vectype, group_size))
3852 store_lanes_p = true;
3853 else if (!vect_grouped_store_supported (vectype, group_size))
3854 return false;
3857 if (first_stmt == stmt)
3859 /* STMT is the leader of the group. Check the operands of all the
3860 stmts of the group. */
3861 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3862 while (next_stmt)
3864 gcc_assert (gimple_assign_single_p (next_stmt));
3865 op = gimple_assign_rhs1 (next_stmt);
3866 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3867 &def_stmt, &def, &dt))
3869 if (vect_print_dump_info (REPORT_DETAILS))
3870 fprintf (vect_dump, "use not simple.");
3871 return false;
3873 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3878 if (!vec_stmt) /* transformation not required. */
3880 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3881 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL, NULL);
3882 return true;
3885 /** Transform. **/
3887 if (grouped_store)
3889 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3890 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3892 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3894 /* FORNOW */
3895 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3897 /* We vectorize all the stmts of the interleaving group when we
3898 reach the last stmt in the group. */
3899 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3900 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3901 && !slp)
3903 *vec_stmt = NULL;
3904 return true;
3907 if (slp)
3909 grouped_store = false;
3910 /* VEC_NUM is the number of vect stmts to be created for this
3911 group. */
3912 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3913 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3914 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3915 op = gimple_assign_rhs1 (first_stmt);
3917 else
3918 /* VEC_NUM is the number of vect stmts to be created for this
3919 group. */
3920 vec_num = group_size;
3922 else
3924 first_stmt = stmt;
3925 first_dr = dr;
3926 group_size = vec_num = 1;
3929 if (vect_print_dump_info (REPORT_DETAILS))
3930 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3932 dr_chain = VEC_alloc (tree, heap, group_size);
3933 oprnds = VEC_alloc (tree, heap, group_size);
3935 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3936 gcc_assert (alignment_support_scheme);
3937 /* Targets with store-lane instructions must not require explicit
3938 realignment. */
3939 gcc_assert (!store_lanes_p
3940 || alignment_support_scheme == dr_aligned
3941 || alignment_support_scheme == dr_unaligned_supported);
3943 if (store_lanes_p)
3944 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3945 else
3946 aggr_type = vectype;
3948 /* In case the vectorization factor (VF) is bigger than the number
3949 of elements that we can fit in a vectype (nunits), we have to generate
3950 more than one vector stmt - i.e - we need to "unroll" the
3951 vector stmt by a factor VF/nunits. For more details see documentation in
3952 vect_get_vec_def_for_copy_stmt. */
3954 /* In case of interleaving (non-unit grouped access):
3956 S1: &base + 2 = x2
3957 S2: &base = x0
3958 S3: &base + 1 = x1
3959 S4: &base + 3 = x3
3961 We create vectorized stores starting from base address (the access of the
3962 first stmt in the chain (S2 in the above example), when the last store stmt
3963 of the chain (S4) is reached:
3965 VS1: &base = vx2
3966 VS2: &base + vec_size*1 = vx0
3967 VS3: &base + vec_size*2 = vx1
3968 VS4: &base + vec_size*3 = vx3
3970 Then permutation statements are generated:
3972 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
3973 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
3976 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3977 (the order of the data-refs in the output of vect_permute_store_chain
3978 corresponds to the order of scalar stmts in the interleaving chain - see
3979 the documentation of vect_permute_store_chain()).
3981 In case of both multiple types and interleaving, above vector stores and
3982 permutation stmts are created for every copy. The result vector stmts are
3983 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3984 STMT_VINFO_RELATED_STMT for the next copies.
3987 prev_stmt_info = NULL;
3988 for (j = 0; j < ncopies; j++)
3990 gimple new_stmt;
3991 gimple ptr_incr;
3993 if (j == 0)
3995 if (slp)
3997 /* Get vectorized arguments for SLP_NODE. */
3998 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
3999 NULL, slp_node, -1);
4001 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4003 else
4005 /* For interleaved stores we collect vectorized defs for all the
4006 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4007 used as an input to vect_permute_store_chain(), and OPRNDS as
4008 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4010 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4011 OPRNDS are of size 1. */
4012 next_stmt = first_stmt;
4013 for (i = 0; i < group_size; i++)
4015 /* Since gaps are not supported for interleaved stores,
4016 GROUP_SIZE is the exact number of stmts in the chain.
4017 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4018 there is no interleaving, GROUP_SIZE is 1, and only one
4019 iteration of the loop will be executed. */
4020 gcc_assert (next_stmt
4021 && gimple_assign_single_p (next_stmt));
4022 op = gimple_assign_rhs1 (next_stmt);
4024 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4025 NULL);
4026 VEC_quick_push(tree, dr_chain, vec_oprnd);
4027 VEC_quick_push(tree, oprnds, vec_oprnd);
4028 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4032 /* We should have catched mismatched types earlier. */
4033 gcc_assert (useless_type_conversion_p (vectype,
4034 TREE_TYPE (vec_oprnd)));
4035 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4036 NULL_TREE, &dummy, gsi,
4037 &ptr_incr, false, &inv_p);
4038 gcc_assert (bb_vinfo || !inv_p);
4040 else
4042 /* For interleaved stores we created vectorized defs for all the
4043 defs stored in OPRNDS in the previous iteration (previous copy).
4044 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4045 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4046 next copy.
4047 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4048 OPRNDS are of size 1. */
4049 for (i = 0; i < group_size; i++)
4051 op = VEC_index (tree, oprnds, i);
4052 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4053 &def, &dt);
4054 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4055 VEC_replace(tree, dr_chain, i, vec_oprnd);
4056 VEC_replace(tree, oprnds, i, vec_oprnd);
4058 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4059 TYPE_SIZE_UNIT (aggr_type));
4062 if (store_lanes_p)
4064 tree vec_array;
4066 /* Combine all the vectors into an array. */
4067 vec_array = create_vector_array (vectype, vec_num);
4068 for (i = 0; i < vec_num; i++)
4070 vec_oprnd = VEC_index (tree, dr_chain, i);
4071 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4074 /* Emit:
4075 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4076 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4077 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4078 gimple_call_set_lhs (new_stmt, data_ref);
4079 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4081 else
4083 new_stmt = NULL;
4084 if (grouped_store)
4086 result_chain = VEC_alloc (tree, heap, group_size);
4087 /* Permute. */
4088 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4089 &result_chain);
4092 next_stmt = first_stmt;
4093 for (i = 0; i < vec_num; i++)
4095 unsigned align, misalign;
4097 if (i > 0)
4098 /* Bump the vector pointer. */
4099 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4100 stmt, NULL_TREE);
4102 if (slp)
4103 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4104 else if (grouped_store)
4105 /* For grouped stores vectorized defs are interleaved in
4106 vect_permute_store_chain(). */
4107 vec_oprnd = VEC_index (tree, result_chain, i);
4109 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4110 build_int_cst (reference_alias_ptr_type
4111 (DR_REF (first_dr)), 0));
4112 align = TYPE_ALIGN_UNIT (vectype);
4113 if (aligned_access_p (first_dr))
4114 misalign = 0;
4115 else if (DR_MISALIGNMENT (first_dr) == -1)
4117 TREE_TYPE (data_ref)
4118 = build_aligned_type (TREE_TYPE (data_ref),
4119 TYPE_ALIGN (elem_type));
4120 align = TYPE_ALIGN_UNIT (elem_type);
4121 misalign = 0;
4123 else
4125 TREE_TYPE (data_ref)
4126 = build_aligned_type (TREE_TYPE (data_ref),
4127 TYPE_ALIGN (elem_type));
4128 misalign = DR_MISALIGNMENT (first_dr);
4130 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4131 misalign);
4133 /* Arguments are ready. Create the new vector stmt. */
4134 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4135 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4137 if (slp)
4138 continue;
4140 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4141 if (!next_stmt)
4142 break;
4145 if (!slp)
4147 if (j == 0)
4148 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4149 else
4150 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4151 prev_stmt_info = vinfo_for_stmt (new_stmt);
4155 VEC_free (tree, heap, dr_chain);
4156 VEC_free (tree, heap, oprnds);
4157 if (result_chain)
4158 VEC_free (tree, heap, result_chain);
4159 if (vec_oprnds)
4160 VEC_free (tree, heap, vec_oprnds);
4162 return true;
4165 /* Given a vector type VECTYPE and permutation SEL returns
4166 the VECTOR_CST mask that implements the permutation of the
4167 vector elements. If that is impossible to do, returns NULL. */
4169 tree
4170 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4172 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4173 int i, nunits;
4175 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4177 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4178 return NULL;
4180 mask_elt_type = lang_hooks.types.type_for_mode
4181 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4182 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4184 mask_elts = XALLOCAVEC (tree, nunits);
4185 for (i = nunits - 1; i >= 0; i--)
4186 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4187 mask_vec = build_vector (mask_type, mask_elts);
4189 return mask_vec;
4192 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4193 reversal of the vector elements. If that is impossible to do,
4194 returns NULL. */
4196 static tree
4197 perm_mask_for_reverse (tree vectype)
4199 int i, nunits;
4200 unsigned char *sel;
4202 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4203 sel = XALLOCAVEC (unsigned char, nunits);
4205 for (i = 0; i < nunits; ++i)
4206 sel[i] = nunits - 1 - i;
4208 return vect_gen_perm_mask (vectype, sel);
4211 /* Given a vector variable X and Y, that was generated for the scalar
4212 STMT, generate instructions to permute the vector elements of X and Y
4213 using permutation mask MASK_VEC, insert them at *GSI and return the
4214 permuted vector variable. */
4216 static tree
4217 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4218 gimple_stmt_iterator *gsi)
4220 tree vectype = TREE_TYPE (x);
4221 tree perm_dest, data_ref;
4222 gimple perm_stmt;
4224 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4225 data_ref = make_ssa_name (perm_dest, NULL);
4227 /* Generate the permute statement. */
4228 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4229 x, y, mask_vec);
4230 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4232 return data_ref;
4235 /* vectorizable_load.
4237 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4238 can be vectorized.
4239 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4240 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4241 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4243 static bool
4244 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4245 slp_tree slp_node, slp_instance slp_node_instance)
4247 tree scalar_dest;
4248 tree vec_dest = NULL;
4249 tree data_ref = NULL;
4250 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4251 stmt_vec_info prev_stmt_info;
4252 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4253 struct loop *loop = NULL;
4254 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4255 bool nested_in_vect_loop = false;
4256 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4257 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4258 tree elem_type;
4259 tree new_temp;
4260 enum machine_mode mode;
4261 gimple new_stmt = NULL;
4262 tree dummy;
4263 enum dr_alignment_support alignment_support_scheme;
4264 tree dataref_ptr = NULL_TREE;
4265 gimple ptr_incr;
4266 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4267 int ncopies;
4268 int i, j, group_size;
4269 tree msq = NULL_TREE, lsq;
4270 tree offset = NULL_TREE;
4271 tree realignment_token = NULL_TREE;
4272 gimple phi = NULL;
4273 VEC(tree,heap) *dr_chain = NULL;
4274 bool grouped_load = false;
4275 bool load_lanes_p = false;
4276 gimple first_stmt;
4277 bool inv_p;
4278 bool negative = false;
4279 bool compute_in_loop = false;
4280 struct loop *at_loop;
4281 int vec_num;
4282 bool slp = (slp_node != NULL);
4283 bool slp_perm = false;
4284 enum tree_code code;
4285 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4286 int vf;
4287 tree aggr_type;
4288 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4289 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4290 tree stride_base, stride_step;
4291 int gather_scale = 1;
4292 enum vect_def_type gather_dt = vect_unknown_def_type;
4294 if (loop_vinfo)
4296 loop = LOOP_VINFO_LOOP (loop_vinfo);
4297 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4298 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4300 else
4301 vf = 1;
4303 /* Multiple types in SLP are handled by creating the appropriate number of
4304 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4305 case of SLP. */
4306 if (slp || PURE_SLP_STMT (stmt_info))
4307 ncopies = 1;
4308 else
4309 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4311 gcc_assert (ncopies >= 1);
4313 /* FORNOW. This restriction should be relaxed. */
4314 if (nested_in_vect_loop && ncopies > 1)
4316 if (vect_print_dump_info (REPORT_DETAILS))
4317 fprintf (vect_dump, "multiple types in nested loop.");
4318 return false;
4321 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4322 return false;
4324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4325 return false;
4327 /* Is vectorizable load? */
4328 if (!is_gimple_assign (stmt))
4329 return false;
4331 scalar_dest = gimple_assign_lhs (stmt);
4332 if (TREE_CODE (scalar_dest) != SSA_NAME)
4333 return false;
4335 code = gimple_assign_rhs_code (stmt);
4336 if (code != ARRAY_REF
4337 && code != INDIRECT_REF
4338 && code != COMPONENT_REF
4339 && code != IMAGPART_EXPR
4340 && code != REALPART_EXPR
4341 && code != MEM_REF
4342 && TREE_CODE_CLASS (code) != tcc_declaration)
4343 return false;
4345 if (!STMT_VINFO_DATA_REF (stmt_info))
4346 return false;
4348 elem_type = TREE_TYPE (vectype);
4349 mode = TYPE_MODE (vectype);
4351 /* FORNOW. In some cases can vectorize even if data-type not supported
4352 (e.g. - data copies). */
4353 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4355 if (vect_print_dump_info (REPORT_DETAILS))
4356 fprintf (vect_dump, "Aligned load, but unsupported type.");
4357 return false;
4360 /* Check if the load is a part of an interleaving chain. */
4361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4363 grouped_load = true;
4364 /* FORNOW */
4365 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4367 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4368 if (!slp && !PURE_SLP_STMT (stmt_info))
4370 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4371 if (vect_load_lanes_supported (vectype, group_size))
4372 load_lanes_p = true;
4373 else if (!vect_grouped_load_supported (vectype, group_size))
4374 return false;
4379 if (STMT_VINFO_GATHER_P (stmt_info))
4381 gimple def_stmt;
4382 tree def;
4383 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4384 &gather_off, &gather_scale);
4385 gcc_assert (gather_decl);
4386 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4387 &def_stmt, &def, &gather_dt,
4388 &gather_off_vectype))
4390 if (vect_print_dump_info (REPORT_DETAILS))
4391 fprintf (vect_dump, "gather index use not simple.");
4392 return false;
4395 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4397 if (!vect_check_strided_load (stmt, loop_vinfo,
4398 &stride_base, &stride_step))
4399 return false;
4401 else
4403 negative = tree_int_cst_compare (nested_in_vect_loop
4404 ? STMT_VINFO_DR_STEP (stmt_info)
4405 : DR_STEP (dr),
4406 size_zero_node) < 0;
4407 if (negative && ncopies > 1)
4409 if (vect_print_dump_info (REPORT_DETAILS))
4410 fprintf (vect_dump, "multiple types with negative step.");
4411 return false;
4414 if (negative)
4416 gcc_assert (!grouped_load);
4417 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4418 if (alignment_support_scheme != dr_aligned
4419 && alignment_support_scheme != dr_unaligned_supported)
4421 if (vect_print_dump_info (REPORT_DETAILS))
4422 fprintf (vect_dump, "negative step but alignment required.");
4423 return false;
4425 if (!perm_mask_for_reverse (vectype))
4427 if (vect_print_dump_info (REPORT_DETAILS))
4428 fprintf (vect_dump, "negative step and reversing not supported.");
4429 return false;
4434 if (!vec_stmt) /* transformation not required. */
4436 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4437 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL);
4438 return true;
4441 if (vect_print_dump_info (REPORT_DETAILS))
4442 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4444 /** Transform. **/
4446 if (STMT_VINFO_GATHER_P (stmt_info))
4448 tree vec_oprnd0 = NULL_TREE, op;
4449 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4450 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4451 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4452 edge pe = loop_preheader_edge (loop);
4453 gimple_seq seq;
4454 basic_block new_bb;
4455 enum { NARROW, NONE, WIDEN } modifier;
4456 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4458 if (nunits == gather_off_nunits)
4459 modifier = NONE;
4460 else if (nunits == gather_off_nunits / 2)
4462 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4463 modifier = WIDEN;
4465 for (i = 0; i < gather_off_nunits; ++i)
4466 sel[i] = i | nunits;
4468 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4469 gcc_assert (perm_mask != NULL_TREE);
4471 else if (nunits == gather_off_nunits * 2)
4473 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4474 modifier = NARROW;
4476 for (i = 0; i < nunits; ++i)
4477 sel[i] = i < gather_off_nunits
4478 ? i : i + nunits - gather_off_nunits;
4480 perm_mask = vect_gen_perm_mask (vectype, sel);
4481 gcc_assert (perm_mask != NULL_TREE);
4482 ncopies *= 2;
4484 else
4485 gcc_unreachable ();
4487 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4488 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4489 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4490 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4491 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4492 scaletype = TREE_VALUE (arglist);
4493 gcc_checking_assert (types_compatible_p (srctype, rettype)
4494 && types_compatible_p (srctype, masktype));
4496 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4498 ptr = fold_convert (ptrtype, gather_base);
4499 if (!is_gimple_min_invariant (ptr))
4501 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4502 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4503 gcc_assert (!new_bb);
4506 /* Currently we support only unconditional gather loads,
4507 so mask should be all ones. */
4508 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4509 mask = build_int_cst (TREE_TYPE (masktype), -1);
4510 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4512 REAL_VALUE_TYPE r;
4513 long tmp[6];
4514 for (j = 0; j < 6; ++j)
4515 tmp[j] = -1;
4516 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4517 mask = build_real (TREE_TYPE (masktype), r);
4519 else
4520 gcc_unreachable ();
4521 mask = build_vector_from_val (masktype, mask);
4522 mask = vect_init_vector (stmt, mask, masktype, NULL);
4524 scale = build_int_cst (scaletype, gather_scale);
4526 prev_stmt_info = NULL;
4527 for (j = 0; j < ncopies; ++j)
4529 if (modifier == WIDEN && (j & 1))
4530 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4531 perm_mask, stmt, gsi);
4532 else if (j == 0)
4533 op = vec_oprnd0
4534 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4535 else
4536 op = vec_oprnd0
4537 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4539 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4541 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4542 == TYPE_VECTOR_SUBPARTS (idxtype));
4543 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4544 var = make_ssa_name (var, NULL);
4545 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4546 new_stmt
4547 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4548 op, NULL_TREE);
4549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4550 op = var;
4553 new_stmt
4554 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4556 if (!useless_type_conversion_p (vectype, rettype))
4558 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4559 == TYPE_VECTOR_SUBPARTS (rettype));
4560 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4561 op = make_ssa_name (var, new_stmt);
4562 gimple_call_set_lhs (new_stmt, op);
4563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4564 var = make_ssa_name (vec_dest, NULL);
4565 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4566 new_stmt
4567 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4568 NULL_TREE);
4570 else
4572 var = make_ssa_name (vec_dest, new_stmt);
4573 gimple_call_set_lhs (new_stmt, var);
4576 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4578 if (modifier == NARROW)
4580 if ((j & 1) == 0)
4582 prev_res = var;
4583 continue;
4585 var = permute_vec_elements (prev_res, var,
4586 perm_mask, stmt, gsi);
4587 new_stmt = SSA_NAME_DEF_STMT (var);
4590 if (prev_stmt_info == NULL)
4591 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4592 else
4593 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4594 prev_stmt_info = vinfo_for_stmt (new_stmt);
4596 return true;
4598 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4600 gimple_stmt_iterator incr_gsi;
4601 bool insert_after;
4602 gimple incr;
4603 tree offvar;
4604 tree ref = DR_REF (dr);
4605 tree ivstep;
4606 tree running_off;
4607 VEC(constructor_elt, gc) *v = NULL;
4608 gimple_seq stmts = NULL;
4610 gcc_assert (stride_base && stride_step);
4612 /* For a load with loop-invariant (but other than power-of-2)
4613 stride (i.e. not a grouped access) like so:
4615 for (i = 0; i < n; i += stride)
4616 ... = array[i];
4618 we generate a new induction variable and new accesses to
4619 form a new vector (or vectors, depending on ncopies):
4621 for (j = 0; ; j += VF*stride)
4622 tmp1 = array[j];
4623 tmp2 = array[j + stride];
4625 vectemp = {tmp1, tmp2, ...}
4628 ivstep = stride_step;
4629 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4630 build_int_cst (TREE_TYPE (ivstep), vf));
4632 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4634 create_iv (stride_base, ivstep, NULL,
4635 loop, &incr_gsi, insert_after,
4636 &offvar, NULL);
4637 incr = gsi_stmt (incr_gsi);
4638 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4640 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4641 if (stmts)
4642 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4644 prev_stmt_info = NULL;
4645 running_off = offvar;
4646 for (j = 0; j < ncopies; j++)
4648 tree vec_inv;
4650 v = VEC_alloc (constructor_elt, gc, nunits);
4651 for (i = 0; i < nunits; i++)
4653 tree newref, newoff;
4654 gimple incr;
4655 if (TREE_CODE (ref) == ARRAY_REF)
4656 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4657 unshare_expr (TREE_OPERAND (ref, 0)),
4658 running_off,
4659 NULL_TREE, NULL_TREE);
4660 else
4661 newref = build2 (MEM_REF, TREE_TYPE (ref),
4662 running_off,
4663 TREE_OPERAND (ref, 1));
4665 newref = force_gimple_operand_gsi (gsi, newref, true,
4666 NULL_TREE, true,
4667 GSI_SAME_STMT);
4668 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4669 newoff = SSA_NAME_VAR (running_off);
4670 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4671 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4672 running_off, stride_step);
4673 else
4674 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4675 running_off, stride_step);
4676 newoff = make_ssa_name (newoff, incr);
4677 gimple_assign_set_lhs (incr, newoff);
4678 vect_finish_stmt_generation (stmt, incr, gsi);
4680 running_off = newoff;
4683 vec_inv = build_constructor (vectype, v);
4684 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4685 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4687 if (j == 0)
4688 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4689 else
4690 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4691 prev_stmt_info = vinfo_for_stmt (new_stmt);
4693 return true;
4696 if (grouped_load)
4698 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4699 if (slp
4700 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4701 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4702 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4704 /* Check if the chain of loads is already vectorized. */
4705 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4707 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4708 return true;
4710 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4711 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4713 /* VEC_NUM is the number of vect stmts to be created for this group. */
4714 if (slp)
4716 grouped_load = false;
4717 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4718 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4719 slp_perm = true;
4721 else
4722 vec_num = group_size;
4724 else
4726 first_stmt = stmt;
4727 first_dr = dr;
4728 group_size = vec_num = 1;
4731 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4732 gcc_assert (alignment_support_scheme);
4733 /* Targets with load-lane instructions must not require explicit
4734 realignment. */
4735 gcc_assert (!load_lanes_p
4736 || alignment_support_scheme == dr_aligned
4737 || alignment_support_scheme == dr_unaligned_supported);
4739 /* In case the vectorization factor (VF) is bigger than the number
4740 of elements that we can fit in a vectype (nunits), we have to generate
4741 more than one vector stmt - i.e - we need to "unroll" the
4742 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4743 from one copy of the vector stmt to the next, in the field
4744 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4745 stages to find the correct vector defs to be used when vectorizing
4746 stmts that use the defs of the current stmt. The example below
4747 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4748 need to create 4 vectorized stmts):
4750 before vectorization:
4751 RELATED_STMT VEC_STMT
4752 S1: x = memref - -
4753 S2: z = x + 1 - -
4755 step 1: vectorize stmt S1:
4756 We first create the vector stmt VS1_0, and, as usual, record a
4757 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4758 Next, we create the vector stmt VS1_1, and record a pointer to
4759 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4760 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4761 stmts and pointers:
4762 RELATED_STMT VEC_STMT
4763 VS1_0: vx0 = memref0 VS1_1 -
4764 VS1_1: vx1 = memref1 VS1_2 -
4765 VS1_2: vx2 = memref2 VS1_3 -
4766 VS1_3: vx3 = memref3 - -
4767 S1: x = load - VS1_0
4768 S2: z = x + 1 - -
4770 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4771 information we recorded in RELATED_STMT field is used to vectorize
4772 stmt S2. */
4774 /* In case of interleaving (non-unit grouped access):
4776 S1: x2 = &base + 2
4777 S2: x0 = &base
4778 S3: x1 = &base + 1
4779 S4: x3 = &base + 3
4781 Vectorized loads are created in the order of memory accesses
4782 starting from the access of the first stmt of the chain:
4784 VS1: vx0 = &base
4785 VS2: vx1 = &base + vec_size*1
4786 VS3: vx3 = &base + vec_size*2
4787 VS4: vx4 = &base + vec_size*3
4789 Then permutation statements are generated:
4791 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4792 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4795 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4796 (the order of the data-refs in the output of vect_permute_load_chain
4797 corresponds to the order of scalar stmts in the interleaving chain - see
4798 the documentation of vect_permute_load_chain()).
4799 The generation of permutation stmts and recording them in
4800 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4802 In case of both multiple types and interleaving, the vector loads and
4803 permutation stmts above are created for every copy. The result vector
4804 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4805 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4807 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4808 on a target that supports unaligned accesses (dr_unaligned_supported)
4809 we generate the following code:
4810 p = initial_addr;
4811 indx = 0;
4812 loop {
4813 p = p + indx * vectype_size;
4814 vec_dest = *(p);
4815 indx = indx + 1;
4818 Otherwise, the data reference is potentially unaligned on a target that
4819 does not support unaligned accesses (dr_explicit_realign_optimized) -
4820 then generate the following code, in which the data in each iteration is
4821 obtained by two vector loads, one from the previous iteration, and one
4822 from the current iteration:
4823 p1 = initial_addr;
4824 msq_init = *(floor(p1))
4825 p2 = initial_addr + VS - 1;
4826 realignment_token = call target_builtin;
4827 indx = 0;
4828 loop {
4829 p2 = p2 + indx * vectype_size
4830 lsq = *(floor(p2))
4831 vec_dest = realign_load (msq, lsq, realignment_token)
4832 indx = indx + 1;
4833 msq = lsq;
4834 } */
4836 /* If the misalignment remains the same throughout the execution of the
4837 loop, we can create the init_addr and permutation mask at the loop
4838 preheader. Otherwise, it needs to be created inside the loop.
4839 This can only occur when vectorizing memory accesses in the inner-loop
4840 nested within an outer-loop that is being vectorized. */
4842 if (nested_in_vect_loop
4843 && (TREE_INT_CST_LOW (DR_STEP (dr))
4844 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4846 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4847 compute_in_loop = true;
4850 if ((alignment_support_scheme == dr_explicit_realign_optimized
4851 || alignment_support_scheme == dr_explicit_realign)
4852 && !compute_in_loop)
4854 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4855 alignment_support_scheme, NULL_TREE,
4856 &at_loop);
4857 if (alignment_support_scheme == dr_explicit_realign_optimized)
4859 phi = SSA_NAME_DEF_STMT (msq);
4860 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4863 else
4864 at_loop = loop;
4866 if (negative)
4867 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4869 if (load_lanes_p)
4870 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4871 else
4872 aggr_type = vectype;
4874 prev_stmt_info = NULL;
4875 for (j = 0; j < ncopies; j++)
4877 /* 1. Create the vector or array pointer update chain. */
4878 if (j == 0)
4879 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4880 offset, &dummy, gsi,
4881 &ptr_incr, false, &inv_p);
4882 else
4883 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4884 TYPE_SIZE_UNIT (aggr_type));
4886 if (grouped_load || slp_perm)
4887 dr_chain = VEC_alloc (tree, heap, vec_num);
4889 if (load_lanes_p)
4891 tree vec_array;
4893 vec_array = create_vector_array (vectype, vec_num);
4895 /* Emit:
4896 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4897 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4898 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4899 gimple_call_set_lhs (new_stmt, vec_array);
4900 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4902 /* Extract each vector into an SSA_NAME. */
4903 for (i = 0; i < vec_num; i++)
4905 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4906 vec_array, i);
4907 VEC_quick_push (tree, dr_chain, new_temp);
4910 /* Record the mapping between SSA_NAMEs and statements. */
4911 vect_record_grouped_load_vectors (stmt, dr_chain);
4913 else
4915 for (i = 0; i < vec_num; i++)
4917 if (i > 0)
4918 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4919 stmt, NULL_TREE);
4921 /* 2. Create the vector-load in the loop. */
4922 switch (alignment_support_scheme)
4924 case dr_aligned:
4925 case dr_unaligned_supported:
4927 unsigned int align, misalign;
4929 data_ref
4930 = build2 (MEM_REF, vectype, dataref_ptr,
4931 build_int_cst (reference_alias_ptr_type
4932 (DR_REF (first_dr)), 0));
4933 align = TYPE_ALIGN_UNIT (vectype);
4934 if (alignment_support_scheme == dr_aligned)
4936 gcc_assert (aligned_access_p (first_dr));
4937 misalign = 0;
4939 else if (DR_MISALIGNMENT (first_dr) == -1)
4941 TREE_TYPE (data_ref)
4942 = build_aligned_type (TREE_TYPE (data_ref),
4943 TYPE_ALIGN (elem_type));
4944 align = TYPE_ALIGN_UNIT (elem_type);
4945 misalign = 0;
4947 else
4949 TREE_TYPE (data_ref)
4950 = build_aligned_type (TREE_TYPE (data_ref),
4951 TYPE_ALIGN (elem_type));
4952 misalign = DR_MISALIGNMENT (first_dr);
4954 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
4955 align, misalign);
4956 break;
4958 case dr_explicit_realign:
4960 tree ptr, bump;
4961 tree vs_minus_1;
4963 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4965 if (compute_in_loop)
4966 msq = vect_setup_realignment (first_stmt, gsi,
4967 &realignment_token,
4968 dr_explicit_realign,
4969 dataref_ptr, NULL);
4971 new_stmt = gimple_build_assign_with_ops
4972 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
4973 build_int_cst
4974 (TREE_TYPE (dataref_ptr),
4975 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4976 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
4977 gimple_assign_set_lhs (new_stmt, ptr);
4978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4979 data_ref
4980 = build2 (MEM_REF, vectype, ptr,
4981 build_int_cst (reference_alias_ptr_type
4982 (DR_REF (first_dr)), 0));
4983 vec_dest = vect_create_destination_var (scalar_dest,
4984 vectype);
4985 new_stmt = gimple_build_assign (vec_dest, data_ref);
4986 new_temp = make_ssa_name (vec_dest, new_stmt);
4987 gimple_assign_set_lhs (new_stmt, new_temp);
4988 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
4989 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
4990 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4991 msq = new_temp;
4993 bump = size_binop (MULT_EXPR, vs_minus_1,
4994 TYPE_SIZE_UNIT (elem_type));
4995 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
4996 new_stmt = gimple_build_assign_with_ops
4997 (BIT_AND_EXPR, NULL_TREE, ptr,
4998 build_int_cst
4999 (TREE_TYPE (ptr),
5000 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5001 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
5002 gimple_assign_set_lhs (new_stmt, ptr);
5003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5004 data_ref
5005 = build2 (MEM_REF, vectype, ptr,
5006 build_int_cst (reference_alias_ptr_type
5007 (DR_REF (first_dr)), 0));
5008 break;
5010 case dr_explicit_realign_optimized:
5011 new_stmt = gimple_build_assign_with_ops
5012 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
5013 build_int_cst
5014 (TREE_TYPE (dataref_ptr),
5015 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5016 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
5017 new_stmt);
5018 gimple_assign_set_lhs (new_stmt, new_temp);
5019 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5020 data_ref
5021 = build2 (MEM_REF, vectype, new_temp,
5022 build_int_cst (reference_alias_ptr_type
5023 (DR_REF (first_dr)), 0));
5024 break;
5025 default:
5026 gcc_unreachable ();
5028 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5029 new_stmt = gimple_build_assign (vec_dest, data_ref);
5030 new_temp = make_ssa_name (vec_dest, new_stmt);
5031 gimple_assign_set_lhs (new_stmt, new_temp);
5032 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5034 /* 3. Handle explicit realignment if necessary/supported.
5035 Create in loop:
5036 vec_dest = realign_load (msq, lsq, realignment_token) */
5037 if (alignment_support_scheme == dr_explicit_realign_optimized
5038 || alignment_support_scheme == dr_explicit_realign)
5040 lsq = gimple_assign_lhs (new_stmt);
5041 if (!realignment_token)
5042 realignment_token = dataref_ptr;
5043 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5044 new_stmt
5045 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
5046 vec_dest, msq, lsq,
5047 realignment_token);
5048 new_temp = make_ssa_name (vec_dest, new_stmt);
5049 gimple_assign_set_lhs (new_stmt, new_temp);
5050 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5052 if (alignment_support_scheme == dr_explicit_realign_optimized)
5054 gcc_assert (phi);
5055 if (i == vec_num - 1 && j == ncopies - 1)
5056 add_phi_arg (phi, lsq,
5057 loop_latch_edge (containing_loop),
5058 UNKNOWN_LOCATION);
5059 msq = lsq;
5063 /* 4. Handle invariant-load. */
5064 if (inv_p && !bb_vinfo)
5066 gimple_stmt_iterator gsi2 = *gsi;
5067 gcc_assert (!grouped_load);
5068 gsi_next (&gsi2);
5069 new_temp = vect_init_vector (stmt, scalar_dest,
5070 vectype, &gsi2);
5071 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5074 if (negative)
5076 tree perm_mask = perm_mask_for_reverse (vectype);
5077 new_temp = permute_vec_elements (new_temp, new_temp,
5078 perm_mask, stmt, gsi);
5079 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5082 /* Collect vector loads and later create their permutation in
5083 vect_transform_grouped_load (). */
5084 if (grouped_load || slp_perm)
5085 VEC_quick_push (tree, dr_chain, new_temp);
5087 /* Store vector loads in the corresponding SLP_NODE. */
5088 if (slp && !slp_perm)
5089 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5090 new_stmt);
5094 if (slp && !slp_perm)
5095 continue;
5097 if (slp_perm)
5099 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5100 slp_node_instance, false))
5102 VEC_free (tree, heap, dr_chain);
5103 return false;
5106 else
5108 if (grouped_load)
5110 if (!load_lanes_p)
5111 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5112 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5114 else
5116 if (j == 0)
5117 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5118 else
5119 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5120 prev_stmt_info = vinfo_for_stmt (new_stmt);
5123 if (dr_chain)
5124 VEC_free (tree, heap, dr_chain);
5127 return true;
5130 /* Function vect_is_simple_cond.
5132 Input:
5133 LOOP - the loop that is being vectorized.
5134 COND - Condition that is checked for simple use.
5136 Output:
5137 *COMP_VECTYPE - the vector type for the comparison.
5139 Returns whether a COND can be vectorized. Checks whether
5140 condition operands are supportable using vec_is_simple_use. */
5142 static bool
5143 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5144 bb_vec_info bb_vinfo, tree *comp_vectype)
5146 tree lhs, rhs;
5147 tree def;
5148 enum vect_def_type dt;
5149 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5151 if (!COMPARISON_CLASS_P (cond))
5152 return false;
5154 lhs = TREE_OPERAND (cond, 0);
5155 rhs = TREE_OPERAND (cond, 1);
5157 if (TREE_CODE (lhs) == SSA_NAME)
5159 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5160 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5161 &lhs_def_stmt, &def, &dt, &vectype1))
5162 return false;
5164 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5165 && TREE_CODE (lhs) != FIXED_CST)
5166 return false;
5168 if (TREE_CODE (rhs) == SSA_NAME)
5170 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5171 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5172 &rhs_def_stmt, &def, &dt, &vectype2))
5173 return false;
5175 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5176 && TREE_CODE (rhs) != FIXED_CST)
5177 return false;
5179 *comp_vectype = vectype1 ? vectype1 : vectype2;
5180 return true;
5183 /* vectorizable_condition.
5185 Check if STMT is conditional modify expression that can be vectorized.
5186 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5187 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5188 at GSI.
5190 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5191 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5192 else caluse if it is 2).
5194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5196 bool
5197 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5198 gimple *vec_stmt, tree reduc_def, int reduc_index,
5199 slp_tree slp_node)
5201 tree scalar_dest = NULL_TREE;
5202 tree vec_dest = NULL_TREE;
5203 tree cond_expr, then_clause, else_clause;
5204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5205 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5206 tree comp_vectype = NULL_TREE;
5207 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5208 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5209 tree vec_compare, vec_cond_expr;
5210 tree new_temp;
5211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5212 tree def;
5213 enum vect_def_type dt, dts[4];
5214 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5215 int ncopies;
5216 enum tree_code code;
5217 stmt_vec_info prev_stmt_info = NULL;
5218 int i, j;
5219 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5220 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5221 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5223 if (slp_node || PURE_SLP_STMT (stmt_info))
5224 ncopies = 1;
5225 else
5226 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5228 gcc_assert (ncopies >= 1);
5229 if (reduc_index && ncopies > 1)
5230 return false; /* FORNOW */
5232 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5233 return false;
5235 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5236 return false;
5238 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5239 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5240 && reduc_def))
5241 return false;
5243 /* FORNOW: not yet supported. */
5244 if (STMT_VINFO_LIVE_P (stmt_info))
5246 if (vect_print_dump_info (REPORT_DETAILS))
5247 fprintf (vect_dump, "value used after loop.");
5248 return false;
5251 /* Is vectorizable conditional operation? */
5252 if (!is_gimple_assign (stmt))
5253 return false;
5255 code = gimple_assign_rhs_code (stmt);
5257 if (code != COND_EXPR)
5258 return false;
5260 cond_expr = gimple_assign_rhs1 (stmt);
5261 then_clause = gimple_assign_rhs2 (stmt);
5262 else_clause = gimple_assign_rhs3 (stmt);
5264 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5265 &comp_vectype)
5266 || !comp_vectype)
5267 return false;
5269 if (TREE_CODE (then_clause) == SSA_NAME)
5271 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5272 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5273 &then_def_stmt, &def, &dt))
5274 return false;
5276 else if (TREE_CODE (then_clause) != INTEGER_CST
5277 && TREE_CODE (then_clause) != REAL_CST
5278 && TREE_CODE (then_clause) != FIXED_CST)
5279 return false;
5281 if (TREE_CODE (else_clause) == SSA_NAME)
5283 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5284 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5285 &else_def_stmt, &def, &dt))
5286 return false;
5288 else if (TREE_CODE (else_clause) != INTEGER_CST
5289 && TREE_CODE (else_clause) != REAL_CST
5290 && TREE_CODE (else_clause) != FIXED_CST)
5291 return false;
5293 if (!vec_stmt)
5295 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5296 return expand_vec_cond_expr_p (vectype, comp_vectype);
5299 /* Transform. */
5301 if (!slp_node)
5303 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5304 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5305 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5306 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5309 /* Handle def. */
5310 scalar_dest = gimple_assign_lhs (stmt);
5311 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5313 /* Handle cond expr. */
5314 for (j = 0; j < ncopies; j++)
5316 gimple new_stmt = NULL;
5317 if (j == 0)
5319 if (slp_node)
5321 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5322 VEC (slp_void_p, heap) *vec_defs;
5324 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5325 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5326 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5327 VEC_safe_push (tree, heap, ops, then_clause);
5328 VEC_safe_push (tree, heap, ops, else_clause);
5329 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5330 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5331 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5332 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5333 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5335 VEC_free (tree, heap, ops);
5336 VEC_free (slp_void_p, heap, vec_defs);
5338 else
5340 gimple gtemp;
5341 vec_cond_lhs =
5342 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5343 stmt, NULL);
5344 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5345 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5347 vec_cond_rhs =
5348 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5349 stmt, NULL);
5350 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5351 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5352 if (reduc_index == 1)
5353 vec_then_clause = reduc_def;
5354 else
5356 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5357 stmt, NULL);
5358 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5359 NULL, &gtemp, &def, &dts[2]);
5361 if (reduc_index == 2)
5362 vec_else_clause = reduc_def;
5363 else
5365 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5366 stmt, NULL);
5367 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5368 NULL, &gtemp, &def, &dts[3]);
5372 else
5374 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5375 VEC_pop (tree, vec_oprnds0));
5376 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5377 VEC_pop (tree, vec_oprnds1));
5378 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5379 VEC_pop (tree, vec_oprnds2));
5380 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5381 VEC_pop (tree, vec_oprnds3));
5384 if (!slp_node)
5386 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5387 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5388 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5389 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5392 /* Arguments are ready. Create the new vector stmt. */
5393 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5395 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5396 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5397 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5399 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5400 vec_cond_lhs, vec_cond_rhs);
5401 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5402 vec_compare, vec_then_clause, vec_else_clause);
5404 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5405 new_temp = make_ssa_name (vec_dest, new_stmt);
5406 gimple_assign_set_lhs (new_stmt, new_temp);
5407 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5408 if (slp_node)
5409 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5412 if (slp_node)
5413 continue;
5415 if (j == 0)
5416 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5417 else
5418 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5420 prev_stmt_info = vinfo_for_stmt (new_stmt);
5423 VEC_free (tree, heap, vec_oprnds0);
5424 VEC_free (tree, heap, vec_oprnds1);
5425 VEC_free (tree, heap, vec_oprnds2);
5426 VEC_free (tree, heap, vec_oprnds3);
5428 return true;
5432 /* Make sure the statement is vectorizable. */
5434 bool
5435 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5437 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5438 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5439 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5440 bool ok;
5441 tree scalar_type, vectype;
5442 gimple pattern_stmt;
5443 gimple_seq pattern_def_seq;
5445 if (vect_print_dump_info (REPORT_DETAILS))
5447 fprintf (vect_dump, "==> examining statement: ");
5448 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5451 if (gimple_has_volatile_ops (stmt))
5453 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5454 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5456 return false;
5459 /* Skip stmts that do not need to be vectorized. In loops this is expected
5460 to include:
5461 - the COND_EXPR which is the loop exit condition
5462 - any LABEL_EXPRs in the loop
5463 - computations that are used only for array indexing or loop control.
5464 In basic blocks we only analyze statements that are a part of some SLP
5465 instance, therefore, all the statements are relevant.
5467 Pattern statement needs to be analyzed instead of the original statement
5468 if the original statement is not relevant. Otherwise, we analyze both
5469 statements. In basic blocks we are called from some SLP instance
5470 traversal, don't analyze pattern stmts instead, the pattern stmts
5471 already will be part of SLP instance. */
5473 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5474 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5475 && !STMT_VINFO_LIVE_P (stmt_info))
5477 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5478 && pattern_stmt
5479 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5480 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5482 /* Analyze PATTERN_STMT instead of the original stmt. */
5483 stmt = pattern_stmt;
5484 stmt_info = vinfo_for_stmt (pattern_stmt);
5485 if (vect_print_dump_info (REPORT_DETAILS))
5487 fprintf (vect_dump, "==> examining pattern statement: ");
5488 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5491 else
5493 if (vect_print_dump_info (REPORT_DETAILS))
5494 fprintf (vect_dump, "irrelevant.");
5496 return true;
5499 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5500 && node == NULL
5501 && pattern_stmt
5502 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5503 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5505 /* Analyze PATTERN_STMT too. */
5506 if (vect_print_dump_info (REPORT_DETAILS))
5508 fprintf (vect_dump, "==> examining pattern statement: ");
5509 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5512 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5513 return false;
5516 if (is_pattern_stmt_p (stmt_info)
5517 && node == NULL
5518 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5520 gimple_stmt_iterator si;
5522 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5524 gimple pattern_def_stmt = gsi_stmt (si);
5525 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5526 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5528 /* Analyze def stmt of STMT if it's a pattern stmt. */
5529 if (vect_print_dump_info (REPORT_DETAILS))
5531 fprintf (vect_dump, "==> examining pattern def statement: ");
5532 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5535 if (!vect_analyze_stmt (pattern_def_stmt,
5536 need_to_vectorize, node))
5537 return false;
5542 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5544 case vect_internal_def:
5545 break;
5547 case vect_reduction_def:
5548 case vect_nested_cycle:
5549 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5550 || relevance == vect_used_in_outer_by_reduction
5551 || relevance == vect_unused_in_scope));
5552 break;
5554 case vect_induction_def:
5555 case vect_constant_def:
5556 case vect_external_def:
5557 case vect_unknown_def_type:
5558 default:
5559 gcc_unreachable ();
5562 if (bb_vinfo)
5564 gcc_assert (PURE_SLP_STMT (stmt_info));
5566 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5567 if (vect_print_dump_info (REPORT_DETAILS))
5569 fprintf (vect_dump, "get vectype for scalar type: ");
5570 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5573 vectype = get_vectype_for_scalar_type (scalar_type);
5574 if (!vectype)
5576 if (vect_print_dump_info (REPORT_DETAILS))
5578 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5579 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5581 return false;
5584 if (vect_print_dump_info (REPORT_DETAILS))
5586 fprintf (vect_dump, "vectype: ");
5587 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5590 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5593 if (STMT_VINFO_RELEVANT_P (stmt_info))
5595 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5596 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5597 *need_to_vectorize = true;
5600 ok = true;
5601 if (!bb_vinfo
5602 && (STMT_VINFO_RELEVANT_P (stmt_info)
5603 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5604 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5605 || vectorizable_shift (stmt, NULL, NULL, NULL)
5606 || vectorizable_operation (stmt, NULL, NULL, NULL)
5607 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5608 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5609 || vectorizable_call (stmt, NULL, NULL, NULL)
5610 || vectorizable_store (stmt, NULL, NULL, NULL)
5611 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5612 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5613 else
5615 if (bb_vinfo)
5616 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5617 || vectorizable_shift (stmt, NULL, NULL, node)
5618 || vectorizable_operation (stmt, NULL, NULL, node)
5619 || vectorizable_assignment (stmt, NULL, NULL, node)
5620 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5621 || vectorizable_call (stmt, NULL, NULL, node)
5622 || vectorizable_store (stmt, NULL, NULL, node)
5623 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5626 if (!ok)
5628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5630 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5631 fprintf (vect_dump, "supported: ");
5632 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5635 return false;
5638 if (bb_vinfo)
5639 return true;
5641 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5642 need extra handling, except for vectorizable reductions. */
5643 if (STMT_VINFO_LIVE_P (stmt_info)
5644 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5645 ok = vectorizable_live_operation (stmt, NULL, NULL);
5647 if (!ok)
5649 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5651 fprintf (vect_dump, "not vectorized: live stmt not ");
5652 fprintf (vect_dump, "supported: ");
5653 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5656 return false;
5659 return true;
5663 /* Function vect_transform_stmt.
5665 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5667 bool
5668 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5669 bool *grouped_store, slp_tree slp_node,
5670 slp_instance slp_node_instance)
5672 bool is_store = false;
5673 gimple vec_stmt = NULL;
5674 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5675 bool done;
5677 switch (STMT_VINFO_TYPE (stmt_info))
5679 case type_demotion_vec_info_type:
5680 case type_promotion_vec_info_type:
5681 case type_conversion_vec_info_type:
5682 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5683 gcc_assert (done);
5684 break;
5686 case induc_vec_info_type:
5687 gcc_assert (!slp_node);
5688 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5689 gcc_assert (done);
5690 break;
5692 case shift_vec_info_type:
5693 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5694 gcc_assert (done);
5695 break;
5697 case op_vec_info_type:
5698 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5699 gcc_assert (done);
5700 break;
5702 case assignment_vec_info_type:
5703 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5704 gcc_assert (done);
5705 break;
5707 case load_vec_info_type:
5708 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5709 slp_node_instance);
5710 gcc_assert (done);
5711 break;
5713 case store_vec_info_type:
5714 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5715 gcc_assert (done);
5716 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5718 /* In case of interleaving, the whole chain is vectorized when the
5719 last store in the chain is reached. Store stmts before the last
5720 one are skipped, and there vec_stmt_info shouldn't be freed
5721 meanwhile. */
5722 *grouped_store = true;
5723 if (STMT_VINFO_VEC_STMT (stmt_info))
5724 is_store = true;
5726 else
5727 is_store = true;
5728 break;
5730 case condition_vec_info_type:
5731 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5732 gcc_assert (done);
5733 break;
5735 case call_vec_info_type:
5736 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5737 stmt = gsi_stmt (*gsi);
5738 break;
5740 case reduc_vec_info_type:
5741 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5742 gcc_assert (done);
5743 break;
5745 default:
5746 if (!STMT_VINFO_LIVE_P (stmt_info))
5748 if (vect_print_dump_info (REPORT_DETAILS))
5749 fprintf (vect_dump, "stmt not supported.");
5750 gcc_unreachable ();
5754 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5755 is being vectorized, but outside the immediately enclosing loop. */
5756 if (vec_stmt
5757 && STMT_VINFO_LOOP_VINFO (stmt_info)
5758 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5759 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5760 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5761 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5762 || STMT_VINFO_RELEVANT (stmt_info) ==
5763 vect_used_in_outer_by_reduction))
5765 struct loop *innerloop = LOOP_VINFO_LOOP (
5766 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5767 imm_use_iterator imm_iter;
5768 use_operand_p use_p;
5769 tree scalar_dest;
5770 gimple exit_phi;
5772 if (vect_print_dump_info (REPORT_DETAILS))
5773 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5775 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5776 (to be used when vectorizing outer-loop stmts that use the DEF of
5777 STMT). */
5778 if (gimple_code (stmt) == GIMPLE_PHI)
5779 scalar_dest = PHI_RESULT (stmt);
5780 else
5781 scalar_dest = gimple_assign_lhs (stmt);
5783 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5785 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5787 exit_phi = USE_STMT (use_p);
5788 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5793 /* Handle stmts whose DEF is used outside the loop-nest that is
5794 being vectorized. */
5795 if (STMT_VINFO_LIVE_P (stmt_info)
5796 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5798 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5799 gcc_assert (done);
5802 if (vec_stmt)
5803 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5805 return is_store;
5809 /* Remove a group of stores (for SLP or interleaving), free their
5810 stmt_vec_info. */
5812 void
5813 vect_remove_stores (gimple first_stmt)
5815 gimple next = first_stmt;
5816 gimple tmp;
5817 gimple_stmt_iterator next_si;
5819 while (next)
5821 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5823 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5824 if (is_pattern_stmt_p (stmt_info))
5825 next = STMT_VINFO_RELATED_STMT (stmt_info);
5826 /* Free the attached stmt_vec_info and remove the stmt. */
5827 next_si = gsi_for_stmt (next);
5828 unlink_stmt_vdef (next);
5829 gsi_remove (&next_si, true);
5830 release_defs (next);
5831 free_stmt_vec_info (next);
5832 next = tmp;
5837 /* Function new_stmt_vec_info.
5839 Create and initialize a new stmt_vec_info struct for STMT. */
5841 stmt_vec_info
5842 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5843 bb_vec_info bb_vinfo)
5845 stmt_vec_info res;
5846 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5848 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5849 STMT_VINFO_STMT (res) = stmt;
5850 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5851 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5852 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5853 STMT_VINFO_LIVE_P (res) = false;
5854 STMT_VINFO_VECTYPE (res) = NULL;
5855 STMT_VINFO_VEC_STMT (res) = NULL;
5856 STMT_VINFO_VECTORIZABLE (res) = true;
5857 STMT_VINFO_IN_PATTERN_P (res) = false;
5858 STMT_VINFO_RELATED_STMT (res) = NULL;
5859 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5860 STMT_VINFO_DATA_REF (res) = NULL;
5862 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5863 STMT_VINFO_DR_OFFSET (res) = NULL;
5864 STMT_VINFO_DR_INIT (res) = NULL;
5865 STMT_VINFO_DR_STEP (res) = NULL;
5866 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5868 if (gimple_code (stmt) == GIMPLE_PHI
5869 && is_loop_header_bb_p (gimple_bb (stmt)))
5870 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5871 else
5872 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5874 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5875 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5876 STMT_SLP_TYPE (res) = loop_vect;
5877 GROUP_FIRST_ELEMENT (res) = NULL;
5878 GROUP_NEXT_ELEMENT (res) = NULL;
5879 GROUP_SIZE (res) = 0;
5880 GROUP_STORE_COUNT (res) = 0;
5881 GROUP_GAP (res) = 0;
5882 GROUP_SAME_DR_STMT (res) = NULL;
5883 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5885 return res;
5889 /* Create a hash table for stmt_vec_info. */
5891 void
5892 init_stmt_vec_info_vec (void)
5894 gcc_assert (!stmt_vec_info_vec);
5895 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5899 /* Free hash table for stmt_vec_info. */
5901 void
5902 free_stmt_vec_info_vec (void)
5904 gcc_assert (stmt_vec_info_vec);
5905 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5909 /* Free stmt vectorization related info. */
5911 void
5912 free_stmt_vec_info (gimple stmt)
5914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5916 if (!stmt_info)
5917 return;
5919 /* Check if this statement has a related "pattern stmt"
5920 (introduced by the vectorizer during the pattern recognition
5921 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5922 too. */
5923 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5925 stmt_vec_info patt_info
5926 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5927 if (patt_info)
5929 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5930 if (seq)
5932 gimple_stmt_iterator si;
5933 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5934 free_stmt_vec_info (gsi_stmt (si));
5936 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
5940 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
5941 set_vinfo_for_stmt (stmt, NULL);
5942 free (stmt_info);
5946 /* Function get_vectype_for_scalar_type_and_size.
5948 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
5949 by the target. */
5951 static tree
5952 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
5954 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
5955 enum machine_mode simd_mode;
5956 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
5957 int nunits;
5958 tree vectype;
5960 if (nbytes == 0)
5961 return NULL_TREE;
5963 if (GET_MODE_CLASS (inner_mode) != MODE_INT
5964 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
5965 return NULL_TREE;
5967 /* We can't build a vector type of elements with alignment bigger than
5968 their size. */
5969 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
5970 return NULL_TREE;
5972 /* For vector types of elements whose mode precision doesn't
5973 match their types precision we use a element type of mode
5974 precision. The vectorization routines will have to make sure
5975 they support the proper result truncation/extension.
5976 We also make sure to build vector types with INTEGER_TYPE
5977 component type only. */
5978 if (INTEGRAL_TYPE_P (scalar_type)
5979 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
5980 || TREE_CODE (scalar_type) != INTEGER_TYPE))
5981 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
5982 TYPE_UNSIGNED (scalar_type));
5984 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
5985 When the component mode passes the above test simply use a type
5986 corresponding to that mode. The theory is that any use that
5987 would cause problems with this will disable vectorization anyway. */
5988 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
5989 && !INTEGRAL_TYPE_P (scalar_type)
5990 && !POINTER_TYPE_P (scalar_type))
5991 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
5993 /* If no size was supplied use the mode the target prefers. Otherwise
5994 lookup a vector mode of the specified size. */
5995 if (size == 0)
5996 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
5997 else
5998 simd_mode = mode_for_vector (inner_mode, size / nbytes);
5999 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6000 if (nunits <= 1)
6001 return NULL_TREE;
6003 vectype = build_vector_type (scalar_type, nunits);
6004 if (vect_print_dump_info (REPORT_DETAILS))
6006 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
6007 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
6010 if (!vectype)
6011 return NULL_TREE;
6013 if (vect_print_dump_info (REPORT_DETAILS))
6015 fprintf (vect_dump, "vectype: ");
6016 print_generic_expr (vect_dump, vectype, TDF_SLIM);
6019 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6020 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6022 if (vect_print_dump_info (REPORT_DETAILS))
6023 fprintf (vect_dump, "mode not supported by target.");
6024 return NULL_TREE;
6027 return vectype;
6030 unsigned int current_vector_size;
6032 /* Function get_vectype_for_scalar_type.
6034 Returns the vector type corresponding to SCALAR_TYPE as supported
6035 by the target. */
6037 tree
6038 get_vectype_for_scalar_type (tree scalar_type)
6040 tree vectype;
6041 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6042 current_vector_size);
6043 if (vectype
6044 && current_vector_size == 0)
6045 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6046 return vectype;
6049 /* Function get_same_sized_vectype
6051 Returns a vector type corresponding to SCALAR_TYPE of size
6052 VECTOR_TYPE if supported by the target. */
6054 tree
6055 get_same_sized_vectype (tree scalar_type, tree vector_type)
6057 return get_vectype_for_scalar_type_and_size
6058 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6061 /* Function vect_is_simple_use.
6063 Input:
6064 LOOP_VINFO - the vect info of the loop that is being vectorized.
6065 BB_VINFO - the vect info of the basic block that is being vectorized.
6066 OPERAND - operand of STMT in the loop or bb.
6067 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6069 Returns whether a stmt with OPERAND can be vectorized.
6070 For loops, supportable operands are constants, loop invariants, and operands
6071 that are defined by the current iteration of the loop. Unsupportable
6072 operands are those that are defined by a previous iteration of the loop (as
6073 is the case in reduction/induction computations).
6074 For basic blocks, supportable operands are constants and bb invariants.
6075 For now, operands defined outside the basic block are not supported. */
6077 bool
6078 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6079 bb_vec_info bb_vinfo, gimple *def_stmt,
6080 tree *def, enum vect_def_type *dt)
6082 basic_block bb;
6083 stmt_vec_info stmt_vinfo;
6084 struct loop *loop = NULL;
6086 if (loop_vinfo)
6087 loop = LOOP_VINFO_LOOP (loop_vinfo);
6089 *def_stmt = NULL;
6090 *def = NULL_TREE;
6092 if (vect_print_dump_info (REPORT_DETAILS))
6094 fprintf (vect_dump, "vect_is_simple_use: operand ");
6095 print_generic_expr (vect_dump, operand, TDF_SLIM);
6098 if (CONSTANT_CLASS_P (operand))
6100 *dt = vect_constant_def;
6101 return true;
6104 if (is_gimple_min_invariant (operand))
6106 *def = operand;
6107 *dt = vect_external_def;
6108 return true;
6111 if (TREE_CODE (operand) == PAREN_EXPR)
6113 if (vect_print_dump_info (REPORT_DETAILS))
6114 fprintf (vect_dump, "non-associatable copy.");
6115 operand = TREE_OPERAND (operand, 0);
6118 if (TREE_CODE (operand) != SSA_NAME)
6120 if (vect_print_dump_info (REPORT_DETAILS))
6121 fprintf (vect_dump, "not ssa-name.");
6122 return false;
6125 *def_stmt = SSA_NAME_DEF_STMT (operand);
6126 if (*def_stmt == NULL)
6128 if (vect_print_dump_info (REPORT_DETAILS))
6129 fprintf (vect_dump, "no def_stmt.");
6130 return false;
6133 if (vect_print_dump_info (REPORT_DETAILS))
6135 fprintf (vect_dump, "def_stmt: ");
6136 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
6139 /* Empty stmt is expected only in case of a function argument.
6140 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6141 if (gimple_nop_p (*def_stmt))
6143 *def = operand;
6144 *dt = vect_external_def;
6145 return true;
6148 bb = gimple_bb (*def_stmt);
6150 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6151 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6152 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6153 *dt = vect_external_def;
6154 else
6156 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6157 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6160 if (*dt == vect_unknown_def_type
6161 || (stmt
6162 && *dt == vect_double_reduction_def
6163 && gimple_code (stmt) != GIMPLE_PHI))
6165 if (vect_print_dump_info (REPORT_DETAILS))
6166 fprintf (vect_dump, "Unsupported pattern.");
6167 return false;
6170 if (vect_print_dump_info (REPORT_DETAILS))
6171 fprintf (vect_dump, "type of def: %d.",*dt);
6173 switch (gimple_code (*def_stmt))
6175 case GIMPLE_PHI:
6176 *def = gimple_phi_result (*def_stmt);
6177 break;
6179 case GIMPLE_ASSIGN:
6180 *def = gimple_assign_lhs (*def_stmt);
6181 break;
6183 case GIMPLE_CALL:
6184 *def = gimple_call_lhs (*def_stmt);
6185 if (*def != NULL)
6186 break;
6187 /* FALLTHRU */
6188 default:
6189 if (vect_print_dump_info (REPORT_DETAILS))
6190 fprintf (vect_dump, "unsupported defining stmt: ");
6191 return false;
6194 return true;
6197 /* Function vect_is_simple_use_1.
6199 Same as vect_is_simple_use_1 but also determines the vector operand
6200 type of OPERAND and stores it to *VECTYPE. If the definition of
6201 OPERAND is vect_uninitialized_def, vect_constant_def or
6202 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6203 is responsible to compute the best suited vector type for the
6204 scalar operand. */
6206 bool
6207 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6208 bb_vec_info bb_vinfo, gimple *def_stmt,
6209 tree *def, enum vect_def_type *dt, tree *vectype)
6211 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6212 def, dt))
6213 return false;
6215 /* Now get a vector type if the def is internal, otherwise supply
6216 NULL_TREE and leave it up to the caller to figure out a proper
6217 type for the use stmt. */
6218 if (*dt == vect_internal_def
6219 || *dt == vect_induction_def
6220 || *dt == vect_reduction_def
6221 || *dt == vect_double_reduction_def
6222 || *dt == vect_nested_cycle)
6224 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6226 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6227 && !STMT_VINFO_RELEVANT (stmt_info)
6228 && !STMT_VINFO_LIVE_P (stmt_info))
6229 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6231 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6232 gcc_assert (*vectype != NULL_TREE);
6234 else if (*dt == vect_uninitialized_def
6235 || *dt == vect_constant_def
6236 || *dt == vect_external_def)
6237 *vectype = NULL_TREE;
6238 else
6239 gcc_unreachable ();
6241 return true;
6245 /* Function supportable_widening_operation
6247 Check whether an operation represented by the code CODE is a
6248 widening operation that is supported by the target platform in
6249 vector form (i.e., when operating on arguments of type VECTYPE_IN
6250 producing a result of type VECTYPE_OUT).
6252 Widening operations we currently support are NOP (CONVERT), FLOAT
6253 and WIDEN_MULT. This function checks if these operations are supported
6254 by the target platform either directly (via vector tree-codes), or via
6255 target builtins.
6257 Output:
6258 - CODE1 and CODE2 are codes of vector operations to be used when
6259 vectorizing the operation, if available.
6260 - MULTI_STEP_CVT determines the number of required intermediate steps in
6261 case of multi-step conversion (like char->short->int - in that case
6262 MULTI_STEP_CVT will be 1).
6263 - INTERM_TYPES contains the intermediate type required to perform the
6264 widening operation (short in the above example). */
6266 bool
6267 supportable_widening_operation (enum tree_code code, gimple stmt,
6268 tree vectype_out, tree vectype_in,
6269 enum tree_code *code1, enum tree_code *code2,
6270 int *multi_step_cvt,
6271 VEC (tree, heap) **interm_types)
6273 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6274 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6275 struct loop *vect_loop = NULL;
6276 enum machine_mode vec_mode;
6277 enum insn_code icode1, icode2;
6278 optab optab1, optab2;
6279 tree vectype = vectype_in;
6280 tree wide_vectype = vectype_out;
6281 enum tree_code c1, c2;
6282 int i;
6283 tree prev_type, intermediate_type;
6284 enum machine_mode intermediate_mode, prev_mode;
6285 optab optab3, optab4;
6287 *multi_step_cvt = 0;
6288 if (loop_info)
6289 vect_loop = LOOP_VINFO_LOOP (loop_info);
6291 switch (code)
6293 case WIDEN_MULT_EXPR:
6294 /* The result of a vectorized widening operation usually requires
6295 two vectors (because the widened results do not fit into one vector).
6296 The generated vector results would normally be expected to be
6297 generated in the same order as in the original scalar computation,
6298 i.e. if 8 results are generated in each vector iteration, they are
6299 to be organized as follows:
6300 vect1: [res1,res2,res3,res4],
6301 vect2: [res5,res6,res7,res8].
6303 However, in the special case that the result of the widening
6304 operation is used in a reduction computation only, the order doesn't
6305 matter (because when vectorizing a reduction we change the order of
6306 the computation). Some targets can take advantage of this and
6307 generate more efficient code. For example, targets like Altivec,
6308 that support widen_mult using a sequence of {mult_even,mult_odd}
6309 generate the following vectors:
6310 vect1: [res1,res3,res5,res7],
6311 vect2: [res2,res4,res6,res8].
6313 When vectorizing outer-loops, we execute the inner-loop sequentially
6314 (each vectorized inner-loop iteration contributes to VF outer-loop
6315 iterations in parallel). We therefore don't allow to change the
6316 order of the computation in the inner-loop during outer-loop
6317 vectorization. */
6318 /* TODO: Another case in which order doesn't *really* matter is when we
6319 widen and then contract again, e.g. (short)((int)x * y >> 8).
6320 Normally, pack_trunc performs an even/odd permute, whereas the
6321 repack from an even/odd expansion would be an interleave, which
6322 would be significantly simpler for e.g. AVX2. */
6323 /* In any case, in order to avoid duplicating the code below, recurse
6324 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6325 are properly set up for the caller. If we fail, we'll continue with
6326 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6327 if (vect_loop
6328 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6329 && !nested_in_vect_loop_p (vect_loop, stmt)
6330 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6331 stmt, vectype_out, vectype_in,
6332 code1, code2, multi_step_cvt,
6333 interm_types))
6334 return true;
6335 c1 = VEC_WIDEN_MULT_LO_EXPR;
6336 c2 = VEC_WIDEN_MULT_HI_EXPR;
6337 break;
6339 case VEC_WIDEN_MULT_EVEN_EXPR:
6340 /* Support the recursion induced just above. */
6341 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6342 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6343 break;
6345 case WIDEN_LSHIFT_EXPR:
6346 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6347 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6348 break;
6350 CASE_CONVERT:
6351 c1 = VEC_UNPACK_LO_EXPR;
6352 c2 = VEC_UNPACK_HI_EXPR;
6353 break;
6355 case FLOAT_EXPR:
6356 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6357 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6358 break;
6360 case FIX_TRUNC_EXPR:
6361 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6362 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6363 computing the operation. */
6364 return false;
6366 default:
6367 gcc_unreachable ();
6370 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6372 enum tree_code ctmp = c1;
6373 c1 = c2;
6374 c2 = ctmp;
6377 if (code == FIX_TRUNC_EXPR)
6379 /* The signedness is determined from output operand. */
6380 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6381 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6383 else
6385 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6386 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6389 if (!optab1 || !optab2)
6390 return false;
6392 vec_mode = TYPE_MODE (vectype);
6393 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6394 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6395 return false;
6397 *code1 = c1;
6398 *code2 = c2;
6400 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6401 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6402 return true;
6404 /* Check if it's a multi-step conversion that can be done using intermediate
6405 types. */
6407 prev_type = vectype;
6408 prev_mode = vec_mode;
6410 if (!CONVERT_EXPR_CODE_P (code))
6411 return false;
6413 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6414 intermediate steps in promotion sequence. We try
6415 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6416 not. */
6417 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6418 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6420 intermediate_mode = insn_data[icode1].operand[0].mode;
6421 intermediate_type
6422 = lang_hooks.types.type_for_mode (intermediate_mode,
6423 TYPE_UNSIGNED (prev_type));
6424 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6425 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6427 if (!optab3 || !optab4
6428 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6429 || insn_data[icode1].operand[0].mode != intermediate_mode
6430 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6431 || insn_data[icode2].operand[0].mode != intermediate_mode
6432 || ((icode1 = optab_handler (optab3, intermediate_mode))
6433 == CODE_FOR_nothing)
6434 || ((icode2 = optab_handler (optab4, intermediate_mode))
6435 == CODE_FOR_nothing))
6436 break;
6438 VEC_quick_push (tree, *interm_types, intermediate_type);
6439 (*multi_step_cvt)++;
6441 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6442 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6443 return true;
6445 prev_type = intermediate_type;
6446 prev_mode = intermediate_mode;
6449 VEC_free (tree, heap, *interm_types);
6450 return false;
6454 /* Function supportable_narrowing_operation
6456 Check whether an operation represented by the code CODE is a
6457 narrowing operation that is supported by the target platform in
6458 vector form (i.e., when operating on arguments of type VECTYPE_IN
6459 and producing a result of type VECTYPE_OUT).
6461 Narrowing operations we currently support are NOP (CONVERT) and
6462 FIX_TRUNC. This function checks if these operations are supported by
6463 the target platform directly via vector tree-codes.
6465 Output:
6466 - CODE1 is the code of a vector operation to be used when
6467 vectorizing the operation, if available.
6468 - MULTI_STEP_CVT determines the number of required intermediate steps in
6469 case of multi-step conversion (like int->short->char - in that case
6470 MULTI_STEP_CVT will be 1).
6471 - INTERM_TYPES contains the intermediate type required to perform the
6472 narrowing operation (short in the above example). */
6474 bool
6475 supportable_narrowing_operation (enum tree_code code,
6476 tree vectype_out, tree vectype_in,
6477 enum tree_code *code1, int *multi_step_cvt,
6478 VEC (tree, heap) **interm_types)
6480 enum machine_mode vec_mode;
6481 enum insn_code icode1;
6482 optab optab1, interm_optab;
6483 tree vectype = vectype_in;
6484 tree narrow_vectype = vectype_out;
6485 enum tree_code c1;
6486 tree intermediate_type;
6487 enum machine_mode intermediate_mode, prev_mode;
6488 int i;
6489 bool uns;
6491 *multi_step_cvt = 0;
6492 switch (code)
6494 CASE_CONVERT:
6495 c1 = VEC_PACK_TRUNC_EXPR;
6496 break;
6498 case FIX_TRUNC_EXPR:
6499 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6500 break;
6502 case FLOAT_EXPR:
6503 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6504 tree code and optabs used for computing the operation. */
6505 return false;
6507 default:
6508 gcc_unreachable ();
6511 if (code == FIX_TRUNC_EXPR)
6512 /* The signedness is determined from output operand. */
6513 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6514 else
6515 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6517 if (!optab1)
6518 return false;
6520 vec_mode = TYPE_MODE (vectype);
6521 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6522 return false;
6524 *code1 = c1;
6526 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6527 return true;
6529 /* Check if it's a multi-step conversion that can be done using intermediate
6530 types. */
6531 prev_mode = vec_mode;
6532 if (code == FIX_TRUNC_EXPR)
6533 uns = TYPE_UNSIGNED (vectype_out);
6534 else
6535 uns = TYPE_UNSIGNED (vectype);
6537 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6538 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6539 costly than signed. */
6540 if (code == FIX_TRUNC_EXPR && uns)
6542 enum insn_code icode2;
6544 intermediate_type
6545 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6546 interm_optab
6547 = optab_for_tree_code (c1, intermediate_type, optab_default);
6548 if (interm_optab != NULL
6549 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6550 && insn_data[icode1].operand[0].mode
6551 == insn_data[icode2].operand[0].mode)
6553 uns = false;
6554 optab1 = interm_optab;
6555 icode1 = icode2;
6559 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6560 intermediate steps in promotion sequence. We try
6561 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6562 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6563 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6565 intermediate_mode = insn_data[icode1].operand[0].mode;
6566 intermediate_type
6567 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6568 interm_optab
6569 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6570 optab_default);
6571 if (!interm_optab
6572 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6573 || insn_data[icode1].operand[0].mode != intermediate_mode
6574 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6575 == CODE_FOR_nothing))
6576 break;
6578 VEC_quick_push (tree, *interm_types, intermediate_type);
6579 (*multi_step_cvt)++;
6581 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6582 return true;
6584 prev_mode = intermediate_mode;
6585 optab1 = interm_optab;
6588 VEC_free (tree, heap, *interm_types);
6589 return false;