* arm.h (TARGET_CPU_CPP_BUILTINS): Remove Maverick support.
[official-gcc.git] / gcc / tree-vect-stmts.c
bloba0a03c56d13a7323e6167f3655640c1a1d86fef7
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "expr.h"
37 #include "recog.h"
38 #include "optabs.h"
39 #include "diagnostic-core.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
44 /* Return a variable of type ELEM_TYPE[NELEMS]. */
46 static tree
47 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
49 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
50 "vect_array");
53 /* ARRAY is an array of vectors created by create_vector_array.
54 Return an SSA_NAME for the vector in index N. The reference
55 is part of the vectorization of STMT and the vector is associated
56 with scalar destination SCALAR_DEST. */
58 static tree
59 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
60 tree array, unsigned HOST_WIDE_INT n)
62 tree vect_type, vect, vect_name, array_ref;
63 gimple new_stmt;
65 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
66 vect_type = TREE_TYPE (TREE_TYPE (array));
67 vect = vect_create_destination_var (scalar_dest, vect_type);
68 array_ref = build4 (ARRAY_REF, vect_type, array,
69 build_int_cst (size_type_node, n),
70 NULL_TREE, NULL_TREE);
72 new_stmt = gimple_build_assign (vect, array_ref);
73 vect_name = make_ssa_name (vect, new_stmt);
74 gimple_assign_set_lhs (new_stmt, vect_name);
75 vect_finish_stmt_generation (stmt, new_stmt, gsi);
77 return vect_name;
80 /* ARRAY is an array of vectors created by create_vector_array.
81 Emit code to store SSA_NAME VECT in index N of the array.
82 The store is part of the vectorization of STMT. */
84 static void
85 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
86 tree array, unsigned HOST_WIDE_INT n)
88 tree array_ref;
89 gimple new_stmt;
91 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
92 build_int_cst (size_type_node, n),
93 NULL_TREE, NULL_TREE);
95 new_stmt = gimple_build_assign (array_ref, vect);
96 vect_finish_stmt_generation (stmt, new_stmt, gsi);
99 /* PTR is a pointer to an array of type TYPE. Return a representation
100 of *PTR. The memory reference replaces those in FIRST_DR
101 (and its group). */
103 static tree
104 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
106 tree mem_ref, alias_ptr_type;
108 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
109 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
110 /* Arrays have the same alignment as their type. */
111 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
112 return mem_ref;
115 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
117 /* Function vect_mark_relevant.
119 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
121 static void
122 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
123 enum vect_relevant relevant, bool live_p,
124 bool used_in_pattern)
126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
127 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
128 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
129 gimple pattern_stmt;
131 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
134 /* If this stmt is an original stmt in a pattern, we might need to mark its
135 related pattern stmt instead of the original stmt. However, such stmts
136 may have their own uses that are not in any pattern, in such cases the
137 stmt itself should be marked. */
138 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
140 bool found = false;
141 if (!used_in_pattern)
143 imm_use_iterator imm_iter;
144 use_operand_p use_p;
145 gimple use_stmt;
146 tree lhs;
147 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
148 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
150 if (is_gimple_assign (stmt))
151 lhs = gimple_assign_lhs (stmt);
152 else
153 lhs = gimple_call_lhs (stmt);
155 /* This use is out of pattern use, if LHS has other uses that are
156 pattern uses, we should mark the stmt itself, and not the pattern
157 stmt. */
158 if (TREE_CODE (lhs) == SSA_NAME)
159 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
161 if (is_gimple_debug (USE_STMT (use_p)))
162 continue;
163 use_stmt = USE_STMT (use_p);
165 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
166 continue;
168 if (vinfo_for_stmt (use_stmt)
169 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
171 found = true;
172 break;
177 if (!found)
179 /* This is the last stmt in a sequence that was detected as a
180 pattern that can potentially be vectorized. Don't mark the stmt
181 as relevant/live because it's not going to be vectorized.
182 Instead mark the pattern-stmt that replaces it. */
184 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "last stmt in pattern. don't mark"
188 " relevant/live.");
189 stmt_info = vinfo_for_stmt (pattern_stmt);
190 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
191 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
192 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
193 stmt = pattern_stmt;
197 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
198 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
199 STMT_VINFO_RELEVANT (stmt_info) = relevant;
201 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
202 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
204 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "already marked relevant/live.");
206 return;
209 VEC_safe_push (gimple, heap, *worklist, stmt);
213 /* Function vect_stmt_relevant_p.
215 Return true if STMT in loop that is represented by LOOP_VINFO is
216 "relevant for vectorization".
218 A stmt is considered "relevant for vectorization" if:
219 - it has uses outside the loop.
220 - it has vdefs (it alters memory).
221 - control stmts in the loop (except for the exit condition).
223 CHECKME: what other side effects would the vectorizer allow? */
225 static bool
226 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
227 enum vect_relevant *relevant, bool *live_p)
229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
230 ssa_op_iter op_iter;
231 imm_use_iterator imm_iter;
232 use_operand_p use_p;
233 def_operand_p def_p;
235 *relevant = vect_unused_in_scope;
236 *live_p = false;
238 /* cond stmt other than loop exit cond. */
239 if (is_ctrl_stmt (stmt)
240 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
241 != loop_exit_ctrl_vec_info_type)
242 *relevant = vect_used_in_scope;
244 /* changing memory. */
245 if (gimple_code (stmt) != GIMPLE_PHI)
246 if (gimple_vdef (stmt))
248 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
250 *relevant = vect_used_in_scope;
253 /* uses outside the loop. */
254 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
256 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
258 basic_block bb = gimple_bb (USE_STMT (use_p));
259 if (!flow_bb_inside_loop_p (loop, bb))
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
264 if (is_gimple_debug (USE_STMT (use_p)))
265 continue;
267 /* We expect all such uses to be in the loop exit phis
268 (because of loop closed form) */
269 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
270 gcc_assert (bb == single_exit (loop)->dest);
272 *live_p = true;
277 return (*live_p || *relevant);
281 /* Function exist_non_indexing_operands_for_use_p
283 USE is one of the uses attached to STMT. Check if USE is
284 used in STMT for anything other than indexing an array. */
286 static bool
287 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
289 tree operand;
290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
292 /* USE corresponds to some operand in STMT. If there is no data
293 reference in STMT, then any operand that corresponds to USE
294 is not indexing an array. */
295 if (!STMT_VINFO_DATA_REF (stmt_info))
296 return true;
298 /* STMT has a data_ref. FORNOW this means that its of one of
299 the following forms:
300 -1- ARRAY_REF = var
301 -2- var = ARRAY_REF
302 (This should have been verified in analyze_data_refs).
304 'var' in the second case corresponds to a def, not a use,
305 so USE cannot correspond to any operands that are not used
306 for array indexing.
308 Therefore, all we need to check is if STMT falls into the
309 first case, and whether var corresponds to USE. */
311 if (!gimple_assign_copy_p (stmt))
312 return false;
313 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
314 return false;
315 operand = gimple_assign_rhs1 (stmt);
316 if (TREE_CODE (operand) != SSA_NAME)
317 return false;
319 if (operand == use)
320 return true;
322 return false;
327 Function process_use.
329 Inputs:
330 - a USE in STMT in a loop represented by LOOP_VINFO
331 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
332 that defined USE. This is done by calling mark_relevant and passing it
333 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
334 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
335 be performed.
337 Outputs:
338 Generally, LIVE_P and RELEVANT are used to define the liveness and
339 relevance info of the DEF_STMT of this USE:
340 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
341 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
342 Exceptions:
343 - case 1: If USE is used only for address computations (e.g. array indexing),
344 which does not need to be directly vectorized, then the liveness/relevance
345 of the respective DEF_STMT is left unchanged.
346 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
347 skip DEF_STMT cause it had already been processed.
348 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
349 be modified accordingly.
351 Return true if everything is as expected. Return false otherwise. */
353 static bool
354 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
355 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
356 bool force)
358 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
359 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
360 stmt_vec_info dstmt_vinfo;
361 basic_block bb, def_bb;
362 tree def;
363 gimple def_stmt;
364 enum vect_def_type dt;
366 /* case 1: we are only interested in uses that need to be vectorized. Uses
367 that are used for address computation are not considered relevant. */
368 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
369 return true;
371 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
374 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
375 return false;
378 if (!def_stmt || gimple_nop_p (def_stmt))
379 return true;
381 def_bb = gimple_bb (def_stmt);
382 if (!flow_bb_inside_loop_p (loop, def_bb))
384 if (vect_print_dump_info (REPORT_DETAILS))
385 fprintf (vect_dump, "def_stmt is out of loop.");
386 return true;
389 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
390 DEF_STMT must have already been processed, because this should be the
391 only way that STMT, which is a reduction-phi, was put in the worklist,
392 as there should be no other uses for DEF_STMT in the loop. So we just
393 check that everything is as expected, and we are done. */
394 dstmt_vinfo = vinfo_for_stmt (def_stmt);
395 bb = gimple_bb (stmt);
396 if (gimple_code (stmt) == GIMPLE_PHI
397 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
398 && gimple_code (def_stmt) != GIMPLE_PHI
399 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
400 && bb->loop_father == def_bb->loop_father)
402 if (vect_print_dump_info (REPORT_DETAILS))
403 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
404 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
405 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
406 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
407 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
408 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
409 return true;
412 /* case 3a: outer-loop stmt defining an inner-loop stmt:
413 outer-loop-header-bb:
414 d = def_stmt
415 inner-loop:
416 stmt # use (d)
417 outer-loop-tail-bb:
418 ... */
419 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
421 if (vect_print_dump_info (REPORT_DETAILS))
422 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
424 switch (relevant)
426 case vect_unused_in_scope:
427 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
428 vect_used_in_scope : vect_unused_in_scope;
429 break;
431 case vect_used_in_outer_by_reduction:
432 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
433 relevant = vect_used_by_reduction;
434 break;
436 case vect_used_in_outer:
437 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
438 relevant = vect_used_in_scope;
439 break;
441 case vect_used_in_scope:
442 break;
444 default:
445 gcc_unreachable ();
449 /* case 3b: inner-loop stmt defining an outer-loop stmt:
450 outer-loop-header-bb:
452 inner-loop:
453 d = def_stmt
454 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
455 stmt # use (d) */
456 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
458 if (vect_print_dump_info (REPORT_DETAILS))
459 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
461 switch (relevant)
463 case vect_unused_in_scope:
464 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
466 vect_used_in_outer_by_reduction : vect_unused_in_scope;
467 break;
469 case vect_used_by_reduction:
470 relevant = vect_used_in_outer_by_reduction;
471 break;
473 case vect_used_in_scope:
474 relevant = vect_used_in_outer;
475 break;
477 default:
478 gcc_unreachable ();
482 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
483 is_pattern_stmt_p (stmt_vinfo));
484 return true;
488 /* Function vect_mark_stmts_to_be_vectorized.
490 Not all stmts in the loop need to be vectorized. For example:
492 for i...
493 for j...
494 1. T0 = i + j
495 2. T1 = a[T0]
497 3. j = j + 1
499 Stmt 1 and 3 do not need to be vectorized, because loop control and
500 addressing of vectorized data-refs are handled differently.
502 This pass detects such stmts. */
504 bool
505 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
507 VEC(gimple,heap) *worklist;
508 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
509 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
510 unsigned int nbbs = loop->num_nodes;
511 gimple_stmt_iterator si;
512 gimple stmt;
513 unsigned int i;
514 stmt_vec_info stmt_vinfo;
515 basic_block bb;
516 gimple phi;
517 bool live_p;
518 enum vect_relevant relevant, tmp_relevant;
519 enum vect_def_type def_type;
521 if (vect_print_dump_info (REPORT_DETAILS))
522 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
524 worklist = VEC_alloc (gimple, heap, 64);
526 /* 1. Init worklist. */
527 for (i = 0; i < nbbs; i++)
529 bb = bbs[i];
530 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
532 phi = gsi_stmt (si);
533 if (vect_print_dump_info (REPORT_DETAILS))
535 fprintf (vect_dump, "init: phi relevant? ");
536 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
539 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
540 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
542 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
544 stmt = gsi_stmt (si);
545 if (vect_print_dump_info (REPORT_DETAILS))
547 fprintf (vect_dump, "init: stmt relevant? ");
548 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
551 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
552 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
556 /* 2. Process_worklist */
557 while (VEC_length (gimple, worklist) > 0)
559 use_operand_p use_p;
560 ssa_op_iter iter;
562 stmt = VEC_pop (gimple, worklist);
563 if (vect_print_dump_info (REPORT_DETAILS))
565 fprintf (vect_dump, "worklist: examine stmt: ");
566 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
569 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
570 (DEF_STMT) as relevant/irrelevant and live/dead according to the
571 liveness and relevance properties of STMT. */
572 stmt_vinfo = vinfo_for_stmt (stmt);
573 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
574 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
576 /* Generally, the liveness and relevance properties of STMT are
577 propagated as is to the DEF_STMTs of its USEs:
578 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
579 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
581 One exception is when STMT has been identified as defining a reduction
582 variable; in this case we set the liveness/relevance as follows:
583 live_p = false
584 relevant = vect_used_by_reduction
585 This is because we distinguish between two kinds of relevant stmts -
586 those that are used by a reduction computation, and those that are
587 (also) used by a regular computation. This allows us later on to
588 identify stmts that are used solely by a reduction, and therefore the
589 order of the results that they produce does not have to be kept. */
591 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
592 tmp_relevant = relevant;
593 switch (def_type)
595 case vect_reduction_def:
596 switch (tmp_relevant)
598 case vect_unused_in_scope:
599 relevant = vect_used_by_reduction;
600 break;
602 case vect_used_by_reduction:
603 if (gimple_code (stmt) == GIMPLE_PHI)
604 break;
605 /* fall through */
607 default:
608 if (vect_print_dump_info (REPORT_DETAILS))
609 fprintf (vect_dump, "unsupported use of reduction.");
611 VEC_free (gimple, heap, worklist);
612 return false;
615 live_p = false;
616 break;
618 case vect_nested_cycle:
619 if (tmp_relevant != vect_unused_in_scope
620 && tmp_relevant != vect_used_in_outer_by_reduction
621 && tmp_relevant != vect_used_in_outer)
623 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "unsupported use of nested cycle.");
626 VEC_free (gimple, heap, worklist);
627 return false;
630 live_p = false;
631 break;
633 case vect_double_reduction_def:
634 if (tmp_relevant != vect_unused_in_scope
635 && tmp_relevant != vect_used_by_reduction)
637 if (vect_print_dump_info (REPORT_DETAILS))
638 fprintf (vect_dump, "unsupported use of double reduction.");
640 VEC_free (gimple, heap, worklist);
641 return false;
644 live_p = false;
645 break;
647 default:
648 break;
651 if (is_pattern_stmt_p (stmt_vinfo))
653 /* Pattern statements are not inserted into the code, so
654 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
655 have to scan the RHS or function arguments instead. */
656 if (is_gimple_assign (stmt))
658 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
659 tree op = gimple_assign_rhs1 (stmt);
661 i = 1;
662 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
664 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
665 live_p, relevant, &worklist, false)
666 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
667 live_p, relevant, &worklist, false))
669 VEC_free (gimple, heap, worklist);
670 return false;
672 i = 2;
674 for (; i < gimple_num_ops (stmt); i++)
676 op = gimple_op (stmt, i);
677 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
678 &worklist, false))
680 VEC_free (gimple, heap, worklist);
681 return false;
685 else if (is_gimple_call (stmt))
687 for (i = 0; i < gimple_call_num_args (stmt); i++)
689 tree arg = gimple_call_arg (stmt, i);
690 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
691 &worklist, false))
693 VEC_free (gimple, heap, worklist);
694 return false;
699 else
700 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
702 tree op = USE_FROM_PTR (use_p);
703 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
704 &worklist, false))
706 VEC_free (gimple, heap, worklist);
707 return false;
711 if (STMT_VINFO_GATHER_P (stmt_vinfo))
713 tree off;
714 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
715 gcc_assert (decl);
716 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
717 &worklist, true))
719 VEC_free (gimple, heap, worklist);
720 return false;
723 } /* while worklist */
725 VEC_free (gimple, heap, worklist);
726 return true;
730 /* Function vect_model_simple_cost.
732 Models cost for simple operations, i.e. those that only emit ncopies of a
733 single op. Right now, this does not account for multiple insns that could
734 be generated for the single vector op. We will handle that shortly. */
736 void
737 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
738 enum vect_def_type *dt, slp_tree slp_node)
740 int i;
741 int inside_cost = 0, outside_cost = 0;
743 /* The SLP costs were already calculated during SLP tree build. */
744 if (PURE_SLP_STMT (stmt_info))
745 return;
747 inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
749 /* FORNOW: Assuming maximum 2 args per stmts. */
750 for (i = 0; i < 2; i++)
752 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
753 outside_cost += vect_get_stmt_cost (vector_stmt);
756 if (vect_print_dump_info (REPORT_COST))
757 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
758 "outside_cost = %d .", inside_cost, outside_cost);
760 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
761 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
762 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
766 /* Model cost for type demotion and promotion operations. PWR is normally
767 zero for single-step promotions and demotions. It will be one if
768 two-step promotion/demotion is required, and so on. Each additional
769 step doubles the number of instructions required. */
771 static void
772 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
773 enum vect_def_type *dt, int pwr)
775 int i, tmp;
776 int inside_cost = 0, outside_cost = 0, single_stmt_cost;
778 /* The SLP costs were already calculated during SLP tree build. */
779 if (PURE_SLP_STMT (stmt_info))
780 return;
782 single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
783 for (i = 0; i < pwr + 1; i++)
785 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
786 (i + 1) : i;
787 inside_cost += vect_pow2 (tmp) * single_stmt_cost;
790 /* FORNOW: Assuming maximum 2 args per stmts. */
791 for (i = 0; i < 2; i++)
793 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
794 outside_cost += vect_get_stmt_cost (vector_stmt);
797 if (vect_print_dump_info (REPORT_COST))
798 fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
799 "outside_cost = %d .", inside_cost, outside_cost);
801 /* Set the costs in STMT_INFO. */
802 stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
803 stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
806 /* Function vect_cost_group_size
808 For grouped load or store, return the group_size only if it is the first
809 load or store of a group, else return 1. This ensures that group size is
810 only returned once per group. */
812 static int
813 vect_cost_group_size (stmt_vec_info stmt_info)
815 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
817 if (first_stmt == STMT_VINFO_STMT (stmt_info))
818 return GROUP_SIZE (stmt_info);
820 return 1;
824 /* Function vect_model_store_cost
826 Models cost for stores. In the case of grouped accesses, one access
827 has the overhead of the grouped access attributed to it. */
829 void
830 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
831 bool store_lanes_p, enum vect_def_type dt,
832 slp_tree slp_node)
834 int group_size;
835 unsigned int inside_cost = 0, outside_cost = 0;
836 struct data_reference *first_dr;
837 gimple first_stmt;
839 /* The SLP costs were already calculated during SLP tree build. */
840 if (PURE_SLP_STMT (stmt_info))
841 return;
843 if (dt == vect_constant_def || dt == vect_external_def)
844 outside_cost = vect_get_stmt_cost (scalar_to_vec);
846 /* Grouped access? */
847 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
849 if (slp_node)
851 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
852 group_size = 1;
854 else
856 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
857 group_size = vect_cost_group_size (stmt_info);
860 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
862 /* Not a grouped access. */
863 else
865 group_size = 1;
866 first_dr = STMT_VINFO_DATA_REF (stmt_info);
869 /* We assume that the cost of a single store-lanes instruction is
870 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
871 access is instead being provided by a permute-and-store operation,
872 include the cost of the permutes. */
873 if (!store_lanes_p && group_size > 1)
875 /* Uses a high and low interleave operation for each needed permute. */
876 inside_cost = ncopies * exact_log2(group_size) * group_size
877 * vect_get_stmt_cost (vec_perm);
879 if (vect_print_dump_info (REPORT_COST))
880 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
881 group_size);
884 /* Costs of the stores. */
885 vect_get_store_cost (first_dr, ncopies, &inside_cost);
887 if (vect_print_dump_info (REPORT_COST))
888 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
889 "outside_cost = %d .", inside_cost, outside_cost);
891 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
892 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
893 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
897 /* Calculate cost of DR's memory access. */
898 void
899 vect_get_store_cost (struct data_reference *dr, int ncopies,
900 unsigned int *inside_cost)
902 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
904 switch (alignment_support_scheme)
906 case dr_aligned:
908 *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
910 if (vect_print_dump_info (REPORT_COST))
911 fprintf (vect_dump, "vect_model_store_cost: aligned.");
913 break;
916 case dr_unaligned_supported:
918 gimple stmt = DR_STMT (dr);
919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
920 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
922 /* Here, we assign an additional cost for the unaligned store. */
923 *inside_cost += ncopies
924 * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
925 vectype, DR_MISALIGNMENT (dr));
927 if (vect_print_dump_info (REPORT_COST))
928 fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
929 "hardware.");
931 break;
934 case dr_unaligned_unsupported:
936 *inside_cost = VECT_MAX_COST;
938 if (vect_print_dump_info (REPORT_COST))
939 fprintf (vect_dump, "vect_model_store_cost: unsupported access.");
941 break;
944 default:
945 gcc_unreachable ();
950 /* Function vect_model_load_cost
952 Models cost for loads. In the case of grouped accesses, the last access
953 has the overhead of the grouped access attributed to it. Since unaligned
954 accesses are supported for loads, we also account for the costs of the
955 access scheme chosen. */
957 void
958 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
959 slp_tree slp_node)
961 int group_size;
962 gimple first_stmt;
963 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
964 unsigned int inside_cost = 0, outside_cost = 0;
966 /* The SLP costs were already calculated during SLP tree build. */
967 if (PURE_SLP_STMT (stmt_info))
968 return;
970 /* Grouped accesses? */
971 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
972 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
974 group_size = vect_cost_group_size (stmt_info);
975 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
977 /* Not a grouped access. */
978 else
980 group_size = 1;
981 first_dr = dr;
984 /* We assume that the cost of a single load-lanes instruction is
985 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
986 access is instead being provided by a load-and-permute operation,
987 include the cost of the permutes. */
988 if (!load_lanes_p && group_size > 1)
990 /* Uses an even and odd extract operations for each needed permute. */
991 inside_cost = ncopies * exact_log2(group_size) * group_size
992 * vect_get_stmt_cost (vec_perm);
994 if (vect_print_dump_info (REPORT_COST))
995 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
996 group_size);
999 /* The loads themselves. */
1000 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1002 /* N scalar loads plus gathering them into a vector. */
1003 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1004 inside_cost += (vect_get_stmt_cost (scalar_load) * ncopies
1005 * TYPE_VECTOR_SUBPARTS (vectype));
1006 inside_cost += ncopies
1007 * targetm.vectorize.builtin_vectorization_cost (vec_construct,
1008 vectype, 0);
1010 else
1011 vect_get_load_cost (first_dr, ncopies,
1012 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1013 || group_size > 1 || slp_node),
1014 &inside_cost, &outside_cost);
1016 if (vect_print_dump_info (REPORT_COST))
1017 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
1018 "outside_cost = %d .", inside_cost, outside_cost);
1020 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
1021 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
1022 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
1026 /* Calculate cost of DR's memory access. */
1027 void
1028 vect_get_load_cost (struct data_reference *dr, int ncopies,
1029 bool add_realign_cost, unsigned int *inside_cost,
1030 unsigned int *outside_cost)
1032 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1034 switch (alignment_support_scheme)
1036 case dr_aligned:
1038 *inside_cost += ncopies * vect_get_stmt_cost (vector_load);
1040 if (vect_print_dump_info (REPORT_COST))
1041 fprintf (vect_dump, "vect_model_load_cost: aligned.");
1043 break;
1045 case dr_unaligned_supported:
1047 gimple stmt = DR_STMT (dr);
1048 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1049 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1051 /* Here, we assign an additional cost for the unaligned load. */
1052 *inside_cost += ncopies
1053 * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
1054 vectype, DR_MISALIGNMENT (dr));
1055 if (vect_print_dump_info (REPORT_COST))
1056 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
1057 "hardware.");
1059 break;
1061 case dr_explicit_realign:
1063 *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
1064 + vect_get_stmt_cost (vec_perm));
1066 /* FIXME: If the misalignment remains fixed across the iterations of
1067 the containing loop, the following cost should be added to the
1068 outside costs. */
1069 if (targetm.vectorize.builtin_mask_for_load)
1070 *inside_cost += vect_get_stmt_cost (vector_stmt);
1072 if (vect_print_dump_info (REPORT_COST))
1073 fprintf (vect_dump, "vect_model_load_cost: explicit realign");
1075 break;
1077 case dr_explicit_realign_optimized:
1079 if (vect_print_dump_info (REPORT_COST))
1080 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
1081 "pipelined.");
1083 /* Unaligned software pipeline has a load of an address, an initial
1084 load, and possibly a mask operation to "prime" the loop. However,
1085 if this is an access in a group of loads, which provide grouped
1086 access, then the above cost should only be considered for one
1087 access in the group. Inside the loop, there is a load op
1088 and a realignment op. */
1090 if (add_realign_cost)
1092 *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
1093 if (targetm.vectorize.builtin_mask_for_load)
1094 *outside_cost += vect_get_stmt_cost (vector_stmt);
1097 *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
1098 + vect_get_stmt_cost (vec_perm));
1100 if (vect_print_dump_info (REPORT_COST))
1101 fprintf (vect_dump,
1102 "vect_model_load_cost: explicit realign optimized");
1104 break;
1107 case dr_unaligned_unsupported:
1109 *inside_cost = VECT_MAX_COST;
1111 if (vect_print_dump_info (REPORT_COST))
1112 fprintf (vect_dump, "vect_model_load_cost: unsupported access.");
1114 break;
1117 default:
1118 gcc_unreachable ();
1122 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1123 the loop preheader for the vectorized stmt STMT. */
1125 static void
1126 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1128 if (gsi)
1129 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1130 else
1132 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1133 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1135 if (loop_vinfo)
1137 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1138 basic_block new_bb;
1139 edge pe;
1141 if (nested_in_vect_loop_p (loop, stmt))
1142 loop = loop->inner;
1144 pe = loop_preheader_edge (loop);
1145 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1146 gcc_assert (!new_bb);
1148 else
1150 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1151 basic_block bb;
1152 gimple_stmt_iterator gsi_bb_start;
1154 gcc_assert (bb_vinfo);
1155 bb = BB_VINFO_BB (bb_vinfo);
1156 gsi_bb_start = gsi_after_labels (bb);
1157 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1161 if (vect_print_dump_info (REPORT_DETAILS))
1163 fprintf (vect_dump, "created new init_stmt: ");
1164 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1168 /* Function vect_init_vector.
1170 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1171 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1172 vector type a vector with all elements equal to VAL is created first.
1173 Place the initialization at BSI if it is not NULL. Otherwise, place the
1174 initialization at the loop preheader.
1175 Return the DEF of INIT_STMT.
1176 It will be used in the vectorization of STMT. */
1178 tree
1179 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1181 tree new_var;
1182 gimple init_stmt;
1183 tree vec_oprnd;
1184 tree new_temp;
1186 if (TREE_CODE (type) == VECTOR_TYPE
1187 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1189 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1191 if (CONSTANT_CLASS_P (val))
1192 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1193 else
1195 new_var = create_tmp_reg (TREE_TYPE (type), NULL);
1196 add_referenced_var (new_var);
1197 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1198 new_var, val,
1199 NULL_TREE);
1200 new_temp = make_ssa_name (new_var, init_stmt);
1201 gimple_assign_set_lhs (init_stmt, new_temp);
1202 vect_init_vector_1 (stmt, init_stmt, gsi);
1203 val = new_temp;
1206 val = build_vector_from_val (type, val);
1209 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1210 add_referenced_var (new_var);
1211 init_stmt = gimple_build_assign (new_var, val);
1212 new_temp = make_ssa_name (new_var, init_stmt);
1213 gimple_assign_set_lhs (init_stmt, new_temp);
1214 vect_init_vector_1 (stmt, init_stmt, gsi);
1215 vec_oprnd = gimple_assign_lhs (init_stmt);
1216 return vec_oprnd;
1220 /* Function vect_get_vec_def_for_operand.
1222 OP is an operand in STMT. This function returns a (vector) def that will be
1223 used in the vectorized stmt for STMT.
1225 In the case that OP is an SSA_NAME which is defined in the loop, then
1226 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1228 In case OP is an invariant or constant, a new stmt that creates a vector def
1229 needs to be introduced. */
1231 tree
1232 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1234 tree vec_oprnd;
1235 gimple vec_stmt;
1236 gimple def_stmt;
1237 stmt_vec_info def_stmt_info = NULL;
1238 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1239 unsigned int nunits;
1240 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1241 tree def;
1242 enum vect_def_type dt;
1243 bool is_simple_use;
1244 tree vector_type;
1246 if (vect_print_dump_info (REPORT_DETAILS))
1248 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1249 print_generic_expr (vect_dump, op, TDF_SLIM);
1252 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1253 &def_stmt, &def, &dt);
1254 gcc_assert (is_simple_use);
1255 if (vect_print_dump_info (REPORT_DETAILS))
1257 if (def)
1259 fprintf (vect_dump, "def = ");
1260 print_generic_expr (vect_dump, def, TDF_SLIM);
1262 if (def_stmt)
1264 fprintf (vect_dump, " def_stmt = ");
1265 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1269 switch (dt)
1271 /* Case 1: operand is a constant. */
1272 case vect_constant_def:
1274 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1275 gcc_assert (vector_type);
1276 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1278 if (scalar_def)
1279 *scalar_def = op;
1281 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1282 if (vect_print_dump_info (REPORT_DETAILS))
1283 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1285 return vect_init_vector (stmt, op, vector_type, NULL);
1288 /* Case 2: operand is defined outside the loop - loop invariant. */
1289 case vect_external_def:
1291 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1292 gcc_assert (vector_type);
1294 if (scalar_def)
1295 *scalar_def = def;
1297 /* Create 'vec_inv = {inv,inv,..,inv}' */
1298 if (vect_print_dump_info (REPORT_DETAILS))
1299 fprintf (vect_dump, "Create vector_inv.");
1301 return vect_init_vector (stmt, def, vector_type, NULL);
1304 /* Case 3: operand is defined inside the loop. */
1305 case vect_internal_def:
1307 if (scalar_def)
1308 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1310 /* Get the def from the vectorized stmt. */
1311 def_stmt_info = vinfo_for_stmt (def_stmt);
1313 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1314 /* Get vectorized pattern statement. */
1315 if (!vec_stmt
1316 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1317 && !STMT_VINFO_RELEVANT (def_stmt_info))
1318 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1319 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1320 gcc_assert (vec_stmt);
1321 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1322 vec_oprnd = PHI_RESULT (vec_stmt);
1323 else if (is_gimple_call (vec_stmt))
1324 vec_oprnd = gimple_call_lhs (vec_stmt);
1325 else
1326 vec_oprnd = gimple_assign_lhs (vec_stmt);
1327 return vec_oprnd;
1330 /* Case 4: operand is defined by a loop header phi - reduction */
1331 case vect_reduction_def:
1332 case vect_double_reduction_def:
1333 case vect_nested_cycle:
1335 struct loop *loop;
1337 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1338 loop = (gimple_bb (def_stmt))->loop_father;
1340 /* Get the def before the loop */
1341 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1342 return get_initial_def_for_reduction (stmt, op, scalar_def);
1345 /* Case 5: operand is defined by loop-header phi - induction. */
1346 case vect_induction_def:
1348 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1350 /* Get the def from the vectorized stmt. */
1351 def_stmt_info = vinfo_for_stmt (def_stmt);
1352 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1353 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1354 vec_oprnd = PHI_RESULT (vec_stmt);
1355 else
1356 vec_oprnd = gimple_get_lhs (vec_stmt);
1357 return vec_oprnd;
1360 default:
1361 gcc_unreachable ();
1366 /* Function vect_get_vec_def_for_stmt_copy
1368 Return a vector-def for an operand. This function is used when the
1369 vectorized stmt to be created (by the caller to this function) is a "copy"
1370 created in case the vectorized result cannot fit in one vector, and several
1371 copies of the vector-stmt are required. In this case the vector-def is
1372 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1373 of the stmt that defines VEC_OPRND.
1374 DT is the type of the vector def VEC_OPRND.
1376 Context:
1377 In case the vectorization factor (VF) is bigger than the number
1378 of elements that can fit in a vectype (nunits), we have to generate
1379 more than one vector stmt to vectorize the scalar stmt. This situation
1380 arises when there are multiple data-types operated upon in the loop; the
1381 smallest data-type determines the VF, and as a result, when vectorizing
1382 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1383 vector stmt (each computing a vector of 'nunits' results, and together
1384 computing 'VF' results in each iteration). This function is called when
1385 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1386 which VF=16 and nunits=4, so the number of copies required is 4):
1388 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1390 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1391 VS1.1: vx.1 = memref1 VS1.2
1392 VS1.2: vx.2 = memref2 VS1.3
1393 VS1.3: vx.3 = memref3
1395 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1396 VSnew.1: vz1 = vx.1 + ... VSnew.2
1397 VSnew.2: vz2 = vx.2 + ... VSnew.3
1398 VSnew.3: vz3 = vx.3 + ...
1400 The vectorization of S1 is explained in vectorizable_load.
1401 The vectorization of S2:
1402 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1403 the function 'vect_get_vec_def_for_operand' is called to
1404 get the relevant vector-def for each operand of S2. For operand x it
1405 returns the vector-def 'vx.0'.
1407 To create the remaining copies of the vector-stmt (VSnew.j), this
1408 function is called to get the relevant vector-def for each operand. It is
1409 obtained from the respective VS1.j stmt, which is recorded in the
1410 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1412 For example, to obtain the vector-def 'vx.1' in order to create the
1413 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1414 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1415 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1416 and return its def ('vx.1').
1417 Overall, to create the above sequence this function will be called 3 times:
1418 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1419 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1420 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1422 tree
1423 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1425 gimple vec_stmt_for_operand;
1426 stmt_vec_info def_stmt_info;
1428 /* Do nothing; can reuse same def. */
1429 if (dt == vect_external_def || dt == vect_constant_def )
1430 return vec_oprnd;
1432 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1433 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1434 gcc_assert (def_stmt_info);
1435 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1436 gcc_assert (vec_stmt_for_operand);
1437 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1438 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1439 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1440 else
1441 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1442 return vec_oprnd;
1446 /* Get vectorized definitions for the operands to create a copy of an original
1447 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1449 static void
1450 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1451 VEC(tree,heap) **vec_oprnds0,
1452 VEC(tree,heap) **vec_oprnds1)
1454 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1456 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1457 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1459 if (vec_oprnds1 && *vec_oprnds1)
1461 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1462 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1463 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1468 /* Get vectorized definitions for OP0 and OP1.
1469 REDUC_INDEX is the index of reduction operand in case of reduction,
1470 and -1 otherwise. */
1472 void
1473 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1474 VEC (tree, heap) **vec_oprnds0,
1475 VEC (tree, heap) **vec_oprnds1,
1476 slp_tree slp_node, int reduc_index)
1478 if (slp_node)
1480 int nops = (op1 == NULL_TREE) ? 1 : 2;
1481 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1482 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1484 VEC_quick_push (tree, ops, op0);
1485 if (op1)
1486 VEC_quick_push (tree, ops, op1);
1488 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1490 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1491 if (op1)
1492 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1494 VEC_free (tree, heap, ops);
1495 VEC_free (slp_void_p, heap, vec_defs);
1497 else
1499 tree vec_oprnd;
1501 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1502 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1503 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1505 if (op1)
1507 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1508 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1509 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1515 /* Function vect_finish_stmt_generation.
1517 Insert a new stmt. */
1519 void
1520 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1521 gimple_stmt_iterator *gsi)
1523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1524 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1525 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1527 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1529 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1531 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1532 bb_vinfo));
1534 if (vect_print_dump_info (REPORT_DETAILS))
1536 fprintf (vect_dump, "add new stmt: ");
1537 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
1540 gimple_set_location (vec_stmt, gimple_location (stmt));
1543 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1544 a function declaration if the target has a vectorized version
1545 of the function, or NULL_TREE if the function cannot be vectorized. */
1547 tree
1548 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1550 tree fndecl = gimple_call_fndecl (call);
1552 /* We only handle functions that do not read or clobber memory -- i.e.
1553 const or novops ones. */
1554 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1555 return NULL_TREE;
1557 if (!fndecl
1558 || TREE_CODE (fndecl) != FUNCTION_DECL
1559 || !DECL_BUILT_IN (fndecl))
1560 return NULL_TREE;
1562 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1563 vectype_in);
1566 /* Function vectorizable_call.
1568 Check if STMT performs a function call that can be vectorized.
1569 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1570 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1571 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1573 static bool
1574 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1575 slp_tree slp_node)
1577 tree vec_dest;
1578 tree scalar_dest;
1579 tree op, type;
1580 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1581 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1582 tree vectype_out, vectype_in;
1583 int nunits_in;
1584 int nunits_out;
1585 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1586 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1587 tree fndecl, new_temp, def, rhs_type;
1588 gimple def_stmt;
1589 enum vect_def_type dt[3]
1590 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1591 gimple new_stmt = NULL;
1592 int ncopies, j;
1593 VEC(tree, heap) *vargs = NULL;
1594 enum { NARROW, NONE, WIDEN } modifier;
1595 size_t i, nargs;
1596 tree lhs;
1598 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1599 return false;
1601 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1602 return false;
1604 /* Is STMT a vectorizable call? */
1605 if (!is_gimple_call (stmt))
1606 return false;
1608 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1609 return false;
1611 if (stmt_can_throw_internal (stmt))
1612 return false;
1614 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1616 /* Process function arguments. */
1617 rhs_type = NULL_TREE;
1618 vectype_in = NULL_TREE;
1619 nargs = gimple_call_num_args (stmt);
1621 /* Bail out if the function has more than three arguments, we do not have
1622 interesting builtin functions to vectorize with more than two arguments
1623 except for fma. No arguments is also not good. */
1624 if (nargs == 0 || nargs > 3)
1625 return false;
1627 for (i = 0; i < nargs; i++)
1629 tree opvectype;
1631 op = gimple_call_arg (stmt, i);
1633 /* We can only handle calls with arguments of the same type. */
1634 if (rhs_type
1635 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1637 if (vect_print_dump_info (REPORT_DETAILS))
1638 fprintf (vect_dump, "argument types differ.");
1639 return false;
1641 if (!rhs_type)
1642 rhs_type = TREE_TYPE (op);
1644 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1645 &def_stmt, &def, &dt[i], &opvectype))
1647 if (vect_print_dump_info (REPORT_DETAILS))
1648 fprintf (vect_dump, "use not simple.");
1649 return false;
1652 if (!vectype_in)
1653 vectype_in = opvectype;
1654 else if (opvectype
1655 && opvectype != vectype_in)
1657 if (vect_print_dump_info (REPORT_DETAILS))
1658 fprintf (vect_dump, "argument vector types differ.");
1659 return false;
1662 /* If all arguments are external or constant defs use a vector type with
1663 the same size as the output vector type. */
1664 if (!vectype_in)
1665 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1666 if (vec_stmt)
1667 gcc_assert (vectype_in);
1668 if (!vectype_in)
1670 if (vect_print_dump_info (REPORT_DETAILS))
1672 fprintf (vect_dump, "no vectype for scalar type ");
1673 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
1676 return false;
1679 /* FORNOW */
1680 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1681 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1682 if (nunits_in == nunits_out / 2)
1683 modifier = NARROW;
1684 else if (nunits_out == nunits_in)
1685 modifier = NONE;
1686 else if (nunits_out == nunits_in / 2)
1687 modifier = WIDEN;
1688 else
1689 return false;
1691 /* For now, we only vectorize functions if a target specific builtin
1692 is available. TODO -- in some cases, it might be profitable to
1693 insert the calls for pieces of the vector, in order to be able
1694 to vectorize other operations in the loop. */
1695 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1696 if (fndecl == NULL_TREE)
1698 if (vect_print_dump_info (REPORT_DETAILS))
1699 fprintf (vect_dump, "function is not vectorizable.");
1701 return false;
1704 gcc_assert (!gimple_vuse (stmt));
1706 if (slp_node || PURE_SLP_STMT (stmt_info))
1707 ncopies = 1;
1708 else if (modifier == NARROW)
1709 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1710 else
1711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1713 /* Sanity check: make sure that at least one copy of the vectorized stmt
1714 needs to be generated. */
1715 gcc_assert (ncopies >= 1);
1717 if (!vec_stmt) /* transformation not required. */
1719 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1720 if (vect_print_dump_info (REPORT_DETAILS))
1721 fprintf (vect_dump, "=== vectorizable_call ===");
1722 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
1723 return true;
1726 /** Transform. **/
1728 if (vect_print_dump_info (REPORT_DETAILS))
1729 fprintf (vect_dump, "transform call.");
1731 /* Handle def. */
1732 scalar_dest = gimple_call_lhs (stmt);
1733 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1735 prev_stmt_info = NULL;
1736 switch (modifier)
1738 case NONE:
1739 for (j = 0; j < ncopies; ++j)
1741 /* Build argument list for the vectorized call. */
1742 if (j == 0)
1743 vargs = VEC_alloc (tree, heap, nargs);
1744 else
1745 VEC_truncate (tree, vargs, 0);
1747 if (slp_node)
1749 VEC (slp_void_p, heap) *vec_defs
1750 = VEC_alloc (slp_void_p, heap, nargs);
1751 VEC (tree, heap) *vec_oprnds0;
1753 for (i = 0; i < nargs; i++)
1754 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1755 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1756 vec_oprnds0
1757 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1759 /* Arguments are ready. Create the new vector stmt. */
1760 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1762 size_t k;
1763 for (k = 0; k < nargs; k++)
1765 VEC (tree, heap) *vec_oprndsk
1766 = (VEC (tree, heap) *)
1767 VEC_index (slp_void_p, vec_defs, k);
1768 VEC_replace (tree, vargs, k,
1769 VEC_index (tree, vec_oprndsk, i));
1771 new_stmt = gimple_build_call_vec (fndecl, vargs);
1772 new_temp = make_ssa_name (vec_dest, new_stmt);
1773 gimple_call_set_lhs (new_stmt, new_temp);
1774 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1775 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1776 new_stmt);
1779 for (i = 0; i < nargs; i++)
1781 VEC (tree, heap) *vec_oprndsi
1782 = (VEC (tree, heap) *)
1783 VEC_index (slp_void_p, vec_defs, i);
1784 VEC_free (tree, heap, vec_oprndsi);
1786 VEC_free (slp_void_p, heap, vec_defs);
1787 continue;
1790 for (i = 0; i < nargs; i++)
1792 op = gimple_call_arg (stmt, i);
1793 if (j == 0)
1794 vec_oprnd0
1795 = vect_get_vec_def_for_operand (op, stmt, NULL);
1796 else
1798 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1799 vec_oprnd0
1800 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1803 VEC_quick_push (tree, vargs, vec_oprnd0);
1806 new_stmt = gimple_build_call_vec (fndecl, vargs);
1807 new_temp = make_ssa_name (vec_dest, new_stmt);
1808 gimple_call_set_lhs (new_stmt, new_temp);
1809 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1811 if (j == 0)
1812 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1813 else
1814 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1816 prev_stmt_info = vinfo_for_stmt (new_stmt);
1819 break;
1821 case NARROW:
1822 for (j = 0; j < ncopies; ++j)
1824 /* Build argument list for the vectorized call. */
1825 if (j == 0)
1826 vargs = VEC_alloc (tree, heap, nargs * 2);
1827 else
1828 VEC_truncate (tree, vargs, 0);
1830 if (slp_node)
1832 VEC (slp_void_p, heap) *vec_defs
1833 = VEC_alloc (slp_void_p, heap, nargs);
1834 VEC (tree, heap) *vec_oprnds0;
1836 for (i = 0; i < nargs; i++)
1837 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1838 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1839 vec_oprnds0
1840 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1842 /* Arguments are ready. Create the new vector stmt. */
1843 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1844 i += 2)
1846 size_t k;
1847 VEC_truncate (tree, vargs, 0);
1848 for (k = 0; k < nargs; k++)
1850 VEC (tree, heap) *vec_oprndsk
1851 = (VEC (tree, heap) *)
1852 VEC_index (slp_void_p, vec_defs, k);
1853 VEC_quick_push (tree, vargs,
1854 VEC_index (tree, vec_oprndsk, i));
1855 VEC_quick_push (tree, vargs,
1856 VEC_index (tree, vec_oprndsk, i + 1));
1858 new_stmt = gimple_build_call_vec (fndecl, vargs);
1859 new_temp = make_ssa_name (vec_dest, new_stmt);
1860 gimple_call_set_lhs (new_stmt, new_temp);
1861 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1862 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1863 new_stmt);
1866 for (i = 0; i < nargs; i++)
1868 VEC (tree, heap) *vec_oprndsi
1869 = (VEC (tree, heap) *)
1870 VEC_index (slp_void_p, vec_defs, i);
1871 VEC_free (tree, heap, vec_oprndsi);
1873 VEC_free (slp_void_p, heap, vec_defs);
1874 continue;
1877 for (i = 0; i < nargs; i++)
1879 op = gimple_call_arg (stmt, i);
1880 if (j == 0)
1882 vec_oprnd0
1883 = vect_get_vec_def_for_operand (op, stmt, NULL);
1884 vec_oprnd1
1885 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1887 else
1889 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
1890 vec_oprnd0
1891 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
1892 vec_oprnd1
1893 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1896 VEC_quick_push (tree, vargs, vec_oprnd0);
1897 VEC_quick_push (tree, vargs, vec_oprnd1);
1900 new_stmt = gimple_build_call_vec (fndecl, vargs);
1901 new_temp = make_ssa_name (vec_dest, new_stmt);
1902 gimple_call_set_lhs (new_stmt, new_temp);
1903 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1905 if (j == 0)
1906 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
1907 else
1908 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1910 prev_stmt_info = vinfo_for_stmt (new_stmt);
1913 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
1915 break;
1917 case WIDEN:
1918 /* No current target implements this case. */
1919 return false;
1922 VEC_free (tree, heap, vargs);
1924 /* Update the exception handling table with the vector stmt if necessary. */
1925 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
1926 gimple_purge_dead_eh_edges (gimple_bb (stmt));
1928 /* The call in STMT might prevent it from being removed in dce.
1929 We however cannot remove it here, due to the way the ssa name
1930 it defines is mapped to the new definition. So just replace
1931 rhs of the statement with something harmless. */
1933 if (slp_node)
1934 return true;
1936 type = TREE_TYPE (scalar_dest);
1937 if (is_pattern_stmt_p (stmt_info))
1938 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
1939 else
1940 lhs = gimple_call_lhs (stmt);
1941 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
1942 set_vinfo_for_stmt (new_stmt, stmt_info);
1943 set_vinfo_for_stmt (stmt, NULL);
1944 STMT_VINFO_STMT (stmt_info) = new_stmt;
1945 gsi_replace (gsi, new_stmt, false);
1946 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
1948 return true;
1952 /* Function vect_gen_widened_results_half
1954 Create a vector stmt whose code, type, number of arguments, and result
1955 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
1956 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
1957 In the case that CODE is a CALL_EXPR, this means that a call to DECL
1958 needs to be created (DECL is a function-decl of a target-builtin).
1959 STMT is the original scalar stmt that we are vectorizing. */
1961 static gimple
1962 vect_gen_widened_results_half (enum tree_code code,
1963 tree decl,
1964 tree vec_oprnd0, tree vec_oprnd1, int op_type,
1965 tree vec_dest, gimple_stmt_iterator *gsi,
1966 gimple stmt)
1968 gimple new_stmt;
1969 tree new_temp;
1971 /* Generate half of the widened result: */
1972 if (code == CALL_EXPR)
1974 /* Target specific support */
1975 if (op_type == binary_op)
1976 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
1977 else
1978 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
1979 new_temp = make_ssa_name (vec_dest, new_stmt);
1980 gimple_call_set_lhs (new_stmt, new_temp);
1982 else
1984 /* Generic support */
1985 gcc_assert (op_type == TREE_CODE_LENGTH (code));
1986 if (op_type != binary_op)
1987 vec_oprnd1 = NULL;
1988 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
1989 vec_oprnd1);
1990 new_temp = make_ssa_name (vec_dest, new_stmt);
1991 gimple_assign_set_lhs (new_stmt, new_temp);
1993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1995 return new_stmt;
1999 /* Get vectorized definitions for loop-based vectorization. For the first
2000 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2001 scalar operand), and for the rest we get a copy with
2002 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2003 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2004 The vectors are collected into VEC_OPRNDS. */
2006 static void
2007 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2008 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2010 tree vec_oprnd;
2012 /* Get first vector operand. */
2013 /* All the vector operands except the very first one (that is scalar oprnd)
2014 are stmt copies. */
2015 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2016 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2017 else
2018 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2020 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2022 /* Get second vector operand. */
2023 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2024 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2026 *oprnd = vec_oprnd;
2028 /* For conversion in multiple steps, continue to get operands
2029 recursively. */
2030 if (multi_step_cvt)
2031 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2035 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2036 For multi-step conversions store the resulting vectors and call the function
2037 recursively. */
2039 static void
2040 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2041 int multi_step_cvt, gimple stmt,
2042 VEC (tree, heap) *vec_dsts,
2043 gimple_stmt_iterator *gsi,
2044 slp_tree slp_node, enum tree_code code,
2045 stmt_vec_info *prev_stmt_info)
2047 unsigned int i;
2048 tree vop0, vop1, new_tmp, vec_dest;
2049 gimple new_stmt;
2050 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2052 vec_dest = VEC_pop (tree, vec_dsts);
2054 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2056 /* Create demotion operation. */
2057 vop0 = VEC_index (tree, *vec_oprnds, i);
2058 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2059 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2060 new_tmp = make_ssa_name (vec_dest, new_stmt);
2061 gimple_assign_set_lhs (new_stmt, new_tmp);
2062 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2064 if (multi_step_cvt)
2065 /* Store the resulting vector for next recursive call. */
2066 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2067 else
2069 /* This is the last step of the conversion sequence. Store the
2070 vectors in SLP_NODE or in vector info of the scalar statement
2071 (or in STMT_VINFO_RELATED_STMT chain). */
2072 if (slp_node)
2073 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2074 else
2076 if (!*prev_stmt_info)
2077 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2078 else
2079 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2081 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2086 /* For multi-step demotion operations we first generate demotion operations
2087 from the source type to the intermediate types, and then combine the
2088 results (stored in VEC_OPRNDS) in demotion operation to the destination
2089 type. */
2090 if (multi_step_cvt)
2092 /* At each level of recursion we have half of the operands we had at the
2093 previous level. */
2094 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2095 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2096 stmt, vec_dsts, gsi, slp_node,
2097 VEC_PACK_TRUNC_EXPR,
2098 prev_stmt_info);
2101 VEC_quick_push (tree, vec_dsts, vec_dest);
2105 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2106 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2107 the resulting vectors and call the function recursively. */
2109 static void
2110 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2111 VEC (tree, heap) **vec_oprnds1,
2112 gimple stmt, tree vec_dest,
2113 gimple_stmt_iterator *gsi,
2114 enum tree_code code1,
2115 enum tree_code code2, tree decl1,
2116 tree decl2, int op_type)
2118 int i;
2119 tree vop0, vop1, new_tmp1, new_tmp2;
2120 gimple new_stmt1, new_stmt2;
2121 VEC (tree, heap) *vec_tmp = NULL;
2123 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2124 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2126 if (op_type == binary_op)
2127 vop1 = VEC_index (tree, *vec_oprnds1, i);
2128 else
2129 vop1 = NULL_TREE;
2131 /* Generate the two halves of promotion operation. */
2132 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2133 op_type, vec_dest, gsi, stmt);
2134 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2135 op_type, vec_dest, gsi, stmt);
2136 if (is_gimple_call (new_stmt1))
2138 new_tmp1 = gimple_call_lhs (new_stmt1);
2139 new_tmp2 = gimple_call_lhs (new_stmt2);
2141 else
2143 new_tmp1 = gimple_assign_lhs (new_stmt1);
2144 new_tmp2 = gimple_assign_lhs (new_stmt2);
2147 /* Store the results for the next step. */
2148 VEC_quick_push (tree, vec_tmp, new_tmp1);
2149 VEC_quick_push (tree, vec_tmp, new_tmp2);
2152 VEC_free (tree, heap, *vec_oprnds0);
2153 *vec_oprnds0 = vec_tmp;
2157 /* Check if STMT performs a conversion operation, that can be vectorized.
2158 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2159 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2160 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2162 static bool
2163 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2164 gimple *vec_stmt, slp_tree slp_node)
2166 tree vec_dest;
2167 tree scalar_dest;
2168 tree op0, op1 = NULL_TREE;
2169 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2171 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2172 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2173 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2174 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2175 tree new_temp;
2176 tree def;
2177 gimple def_stmt;
2178 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2179 gimple new_stmt = NULL;
2180 stmt_vec_info prev_stmt_info;
2181 int nunits_in;
2182 int nunits_out;
2183 tree vectype_out, vectype_in;
2184 int ncopies, i, j;
2185 tree lhs_type, rhs_type;
2186 enum { NARROW, NONE, WIDEN } modifier;
2187 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2188 tree vop0;
2189 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2190 int multi_step_cvt = 0;
2191 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2192 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2193 int op_type;
2194 enum machine_mode rhs_mode;
2195 unsigned short fltsz;
2197 /* Is STMT a vectorizable conversion? */
2199 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2200 return false;
2202 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2203 return false;
2205 if (!is_gimple_assign (stmt))
2206 return false;
2208 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2209 return false;
2211 code = gimple_assign_rhs_code (stmt);
2212 if (!CONVERT_EXPR_CODE_P (code)
2213 && code != FIX_TRUNC_EXPR
2214 && code != FLOAT_EXPR
2215 && code != WIDEN_MULT_EXPR
2216 && code != WIDEN_LSHIFT_EXPR)
2217 return false;
2219 op_type = TREE_CODE_LENGTH (code);
2221 /* Check types of lhs and rhs. */
2222 scalar_dest = gimple_assign_lhs (stmt);
2223 lhs_type = TREE_TYPE (scalar_dest);
2224 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2226 op0 = gimple_assign_rhs1 (stmt);
2227 rhs_type = TREE_TYPE (op0);
2229 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2230 && !((INTEGRAL_TYPE_P (lhs_type)
2231 && INTEGRAL_TYPE_P (rhs_type))
2232 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2233 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2234 return false;
2236 if ((INTEGRAL_TYPE_P (lhs_type)
2237 && (TYPE_PRECISION (lhs_type)
2238 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2239 || (INTEGRAL_TYPE_P (rhs_type)
2240 && (TYPE_PRECISION (rhs_type)
2241 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2243 if (vect_print_dump_info (REPORT_DETAILS))
2244 fprintf (vect_dump,
2245 "type conversion to/from bit-precision unsupported.");
2246 return false;
2249 /* Check the operands of the operation. */
2250 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2251 &def_stmt, &def, &dt[0], &vectype_in))
2253 if (vect_print_dump_info (REPORT_DETAILS))
2254 fprintf (vect_dump, "use not simple.");
2255 return false;
2257 if (op_type == binary_op)
2259 bool ok;
2261 op1 = gimple_assign_rhs2 (stmt);
2262 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2263 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2264 OP1. */
2265 if (CONSTANT_CLASS_P (op0))
2266 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2267 &def_stmt, &def, &dt[1], &vectype_in);
2268 else
2269 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2270 &def, &dt[1]);
2272 if (!ok)
2274 if (vect_print_dump_info (REPORT_DETAILS))
2275 fprintf (vect_dump, "use not simple.");
2276 return false;
2280 /* If op0 is an external or constant defs use a vector type of
2281 the same size as the output vector type. */
2282 if (!vectype_in)
2283 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2284 if (vec_stmt)
2285 gcc_assert (vectype_in);
2286 if (!vectype_in)
2288 if (vect_print_dump_info (REPORT_DETAILS))
2290 fprintf (vect_dump, "no vectype for scalar type ");
2291 print_generic_expr (vect_dump, rhs_type, TDF_SLIM);
2294 return false;
2297 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2298 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2299 if (nunits_in < nunits_out)
2300 modifier = NARROW;
2301 else if (nunits_out == nunits_in)
2302 modifier = NONE;
2303 else
2304 modifier = WIDEN;
2306 /* Multiple types in SLP are handled by creating the appropriate number of
2307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2308 case of SLP. */
2309 if (slp_node || PURE_SLP_STMT (stmt_info))
2310 ncopies = 1;
2311 else if (modifier == NARROW)
2312 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2313 else
2314 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2316 /* Sanity check: make sure that at least one copy of the vectorized stmt
2317 needs to be generated. */
2318 gcc_assert (ncopies >= 1);
2320 /* Supportable by target? */
2321 switch (modifier)
2323 case NONE:
2324 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2325 return false;
2326 if (supportable_convert_operation (code, vectype_out, vectype_in,
2327 &decl1, &code1))
2328 break;
2329 /* FALLTHRU */
2330 unsupported:
2331 if (vect_print_dump_info (REPORT_DETAILS))
2332 fprintf (vect_dump, "conversion not supported by target.");
2333 return false;
2335 case WIDEN:
2336 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2337 &decl1, &decl2, &code1, &code2,
2338 &multi_step_cvt, &interm_types))
2340 /* Binary widening operation can only be supported directly by the
2341 architecture. */
2342 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2343 break;
2346 if (code != FLOAT_EXPR
2347 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2348 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2349 goto unsupported;
2351 rhs_mode = TYPE_MODE (rhs_type);
2352 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2353 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2354 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2355 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2357 cvt_type
2358 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2359 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2360 if (cvt_type == NULL_TREE)
2361 goto unsupported;
2363 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2365 if (!supportable_convert_operation (code, vectype_out,
2366 cvt_type, &decl1, &codecvt1))
2367 goto unsupported;
2369 else if (!supportable_widening_operation (code, stmt, vectype_out,
2370 cvt_type, &decl1, &decl2,
2371 &codecvt1, &codecvt2,
2372 &multi_step_cvt,
2373 &interm_types))
2374 continue;
2375 else
2376 gcc_assert (multi_step_cvt == 0);
2378 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2379 vectype_in, NULL, NULL, &code1,
2380 &code2, &multi_step_cvt,
2381 &interm_types))
2382 break;
2385 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2386 goto unsupported;
2388 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2389 codecvt2 = ERROR_MARK;
2390 else
2392 multi_step_cvt++;
2393 VEC_safe_push (tree, heap, interm_types, cvt_type);
2394 cvt_type = NULL_TREE;
2396 break;
2398 case NARROW:
2399 gcc_assert (op_type == unary_op);
2400 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2401 &code1, &multi_step_cvt,
2402 &interm_types))
2403 break;
2405 if (code != FIX_TRUNC_EXPR
2406 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2407 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2408 goto unsupported;
2410 rhs_mode = TYPE_MODE (rhs_type);
2411 cvt_type
2412 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2413 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2414 if (cvt_type == NULL_TREE)
2415 goto unsupported;
2416 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2417 &decl1, &codecvt1))
2418 goto unsupported;
2419 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2420 &code1, &multi_step_cvt,
2421 &interm_types))
2422 break;
2423 goto unsupported;
2425 default:
2426 gcc_unreachable ();
2429 if (!vec_stmt) /* transformation not required. */
2431 if (vect_print_dump_info (REPORT_DETAILS))
2432 fprintf (vect_dump, "=== vectorizable_conversion ===");
2433 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2435 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2436 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2438 else if (modifier == NARROW)
2440 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2441 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2443 else
2445 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2446 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2448 VEC_free (tree, heap, interm_types);
2449 return true;
2452 /** Transform. **/
2453 if (vect_print_dump_info (REPORT_DETAILS))
2454 fprintf (vect_dump, "transform conversion. ncopies = %d.", ncopies);
2456 if (op_type == binary_op)
2458 if (CONSTANT_CLASS_P (op0))
2459 op0 = fold_convert (TREE_TYPE (op1), op0);
2460 else if (CONSTANT_CLASS_P (op1))
2461 op1 = fold_convert (TREE_TYPE (op0), op1);
2464 /* In case of multi-step conversion, we first generate conversion operations
2465 to the intermediate types, and then from that types to the final one.
2466 We create vector destinations for the intermediate type (TYPES) received
2467 from supportable_*_operation, and store them in the correct order
2468 for future use in vect_create_vectorized_*_stmts (). */
2469 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2470 vec_dest = vect_create_destination_var (scalar_dest,
2471 (cvt_type && modifier == WIDEN)
2472 ? cvt_type : vectype_out);
2473 VEC_quick_push (tree, vec_dsts, vec_dest);
2475 if (multi_step_cvt)
2477 for (i = VEC_length (tree, interm_types) - 1;
2478 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2480 vec_dest = vect_create_destination_var (scalar_dest,
2481 intermediate_type);
2482 VEC_quick_push (tree, vec_dsts, vec_dest);
2486 if (cvt_type)
2487 vec_dest = vect_create_destination_var (scalar_dest,
2488 modifier == WIDEN
2489 ? vectype_out : cvt_type);
2491 if (!slp_node)
2493 if (modifier == NONE)
2494 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2495 else if (modifier == WIDEN)
2497 vec_oprnds0 = VEC_alloc (tree, heap,
2498 (multi_step_cvt
2499 ? vect_pow2 (multi_step_cvt) : 1));
2500 if (op_type == binary_op)
2501 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2503 else
2504 vec_oprnds0 = VEC_alloc (tree, heap,
2505 2 * (multi_step_cvt
2506 ? vect_pow2 (multi_step_cvt) : 1));
2508 else if (code == WIDEN_LSHIFT_EXPR)
2509 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2511 last_oprnd = op0;
2512 prev_stmt_info = NULL;
2513 switch (modifier)
2515 case NONE:
2516 for (j = 0; j < ncopies; j++)
2518 if (j == 0)
2519 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2520 -1);
2521 else
2522 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2524 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2526 /* Arguments are ready, create the new vector stmt. */
2527 if (code1 == CALL_EXPR)
2529 new_stmt = gimple_build_call (decl1, 1, vop0);
2530 new_temp = make_ssa_name (vec_dest, new_stmt);
2531 gimple_call_set_lhs (new_stmt, new_temp);
2533 else
2535 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2536 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2537 vop0, NULL);
2538 new_temp = make_ssa_name (vec_dest, new_stmt);
2539 gimple_assign_set_lhs (new_stmt, new_temp);
2542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2543 if (slp_node)
2544 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2545 new_stmt);
2548 if (j == 0)
2549 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2550 else
2551 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2552 prev_stmt_info = vinfo_for_stmt (new_stmt);
2554 break;
2556 case WIDEN:
2557 /* In case the vectorization factor (VF) is bigger than the number
2558 of elements that we can fit in a vectype (nunits), we have to
2559 generate more than one vector stmt - i.e - we need to "unroll"
2560 the vector stmt by a factor VF/nunits. */
2561 for (j = 0; j < ncopies; j++)
2563 /* Handle uses. */
2564 if (j == 0)
2566 if (slp_node)
2568 if (code == WIDEN_LSHIFT_EXPR)
2570 unsigned int k;
2572 vec_oprnd1 = op1;
2573 /* Store vec_oprnd1 for every vector stmt to be created
2574 for SLP_NODE. We check during the analysis that all
2575 the shift arguments are the same. */
2576 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2577 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2579 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2580 slp_node, -1);
2582 else
2583 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2584 &vec_oprnds1, slp_node, -1);
2586 else
2588 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2589 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2590 if (op_type == binary_op)
2592 if (code == WIDEN_LSHIFT_EXPR)
2593 vec_oprnd1 = op1;
2594 else
2595 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2596 NULL);
2597 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2601 else
2603 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2604 VEC_truncate (tree, vec_oprnds0, 0);
2605 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2606 if (op_type == binary_op)
2608 if (code == WIDEN_LSHIFT_EXPR)
2609 vec_oprnd1 = op1;
2610 else
2611 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2612 vec_oprnd1);
2613 VEC_truncate (tree, vec_oprnds1, 0);
2614 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2618 /* Arguments are ready. Create the new vector stmts. */
2619 for (i = multi_step_cvt; i >= 0; i--)
2621 tree this_dest = VEC_index (tree, vec_dsts, i);
2622 enum tree_code c1 = code1, c2 = code2;
2623 if (i == 0 && codecvt2 != ERROR_MARK)
2625 c1 = codecvt1;
2626 c2 = codecvt2;
2628 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2629 &vec_oprnds1,
2630 stmt, this_dest, gsi,
2631 c1, c2, decl1, decl2,
2632 op_type);
2635 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2637 if (cvt_type)
2639 if (codecvt1 == CALL_EXPR)
2641 new_stmt = gimple_build_call (decl1, 1, vop0);
2642 new_temp = make_ssa_name (vec_dest, new_stmt);
2643 gimple_call_set_lhs (new_stmt, new_temp);
2645 else
2647 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2648 new_temp = make_ssa_name (vec_dest, NULL);
2649 new_stmt = gimple_build_assign_with_ops (codecvt1,
2650 new_temp,
2651 vop0, NULL);
2654 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2656 else
2657 new_stmt = SSA_NAME_DEF_STMT (vop0);
2659 if (slp_node)
2660 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2661 new_stmt);
2662 else
2664 if (!prev_stmt_info)
2665 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2666 else
2667 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2668 prev_stmt_info = vinfo_for_stmt (new_stmt);
2673 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2674 break;
2676 case NARROW:
2677 /* In case the vectorization factor (VF) is bigger than the number
2678 of elements that we can fit in a vectype (nunits), we have to
2679 generate more than one vector stmt - i.e - we need to "unroll"
2680 the vector stmt by a factor VF/nunits. */
2681 for (j = 0; j < ncopies; j++)
2683 /* Handle uses. */
2684 if (slp_node)
2685 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2686 slp_node, -1);
2687 else
2689 VEC_truncate (tree, vec_oprnds0, 0);
2690 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2691 vect_pow2 (multi_step_cvt) - 1);
2694 /* Arguments are ready. Create the new vector stmts. */
2695 if (cvt_type)
2696 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2698 if (codecvt1 == CALL_EXPR)
2700 new_stmt = gimple_build_call (decl1, 1, vop0);
2701 new_temp = make_ssa_name (vec_dest, new_stmt);
2702 gimple_call_set_lhs (new_stmt, new_temp);
2704 else
2706 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2707 new_temp = make_ssa_name (vec_dest, NULL);
2708 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2709 vop0, NULL);
2712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2713 VEC_replace (tree, vec_oprnds0, i, new_temp);
2716 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2717 stmt, vec_dsts, gsi,
2718 slp_node, code1,
2719 &prev_stmt_info);
2722 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2723 break;
2726 VEC_free (tree, heap, vec_oprnds0);
2727 VEC_free (tree, heap, vec_oprnds1);
2728 VEC_free (tree, heap, vec_dsts);
2729 VEC_free (tree, heap, interm_types);
2731 return true;
2735 /* Function vectorizable_assignment.
2737 Check if STMT performs an assignment (copy) that can be vectorized.
2738 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2739 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2740 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2742 static bool
2743 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2744 gimple *vec_stmt, slp_tree slp_node)
2746 tree vec_dest;
2747 tree scalar_dest;
2748 tree op;
2749 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2750 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2751 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2752 tree new_temp;
2753 tree def;
2754 gimple def_stmt;
2755 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2756 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2757 int ncopies;
2758 int i, j;
2759 VEC(tree,heap) *vec_oprnds = NULL;
2760 tree vop;
2761 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2762 gimple new_stmt = NULL;
2763 stmt_vec_info prev_stmt_info = NULL;
2764 enum tree_code code;
2765 tree vectype_in;
2767 /* Multiple types in SLP are handled by creating the appropriate number of
2768 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2769 case of SLP. */
2770 if (slp_node || PURE_SLP_STMT (stmt_info))
2771 ncopies = 1;
2772 else
2773 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2775 gcc_assert (ncopies >= 1);
2777 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2778 return false;
2780 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2781 return false;
2783 /* Is vectorizable assignment? */
2784 if (!is_gimple_assign (stmt))
2785 return false;
2787 scalar_dest = gimple_assign_lhs (stmt);
2788 if (TREE_CODE (scalar_dest) != SSA_NAME)
2789 return false;
2791 code = gimple_assign_rhs_code (stmt);
2792 if (gimple_assign_single_p (stmt)
2793 || code == PAREN_EXPR
2794 || CONVERT_EXPR_CODE_P (code))
2795 op = gimple_assign_rhs1 (stmt);
2796 else
2797 return false;
2799 if (code == VIEW_CONVERT_EXPR)
2800 op = TREE_OPERAND (op, 0);
2802 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2803 &def_stmt, &def, &dt[0], &vectype_in))
2805 if (vect_print_dump_info (REPORT_DETAILS))
2806 fprintf (vect_dump, "use not simple.");
2807 return false;
2810 /* We can handle NOP_EXPR conversions that do not change the number
2811 of elements or the vector size. */
2812 if ((CONVERT_EXPR_CODE_P (code)
2813 || code == VIEW_CONVERT_EXPR)
2814 && (!vectype_in
2815 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2816 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2817 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2818 return false;
2820 /* We do not handle bit-precision changes. */
2821 if ((CONVERT_EXPR_CODE_P (code)
2822 || code == VIEW_CONVERT_EXPR)
2823 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2824 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2825 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2826 || ((TYPE_PRECISION (TREE_TYPE (op))
2827 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2828 /* But a conversion that does not change the bit-pattern is ok. */
2829 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2830 > TYPE_PRECISION (TREE_TYPE (op)))
2831 && TYPE_UNSIGNED (TREE_TYPE (op))))
2833 if (vect_print_dump_info (REPORT_DETAILS))
2834 fprintf (vect_dump, "type conversion to/from bit-precision "
2835 "unsupported.");
2836 return false;
2839 if (!vec_stmt) /* transformation not required. */
2841 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2842 if (vect_print_dump_info (REPORT_DETAILS))
2843 fprintf (vect_dump, "=== vectorizable_assignment ===");
2844 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
2845 return true;
2848 /** Transform. **/
2849 if (vect_print_dump_info (REPORT_DETAILS))
2850 fprintf (vect_dump, "transform assignment.");
2852 /* Handle def. */
2853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2855 /* Handle use. */
2856 for (j = 0; j < ncopies; j++)
2858 /* Handle uses. */
2859 if (j == 0)
2860 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2861 else
2862 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2864 /* Arguments are ready. create the new vector stmt. */
2865 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
2867 if (CONVERT_EXPR_CODE_P (code)
2868 || code == VIEW_CONVERT_EXPR)
2869 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2870 new_stmt = gimple_build_assign (vec_dest, vop);
2871 new_temp = make_ssa_name (vec_dest, new_stmt);
2872 gimple_assign_set_lhs (new_stmt, new_temp);
2873 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2874 if (slp_node)
2875 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2878 if (slp_node)
2879 continue;
2881 if (j == 0)
2882 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2883 else
2884 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2886 prev_stmt_info = vinfo_for_stmt (new_stmt);
2889 VEC_free (tree, heap, vec_oprnds);
2890 return true;
2894 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
2895 either as shift by a scalar or by a vector. */
2897 bool
2898 vect_supportable_shift (enum tree_code code, tree scalar_type)
2901 enum machine_mode vec_mode;
2902 optab optab;
2903 int icode;
2904 tree vectype;
2906 vectype = get_vectype_for_scalar_type (scalar_type);
2907 if (!vectype)
2908 return false;
2910 optab = optab_for_tree_code (code, vectype, optab_scalar);
2911 if (!optab
2912 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2914 optab = optab_for_tree_code (code, vectype, optab_vector);
2915 if (!optab
2916 || (optab_handler (optab, TYPE_MODE (vectype))
2917 == CODE_FOR_nothing))
2918 return false;
2921 vec_mode = TYPE_MODE (vectype);
2922 icode = (int) optab_handler (optab, vec_mode);
2923 if (icode == CODE_FOR_nothing)
2924 return false;
2926 return true;
2930 /* Function vectorizable_shift.
2932 Check if STMT performs a shift operation that can be vectorized.
2933 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2934 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2935 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2937 static bool
2938 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
2939 gimple *vec_stmt, slp_tree slp_node)
2941 tree vec_dest;
2942 tree scalar_dest;
2943 tree op0, op1 = NULL;
2944 tree vec_oprnd1 = NULL_TREE;
2945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2946 tree vectype;
2947 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2948 enum tree_code code;
2949 enum machine_mode vec_mode;
2950 tree new_temp;
2951 optab optab;
2952 int icode;
2953 enum machine_mode optab_op2_mode;
2954 tree def;
2955 gimple def_stmt;
2956 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2957 gimple new_stmt = NULL;
2958 stmt_vec_info prev_stmt_info;
2959 int nunits_in;
2960 int nunits_out;
2961 tree vectype_out;
2962 tree op1_vectype;
2963 int ncopies;
2964 int j, i;
2965 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2966 tree vop0, vop1;
2967 unsigned int k;
2968 bool scalar_shift_arg = true;
2969 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2970 int vf;
2972 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2973 return false;
2975 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2976 return false;
2978 /* Is STMT a vectorizable binary/unary operation? */
2979 if (!is_gimple_assign (stmt))
2980 return false;
2982 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2983 return false;
2985 code = gimple_assign_rhs_code (stmt);
2987 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
2988 || code == RROTATE_EXPR))
2989 return false;
2991 scalar_dest = gimple_assign_lhs (stmt);
2992 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2993 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
2994 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2996 if (vect_print_dump_info (REPORT_DETAILS))
2997 fprintf (vect_dump, "bit-precision shifts not supported.");
2998 return false;
3001 op0 = gimple_assign_rhs1 (stmt);
3002 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3003 &def_stmt, &def, &dt[0], &vectype))
3005 if (vect_print_dump_info (REPORT_DETAILS))
3006 fprintf (vect_dump, "use not simple.");
3007 return false;
3009 /* If op0 is an external or constant def use a vector type with
3010 the same size as the output vector type. */
3011 if (!vectype)
3012 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3013 if (vec_stmt)
3014 gcc_assert (vectype);
3015 if (!vectype)
3017 if (vect_print_dump_info (REPORT_DETAILS))
3019 fprintf (vect_dump, "no vectype for scalar type ");
3020 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3023 return false;
3026 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3027 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3028 if (nunits_out != nunits_in)
3029 return false;
3031 op1 = gimple_assign_rhs2 (stmt);
3032 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3033 &def, &dt[1], &op1_vectype))
3035 if (vect_print_dump_info (REPORT_DETAILS))
3036 fprintf (vect_dump, "use not simple.");
3037 return false;
3040 if (loop_vinfo)
3041 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3042 else
3043 vf = 1;
3045 /* Multiple types in SLP are handled by creating the appropriate number of
3046 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3047 case of SLP. */
3048 if (slp_node || PURE_SLP_STMT (stmt_info))
3049 ncopies = 1;
3050 else
3051 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3053 gcc_assert (ncopies >= 1);
3055 /* Determine whether the shift amount is a vector, or scalar. If the
3056 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3058 if (dt[1] == vect_internal_def && !slp_node)
3059 scalar_shift_arg = false;
3060 else if (dt[1] == vect_constant_def
3061 || dt[1] == vect_external_def
3062 || dt[1] == vect_internal_def)
3064 /* In SLP, need to check whether the shift count is the same,
3065 in loops if it is a constant or invariant, it is always
3066 a scalar shift. */
3067 if (slp_node)
3069 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3070 gimple slpstmt;
3072 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3073 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3074 scalar_shift_arg = false;
3077 else
3079 if (vect_print_dump_info (REPORT_DETAILS))
3080 fprintf (vect_dump, "operand mode requires invariant argument.");
3081 return false;
3084 /* Vector shifted by vector. */
3085 if (!scalar_shift_arg)
3087 optab = optab_for_tree_code (code, vectype, optab_vector);
3088 if (vect_print_dump_info (REPORT_DETAILS))
3089 fprintf (vect_dump, "vector/vector shift/rotate found.");
3090 if (!op1_vectype)
3091 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3092 if (op1_vectype == NULL_TREE
3093 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3095 if (vect_print_dump_info (REPORT_DETAILS))
3096 fprintf (vect_dump, "unusable type for last operand in"
3097 " vector/vector shift/rotate.");
3098 return false;
3101 /* See if the machine has a vector shifted by scalar insn and if not
3102 then see if it has a vector shifted by vector insn. */
3103 else
3105 optab = optab_for_tree_code (code, vectype, optab_scalar);
3106 if (optab
3107 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3109 if (vect_print_dump_info (REPORT_DETAILS))
3110 fprintf (vect_dump, "vector/scalar shift/rotate found.");
3112 else
3114 optab = optab_for_tree_code (code, vectype, optab_vector);
3115 if (optab
3116 && (optab_handler (optab, TYPE_MODE (vectype))
3117 != CODE_FOR_nothing))
3119 scalar_shift_arg = false;
3121 if (vect_print_dump_info (REPORT_DETAILS))
3122 fprintf (vect_dump, "vector/vector shift/rotate found.");
3124 /* Unlike the other binary operators, shifts/rotates have
3125 the rhs being int, instead of the same type as the lhs,
3126 so make sure the scalar is the right type if we are
3127 dealing with vectors of long long/long/short/char. */
3128 if (dt[1] == vect_constant_def)
3129 op1 = fold_convert (TREE_TYPE (vectype), op1);
3130 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3131 TREE_TYPE (op1)))
3133 if (slp_node
3134 && TYPE_MODE (TREE_TYPE (vectype))
3135 != TYPE_MODE (TREE_TYPE (op1)))
3137 if (vect_print_dump_info (REPORT_DETAILS))
3138 fprintf (vect_dump, "unusable type for last operand in"
3139 " vector/vector shift/rotate.");
3140 return false;
3142 if (vec_stmt && !slp_node)
3144 op1 = fold_convert (TREE_TYPE (vectype), op1);
3145 op1 = vect_init_vector (stmt, op1,
3146 TREE_TYPE (vectype), NULL);
3153 /* Supportable by target? */
3154 if (!optab)
3156 if (vect_print_dump_info (REPORT_DETAILS))
3157 fprintf (vect_dump, "no optab.");
3158 return false;
3160 vec_mode = TYPE_MODE (vectype);
3161 icode = (int) optab_handler (optab, vec_mode);
3162 if (icode == CODE_FOR_nothing)
3164 if (vect_print_dump_info (REPORT_DETAILS))
3165 fprintf (vect_dump, "op not supported by target.");
3166 /* Check only during analysis. */
3167 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3168 || (vf < vect_min_worthwhile_factor (code)
3169 && !vec_stmt))
3170 return false;
3171 if (vect_print_dump_info (REPORT_DETAILS))
3172 fprintf (vect_dump, "proceeding using word mode.");
3175 /* Worthwhile without SIMD support? Check only during analysis. */
3176 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3177 && vf < vect_min_worthwhile_factor (code)
3178 && !vec_stmt)
3180 if (vect_print_dump_info (REPORT_DETAILS))
3181 fprintf (vect_dump, "not worthwhile without SIMD support.");
3182 return false;
3185 if (!vec_stmt) /* transformation not required. */
3187 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3188 if (vect_print_dump_info (REPORT_DETAILS))
3189 fprintf (vect_dump, "=== vectorizable_shift ===");
3190 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3191 return true;
3194 /** Transform. **/
3196 if (vect_print_dump_info (REPORT_DETAILS))
3197 fprintf (vect_dump, "transform binary/unary operation.");
3199 /* Handle def. */
3200 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3202 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3203 created in the previous stages of the recursion, so no allocation is
3204 needed, except for the case of shift with scalar shift argument. In that
3205 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3206 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3207 In case of loop-based vectorization we allocate VECs of size 1. We
3208 allocate VEC_OPRNDS1 only in case of binary operation. */
3209 if (!slp_node)
3211 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3212 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3214 else if (scalar_shift_arg)
3215 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3217 prev_stmt_info = NULL;
3218 for (j = 0; j < ncopies; j++)
3220 /* Handle uses. */
3221 if (j == 0)
3223 if (scalar_shift_arg)
3225 /* Vector shl and shr insn patterns can be defined with scalar
3226 operand 2 (shift operand). In this case, use constant or loop
3227 invariant op1 directly, without extending it to vector mode
3228 first. */
3229 optab_op2_mode = insn_data[icode].operand[2].mode;
3230 if (!VECTOR_MODE_P (optab_op2_mode))
3232 if (vect_print_dump_info (REPORT_DETAILS))
3233 fprintf (vect_dump, "operand 1 using scalar mode.");
3234 vec_oprnd1 = op1;
3235 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3236 if (slp_node)
3238 /* Store vec_oprnd1 for every vector stmt to be created
3239 for SLP_NODE. We check during the analysis that all
3240 the shift arguments are the same.
3241 TODO: Allow different constants for different vector
3242 stmts generated for an SLP instance. */
3243 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3244 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3249 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3250 (a special case for certain kind of vector shifts); otherwise,
3251 operand 1 should be of a vector type (the usual case). */
3252 if (vec_oprnd1)
3253 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3254 slp_node, -1);
3255 else
3256 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3257 slp_node, -1);
3259 else
3260 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3262 /* Arguments are ready. Create the new vector stmt. */
3263 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3265 vop1 = VEC_index (tree, vec_oprnds1, i);
3266 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3267 new_temp = make_ssa_name (vec_dest, new_stmt);
3268 gimple_assign_set_lhs (new_stmt, new_temp);
3269 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3270 if (slp_node)
3271 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3274 if (slp_node)
3275 continue;
3277 if (j == 0)
3278 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3279 else
3280 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3281 prev_stmt_info = vinfo_for_stmt (new_stmt);
3284 VEC_free (tree, heap, vec_oprnds0);
3285 VEC_free (tree, heap, vec_oprnds1);
3287 return true;
3291 static tree permute_vec_elements (tree, tree, tree, gimple,
3292 gimple_stmt_iterator *);
3295 /* Function vectorizable_operation.
3297 Check if STMT performs a binary, unary or ternary operation that can
3298 be vectorized.
3299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3303 static bool
3304 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3305 gimple *vec_stmt, slp_tree slp_node)
3307 tree vec_dest, vec_dest2 = NULL_TREE;
3308 tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
3309 tree scalar_dest;
3310 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3311 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3312 tree vectype, wide_vectype = NULL_TREE;
3313 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3314 enum tree_code code;
3315 enum machine_mode vec_mode;
3316 tree new_temp;
3317 int op_type;
3318 optab optab, optab2 = NULL;
3319 int icode;
3320 tree def;
3321 gimple def_stmt;
3322 enum vect_def_type dt[3]
3323 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3324 gimple new_stmt = NULL;
3325 stmt_vec_info prev_stmt_info;
3326 int nunits_in;
3327 int nunits_out;
3328 tree vectype_out;
3329 int ncopies;
3330 int j, i;
3331 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3332 tree vop0, vop1, vop2;
3333 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3334 int vf;
3335 unsigned char *sel = NULL;
3336 tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
3338 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3339 return false;
3341 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3342 return false;
3344 /* Is STMT a vectorizable binary/unary operation? */
3345 if (!is_gimple_assign (stmt))
3346 return false;
3348 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3349 return false;
3351 code = gimple_assign_rhs_code (stmt);
3353 /* For pointer addition, we should use the normal plus for
3354 the vector addition. */
3355 if (code == POINTER_PLUS_EXPR)
3356 code = PLUS_EXPR;
3358 /* Support only unary or binary operations. */
3359 op_type = TREE_CODE_LENGTH (code);
3360 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3362 if (vect_print_dump_info (REPORT_DETAILS))
3363 fprintf (vect_dump, "num. args = %d (not unary/binary/ternary op).",
3364 op_type);
3365 return false;
3368 scalar_dest = gimple_assign_lhs (stmt);
3369 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3371 /* Most operations cannot handle bit-precision types without extra
3372 truncations. */
3373 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3374 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3375 /* Exception are bitwise binary operations. */
3376 && code != BIT_IOR_EXPR
3377 && code != BIT_XOR_EXPR
3378 && code != BIT_AND_EXPR)
3380 if (vect_print_dump_info (REPORT_DETAILS))
3381 fprintf (vect_dump, "bit-precision arithmetic not supported.");
3382 return false;
3385 op0 = gimple_assign_rhs1 (stmt);
3386 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3387 &def_stmt, &def, &dt[0], &vectype))
3389 if (vect_print_dump_info (REPORT_DETAILS))
3390 fprintf (vect_dump, "use not simple.");
3391 return false;
3393 /* If op0 is an external or constant def use a vector type with
3394 the same size as the output vector type. */
3395 if (!vectype)
3396 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3397 if (vec_stmt)
3398 gcc_assert (vectype);
3399 if (!vectype)
3401 if (vect_print_dump_info (REPORT_DETAILS))
3403 fprintf (vect_dump, "no vectype for scalar type ");
3404 print_generic_expr (vect_dump, TREE_TYPE (op0), TDF_SLIM);
3407 return false;
3410 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3411 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3412 if (nunits_out != nunits_in)
3413 return false;
3415 if (op_type == binary_op || op_type == ternary_op)
3417 op1 = gimple_assign_rhs2 (stmt);
3418 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3419 &def, &dt[1]))
3421 if (vect_print_dump_info (REPORT_DETAILS))
3422 fprintf (vect_dump, "use not simple.");
3423 return false;
3426 if (op_type == ternary_op)
3428 op2 = gimple_assign_rhs3 (stmt);
3429 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3430 &def, &dt[2]))
3432 if (vect_print_dump_info (REPORT_DETAILS))
3433 fprintf (vect_dump, "use not simple.");
3434 return false;
3438 if (loop_vinfo)
3439 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3440 else
3441 vf = 1;
3443 /* Multiple types in SLP are handled by creating the appropriate number of
3444 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3445 case of SLP. */
3446 if (slp_node || PURE_SLP_STMT (stmt_info))
3447 ncopies = 1;
3448 else
3449 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3451 gcc_assert (ncopies >= 1);
3453 /* Shifts are handled in vectorizable_shift (). */
3454 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3455 || code == RROTATE_EXPR)
3456 return false;
3458 optab = optab_for_tree_code (code, vectype, optab_default);
3460 /* Supportable by target? */
3461 if (!optab && code != MULT_HIGHPART_EXPR)
3463 if (vect_print_dump_info (REPORT_DETAILS))
3464 fprintf (vect_dump, "no optab.");
3465 return false;
3467 vec_mode = TYPE_MODE (vectype);
3468 icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
3470 if (icode == CODE_FOR_nothing
3471 && code == MULT_HIGHPART_EXPR
3472 && VECTOR_MODE_P (vec_mode)
3473 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
3475 /* If MULT_HIGHPART_EXPR isn't supported by the backend, see
3476 if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
3477 or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
3478 unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
3479 unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
3480 tree wide_type
3481 = build_nonstandard_integer_type (prec * 2, unsignedp);
3482 wide_vectype
3483 = get_same_sized_vectype (wide_type, vectype);
3485 sel = XALLOCAVEC (unsigned char, nunits_in);
3486 if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
3487 && GET_MODE_SIZE (TYPE_MODE (wide_vectype))
3488 == GET_MODE_SIZE (vec_mode))
3490 if (targetm.vectorize.builtin_mul_widen_even
3491 && (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
3492 && targetm.vectorize.builtin_mul_widen_odd
3493 && (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
3494 && TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
3495 == TYPE_MODE (wide_vectype))
3497 for (i = 0; i < nunits_in; i++)
3498 sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
3499 + ((i & 1) ? nunits_in : 0);
3500 if (can_vec_perm_p (vec_mode, false, sel))
3501 icode = 0;
3503 if (icode == CODE_FOR_nothing)
3505 decl1 = NULL_TREE;
3506 decl2 = NULL_TREE;
3507 optab = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
3508 vectype, optab_default);
3509 optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
3510 vectype, optab_default);
3511 if (optab != NULL
3512 && optab2 != NULL
3513 && optab_handler (optab, vec_mode) != CODE_FOR_nothing
3514 && optab_handler (optab2, vec_mode) != CODE_FOR_nothing
3515 && insn_data[optab_handler (optab, vec_mode)].operand[0].mode
3516 == TYPE_MODE (wide_vectype)
3517 && insn_data[optab_handler (optab2,
3518 vec_mode)].operand[0].mode
3519 == TYPE_MODE (wide_vectype))
3521 for (i = 0; i < nunits_in; i++)
3522 sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
3523 if (can_vec_perm_p (vec_mode, false, sel))
3524 icode = optab_handler (optab, vec_mode);
3528 if (icode == CODE_FOR_nothing)
3530 if (optab_for_tree_code (code, vectype, optab_default) == NULL)
3532 if (vect_print_dump_info (REPORT_DETAILS))
3533 fprintf (vect_dump, "no optab.");
3534 return false;
3536 wide_vectype = NULL_TREE;
3537 optab2 = NULL;
3541 if (icode == CODE_FOR_nothing)
3543 if (vect_print_dump_info (REPORT_DETAILS))
3544 fprintf (vect_dump, "op not supported by target.");
3545 /* Check only during analysis. */
3546 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3547 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3548 return false;
3549 if (vect_print_dump_info (REPORT_DETAILS))
3550 fprintf (vect_dump, "proceeding using word mode.");
3553 /* Worthwhile without SIMD support? Check only during analysis. */
3554 if (!VECTOR_MODE_P (vec_mode)
3555 && !vec_stmt
3556 && vf < vect_min_worthwhile_factor (code))
3558 if (vect_print_dump_info (REPORT_DETAILS))
3559 fprintf (vect_dump, "not worthwhile without SIMD support.");
3560 return false;
3563 if (!vec_stmt) /* transformation not required. */
3565 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3566 if (vect_print_dump_info (REPORT_DETAILS))
3567 fprintf (vect_dump, "=== vectorizable_operation ===");
3568 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3569 return true;
3572 /** Transform. **/
3574 if (vect_print_dump_info (REPORT_DETAILS))
3575 fprintf (vect_dump, "transform binary/unary operation.");
3577 /* Handle def. */
3578 if (wide_vectype)
3580 vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
3581 vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
3582 vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
3583 vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
3584 perm_mask = vect_gen_perm_mask (vectype, sel);
3586 else
3587 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3589 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3590 created in the previous stages of the recursion, so no allocation is
3591 needed, except for the case of shift with scalar shift argument. In that
3592 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3593 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3594 In case of loop-based vectorization we allocate VECs of size 1. We
3595 allocate VEC_OPRNDS1 only in case of binary operation. */
3596 if (!slp_node)
3598 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3599 if (op_type == binary_op || op_type == ternary_op)
3600 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3601 if (op_type == ternary_op)
3602 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3605 /* In case the vectorization factor (VF) is bigger than the number
3606 of elements that we can fit in a vectype (nunits), we have to generate
3607 more than one vector stmt - i.e - we need to "unroll" the
3608 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3609 from one copy of the vector stmt to the next, in the field
3610 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3611 stages to find the correct vector defs to be used when vectorizing
3612 stmts that use the defs of the current stmt. The example below
3613 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3614 we need to create 4 vectorized stmts):
3616 before vectorization:
3617 RELATED_STMT VEC_STMT
3618 S1: x = memref - -
3619 S2: z = x + 1 - -
3621 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3622 there):
3623 RELATED_STMT VEC_STMT
3624 VS1_0: vx0 = memref0 VS1_1 -
3625 VS1_1: vx1 = memref1 VS1_2 -
3626 VS1_2: vx2 = memref2 VS1_3 -
3627 VS1_3: vx3 = memref3 - -
3628 S1: x = load - VS1_0
3629 S2: z = x + 1 - -
3631 step2: vectorize stmt S2 (done here):
3632 To vectorize stmt S2 we first need to find the relevant vector
3633 def for the first operand 'x'. This is, as usual, obtained from
3634 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3635 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3636 relevant vector def 'vx0'. Having found 'vx0' we can generate
3637 the vector stmt VS2_0, and as usual, record it in the
3638 STMT_VINFO_VEC_STMT of stmt S2.
3639 When creating the second copy (VS2_1), we obtain the relevant vector
3640 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3641 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3642 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3643 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3644 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3645 chain of stmts and pointers:
3646 RELATED_STMT VEC_STMT
3647 VS1_0: vx0 = memref0 VS1_1 -
3648 VS1_1: vx1 = memref1 VS1_2 -
3649 VS1_2: vx2 = memref2 VS1_3 -
3650 VS1_3: vx3 = memref3 - -
3651 S1: x = load - VS1_0
3652 VS2_0: vz0 = vx0 + v1 VS2_1 -
3653 VS2_1: vz1 = vx1 + v1 VS2_2 -
3654 VS2_2: vz2 = vx2 + v1 VS2_3 -
3655 VS2_3: vz3 = vx3 + v1 - -
3656 S2: z = x + 1 - VS2_0 */
3658 prev_stmt_info = NULL;
3659 for (j = 0; j < ncopies; j++)
3661 /* Handle uses. */
3662 if (j == 0)
3664 if (op_type == binary_op || op_type == ternary_op)
3665 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3666 slp_node, -1);
3667 else
3668 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3669 slp_node, -1);
3670 if (op_type == ternary_op)
3672 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3673 VEC_quick_push (tree, vec_oprnds2,
3674 vect_get_vec_def_for_operand (op2, stmt, NULL));
3677 else
3679 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3680 if (op_type == ternary_op)
3682 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3683 VEC_quick_push (tree, vec_oprnds2,
3684 vect_get_vec_def_for_stmt_copy (dt[2],
3685 vec_oprnd));
3689 /* Arguments are ready. Create the new vector stmt. */
3690 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3692 vop1 = ((op_type == binary_op || op_type == ternary_op)
3693 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3694 vop2 = ((op_type == ternary_op)
3695 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3696 if (wide_vectype)
3698 tree new_temp2, vce;
3700 gcc_assert (code == MULT_HIGHPART_EXPR);
3701 if (decl1 != NULL_TREE)
3703 new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
3704 new_temp = make_ssa_name (vec_dest, new_stmt);
3705 gimple_call_set_lhs (new_stmt, new_temp);
3706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3708 new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
3709 new_temp2 = make_ssa_name (vec_dest2, new_stmt);
3710 gimple_call_set_lhs (new_stmt, new_temp2);
3711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3713 else
3715 new_temp = make_ssa_name (vec_dest, NULL);
3716 new_stmt
3717 = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
3718 ? VEC_WIDEN_MULT_HI_EXPR
3719 : VEC_WIDEN_MULT_LO_EXPR,
3720 new_temp, vop0, vop1);
3721 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3723 new_temp2 = make_ssa_name (vec_dest2, NULL);
3724 new_stmt
3725 = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
3726 ? VEC_WIDEN_MULT_LO_EXPR
3727 : VEC_WIDEN_MULT_HI_EXPR,
3728 new_temp2, vop0, vop1);
3729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3732 vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
3733 new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
3734 vec_dest3, vce,
3735 NULL_TREE);
3736 new_temp = make_ssa_name (vec_dest3, new_stmt);
3737 gimple_assign_set_lhs (new_stmt, new_temp);
3738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3740 vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
3741 new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
3742 vec_dest4, vce,
3743 NULL_TREE);
3744 new_temp2 = make_ssa_name (vec_dest4, new_stmt);
3745 gimple_assign_set_lhs (new_stmt, new_temp2);
3746 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3748 new_temp = permute_vec_elements (new_temp, new_temp2,
3749 perm_mask, stmt, gsi);
3750 new_stmt = SSA_NAME_DEF_STMT (new_temp);
3751 if (slp_node)
3752 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
3753 new_stmt);
3754 continue;
3756 new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
3757 vop0, vop1, vop2);
3758 new_temp = make_ssa_name (vec_dest, new_stmt);
3759 gimple_assign_set_lhs (new_stmt, new_temp);
3760 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3761 if (slp_node)
3762 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3765 if (slp_node)
3766 continue;
3768 if (j == 0)
3769 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3770 else
3771 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3772 prev_stmt_info = vinfo_for_stmt (new_stmt);
3775 VEC_free (tree, heap, vec_oprnds0);
3776 if (vec_oprnds1)
3777 VEC_free (tree, heap, vec_oprnds1);
3778 if (vec_oprnds2)
3779 VEC_free (tree, heap, vec_oprnds2);
3781 return true;
3785 /* Function vectorizable_store.
3787 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3788 can be vectorized.
3789 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3790 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3791 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3793 static bool
3794 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3795 slp_tree slp_node)
3797 tree scalar_dest;
3798 tree data_ref;
3799 tree op;
3800 tree vec_oprnd = NULL_TREE;
3801 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3802 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3803 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3804 tree elem_type;
3805 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3806 struct loop *loop = NULL;
3807 enum machine_mode vec_mode;
3808 tree dummy;
3809 enum dr_alignment_support alignment_support_scheme;
3810 tree def;
3811 gimple def_stmt;
3812 enum vect_def_type dt;
3813 stmt_vec_info prev_stmt_info = NULL;
3814 tree dataref_ptr = NULL_TREE;
3815 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3816 int ncopies;
3817 int j;
3818 gimple next_stmt, first_stmt = NULL;
3819 bool grouped_store = false;
3820 bool store_lanes_p = false;
3821 unsigned int group_size, i;
3822 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3823 bool inv_p;
3824 VEC(tree,heap) *vec_oprnds = NULL;
3825 bool slp = (slp_node != NULL);
3826 unsigned int vec_num;
3827 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3828 tree aggr_type;
3830 if (loop_vinfo)
3831 loop = LOOP_VINFO_LOOP (loop_vinfo);
3833 /* Multiple types in SLP are handled by creating the appropriate number of
3834 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3835 case of SLP. */
3836 if (slp || PURE_SLP_STMT (stmt_info))
3837 ncopies = 1;
3838 else
3839 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3841 gcc_assert (ncopies >= 1);
3843 /* FORNOW. This restriction should be relaxed. */
3844 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3846 if (vect_print_dump_info (REPORT_DETAILS))
3847 fprintf (vect_dump, "multiple types in nested loop.");
3848 return false;
3851 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3852 return false;
3854 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3855 return false;
3857 /* Is vectorizable store? */
3859 if (!is_gimple_assign (stmt))
3860 return false;
3862 scalar_dest = gimple_assign_lhs (stmt);
3863 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3864 && is_pattern_stmt_p (stmt_info))
3865 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3866 if (TREE_CODE (scalar_dest) != ARRAY_REF
3867 && TREE_CODE (scalar_dest) != INDIRECT_REF
3868 && TREE_CODE (scalar_dest) != COMPONENT_REF
3869 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3870 && TREE_CODE (scalar_dest) != REALPART_EXPR
3871 && TREE_CODE (scalar_dest) != MEM_REF)
3872 return false;
3874 gcc_assert (gimple_assign_single_p (stmt));
3875 op = gimple_assign_rhs1 (stmt);
3876 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3877 &def, &dt))
3879 if (vect_print_dump_info (REPORT_DETAILS))
3880 fprintf (vect_dump, "use not simple.");
3881 return false;
3884 elem_type = TREE_TYPE (vectype);
3885 vec_mode = TYPE_MODE (vectype);
3887 /* FORNOW. In some cases can vectorize even if data-type not supported
3888 (e.g. - array initialization with 0). */
3889 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3890 return false;
3892 if (!STMT_VINFO_DATA_REF (stmt_info))
3893 return false;
3895 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3896 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3897 size_zero_node) < 0)
3899 if (vect_print_dump_info (REPORT_DETAILS))
3900 fprintf (vect_dump, "negative step for store.");
3901 return false;
3904 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3906 grouped_store = true;
3907 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3908 if (!slp && !PURE_SLP_STMT (stmt_info))
3910 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3911 if (vect_store_lanes_supported (vectype, group_size))
3912 store_lanes_p = true;
3913 else if (!vect_grouped_store_supported (vectype, group_size))
3914 return false;
3917 if (first_stmt == stmt)
3919 /* STMT is the leader of the group. Check the operands of all the
3920 stmts of the group. */
3921 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3922 while (next_stmt)
3924 gcc_assert (gimple_assign_single_p (next_stmt));
3925 op = gimple_assign_rhs1 (next_stmt);
3926 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3927 &def_stmt, &def, &dt))
3929 if (vect_print_dump_info (REPORT_DETAILS))
3930 fprintf (vect_dump, "use not simple.");
3931 return false;
3933 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3938 if (!vec_stmt) /* transformation not required. */
3940 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3941 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt, NULL);
3942 return true;
3945 /** Transform. **/
3947 if (grouped_store)
3949 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3950 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3952 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3954 /* FORNOW */
3955 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3957 /* We vectorize all the stmts of the interleaving group when we
3958 reach the last stmt in the group. */
3959 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3960 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3961 && !slp)
3963 *vec_stmt = NULL;
3964 return true;
3967 if (slp)
3969 grouped_store = false;
3970 /* VEC_NUM is the number of vect stmts to be created for this
3971 group. */
3972 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3973 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3974 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3975 op = gimple_assign_rhs1 (first_stmt);
3977 else
3978 /* VEC_NUM is the number of vect stmts to be created for this
3979 group. */
3980 vec_num = group_size;
3982 else
3984 first_stmt = stmt;
3985 first_dr = dr;
3986 group_size = vec_num = 1;
3989 if (vect_print_dump_info (REPORT_DETAILS))
3990 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3992 dr_chain = VEC_alloc (tree, heap, group_size);
3993 oprnds = VEC_alloc (tree, heap, group_size);
3995 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3996 gcc_assert (alignment_support_scheme);
3997 /* Targets with store-lane instructions must not require explicit
3998 realignment. */
3999 gcc_assert (!store_lanes_p
4000 || alignment_support_scheme == dr_aligned
4001 || alignment_support_scheme == dr_unaligned_supported);
4003 if (store_lanes_p)
4004 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4005 else
4006 aggr_type = vectype;
4008 /* In case the vectorization factor (VF) is bigger than the number
4009 of elements that we can fit in a vectype (nunits), we have to generate
4010 more than one vector stmt - i.e - we need to "unroll" the
4011 vector stmt by a factor VF/nunits. For more details see documentation in
4012 vect_get_vec_def_for_copy_stmt. */
4014 /* In case of interleaving (non-unit grouped access):
4016 S1: &base + 2 = x2
4017 S2: &base = x0
4018 S3: &base + 1 = x1
4019 S4: &base + 3 = x3
4021 We create vectorized stores starting from base address (the access of the
4022 first stmt in the chain (S2 in the above example), when the last store stmt
4023 of the chain (S4) is reached:
4025 VS1: &base = vx2
4026 VS2: &base + vec_size*1 = vx0
4027 VS3: &base + vec_size*2 = vx1
4028 VS4: &base + vec_size*3 = vx3
4030 Then permutation statements are generated:
4032 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4033 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4036 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4037 (the order of the data-refs in the output of vect_permute_store_chain
4038 corresponds to the order of scalar stmts in the interleaving chain - see
4039 the documentation of vect_permute_store_chain()).
4041 In case of both multiple types and interleaving, above vector stores and
4042 permutation stmts are created for every copy. The result vector stmts are
4043 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4044 STMT_VINFO_RELATED_STMT for the next copies.
4047 prev_stmt_info = NULL;
4048 for (j = 0; j < ncopies; j++)
4050 gimple new_stmt;
4051 gimple ptr_incr;
4053 if (j == 0)
4055 if (slp)
4057 /* Get vectorized arguments for SLP_NODE. */
4058 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4059 NULL, slp_node, -1);
4061 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4063 else
4065 /* For interleaved stores we collect vectorized defs for all the
4066 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4067 used as an input to vect_permute_store_chain(), and OPRNDS as
4068 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4070 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4071 OPRNDS are of size 1. */
4072 next_stmt = first_stmt;
4073 for (i = 0; i < group_size; i++)
4075 /* Since gaps are not supported for interleaved stores,
4076 GROUP_SIZE is the exact number of stmts in the chain.
4077 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4078 there is no interleaving, GROUP_SIZE is 1, and only one
4079 iteration of the loop will be executed. */
4080 gcc_assert (next_stmt
4081 && gimple_assign_single_p (next_stmt));
4082 op = gimple_assign_rhs1 (next_stmt);
4084 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4085 NULL);
4086 VEC_quick_push(tree, dr_chain, vec_oprnd);
4087 VEC_quick_push(tree, oprnds, vec_oprnd);
4088 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4092 /* We should have catched mismatched types earlier. */
4093 gcc_assert (useless_type_conversion_p (vectype,
4094 TREE_TYPE (vec_oprnd)));
4095 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4096 NULL_TREE, &dummy, gsi,
4097 &ptr_incr, false, &inv_p);
4098 gcc_assert (bb_vinfo || !inv_p);
4100 else
4102 /* For interleaved stores we created vectorized defs for all the
4103 defs stored in OPRNDS in the previous iteration (previous copy).
4104 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4105 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4106 next copy.
4107 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4108 OPRNDS are of size 1. */
4109 for (i = 0; i < group_size; i++)
4111 op = VEC_index (tree, oprnds, i);
4112 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4113 &def, &dt);
4114 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4115 VEC_replace(tree, dr_chain, i, vec_oprnd);
4116 VEC_replace(tree, oprnds, i, vec_oprnd);
4118 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4119 TYPE_SIZE_UNIT (aggr_type));
4122 if (store_lanes_p)
4124 tree vec_array;
4126 /* Combine all the vectors into an array. */
4127 vec_array = create_vector_array (vectype, vec_num);
4128 for (i = 0; i < vec_num; i++)
4130 vec_oprnd = VEC_index (tree, dr_chain, i);
4131 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4134 /* Emit:
4135 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4136 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4137 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4138 gimple_call_set_lhs (new_stmt, data_ref);
4139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4141 else
4143 new_stmt = NULL;
4144 if (grouped_store)
4146 result_chain = VEC_alloc (tree, heap, group_size);
4147 /* Permute. */
4148 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4149 &result_chain);
4152 next_stmt = first_stmt;
4153 for (i = 0; i < vec_num; i++)
4155 unsigned align, misalign;
4157 if (i > 0)
4158 /* Bump the vector pointer. */
4159 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4160 stmt, NULL_TREE);
4162 if (slp)
4163 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4164 else if (grouped_store)
4165 /* For grouped stores vectorized defs are interleaved in
4166 vect_permute_store_chain(). */
4167 vec_oprnd = VEC_index (tree, result_chain, i);
4169 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4170 build_int_cst (reference_alias_ptr_type
4171 (DR_REF (first_dr)), 0));
4172 align = TYPE_ALIGN_UNIT (vectype);
4173 if (aligned_access_p (first_dr))
4174 misalign = 0;
4175 else if (DR_MISALIGNMENT (first_dr) == -1)
4177 TREE_TYPE (data_ref)
4178 = build_aligned_type (TREE_TYPE (data_ref),
4179 TYPE_ALIGN (elem_type));
4180 align = TYPE_ALIGN_UNIT (elem_type);
4181 misalign = 0;
4183 else
4185 TREE_TYPE (data_ref)
4186 = build_aligned_type (TREE_TYPE (data_ref),
4187 TYPE_ALIGN (elem_type));
4188 misalign = DR_MISALIGNMENT (first_dr);
4190 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4191 misalign);
4193 /* Arguments are ready. Create the new vector stmt. */
4194 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4195 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4197 if (slp)
4198 continue;
4200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4201 if (!next_stmt)
4202 break;
4205 if (!slp)
4207 if (j == 0)
4208 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4209 else
4210 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4211 prev_stmt_info = vinfo_for_stmt (new_stmt);
4215 VEC_free (tree, heap, dr_chain);
4216 VEC_free (tree, heap, oprnds);
4217 if (result_chain)
4218 VEC_free (tree, heap, result_chain);
4219 if (vec_oprnds)
4220 VEC_free (tree, heap, vec_oprnds);
4222 return true;
4225 /* Given a vector type VECTYPE and permutation SEL returns
4226 the VECTOR_CST mask that implements the permutation of the
4227 vector elements. If that is impossible to do, returns NULL. */
4229 tree
4230 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4232 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4233 int i, nunits;
4235 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4237 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4238 return NULL;
4240 mask_elt_type = lang_hooks.types.type_for_mode
4241 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4242 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4244 mask_elts = XALLOCAVEC (tree, nunits);
4245 for (i = nunits - 1; i >= 0; i--)
4246 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4247 mask_vec = build_vector (mask_type, mask_elts);
4249 return mask_vec;
4252 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4253 reversal of the vector elements. If that is impossible to do,
4254 returns NULL. */
4256 static tree
4257 perm_mask_for_reverse (tree vectype)
4259 int i, nunits;
4260 unsigned char *sel;
4262 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4263 sel = XALLOCAVEC (unsigned char, nunits);
4265 for (i = 0; i < nunits; ++i)
4266 sel[i] = nunits - 1 - i;
4268 return vect_gen_perm_mask (vectype, sel);
4271 /* Given a vector variable X and Y, that was generated for the scalar
4272 STMT, generate instructions to permute the vector elements of X and Y
4273 using permutation mask MASK_VEC, insert them at *GSI and return the
4274 permuted vector variable. */
4276 static tree
4277 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4278 gimple_stmt_iterator *gsi)
4280 tree vectype = TREE_TYPE (x);
4281 tree perm_dest, data_ref;
4282 gimple perm_stmt;
4284 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4285 data_ref = make_ssa_name (perm_dest, NULL);
4287 /* Generate the permute statement. */
4288 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref,
4289 x, y, mask_vec);
4290 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4292 return data_ref;
4295 /* vectorizable_load.
4297 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4298 can be vectorized.
4299 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4300 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4301 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4303 static bool
4304 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4305 slp_tree slp_node, slp_instance slp_node_instance)
4307 tree scalar_dest;
4308 tree vec_dest = NULL;
4309 tree data_ref = NULL;
4310 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4311 stmt_vec_info prev_stmt_info;
4312 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4313 struct loop *loop = NULL;
4314 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4315 bool nested_in_vect_loop = false;
4316 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4317 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4318 tree elem_type;
4319 tree new_temp;
4320 enum machine_mode mode;
4321 gimple new_stmt = NULL;
4322 tree dummy;
4323 enum dr_alignment_support alignment_support_scheme;
4324 tree dataref_ptr = NULL_TREE;
4325 gimple ptr_incr;
4326 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4327 int ncopies;
4328 int i, j, group_size;
4329 tree msq = NULL_TREE, lsq;
4330 tree offset = NULL_TREE;
4331 tree realignment_token = NULL_TREE;
4332 gimple phi = NULL;
4333 VEC(tree,heap) *dr_chain = NULL;
4334 bool grouped_load = false;
4335 bool load_lanes_p = false;
4336 gimple first_stmt;
4337 bool inv_p;
4338 bool negative = false;
4339 bool compute_in_loop = false;
4340 struct loop *at_loop;
4341 int vec_num;
4342 bool slp = (slp_node != NULL);
4343 bool slp_perm = false;
4344 enum tree_code code;
4345 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4346 int vf;
4347 tree aggr_type;
4348 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4349 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4350 tree stride_base, stride_step;
4351 int gather_scale = 1;
4352 enum vect_def_type gather_dt = vect_unknown_def_type;
4354 if (loop_vinfo)
4356 loop = LOOP_VINFO_LOOP (loop_vinfo);
4357 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4358 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4360 else
4361 vf = 1;
4363 /* Multiple types in SLP are handled by creating the appropriate number of
4364 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4365 case of SLP. */
4366 if (slp || PURE_SLP_STMT (stmt_info))
4367 ncopies = 1;
4368 else
4369 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4371 gcc_assert (ncopies >= 1);
4373 /* FORNOW. This restriction should be relaxed. */
4374 if (nested_in_vect_loop && ncopies > 1)
4376 if (vect_print_dump_info (REPORT_DETAILS))
4377 fprintf (vect_dump, "multiple types in nested loop.");
4378 return false;
4381 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4382 return false;
4384 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4385 return false;
4387 /* Is vectorizable load? */
4388 if (!is_gimple_assign (stmt))
4389 return false;
4391 scalar_dest = gimple_assign_lhs (stmt);
4392 if (TREE_CODE (scalar_dest) != SSA_NAME)
4393 return false;
4395 code = gimple_assign_rhs_code (stmt);
4396 if (code != ARRAY_REF
4397 && code != INDIRECT_REF
4398 && code != COMPONENT_REF
4399 && code != IMAGPART_EXPR
4400 && code != REALPART_EXPR
4401 && code != MEM_REF
4402 && TREE_CODE_CLASS (code) != tcc_declaration)
4403 return false;
4405 if (!STMT_VINFO_DATA_REF (stmt_info))
4406 return false;
4408 elem_type = TREE_TYPE (vectype);
4409 mode = TYPE_MODE (vectype);
4411 /* FORNOW. In some cases can vectorize even if data-type not supported
4412 (e.g. - data copies). */
4413 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4415 if (vect_print_dump_info (REPORT_DETAILS))
4416 fprintf (vect_dump, "Aligned load, but unsupported type.");
4417 return false;
4420 /* Check if the load is a part of an interleaving chain. */
4421 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4423 grouped_load = true;
4424 /* FORNOW */
4425 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4427 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4428 if (!slp && !PURE_SLP_STMT (stmt_info))
4430 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4431 if (vect_load_lanes_supported (vectype, group_size))
4432 load_lanes_p = true;
4433 else if (!vect_grouped_load_supported (vectype, group_size))
4434 return false;
4439 if (STMT_VINFO_GATHER_P (stmt_info))
4441 gimple def_stmt;
4442 tree def;
4443 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4444 &gather_off, &gather_scale);
4445 gcc_assert (gather_decl);
4446 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4447 &def_stmt, &def, &gather_dt,
4448 &gather_off_vectype))
4450 if (vect_print_dump_info (REPORT_DETAILS))
4451 fprintf (vect_dump, "gather index use not simple.");
4452 return false;
4455 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4457 if (!vect_check_strided_load (stmt, loop_vinfo,
4458 &stride_base, &stride_step))
4459 return false;
4461 else
4463 negative = tree_int_cst_compare (nested_in_vect_loop
4464 ? STMT_VINFO_DR_STEP (stmt_info)
4465 : DR_STEP (dr),
4466 size_zero_node) < 0;
4467 if (negative && ncopies > 1)
4469 if (vect_print_dump_info (REPORT_DETAILS))
4470 fprintf (vect_dump, "multiple types with negative step.");
4471 return false;
4474 if (negative)
4476 gcc_assert (!grouped_load);
4477 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4478 if (alignment_support_scheme != dr_aligned
4479 && alignment_support_scheme != dr_unaligned_supported)
4481 if (vect_print_dump_info (REPORT_DETAILS))
4482 fprintf (vect_dump, "negative step but alignment required.");
4483 return false;
4485 if (!perm_mask_for_reverse (vectype))
4487 if (vect_print_dump_info (REPORT_DETAILS))
4488 fprintf (vect_dump, "negative step and reversing not supported.");
4489 return false;
4494 if (!vec_stmt) /* transformation not required. */
4496 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4497 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL);
4498 return true;
4501 if (vect_print_dump_info (REPORT_DETAILS))
4502 fprintf (vect_dump, "transform load. ncopies = %d", ncopies);
4504 /** Transform. **/
4506 if (STMT_VINFO_GATHER_P (stmt_info))
4508 tree vec_oprnd0 = NULL_TREE, op;
4509 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4510 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4511 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4512 edge pe = loop_preheader_edge (loop);
4513 gimple_seq seq;
4514 basic_block new_bb;
4515 enum { NARROW, NONE, WIDEN } modifier;
4516 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4518 if (nunits == gather_off_nunits)
4519 modifier = NONE;
4520 else if (nunits == gather_off_nunits / 2)
4522 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4523 modifier = WIDEN;
4525 for (i = 0; i < gather_off_nunits; ++i)
4526 sel[i] = i | nunits;
4528 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4529 gcc_assert (perm_mask != NULL_TREE);
4531 else if (nunits == gather_off_nunits * 2)
4533 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4534 modifier = NARROW;
4536 for (i = 0; i < nunits; ++i)
4537 sel[i] = i < gather_off_nunits
4538 ? i : i + nunits - gather_off_nunits;
4540 perm_mask = vect_gen_perm_mask (vectype, sel);
4541 gcc_assert (perm_mask != NULL_TREE);
4542 ncopies *= 2;
4544 else
4545 gcc_unreachable ();
4547 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4548 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4549 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4550 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4551 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4552 scaletype = TREE_VALUE (arglist);
4553 gcc_checking_assert (types_compatible_p (srctype, rettype)
4554 && types_compatible_p (srctype, masktype));
4556 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4558 ptr = fold_convert (ptrtype, gather_base);
4559 if (!is_gimple_min_invariant (ptr))
4561 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4562 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4563 gcc_assert (!new_bb);
4566 /* Currently we support only unconditional gather loads,
4567 so mask should be all ones. */
4568 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4569 mask = build_int_cst (TREE_TYPE (masktype), -1);
4570 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4572 REAL_VALUE_TYPE r;
4573 long tmp[6];
4574 for (j = 0; j < 6; ++j)
4575 tmp[j] = -1;
4576 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4577 mask = build_real (TREE_TYPE (masktype), r);
4579 else
4580 gcc_unreachable ();
4581 mask = build_vector_from_val (masktype, mask);
4582 mask = vect_init_vector (stmt, mask, masktype, NULL);
4584 scale = build_int_cst (scaletype, gather_scale);
4586 prev_stmt_info = NULL;
4587 for (j = 0; j < ncopies; ++j)
4589 if (modifier == WIDEN && (j & 1))
4590 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4591 perm_mask, stmt, gsi);
4592 else if (j == 0)
4593 op = vec_oprnd0
4594 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4595 else
4596 op = vec_oprnd0
4597 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4599 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4601 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4602 == TYPE_VECTOR_SUBPARTS (idxtype));
4603 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4604 add_referenced_var (var);
4605 var = make_ssa_name (var, NULL);
4606 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4607 new_stmt
4608 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4609 op, NULL_TREE);
4610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4611 op = var;
4614 new_stmt
4615 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4617 if (!useless_type_conversion_p (vectype, rettype))
4619 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4620 == TYPE_VECTOR_SUBPARTS (rettype));
4621 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4622 add_referenced_var (var);
4623 op = make_ssa_name (var, new_stmt);
4624 gimple_call_set_lhs (new_stmt, op);
4625 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4626 var = make_ssa_name (vec_dest, NULL);
4627 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4628 new_stmt
4629 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4630 NULL_TREE);
4632 else
4634 var = make_ssa_name (vec_dest, new_stmt);
4635 gimple_call_set_lhs (new_stmt, var);
4638 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4640 if (modifier == NARROW)
4642 if ((j & 1) == 0)
4644 prev_res = var;
4645 continue;
4647 var = permute_vec_elements (prev_res, var,
4648 perm_mask, stmt, gsi);
4649 new_stmt = SSA_NAME_DEF_STMT (var);
4652 if (prev_stmt_info == NULL)
4653 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4654 else
4655 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4656 prev_stmt_info = vinfo_for_stmt (new_stmt);
4658 return true;
4660 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4662 gimple_stmt_iterator incr_gsi;
4663 bool insert_after;
4664 gimple incr;
4665 tree offvar;
4666 tree ref = DR_REF (dr);
4667 tree ivstep;
4668 tree running_off;
4669 VEC(constructor_elt, gc) *v = NULL;
4670 gimple_seq stmts = NULL;
4672 gcc_assert (stride_base && stride_step);
4674 /* For a load with loop-invariant (but other than power-of-2)
4675 stride (i.e. not a grouped access) like so:
4677 for (i = 0; i < n; i += stride)
4678 ... = array[i];
4680 we generate a new induction variable and new accesses to
4681 form a new vector (or vectors, depending on ncopies):
4683 for (j = 0; ; j += VF*stride)
4684 tmp1 = array[j];
4685 tmp2 = array[j + stride];
4687 vectemp = {tmp1, tmp2, ...}
4690 ivstep = stride_step;
4691 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4692 build_int_cst (TREE_TYPE (ivstep), vf));
4694 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4696 create_iv (stride_base, ivstep, NULL,
4697 loop, &incr_gsi, insert_after,
4698 &offvar, NULL);
4699 incr = gsi_stmt (incr_gsi);
4700 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4702 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4703 if (stmts)
4704 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4706 prev_stmt_info = NULL;
4707 running_off = offvar;
4708 for (j = 0; j < ncopies; j++)
4710 tree vec_inv;
4712 v = VEC_alloc (constructor_elt, gc, nunits);
4713 for (i = 0; i < nunits; i++)
4715 tree newref, newoff;
4716 gimple incr;
4717 if (TREE_CODE (ref) == ARRAY_REF)
4718 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4719 unshare_expr (TREE_OPERAND (ref, 0)),
4720 running_off,
4721 NULL_TREE, NULL_TREE);
4722 else
4723 newref = build2 (MEM_REF, TREE_TYPE (ref),
4724 running_off,
4725 TREE_OPERAND (ref, 1));
4727 newref = force_gimple_operand_gsi (gsi, newref, true,
4728 NULL_TREE, true,
4729 GSI_SAME_STMT);
4730 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4731 newoff = SSA_NAME_VAR (running_off);
4732 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4733 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4734 running_off, stride_step);
4735 else
4736 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4737 running_off, stride_step);
4738 newoff = make_ssa_name (newoff, incr);
4739 gimple_assign_set_lhs (incr, newoff);
4740 vect_finish_stmt_generation (stmt, incr, gsi);
4742 running_off = newoff;
4745 vec_inv = build_constructor (vectype, v);
4746 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4747 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4749 if (j == 0)
4750 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4751 else
4752 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4753 prev_stmt_info = vinfo_for_stmt (new_stmt);
4755 return true;
4758 if (grouped_load)
4760 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4761 if (slp
4762 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4763 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4764 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4766 /* Check if the chain of loads is already vectorized. */
4767 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4769 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4770 return true;
4772 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4773 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4775 /* VEC_NUM is the number of vect stmts to be created for this group. */
4776 if (slp)
4778 grouped_load = false;
4779 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4780 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4781 slp_perm = true;
4783 else
4784 vec_num = group_size;
4786 else
4788 first_stmt = stmt;
4789 first_dr = dr;
4790 group_size = vec_num = 1;
4793 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4794 gcc_assert (alignment_support_scheme);
4795 /* Targets with load-lane instructions must not require explicit
4796 realignment. */
4797 gcc_assert (!load_lanes_p
4798 || alignment_support_scheme == dr_aligned
4799 || alignment_support_scheme == dr_unaligned_supported);
4801 /* In case the vectorization factor (VF) is bigger than the number
4802 of elements that we can fit in a vectype (nunits), we have to generate
4803 more than one vector stmt - i.e - we need to "unroll" the
4804 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4805 from one copy of the vector stmt to the next, in the field
4806 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4807 stages to find the correct vector defs to be used when vectorizing
4808 stmts that use the defs of the current stmt. The example below
4809 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4810 need to create 4 vectorized stmts):
4812 before vectorization:
4813 RELATED_STMT VEC_STMT
4814 S1: x = memref - -
4815 S2: z = x + 1 - -
4817 step 1: vectorize stmt S1:
4818 We first create the vector stmt VS1_0, and, as usual, record a
4819 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4820 Next, we create the vector stmt VS1_1, and record a pointer to
4821 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4822 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4823 stmts and pointers:
4824 RELATED_STMT VEC_STMT
4825 VS1_0: vx0 = memref0 VS1_1 -
4826 VS1_1: vx1 = memref1 VS1_2 -
4827 VS1_2: vx2 = memref2 VS1_3 -
4828 VS1_3: vx3 = memref3 - -
4829 S1: x = load - VS1_0
4830 S2: z = x + 1 - -
4832 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4833 information we recorded in RELATED_STMT field is used to vectorize
4834 stmt S2. */
4836 /* In case of interleaving (non-unit grouped access):
4838 S1: x2 = &base + 2
4839 S2: x0 = &base
4840 S3: x1 = &base + 1
4841 S4: x3 = &base + 3
4843 Vectorized loads are created in the order of memory accesses
4844 starting from the access of the first stmt of the chain:
4846 VS1: vx0 = &base
4847 VS2: vx1 = &base + vec_size*1
4848 VS3: vx3 = &base + vec_size*2
4849 VS4: vx4 = &base + vec_size*3
4851 Then permutation statements are generated:
4853 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4854 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4857 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4858 (the order of the data-refs in the output of vect_permute_load_chain
4859 corresponds to the order of scalar stmts in the interleaving chain - see
4860 the documentation of vect_permute_load_chain()).
4861 The generation of permutation stmts and recording them in
4862 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4864 In case of both multiple types and interleaving, the vector loads and
4865 permutation stmts above are created for every copy. The result vector
4866 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4867 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4869 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4870 on a target that supports unaligned accesses (dr_unaligned_supported)
4871 we generate the following code:
4872 p = initial_addr;
4873 indx = 0;
4874 loop {
4875 p = p + indx * vectype_size;
4876 vec_dest = *(p);
4877 indx = indx + 1;
4880 Otherwise, the data reference is potentially unaligned on a target that
4881 does not support unaligned accesses (dr_explicit_realign_optimized) -
4882 then generate the following code, in which the data in each iteration is
4883 obtained by two vector loads, one from the previous iteration, and one
4884 from the current iteration:
4885 p1 = initial_addr;
4886 msq_init = *(floor(p1))
4887 p2 = initial_addr + VS - 1;
4888 realignment_token = call target_builtin;
4889 indx = 0;
4890 loop {
4891 p2 = p2 + indx * vectype_size
4892 lsq = *(floor(p2))
4893 vec_dest = realign_load (msq, lsq, realignment_token)
4894 indx = indx + 1;
4895 msq = lsq;
4896 } */
4898 /* If the misalignment remains the same throughout the execution of the
4899 loop, we can create the init_addr and permutation mask at the loop
4900 preheader. Otherwise, it needs to be created inside the loop.
4901 This can only occur when vectorizing memory accesses in the inner-loop
4902 nested within an outer-loop that is being vectorized. */
4904 if (nested_in_vect_loop
4905 && (TREE_INT_CST_LOW (DR_STEP (dr))
4906 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4908 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4909 compute_in_loop = true;
4912 if ((alignment_support_scheme == dr_explicit_realign_optimized
4913 || alignment_support_scheme == dr_explicit_realign)
4914 && !compute_in_loop)
4916 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4917 alignment_support_scheme, NULL_TREE,
4918 &at_loop);
4919 if (alignment_support_scheme == dr_explicit_realign_optimized)
4921 phi = SSA_NAME_DEF_STMT (msq);
4922 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4925 else
4926 at_loop = loop;
4928 if (negative)
4929 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4931 if (load_lanes_p)
4932 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4933 else
4934 aggr_type = vectype;
4936 prev_stmt_info = NULL;
4937 for (j = 0; j < ncopies; j++)
4939 /* 1. Create the vector or array pointer update chain. */
4940 if (j == 0)
4941 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4942 offset, &dummy, gsi,
4943 &ptr_incr, false, &inv_p);
4944 else
4945 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4946 TYPE_SIZE_UNIT (aggr_type));
4948 if (grouped_load || slp_perm)
4949 dr_chain = VEC_alloc (tree, heap, vec_num);
4951 if (load_lanes_p)
4953 tree vec_array;
4955 vec_array = create_vector_array (vectype, vec_num);
4957 /* Emit:
4958 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4959 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4960 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4961 gimple_call_set_lhs (new_stmt, vec_array);
4962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4964 /* Extract each vector into an SSA_NAME. */
4965 for (i = 0; i < vec_num; i++)
4967 new_temp = read_vector_array (stmt, gsi, scalar_dest,
4968 vec_array, i);
4969 VEC_quick_push (tree, dr_chain, new_temp);
4972 /* Record the mapping between SSA_NAMEs and statements. */
4973 vect_record_grouped_load_vectors (stmt, dr_chain);
4975 else
4977 for (i = 0; i < vec_num; i++)
4979 if (i > 0)
4980 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4981 stmt, NULL_TREE);
4983 /* 2. Create the vector-load in the loop. */
4984 switch (alignment_support_scheme)
4986 case dr_aligned:
4987 case dr_unaligned_supported:
4989 unsigned int align, misalign;
4991 data_ref
4992 = build2 (MEM_REF, vectype, dataref_ptr,
4993 build_int_cst (reference_alias_ptr_type
4994 (DR_REF (first_dr)), 0));
4995 align = TYPE_ALIGN_UNIT (vectype);
4996 if (alignment_support_scheme == dr_aligned)
4998 gcc_assert (aligned_access_p (first_dr));
4999 misalign = 0;
5001 else if (DR_MISALIGNMENT (first_dr) == -1)
5003 TREE_TYPE (data_ref)
5004 = build_aligned_type (TREE_TYPE (data_ref),
5005 TYPE_ALIGN (elem_type));
5006 align = TYPE_ALIGN_UNIT (elem_type);
5007 misalign = 0;
5009 else
5011 TREE_TYPE (data_ref)
5012 = build_aligned_type (TREE_TYPE (data_ref),
5013 TYPE_ALIGN (elem_type));
5014 misalign = DR_MISALIGNMENT (first_dr);
5016 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5017 align, misalign);
5018 break;
5020 case dr_explicit_realign:
5022 tree ptr, bump;
5023 tree vs_minus_1;
5025 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5027 if (compute_in_loop)
5028 msq = vect_setup_realignment (first_stmt, gsi,
5029 &realignment_token,
5030 dr_explicit_realign,
5031 dataref_ptr, NULL);
5033 new_stmt = gimple_build_assign_with_ops
5034 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
5035 build_int_cst
5036 (TREE_TYPE (dataref_ptr),
5037 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5038 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
5039 gimple_assign_set_lhs (new_stmt, ptr);
5040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5041 data_ref
5042 = build2 (MEM_REF, vectype, ptr,
5043 build_int_cst (reference_alias_ptr_type
5044 (DR_REF (first_dr)), 0));
5045 vec_dest = vect_create_destination_var (scalar_dest,
5046 vectype);
5047 new_stmt = gimple_build_assign (vec_dest, data_ref);
5048 new_temp = make_ssa_name (vec_dest, new_stmt);
5049 gimple_assign_set_lhs (new_stmt, new_temp);
5050 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5051 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5052 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5053 msq = new_temp;
5055 bump = size_binop (MULT_EXPR, vs_minus_1,
5056 TYPE_SIZE_UNIT (elem_type));
5057 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5058 new_stmt = gimple_build_assign_with_ops
5059 (BIT_AND_EXPR, NULL_TREE, ptr,
5060 build_int_cst
5061 (TREE_TYPE (ptr),
5062 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5063 ptr = make_ssa_name (SSA_NAME_VAR (dataref_ptr), new_stmt);
5064 gimple_assign_set_lhs (new_stmt, ptr);
5065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5066 data_ref
5067 = build2 (MEM_REF, vectype, ptr,
5068 build_int_cst (reference_alias_ptr_type
5069 (DR_REF (first_dr)), 0));
5070 break;
5072 case dr_explicit_realign_optimized:
5073 new_stmt = gimple_build_assign_with_ops
5074 (BIT_AND_EXPR, NULL_TREE, dataref_ptr,
5075 build_int_cst
5076 (TREE_TYPE (dataref_ptr),
5077 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5078 new_temp = make_ssa_name (SSA_NAME_VAR (dataref_ptr),
5079 new_stmt);
5080 gimple_assign_set_lhs (new_stmt, new_temp);
5081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5082 data_ref
5083 = build2 (MEM_REF, vectype, new_temp,
5084 build_int_cst (reference_alias_ptr_type
5085 (DR_REF (first_dr)), 0));
5086 break;
5087 default:
5088 gcc_unreachable ();
5090 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5091 new_stmt = gimple_build_assign (vec_dest, data_ref);
5092 new_temp = make_ssa_name (vec_dest, new_stmt);
5093 gimple_assign_set_lhs (new_stmt, new_temp);
5094 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5096 /* 3. Handle explicit realignment if necessary/supported.
5097 Create in loop:
5098 vec_dest = realign_load (msq, lsq, realignment_token) */
5099 if (alignment_support_scheme == dr_explicit_realign_optimized
5100 || alignment_support_scheme == dr_explicit_realign)
5102 lsq = gimple_assign_lhs (new_stmt);
5103 if (!realignment_token)
5104 realignment_token = dataref_ptr;
5105 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5106 new_stmt
5107 = gimple_build_assign_with_ops3 (REALIGN_LOAD_EXPR,
5108 vec_dest, msq, lsq,
5109 realignment_token);
5110 new_temp = make_ssa_name (vec_dest, new_stmt);
5111 gimple_assign_set_lhs (new_stmt, new_temp);
5112 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5114 if (alignment_support_scheme == dr_explicit_realign_optimized)
5116 gcc_assert (phi);
5117 if (i == vec_num - 1 && j == ncopies - 1)
5118 add_phi_arg (phi, lsq,
5119 loop_latch_edge (containing_loop),
5120 UNKNOWN_LOCATION);
5121 msq = lsq;
5125 /* 4. Handle invariant-load. */
5126 if (inv_p && !bb_vinfo)
5128 gimple_stmt_iterator gsi2 = *gsi;
5129 gcc_assert (!grouped_load);
5130 gsi_next (&gsi2);
5131 new_temp = vect_init_vector (stmt, scalar_dest,
5132 vectype, &gsi2);
5133 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5136 if (negative)
5138 tree perm_mask = perm_mask_for_reverse (vectype);
5139 new_temp = permute_vec_elements (new_temp, new_temp,
5140 perm_mask, stmt, gsi);
5141 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5144 /* Collect vector loads and later create their permutation in
5145 vect_transform_grouped_load (). */
5146 if (grouped_load || slp_perm)
5147 VEC_quick_push (tree, dr_chain, new_temp);
5149 /* Store vector loads in the corresponding SLP_NODE. */
5150 if (slp && !slp_perm)
5151 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5152 new_stmt);
5156 if (slp && !slp_perm)
5157 continue;
5159 if (slp_perm)
5161 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5162 slp_node_instance, false))
5164 VEC_free (tree, heap, dr_chain);
5165 return false;
5168 else
5170 if (grouped_load)
5172 if (!load_lanes_p)
5173 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5174 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5176 else
5178 if (j == 0)
5179 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5180 else
5181 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5182 prev_stmt_info = vinfo_for_stmt (new_stmt);
5185 if (dr_chain)
5186 VEC_free (tree, heap, dr_chain);
5189 return true;
5192 /* Function vect_is_simple_cond.
5194 Input:
5195 LOOP - the loop that is being vectorized.
5196 COND - Condition that is checked for simple use.
5198 Output:
5199 *COMP_VECTYPE - the vector type for the comparison.
5201 Returns whether a COND can be vectorized. Checks whether
5202 condition operands are supportable using vec_is_simple_use. */
5204 static bool
5205 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5206 bb_vec_info bb_vinfo, tree *comp_vectype)
5208 tree lhs, rhs;
5209 tree def;
5210 enum vect_def_type dt;
5211 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5213 if (!COMPARISON_CLASS_P (cond))
5214 return false;
5216 lhs = TREE_OPERAND (cond, 0);
5217 rhs = TREE_OPERAND (cond, 1);
5219 if (TREE_CODE (lhs) == SSA_NAME)
5221 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5222 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5223 &lhs_def_stmt, &def, &dt, &vectype1))
5224 return false;
5226 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5227 && TREE_CODE (lhs) != FIXED_CST)
5228 return false;
5230 if (TREE_CODE (rhs) == SSA_NAME)
5232 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5233 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5234 &rhs_def_stmt, &def, &dt, &vectype2))
5235 return false;
5237 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5238 && TREE_CODE (rhs) != FIXED_CST)
5239 return false;
5241 *comp_vectype = vectype1 ? vectype1 : vectype2;
5242 return true;
5245 /* vectorizable_condition.
5247 Check if STMT is conditional modify expression that can be vectorized.
5248 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5249 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5250 at GSI.
5252 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5253 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5254 else caluse if it is 2).
5256 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5258 bool
5259 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5260 gimple *vec_stmt, tree reduc_def, int reduc_index,
5261 slp_tree slp_node)
5263 tree scalar_dest = NULL_TREE;
5264 tree vec_dest = NULL_TREE;
5265 tree cond_expr, then_clause, else_clause;
5266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5268 tree comp_vectype = NULL_TREE;
5269 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5270 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5271 tree vec_compare, vec_cond_expr;
5272 tree new_temp;
5273 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5274 tree def;
5275 enum vect_def_type dt, dts[4];
5276 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5277 int ncopies;
5278 enum tree_code code;
5279 stmt_vec_info prev_stmt_info = NULL;
5280 int i, j;
5281 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5282 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5283 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5285 if (slp_node || PURE_SLP_STMT (stmt_info))
5286 ncopies = 1;
5287 else
5288 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5290 gcc_assert (ncopies >= 1);
5291 if (reduc_index && ncopies > 1)
5292 return false; /* FORNOW */
5294 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5295 return false;
5297 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5298 return false;
5300 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5301 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5302 && reduc_def))
5303 return false;
5305 /* FORNOW: not yet supported. */
5306 if (STMT_VINFO_LIVE_P (stmt_info))
5308 if (vect_print_dump_info (REPORT_DETAILS))
5309 fprintf (vect_dump, "value used after loop.");
5310 return false;
5313 /* Is vectorizable conditional operation? */
5314 if (!is_gimple_assign (stmt))
5315 return false;
5317 code = gimple_assign_rhs_code (stmt);
5319 if (code != COND_EXPR)
5320 return false;
5322 cond_expr = gimple_assign_rhs1 (stmt);
5323 then_clause = gimple_assign_rhs2 (stmt);
5324 else_clause = gimple_assign_rhs3 (stmt);
5326 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5327 &comp_vectype)
5328 || !comp_vectype)
5329 return false;
5331 if (TREE_CODE (then_clause) == SSA_NAME)
5333 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5334 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5335 &then_def_stmt, &def, &dt))
5336 return false;
5338 else if (TREE_CODE (then_clause) != INTEGER_CST
5339 && TREE_CODE (then_clause) != REAL_CST
5340 && TREE_CODE (then_clause) != FIXED_CST)
5341 return false;
5343 if (TREE_CODE (else_clause) == SSA_NAME)
5345 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5346 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5347 &else_def_stmt, &def, &dt))
5348 return false;
5350 else if (TREE_CODE (else_clause) != INTEGER_CST
5351 && TREE_CODE (else_clause) != REAL_CST
5352 && TREE_CODE (else_clause) != FIXED_CST)
5353 return false;
5355 if (!vec_stmt)
5357 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5358 return expand_vec_cond_expr_p (vectype, comp_vectype);
5361 /* Transform. */
5363 if (!slp_node)
5365 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5366 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5367 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5368 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5371 /* Handle def. */
5372 scalar_dest = gimple_assign_lhs (stmt);
5373 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5375 /* Handle cond expr. */
5376 for (j = 0; j < ncopies; j++)
5378 gimple new_stmt = NULL;
5379 if (j == 0)
5381 if (slp_node)
5383 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5384 VEC (slp_void_p, heap) *vec_defs;
5386 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5387 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5388 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5389 VEC_safe_push (tree, heap, ops, then_clause);
5390 VEC_safe_push (tree, heap, ops, else_clause);
5391 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5392 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5393 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5394 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5395 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5397 VEC_free (tree, heap, ops);
5398 VEC_free (slp_void_p, heap, vec_defs);
5400 else
5402 gimple gtemp;
5403 vec_cond_lhs =
5404 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5405 stmt, NULL);
5406 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5407 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5409 vec_cond_rhs =
5410 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5411 stmt, NULL);
5412 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5413 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5414 if (reduc_index == 1)
5415 vec_then_clause = reduc_def;
5416 else
5418 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5419 stmt, NULL);
5420 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5421 NULL, &gtemp, &def, &dts[2]);
5423 if (reduc_index == 2)
5424 vec_else_clause = reduc_def;
5425 else
5427 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5428 stmt, NULL);
5429 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5430 NULL, &gtemp, &def, &dts[3]);
5434 else
5436 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5437 VEC_pop (tree, vec_oprnds0));
5438 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5439 VEC_pop (tree, vec_oprnds1));
5440 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5441 VEC_pop (tree, vec_oprnds2));
5442 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5443 VEC_pop (tree, vec_oprnds3));
5446 if (!slp_node)
5448 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5449 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5450 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5451 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5454 /* Arguments are ready. Create the new vector stmt. */
5455 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5457 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5458 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5459 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5461 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5462 vec_cond_lhs, vec_cond_rhs);
5463 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5464 vec_compare, vec_then_clause, vec_else_clause);
5466 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5467 new_temp = make_ssa_name (vec_dest, new_stmt);
5468 gimple_assign_set_lhs (new_stmt, new_temp);
5469 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5470 if (slp_node)
5471 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5474 if (slp_node)
5475 continue;
5477 if (j == 0)
5478 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5479 else
5480 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5482 prev_stmt_info = vinfo_for_stmt (new_stmt);
5485 VEC_free (tree, heap, vec_oprnds0);
5486 VEC_free (tree, heap, vec_oprnds1);
5487 VEC_free (tree, heap, vec_oprnds2);
5488 VEC_free (tree, heap, vec_oprnds3);
5490 return true;
5494 /* Make sure the statement is vectorizable. */
5496 bool
5497 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5499 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5500 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5501 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5502 bool ok;
5503 tree scalar_type, vectype;
5504 gimple pattern_stmt;
5505 gimple_seq pattern_def_seq;
5507 if (vect_print_dump_info (REPORT_DETAILS))
5509 fprintf (vect_dump, "==> examining statement: ");
5510 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5513 if (gimple_has_volatile_ops (stmt))
5515 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5516 fprintf (vect_dump, "not vectorized: stmt has volatile operands");
5518 return false;
5521 /* Skip stmts that do not need to be vectorized. In loops this is expected
5522 to include:
5523 - the COND_EXPR which is the loop exit condition
5524 - any LABEL_EXPRs in the loop
5525 - computations that are used only for array indexing or loop control.
5526 In basic blocks we only analyze statements that are a part of some SLP
5527 instance, therefore, all the statements are relevant.
5529 Pattern statement needs to be analyzed instead of the original statement
5530 if the original statement is not relevant. Otherwise, we analyze both
5531 statements. In basic blocks we are called from some SLP instance
5532 traversal, don't analyze pattern stmts instead, the pattern stmts
5533 already will be part of SLP instance. */
5535 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5536 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5537 && !STMT_VINFO_LIVE_P (stmt_info))
5539 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5540 && pattern_stmt
5541 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5542 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5544 /* Analyze PATTERN_STMT instead of the original stmt. */
5545 stmt = pattern_stmt;
5546 stmt_info = vinfo_for_stmt (pattern_stmt);
5547 if (vect_print_dump_info (REPORT_DETAILS))
5549 fprintf (vect_dump, "==> examining pattern statement: ");
5550 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5553 else
5555 if (vect_print_dump_info (REPORT_DETAILS))
5556 fprintf (vect_dump, "irrelevant.");
5558 return true;
5561 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5562 && node == NULL
5563 && pattern_stmt
5564 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5565 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5567 /* Analyze PATTERN_STMT too. */
5568 if (vect_print_dump_info (REPORT_DETAILS))
5570 fprintf (vect_dump, "==> examining pattern statement: ");
5571 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5574 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5575 return false;
5578 if (is_pattern_stmt_p (stmt_info)
5579 && node == NULL
5580 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5582 gimple_stmt_iterator si;
5584 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5586 gimple pattern_def_stmt = gsi_stmt (si);
5587 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5588 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5590 /* Analyze def stmt of STMT if it's a pattern stmt. */
5591 if (vect_print_dump_info (REPORT_DETAILS))
5593 fprintf (vect_dump, "==> examining pattern def statement: ");
5594 print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
5597 if (!vect_analyze_stmt (pattern_def_stmt,
5598 need_to_vectorize, node))
5599 return false;
5604 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5606 case vect_internal_def:
5607 break;
5609 case vect_reduction_def:
5610 case vect_nested_cycle:
5611 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5612 || relevance == vect_used_in_outer_by_reduction
5613 || relevance == vect_unused_in_scope));
5614 break;
5616 case vect_induction_def:
5617 case vect_constant_def:
5618 case vect_external_def:
5619 case vect_unknown_def_type:
5620 default:
5621 gcc_unreachable ();
5624 if (bb_vinfo)
5626 gcc_assert (PURE_SLP_STMT (stmt_info));
5628 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5629 if (vect_print_dump_info (REPORT_DETAILS))
5631 fprintf (vect_dump, "get vectype for scalar type: ");
5632 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5635 vectype = get_vectype_for_scalar_type (scalar_type);
5636 if (!vectype)
5638 if (vect_print_dump_info (REPORT_DETAILS))
5640 fprintf (vect_dump, "not SLPed: unsupported data-type ");
5641 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
5643 return false;
5646 if (vect_print_dump_info (REPORT_DETAILS))
5648 fprintf (vect_dump, "vectype: ");
5649 print_generic_expr (vect_dump, vectype, TDF_SLIM);
5652 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5655 if (STMT_VINFO_RELEVANT_P (stmt_info))
5657 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5658 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5659 *need_to_vectorize = true;
5662 ok = true;
5663 if (!bb_vinfo
5664 && (STMT_VINFO_RELEVANT_P (stmt_info)
5665 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5666 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5667 || vectorizable_shift (stmt, NULL, NULL, NULL)
5668 || vectorizable_operation (stmt, NULL, NULL, NULL)
5669 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5670 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5671 || vectorizable_call (stmt, NULL, NULL, NULL)
5672 || vectorizable_store (stmt, NULL, NULL, NULL)
5673 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5674 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5675 else
5677 if (bb_vinfo)
5678 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5679 || vectorizable_shift (stmt, NULL, NULL, node)
5680 || vectorizable_operation (stmt, NULL, NULL, node)
5681 || vectorizable_assignment (stmt, NULL, NULL, node)
5682 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5683 || vectorizable_call (stmt, NULL, NULL, node)
5684 || vectorizable_store (stmt, NULL, NULL, node)
5685 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5688 if (!ok)
5690 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5692 fprintf (vect_dump, "not vectorized: relevant stmt not ");
5693 fprintf (vect_dump, "supported: ");
5694 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5697 return false;
5700 if (bb_vinfo)
5701 return true;
5703 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5704 need extra handling, except for vectorizable reductions. */
5705 if (STMT_VINFO_LIVE_P (stmt_info)
5706 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5707 ok = vectorizable_live_operation (stmt, NULL, NULL);
5709 if (!ok)
5711 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
5713 fprintf (vect_dump, "not vectorized: live stmt not ");
5714 fprintf (vect_dump, "supported: ");
5715 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
5718 return false;
5721 return true;
5725 /* Function vect_transform_stmt.
5727 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5729 bool
5730 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5731 bool *grouped_store, slp_tree slp_node,
5732 slp_instance slp_node_instance)
5734 bool is_store = false;
5735 gimple vec_stmt = NULL;
5736 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5737 bool done;
5739 switch (STMT_VINFO_TYPE (stmt_info))
5741 case type_demotion_vec_info_type:
5742 case type_promotion_vec_info_type:
5743 case type_conversion_vec_info_type:
5744 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5745 gcc_assert (done);
5746 break;
5748 case induc_vec_info_type:
5749 gcc_assert (!slp_node);
5750 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5751 gcc_assert (done);
5752 break;
5754 case shift_vec_info_type:
5755 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5756 gcc_assert (done);
5757 break;
5759 case op_vec_info_type:
5760 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5761 gcc_assert (done);
5762 break;
5764 case assignment_vec_info_type:
5765 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5766 gcc_assert (done);
5767 break;
5769 case load_vec_info_type:
5770 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5771 slp_node_instance);
5772 gcc_assert (done);
5773 break;
5775 case store_vec_info_type:
5776 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5777 gcc_assert (done);
5778 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5780 /* In case of interleaving, the whole chain is vectorized when the
5781 last store in the chain is reached. Store stmts before the last
5782 one are skipped, and there vec_stmt_info shouldn't be freed
5783 meanwhile. */
5784 *grouped_store = true;
5785 if (STMT_VINFO_VEC_STMT (stmt_info))
5786 is_store = true;
5788 else
5789 is_store = true;
5790 break;
5792 case condition_vec_info_type:
5793 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5794 gcc_assert (done);
5795 break;
5797 case call_vec_info_type:
5798 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5799 stmt = gsi_stmt (*gsi);
5800 break;
5802 case reduc_vec_info_type:
5803 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5804 gcc_assert (done);
5805 break;
5807 default:
5808 if (!STMT_VINFO_LIVE_P (stmt_info))
5810 if (vect_print_dump_info (REPORT_DETAILS))
5811 fprintf (vect_dump, "stmt not supported.");
5812 gcc_unreachable ();
5816 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5817 is being vectorized, but outside the immediately enclosing loop. */
5818 if (vec_stmt
5819 && STMT_VINFO_LOOP_VINFO (stmt_info)
5820 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5821 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5822 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5823 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5824 || STMT_VINFO_RELEVANT (stmt_info) ==
5825 vect_used_in_outer_by_reduction))
5827 struct loop *innerloop = LOOP_VINFO_LOOP (
5828 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5829 imm_use_iterator imm_iter;
5830 use_operand_p use_p;
5831 tree scalar_dest;
5832 gimple exit_phi;
5834 if (vect_print_dump_info (REPORT_DETAILS))
5835 fprintf (vect_dump, "Record the vdef for outer-loop vectorization.");
5837 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5838 (to be used when vectorizing outer-loop stmts that use the DEF of
5839 STMT). */
5840 if (gimple_code (stmt) == GIMPLE_PHI)
5841 scalar_dest = PHI_RESULT (stmt);
5842 else
5843 scalar_dest = gimple_assign_lhs (stmt);
5845 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5847 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5849 exit_phi = USE_STMT (use_p);
5850 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5855 /* Handle stmts whose DEF is used outside the loop-nest that is
5856 being vectorized. */
5857 if (STMT_VINFO_LIVE_P (stmt_info)
5858 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5860 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5861 gcc_assert (done);
5864 if (vec_stmt)
5865 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5867 return is_store;
5871 /* Remove a group of stores (for SLP or interleaving), free their
5872 stmt_vec_info. */
5874 void
5875 vect_remove_stores (gimple first_stmt)
5877 gimple next = first_stmt;
5878 gimple tmp;
5879 gimple_stmt_iterator next_si;
5881 while (next)
5883 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5885 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5886 if (is_pattern_stmt_p (stmt_info))
5887 next = STMT_VINFO_RELATED_STMT (stmt_info);
5888 /* Free the attached stmt_vec_info and remove the stmt. */
5889 next_si = gsi_for_stmt (next);
5890 unlink_stmt_vdef (next);
5891 gsi_remove (&next_si, true);
5892 release_defs (next);
5893 free_stmt_vec_info (next);
5894 next = tmp;
5899 /* Function new_stmt_vec_info.
5901 Create and initialize a new stmt_vec_info struct for STMT. */
5903 stmt_vec_info
5904 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5905 bb_vec_info bb_vinfo)
5907 stmt_vec_info res;
5908 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5910 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5911 STMT_VINFO_STMT (res) = stmt;
5912 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5913 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5914 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5915 STMT_VINFO_LIVE_P (res) = false;
5916 STMT_VINFO_VECTYPE (res) = NULL;
5917 STMT_VINFO_VEC_STMT (res) = NULL;
5918 STMT_VINFO_VECTORIZABLE (res) = true;
5919 STMT_VINFO_IN_PATTERN_P (res) = false;
5920 STMT_VINFO_RELATED_STMT (res) = NULL;
5921 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5922 STMT_VINFO_DATA_REF (res) = NULL;
5924 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5925 STMT_VINFO_DR_OFFSET (res) = NULL;
5926 STMT_VINFO_DR_INIT (res) = NULL;
5927 STMT_VINFO_DR_STEP (res) = NULL;
5928 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5930 if (gimple_code (stmt) == GIMPLE_PHI
5931 && is_loop_header_bb_p (gimple_bb (stmt)))
5932 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5933 else
5934 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5936 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
5937 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
5938 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
5939 STMT_SLP_TYPE (res) = loop_vect;
5940 GROUP_FIRST_ELEMENT (res) = NULL;
5941 GROUP_NEXT_ELEMENT (res) = NULL;
5942 GROUP_SIZE (res) = 0;
5943 GROUP_STORE_COUNT (res) = 0;
5944 GROUP_GAP (res) = 0;
5945 GROUP_SAME_DR_STMT (res) = NULL;
5946 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5948 return res;
5952 /* Create a hash table for stmt_vec_info. */
5954 void
5955 init_stmt_vec_info_vec (void)
5957 gcc_assert (!stmt_vec_info_vec);
5958 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
5962 /* Free hash table for stmt_vec_info. */
5964 void
5965 free_stmt_vec_info_vec (void)
5967 gcc_assert (stmt_vec_info_vec);
5968 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
5972 /* Free stmt vectorization related info. */
5974 void
5975 free_stmt_vec_info (gimple stmt)
5977 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5979 if (!stmt_info)
5980 return;
5982 /* Check if this statement has a related "pattern stmt"
5983 (introduced by the vectorizer during the pattern recognition
5984 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
5985 too. */
5986 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5988 stmt_vec_info patt_info
5989 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
5990 if (patt_info)
5992 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
5993 if (seq)
5995 gimple_stmt_iterator si;
5996 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
5997 free_stmt_vec_info (gsi_stmt (si));
5999 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6003 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
6004 set_vinfo_for_stmt (stmt, NULL);
6005 free (stmt_info);
6009 /* Function get_vectype_for_scalar_type_and_size.
6011 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6012 by the target. */
6014 static tree
6015 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6017 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6018 enum machine_mode simd_mode;
6019 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6020 int nunits;
6021 tree vectype;
6023 if (nbytes == 0)
6024 return NULL_TREE;
6026 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6027 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6028 return NULL_TREE;
6030 /* We can't build a vector type of elements with alignment bigger than
6031 their size. */
6032 if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6033 return NULL_TREE;
6035 /* For vector types of elements whose mode precision doesn't
6036 match their types precision we use a element type of mode
6037 precision. The vectorization routines will have to make sure
6038 they support the proper result truncation/extension.
6039 We also make sure to build vector types with INTEGER_TYPE
6040 component type only. */
6041 if (INTEGRAL_TYPE_P (scalar_type)
6042 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6043 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6044 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6045 TYPE_UNSIGNED (scalar_type));
6047 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6048 When the component mode passes the above test simply use a type
6049 corresponding to that mode. The theory is that any use that
6050 would cause problems with this will disable vectorization anyway. */
6051 if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6052 && !INTEGRAL_TYPE_P (scalar_type)
6053 && !POINTER_TYPE_P (scalar_type))
6054 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6056 /* If no size was supplied use the mode the target prefers. Otherwise
6057 lookup a vector mode of the specified size. */
6058 if (size == 0)
6059 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6060 else
6061 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6062 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6063 if (nunits <= 1)
6064 return NULL_TREE;
6066 vectype = build_vector_type (scalar_type, nunits);
6067 if (vect_print_dump_info (REPORT_DETAILS))
6069 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
6070 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
6073 if (!vectype)
6074 return NULL_TREE;
6076 if (vect_print_dump_info (REPORT_DETAILS))
6078 fprintf (vect_dump, "vectype: ");
6079 print_generic_expr (vect_dump, vectype, TDF_SLIM);
6082 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6083 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6085 if (vect_print_dump_info (REPORT_DETAILS))
6086 fprintf (vect_dump, "mode not supported by target.");
6087 return NULL_TREE;
6090 return vectype;
6093 unsigned int current_vector_size;
6095 /* Function get_vectype_for_scalar_type.
6097 Returns the vector type corresponding to SCALAR_TYPE as supported
6098 by the target. */
6100 tree
6101 get_vectype_for_scalar_type (tree scalar_type)
6103 tree vectype;
6104 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6105 current_vector_size);
6106 if (vectype
6107 && current_vector_size == 0)
6108 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6109 return vectype;
6112 /* Function get_same_sized_vectype
6114 Returns a vector type corresponding to SCALAR_TYPE of size
6115 VECTOR_TYPE if supported by the target. */
6117 tree
6118 get_same_sized_vectype (tree scalar_type, tree vector_type)
6120 return get_vectype_for_scalar_type_and_size
6121 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6124 /* Function vect_is_simple_use.
6126 Input:
6127 LOOP_VINFO - the vect info of the loop that is being vectorized.
6128 BB_VINFO - the vect info of the basic block that is being vectorized.
6129 OPERAND - operand of STMT in the loop or bb.
6130 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6132 Returns whether a stmt with OPERAND can be vectorized.
6133 For loops, supportable operands are constants, loop invariants, and operands
6134 that are defined by the current iteration of the loop. Unsupportable
6135 operands are those that are defined by a previous iteration of the loop (as
6136 is the case in reduction/induction computations).
6137 For basic blocks, supportable operands are constants and bb invariants.
6138 For now, operands defined outside the basic block are not supported. */
6140 bool
6141 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6142 bb_vec_info bb_vinfo, gimple *def_stmt,
6143 tree *def, enum vect_def_type *dt)
6145 basic_block bb;
6146 stmt_vec_info stmt_vinfo;
6147 struct loop *loop = NULL;
6149 if (loop_vinfo)
6150 loop = LOOP_VINFO_LOOP (loop_vinfo);
6152 *def_stmt = NULL;
6153 *def = NULL_TREE;
6155 if (vect_print_dump_info (REPORT_DETAILS))
6157 fprintf (vect_dump, "vect_is_simple_use: operand ");
6158 print_generic_expr (vect_dump, operand, TDF_SLIM);
6161 if (CONSTANT_CLASS_P (operand))
6163 *dt = vect_constant_def;
6164 return true;
6167 if (is_gimple_min_invariant (operand))
6169 *def = operand;
6170 *dt = vect_external_def;
6171 return true;
6174 if (TREE_CODE (operand) == PAREN_EXPR)
6176 if (vect_print_dump_info (REPORT_DETAILS))
6177 fprintf (vect_dump, "non-associatable copy.");
6178 operand = TREE_OPERAND (operand, 0);
6181 if (TREE_CODE (operand) != SSA_NAME)
6183 if (vect_print_dump_info (REPORT_DETAILS))
6184 fprintf (vect_dump, "not ssa-name.");
6185 return false;
6188 *def_stmt = SSA_NAME_DEF_STMT (operand);
6189 if (*def_stmt == NULL)
6191 if (vect_print_dump_info (REPORT_DETAILS))
6192 fprintf (vect_dump, "no def_stmt.");
6193 return false;
6196 if (vect_print_dump_info (REPORT_DETAILS))
6198 fprintf (vect_dump, "def_stmt: ");
6199 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
6202 /* Empty stmt is expected only in case of a function argument.
6203 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6204 if (gimple_nop_p (*def_stmt))
6206 *def = operand;
6207 *dt = vect_external_def;
6208 return true;
6211 bb = gimple_bb (*def_stmt);
6213 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6214 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6215 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6216 *dt = vect_external_def;
6217 else
6219 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6220 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6223 if (*dt == vect_unknown_def_type
6224 || (stmt
6225 && *dt == vect_double_reduction_def
6226 && gimple_code (stmt) != GIMPLE_PHI))
6228 if (vect_print_dump_info (REPORT_DETAILS))
6229 fprintf (vect_dump, "Unsupported pattern.");
6230 return false;
6233 if (vect_print_dump_info (REPORT_DETAILS))
6234 fprintf (vect_dump, "type of def: %d.",*dt);
6236 switch (gimple_code (*def_stmt))
6238 case GIMPLE_PHI:
6239 *def = gimple_phi_result (*def_stmt);
6240 break;
6242 case GIMPLE_ASSIGN:
6243 *def = gimple_assign_lhs (*def_stmt);
6244 break;
6246 case GIMPLE_CALL:
6247 *def = gimple_call_lhs (*def_stmt);
6248 if (*def != NULL)
6249 break;
6250 /* FALLTHRU */
6251 default:
6252 if (vect_print_dump_info (REPORT_DETAILS))
6253 fprintf (vect_dump, "unsupported defining stmt: ");
6254 return false;
6257 return true;
6260 /* Function vect_is_simple_use_1.
6262 Same as vect_is_simple_use_1 but also determines the vector operand
6263 type of OPERAND and stores it to *VECTYPE. If the definition of
6264 OPERAND is vect_uninitialized_def, vect_constant_def or
6265 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6266 is responsible to compute the best suited vector type for the
6267 scalar operand. */
6269 bool
6270 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6271 bb_vec_info bb_vinfo, gimple *def_stmt,
6272 tree *def, enum vect_def_type *dt, tree *vectype)
6274 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6275 def, dt))
6276 return false;
6278 /* Now get a vector type if the def is internal, otherwise supply
6279 NULL_TREE and leave it up to the caller to figure out a proper
6280 type for the use stmt. */
6281 if (*dt == vect_internal_def
6282 || *dt == vect_induction_def
6283 || *dt == vect_reduction_def
6284 || *dt == vect_double_reduction_def
6285 || *dt == vect_nested_cycle)
6287 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6289 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6290 && !STMT_VINFO_RELEVANT (stmt_info)
6291 && !STMT_VINFO_LIVE_P (stmt_info))
6292 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6294 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6295 gcc_assert (*vectype != NULL_TREE);
6297 else if (*dt == vect_uninitialized_def
6298 || *dt == vect_constant_def
6299 || *dt == vect_external_def)
6300 *vectype = NULL_TREE;
6301 else
6302 gcc_unreachable ();
6304 return true;
6308 /* Function supportable_widening_operation
6310 Check whether an operation represented by the code CODE is a
6311 widening operation that is supported by the target platform in
6312 vector form (i.e., when operating on arguments of type VECTYPE_IN
6313 producing a result of type VECTYPE_OUT).
6315 Widening operations we currently support are NOP (CONVERT), FLOAT
6316 and WIDEN_MULT. This function checks if these operations are supported
6317 by the target platform either directly (via vector tree-codes), or via
6318 target builtins.
6320 Output:
6321 - CODE1 and CODE2 are codes of vector operations to be used when
6322 vectorizing the operation, if available.
6323 - DECL1 and DECL2 are decls of target builtin functions to be used
6324 when vectorizing the operation, if available. In this case,
6325 CODE1 and CODE2 are CALL_EXPR.
6326 - MULTI_STEP_CVT determines the number of required intermediate steps in
6327 case of multi-step conversion (like char->short->int - in that case
6328 MULTI_STEP_CVT will be 1).
6329 - INTERM_TYPES contains the intermediate type required to perform the
6330 widening operation (short in the above example). */
6332 bool
6333 supportable_widening_operation (enum tree_code code, gimple stmt,
6334 tree vectype_out, tree vectype_in,
6335 tree *decl1, tree *decl2,
6336 enum tree_code *code1, enum tree_code *code2,
6337 int *multi_step_cvt,
6338 VEC (tree, heap) **interm_types)
6340 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6341 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6342 struct loop *vect_loop = NULL;
6343 bool ordered_p;
6344 enum machine_mode vec_mode;
6345 enum insn_code icode1, icode2;
6346 optab optab1, optab2;
6347 tree vectype = vectype_in;
6348 tree wide_vectype = vectype_out;
6349 enum tree_code c1, c2;
6350 int i;
6351 tree prev_type, intermediate_type;
6352 enum machine_mode intermediate_mode, prev_mode;
6353 optab optab3, optab4;
6355 *multi_step_cvt = 0;
6356 if (loop_info)
6357 vect_loop = LOOP_VINFO_LOOP (loop_info);
6359 /* The result of a vectorized widening operation usually requires two vectors
6360 (because the widened results do not fit into one vector). The generated
6361 vector results would normally be expected to be generated in the same
6362 order as in the original scalar computation, i.e. if 8 results are
6363 generated in each vector iteration, they are to be organized as follows:
6364 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
6366 However, in the special case that the result of the widening operation is
6367 used in a reduction computation only, the order doesn't matter (because
6368 when vectorizing a reduction we change the order of the computation).
6369 Some targets can take advantage of this and generate more efficient code.
6370 For example, targets like Altivec, that support widen_mult using a sequence
6371 of {mult_even,mult_odd} generate the following vectors:
6372 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
6374 When vectorizing outer-loops, we execute the inner-loop sequentially
6375 (each vectorized inner-loop iteration contributes to VF outer-loop
6376 iterations in parallel). We therefore don't allow to change the order
6377 of the computation in the inner-loop during outer-loop vectorization. */
6379 if (vect_loop
6380 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6381 && !nested_in_vect_loop_p (vect_loop, stmt))
6382 ordered_p = false;
6383 else
6384 ordered_p = true;
6386 if (!ordered_p
6387 && code == WIDEN_MULT_EXPR
6388 && targetm.vectorize.builtin_mul_widen_even
6389 && targetm.vectorize.builtin_mul_widen_even (vectype)
6390 && targetm.vectorize.builtin_mul_widen_odd
6391 && targetm.vectorize.builtin_mul_widen_odd (vectype))
6393 if (vect_print_dump_info (REPORT_DETAILS))
6394 fprintf (vect_dump, "Unordered widening operation detected.");
6396 *code1 = *code2 = CALL_EXPR;
6397 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
6398 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
6399 return true;
6402 switch (code)
6404 case WIDEN_MULT_EXPR:
6405 c1 = VEC_WIDEN_MULT_LO_EXPR;
6406 c2 = VEC_WIDEN_MULT_HI_EXPR;
6407 break;
6409 case WIDEN_LSHIFT_EXPR:
6410 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6411 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6412 break;
6414 CASE_CONVERT:
6415 c1 = VEC_UNPACK_LO_EXPR;
6416 c2 = VEC_UNPACK_HI_EXPR;
6417 break;
6419 case FLOAT_EXPR:
6420 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6421 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6422 break;
6424 case FIX_TRUNC_EXPR:
6425 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6426 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6427 computing the operation. */
6428 return false;
6430 default:
6431 gcc_unreachable ();
6434 if (BYTES_BIG_ENDIAN)
6436 enum tree_code ctmp = c1;
6437 c1 = c2;
6438 c2 = ctmp;
6441 if (code == FIX_TRUNC_EXPR)
6443 /* The signedness is determined from output operand. */
6444 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6445 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6447 else
6449 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6450 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6453 if (!optab1 || !optab2)
6454 return false;
6456 vec_mode = TYPE_MODE (vectype);
6457 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6458 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6459 return false;
6461 *code1 = c1;
6462 *code2 = c2;
6464 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6465 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6466 return true;
6468 /* Check if it's a multi-step conversion that can be done using intermediate
6469 types. */
6471 prev_type = vectype;
6472 prev_mode = vec_mode;
6474 if (!CONVERT_EXPR_CODE_P (code))
6475 return false;
6477 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6478 intermediate steps in promotion sequence. We try
6479 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6480 not. */
6481 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6482 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6484 intermediate_mode = insn_data[icode1].operand[0].mode;
6485 intermediate_type
6486 = lang_hooks.types.type_for_mode (intermediate_mode,
6487 TYPE_UNSIGNED (prev_type));
6488 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6489 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6491 if (!optab3 || !optab4
6492 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6493 || insn_data[icode1].operand[0].mode != intermediate_mode
6494 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6495 || insn_data[icode2].operand[0].mode != intermediate_mode
6496 || ((icode1 = optab_handler (optab3, intermediate_mode))
6497 == CODE_FOR_nothing)
6498 || ((icode2 = optab_handler (optab4, intermediate_mode))
6499 == CODE_FOR_nothing))
6500 break;
6502 VEC_quick_push (tree, *interm_types, intermediate_type);
6503 (*multi_step_cvt)++;
6505 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6506 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6507 return true;
6509 prev_type = intermediate_type;
6510 prev_mode = intermediate_mode;
6513 VEC_free (tree, heap, *interm_types);
6514 return false;
6518 /* Function supportable_narrowing_operation
6520 Check whether an operation represented by the code CODE is a
6521 narrowing operation that is supported by the target platform in
6522 vector form (i.e., when operating on arguments of type VECTYPE_IN
6523 and producing a result of type VECTYPE_OUT).
6525 Narrowing operations we currently support are NOP (CONVERT) and
6526 FIX_TRUNC. This function checks if these operations are supported by
6527 the target platform directly via vector tree-codes.
6529 Output:
6530 - CODE1 is the code of a vector operation to be used when
6531 vectorizing the operation, if available.
6532 - MULTI_STEP_CVT determines the number of required intermediate steps in
6533 case of multi-step conversion (like int->short->char - in that case
6534 MULTI_STEP_CVT will be 1).
6535 - INTERM_TYPES contains the intermediate type required to perform the
6536 narrowing operation (short in the above example). */
6538 bool
6539 supportable_narrowing_operation (enum tree_code code,
6540 tree vectype_out, tree vectype_in,
6541 enum tree_code *code1, int *multi_step_cvt,
6542 VEC (tree, heap) **interm_types)
6544 enum machine_mode vec_mode;
6545 enum insn_code icode1;
6546 optab optab1, interm_optab;
6547 tree vectype = vectype_in;
6548 tree narrow_vectype = vectype_out;
6549 enum tree_code c1;
6550 tree intermediate_type;
6551 enum machine_mode intermediate_mode, prev_mode;
6552 int i;
6553 bool uns;
6555 *multi_step_cvt = 0;
6556 switch (code)
6558 CASE_CONVERT:
6559 c1 = VEC_PACK_TRUNC_EXPR;
6560 break;
6562 case FIX_TRUNC_EXPR:
6563 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6564 break;
6566 case FLOAT_EXPR:
6567 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6568 tree code and optabs used for computing the operation. */
6569 return false;
6571 default:
6572 gcc_unreachable ();
6575 if (code == FIX_TRUNC_EXPR)
6576 /* The signedness is determined from output operand. */
6577 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6578 else
6579 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6581 if (!optab1)
6582 return false;
6584 vec_mode = TYPE_MODE (vectype);
6585 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6586 return false;
6588 *code1 = c1;
6590 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6591 return true;
6593 /* Check if it's a multi-step conversion that can be done using intermediate
6594 types. */
6595 prev_mode = vec_mode;
6596 if (code == FIX_TRUNC_EXPR)
6597 uns = TYPE_UNSIGNED (vectype_out);
6598 else
6599 uns = TYPE_UNSIGNED (vectype);
6601 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6602 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6603 costly than signed. */
6604 if (code == FIX_TRUNC_EXPR && uns)
6606 enum insn_code icode2;
6608 intermediate_type
6609 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6610 interm_optab
6611 = optab_for_tree_code (c1, intermediate_type, optab_default);
6612 if (interm_optab != NULL
6613 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6614 && insn_data[icode1].operand[0].mode
6615 == insn_data[icode2].operand[0].mode)
6617 uns = false;
6618 optab1 = interm_optab;
6619 icode1 = icode2;
6623 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6624 intermediate steps in promotion sequence. We try
6625 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6626 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6627 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6629 intermediate_mode = insn_data[icode1].operand[0].mode;
6630 intermediate_type
6631 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6632 interm_optab
6633 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6634 optab_default);
6635 if (!interm_optab
6636 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6637 || insn_data[icode1].operand[0].mode != intermediate_mode
6638 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6639 == CODE_FOR_nothing))
6640 break;
6642 VEC_quick_push (tree, *interm_types, intermediate_type);
6643 (*multi_step_cvt)++;
6645 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6646 return true;
6648 prev_mode = intermediate_mode;
6649 optab1 = interm_optab;
6652 VEC_free (tree, heap, *interm_types);
6653 return false;