gcc-defs.exp (dg-additional-files-options): Extend regsub for dg-additional-files...
[official-gcc.git] / gcc / tree-vect-stmts.c
blobc1ba3c7eedb2e6bb3dbff34535bb8c4715f256d9
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "gimple.h"
33 #include "gimple-ssa.h"
34 #include "tree-cfg.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop-manip.h"
39 #include "cfgloop.h"
40 #include "expr.h"
41 #include "recog.h" /* FIXME: for insn_data */
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "tree-vectorizer.h"
45 #include "dumpfile.h"
47 /* For lang_hooks.types.type_for_mode. */
48 #include "langhooks.h"
50 /* Return the vectorized type for the given statement. */
52 tree
53 stmt_vectype (struct _stmt_vec_info *stmt_info)
55 return STMT_VINFO_VECTYPE (stmt_info);
58 /* Return TRUE iff the given statement is in an inner loop relative to
59 the loop being vectorized. */
60 bool
61 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
63 gimple stmt = STMT_VINFO_STMT (stmt_info);
64 basic_block bb = gimple_bb (stmt);
65 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
66 struct loop* loop;
68 if (!loop_vinfo)
69 return false;
71 loop = LOOP_VINFO_LOOP (loop_vinfo);
73 return (bb->loop_father == loop->inner);
76 /* Record the cost of a statement, either by directly informing the
77 target model or by saving it in a vector for later processing.
78 Return a preliminary estimate of the statement's cost. */
80 unsigned
81 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
82 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
83 int misalign, enum vect_cost_model_location where)
85 if (body_cost_vec)
87 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
88 add_stmt_info_to_vec (body_cost_vec, count, kind,
89 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
90 misalign);
91 return (unsigned)
92 (builtin_vectorization_cost (kind, vectype, misalign) * count);
95 else
97 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
98 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
99 void *target_cost_data;
101 if (loop_vinfo)
102 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
103 else
104 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
106 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
107 misalign, where);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
125 static tree
126 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
127 tree array, unsigned HOST_WIDE_INT n)
129 tree vect_type, vect, vect_name, array_ref;
130 gimple new_stmt;
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
144 return vect_name;
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
151 static void
152 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
153 tree array, unsigned HOST_WIDE_INT n)
155 tree array_ref;
156 gimple new_stmt;
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
170 static tree
171 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
173 tree mem_ref, alias_ptr_type;
175 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
182 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
184 /* Function vect_mark_relevant.
186 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
188 static void
189 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
190 enum vect_relevant relevant, bool live_p,
191 bool used_in_pattern)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple pattern_stmt;
198 if (dump_enabled_p ())
199 dump_printf_loc (MSG_NOTE, vect_location,
200 "mark relevant %d, live %d.\n", relevant, live_p);
202 /* If this stmt is an original stmt in a pattern, we might need to mark its
203 related pattern stmt instead of the original stmt. However, such stmts
204 may have their own uses that are not in any pattern, in such cases the
205 stmt itself should be marked. */
206 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
208 bool found = false;
209 if (!used_in_pattern)
211 imm_use_iterator imm_iter;
212 use_operand_p use_p;
213 gimple use_stmt;
214 tree lhs;
215 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
216 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
218 if (is_gimple_assign (stmt))
219 lhs = gimple_assign_lhs (stmt);
220 else
221 lhs = gimple_call_lhs (stmt);
223 /* This use is out of pattern use, if LHS has other uses that are
224 pattern uses, we should mark the stmt itself, and not the pattern
225 stmt. */
226 if (TREE_CODE (lhs) == SSA_NAME)
227 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
229 if (is_gimple_debug (USE_STMT (use_p)))
230 continue;
231 use_stmt = USE_STMT (use_p);
233 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
234 continue;
236 if (vinfo_for_stmt (use_stmt)
237 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
239 found = true;
240 break;
245 if (!found)
247 /* This is the last stmt in a sequence that was detected as a
248 pattern that can potentially be vectorized. Don't mark the stmt
249 as relevant/live because it's not going to be vectorized.
250 Instead mark the pattern-stmt that replaces it. */
252 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
254 if (dump_enabled_p ())
255 dump_printf_loc (MSG_NOTE, vect_location,
256 "last stmt in pattern. don't mark"
257 " relevant/live.\n");
258 stmt_info = vinfo_for_stmt (pattern_stmt);
259 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
260 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
261 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
262 stmt = pattern_stmt;
266 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
267 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
268 STMT_VINFO_RELEVANT (stmt_info) = relevant;
270 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
271 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "already marked relevant/live.\n");
276 return;
279 worklist->safe_push (stmt);
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT in loop that is represented by LOOP_VINFO is
286 "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
295 static bool
296 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
297 enum vect_relevant *relevant, bool *live_p)
299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300 ssa_op_iter op_iter;
301 imm_use_iterator imm_iter;
302 use_operand_p use_p;
303 def_operand_p def_p;
305 *relevant = vect_unused_in_scope;
306 *live_p = false;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt)
310 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
311 != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
314 /* changing memory. */
315 if (gimple_code (stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant = vect_used_in_scope;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
344 *live_p = true;
349 return (*live_p || *relevant);
353 /* Function exist_non_indexing_operands_for_use_p
355 USE is one of the uses attached to STMT. Check if USE is
356 used in STMT for anything other than indexing an array. */
358 static bool
359 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
361 tree operand;
362 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
364 /* USE corresponds to some operand in STMT. If there is no data
365 reference in STMT, then any operand that corresponds to USE
366 is not indexing an array. */
367 if (!STMT_VINFO_DATA_REF (stmt_info))
368 return true;
370 /* STMT has a data_ref. FORNOW this means that its of one of
371 the following forms:
372 -1- ARRAY_REF = var
373 -2- var = ARRAY_REF
374 (This should have been verified in analyze_data_refs).
376 'var' in the second case corresponds to a def, not a use,
377 so USE cannot correspond to any operands that are not used
378 for array indexing.
380 Therefore, all we need to check is if STMT falls into the
381 first case, and whether var corresponds to USE. */
383 if (!gimple_assign_copy_p (stmt))
384 return false;
385 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
386 return false;
387 operand = gimple_assign_rhs1 (stmt);
388 if (TREE_CODE (operand) != SSA_NAME)
389 return false;
391 if (operand == use)
392 return true;
394 return false;
399 Function process_use.
401 Inputs:
402 - a USE in STMT in a loop represented by LOOP_VINFO
403 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
404 that defined USE. This is done by calling mark_relevant and passing it
405 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
406 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
407 be performed.
409 Outputs:
410 Generally, LIVE_P and RELEVANT are used to define the liveness and
411 relevance info of the DEF_STMT of this USE:
412 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
413 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
414 Exceptions:
415 - case 1: If USE is used only for address computations (e.g. array indexing),
416 which does not need to be directly vectorized, then the liveness/relevance
417 of the respective DEF_STMT is left unchanged.
418 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
419 skip DEF_STMT cause it had already been processed.
420 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
421 be modified accordingly.
423 Return true if everything is as expected. Return false otherwise. */
425 static bool
426 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
427 enum vect_relevant relevant, vec<gimple> *worklist,
428 bool force)
430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
431 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
432 stmt_vec_info dstmt_vinfo;
433 basic_block bb, def_bb;
434 tree def;
435 gimple def_stmt;
436 enum vect_def_type dt;
438 /* case 1: we are only interested in uses that need to be vectorized. Uses
439 that are used for address computation are not considered relevant. */
440 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
441 return true;
443 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
445 if (dump_enabled_p ())
446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
447 "not vectorized: unsupported use in stmt.\n");
448 return false;
451 if (!def_stmt || gimple_nop_p (def_stmt))
452 return true;
454 def_bb = gimple_bb (def_stmt);
455 if (!flow_bb_inside_loop_p (loop, def_bb))
457 if (dump_enabled_p ())
458 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
459 return true;
462 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
463 DEF_STMT must have already been processed, because this should be the
464 only way that STMT, which is a reduction-phi, was put in the worklist,
465 as there should be no other uses for DEF_STMT in the loop. So we just
466 check that everything is as expected, and we are done. */
467 dstmt_vinfo = vinfo_for_stmt (def_stmt);
468 bb = gimple_bb (stmt);
469 if (gimple_code (stmt) == GIMPLE_PHI
470 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471 && gimple_code (def_stmt) != GIMPLE_PHI
472 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
473 && bb->loop_father == def_bb->loop_father)
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_NOTE, vect_location,
477 "reduc-stmt defining reduc-phi in the same nest.\n");
478 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
479 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
480 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
481 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
482 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
483 return true;
486 /* case 3a: outer-loop stmt defining an inner-loop stmt:
487 outer-loop-header-bb:
488 d = def_stmt
489 inner-loop:
490 stmt # use (d)
491 outer-loop-tail-bb:
492 ... */
493 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
495 if (dump_enabled_p ())
496 dump_printf_loc (MSG_NOTE, vect_location,
497 "outer-loop def-stmt defining inner-loop stmt.\n");
499 switch (relevant)
501 case vect_unused_in_scope:
502 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
503 vect_used_in_scope : vect_unused_in_scope;
504 break;
506 case vect_used_in_outer_by_reduction:
507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
508 relevant = vect_used_by_reduction;
509 break;
511 case vect_used_in_outer:
512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
513 relevant = vect_used_in_scope;
514 break;
516 case vect_used_in_scope:
517 break;
519 default:
520 gcc_unreachable ();
524 /* case 3b: inner-loop stmt defining an outer-loop stmt:
525 outer-loop-header-bb:
527 inner-loop:
528 d = def_stmt
529 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
530 stmt # use (d) */
531 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
533 if (dump_enabled_p ())
534 dump_printf_loc (MSG_NOTE, vect_location,
535 "inner-loop def-stmt defining outer-loop stmt.\n");
537 switch (relevant)
539 case vect_unused_in_scope:
540 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
541 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
542 vect_used_in_outer_by_reduction : vect_unused_in_scope;
543 break;
545 case vect_used_by_reduction:
546 relevant = vect_used_in_outer_by_reduction;
547 break;
549 case vect_used_in_scope:
550 relevant = vect_used_in_outer;
551 break;
553 default:
554 gcc_unreachable ();
558 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
559 is_pattern_stmt_p (stmt_vinfo));
560 return true;
564 /* Function vect_mark_stmts_to_be_vectorized.
566 Not all stmts in the loop need to be vectorized. For example:
568 for i...
569 for j...
570 1. T0 = i + j
571 2. T1 = a[T0]
573 3. j = j + 1
575 Stmt 1 and 3 do not need to be vectorized, because loop control and
576 addressing of vectorized data-refs are handled differently.
578 This pass detects such stmts. */
580 bool
581 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
583 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
584 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
585 unsigned int nbbs = loop->num_nodes;
586 gimple_stmt_iterator si;
587 gimple stmt;
588 unsigned int i;
589 stmt_vec_info stmt_vinfo;
590 basic_block bb;
591 gimple phi;
592 bool live_p;
593 enum vect_relevant relevant, tmp_relevant;
594 enum vect_def_type def_type;
596 if (dump_enabled_p ())
597 dump_printf_loc (MSG_NOTE, vect_location,
598 "=== vect_mark_stmts_to_be_vectorized ===\n");
600 stack_vec<gimple, 64> worklist;
602 /* 1. Init worklist. */
603 for (i = 0; i < nbbs; i++)
605 bb = bbs[i];
606 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
608 phi = gsi_stmt (si);
609 if (dump_enabled_p ())
611 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
612 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
613 dump_printf (MSG_NOTE, "\n");
616 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
617 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
619 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
621 stmt = gsi_stmt (si);
622 if (dump_enabled_p ())
624 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
625 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
626 dump_printf (MSG_NOTE, "\n");
629 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
630 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
634 /* 2. Process_worklist */
635 while (worklist.length () > 0)
637 use_operand_p use_p;
638 ssa_op_iter iter;
640 stmt = worklist.pop ();
641 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
645 dump_printf (MSG_NOTE, "\n");
648 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
649 (DEF_STMT) as relevant/irrelevant and live/dead according to the
650 liveness and relevance properties of STMT. */
651 stmt_vinfo = vinfo_for_stmt (stmt);
652 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
653 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
655 /* Generally, the liveness and relevance properties of STMT are
656 propagated as is to the DEF_STMTs of its USEs:
657 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
658 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
660 One exception is when STMT has been identified as defining a reduction
661 variable; in this case we set the liveness/relevance as follows:
662 live_p = false
663 relevant = vect_used_by_reduction
664 This is because we distinguish between two kinds of relevant stmts -
665 those that are used by a reduction computation, and those that are
666 (also) used by a regular computation. This allows us later on to
667 identify stmts that are used solely by a reduction, and therefore the
668 order of the results that they produce does not have to be kept. */
670 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
671 tmp_relevant = relevant;
672 switch (def_type)
674 case vect_reduction_def:
675 switch (tmp_relevant)
677 case vect_unused_in_scope:
678 relevant = vect_used_by_reduction;
679 break;
681 case vect_used_by_reduction:
682 if (gimple_code (stmt) == GIMPLE_PHI)
683 break;
684 /* fall through */
686 default:
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of reduction.\n");
690 return false;
693 live_p = false;
694 break;
696 case vect_nested_cycle:
697 if (tmp_relevant != vect_unused_in_scope
698 && tmp_relevant != vect_used_in_outer_by_reduction
699 && tmp_relevant != vect_used_in_outer)
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "unsupported use of nested cycle.\n");
705 return false;
708 live_p = false;
709 break;
711 case vect_double_reduction_def:
712 if (tmp_relevant != vect_unused_in_scope
713 && tmp_relevant != vect_used_by_reduction)
715 if (dump_enabled_p ())
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "unsupported use of double reduction.\n");
719 return false;
722 live_p = false;
723 break;
725 default:
726 break;
729 if (is_pattern_stmt_p (stmt_vinfo))
731 /* Pattern statements are not inserted into the code, so
732 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
733 have to scan the RHS or function arguments instead. */
734 if (is_gimple_assign (stmt))
736 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
737 tree op = gimple_assign_rhs1 (stmt);
739 i = 1;
740 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
742 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
743 live_p, relevant, &worklist, false)
744 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
745 live_p, relevant, &worklist, false))
746 return false;
747 i = 2;
749 for (; i < gimple_num_ops (stmt); i++)
751 op = gimple_op (stmt, i);
752 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
753 &worklist, false))
754 return false;
757 else if (is_gimple_call (stmt))
759 for (i = 0; i < gimple_call_num_args (stmt); i++)
761 tree arg = gimple_call_arg (stmt, i);
762 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
763 &worklist, false))
764 return false;
768 else
769 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
771 tree op = USE_FROM_PTR (use_p);
772 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
773 &worklist, false))
774 return false;
777 if (STMT_VINFO_GATHER_P (stmt_vinfo))
779 tree off;
780 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
781 gcc_assert (decl);
782 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
783 &worklist, true))
784 return false;
786 } /* while worklist */
788 return true;
792 /* Function vect_model_simple_cost.
794 Models cost for simple operations, i.e. those that only emit ncopies of a
795 single op. Right now, this does not account for multiple insns that could
796 be generated for the single vector op. We will handle that shortly. */
798 void
799 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 stmt_vector_for_cost *prologue_cost_vec,
802 stmt_vector_for_cost *body_cost_vec)
804 int i;
805 int inside_cost = 0, prologue_cost = 0;
807 /* The SLP costs were already calculated during SLP tree build. */
808 if (PURE_SLP_STMT (stmt_info))
809 return;
811 /* FORNOW: Assuming maximum 2 args per stmts. */
812 for (i = 0; i < 2; i++)
813 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
814 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
815 stmt_info, 0, vect_prologue);
817 /* Pass the inside-of-loop statements to the target-specific cost model. */
818 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
819 stmt_info, 0, vect_body);
821 if (dump_enabled_p ())
822 dump_printf_loc (MSG_NOTE, vect_location,
823 "vect_model_simple_cost: inside_cost = %d, "
824 "prologue_cost = %d .\n", inside_cost, prologue_cost);
828 /* Model cost for type demotion and promotion operations. PWR is normally
829 zero for single-step promotions and demotions. It will be one if
830 two-step promotion/demotion is required, and so on. Each additional
831 step doubles the number of instructions required. */
833 static void
834 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
835 enum vect_def_type *dt, int pwr)
837 int i, tmp;
838 int inside_cost = 0, prologue_cost = 0;
839 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
840 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
841 void *target_cost_data;
843 /* The SLP costs were already calculated during SLP tree build. */
844 if (PURE_SLP_STMT (stmt_info))
845 return;
847 if (loop_vinfo)
848 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
849 else
850 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
852 for (i = 0; i < pwr + 1; i++)
854 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
855 (i + 1) : i;
856 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
857 vec_promote_demote, stmt_info, 0,
858 vect_body);
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 /* Function vect_cost_group_size
875 For grouped load or store, return the group_size only if it is the first
876 load or store of a group, else return 1. This ensures that group size is
877 only returned once per group. */
879 static int
880 vect_cost_group_size (stmt_vec_info stmt_info)
882 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
884 if (first_stmt == STMT_VINFO_STMT (stmt_info))
885 return GROUP_SIZE (stmt_info);
887 return 1;
891 /* Function vect_model_store_cost
893 Models cost for stores. In the case of grouped accesses, one access
894 has the overhead of the grouped access attributed to it. */
896 void
897 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
898 bool store_lanes_p, enum vect_def_type dt,
899 slp_tree slp_node,
900 stmt_vector_for_cost *prologue_cost_vec,
901 stmt_vector_for_cost *body_cost_vec)
903 int group_size;
904 unsigned int inside_cost = 0, prologue_cost = 0;
905 struct data_reference *first_dr;
906 gimple first_stmt;
908 /* The SLP costs were already calculated during SLP tree build. */
909 if (PURE_SLP_STMT (stmt_info))
910 return;
912 if (dt == vect_constant_def || dt == vect_external_def)
913 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
914 stmt_info, 0, vect_prologue);
916 /* Grouped access? */
917 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
919 if (slp_node)
921 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
922 group_size = 1;
924 else
926 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
927 group_size = vect_cost_group_size (stmt_info);
930 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
932 /* Not a grouped access. */
933 else
935 group_size = 1;
936 first_dr = STMT_VINFO_DATA_REF (stmt_info);
939 /* We assume that the cost of a single store-lanes instruction is
940 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
941 access is instead being provided by a permute-and-store operation,
942 include the cost of the permutes. */
943 if (!store_lanes_p && group_size > 1)
945 /* Uses a high and low interleave operation for each needed permute. */
947 int nstmts = ncopies * exact_log2 (group_size) * group_size;
948 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
949 stmt_info, 0, vect_body);
951 if (dump_enabled_p ())
952 dump_printf_loc (MSG_NOTE, vect_location,
953 "vect_model_store_cost: strided group_size = %d .\n",
954 group_size);
957 /* Costs of the stores. */
958 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
960 if (dump_enabled_p ())
961 dump_printf_loc (MSG_NOTE, vect_location,
962 "vect_model_store_cost: inside_cost = %d, "
963 "prologue_cost = %d .\n", inside_cost, prologue_cost);
967 /* Calculate cost of DR's memory access. */
968 void
969 vect_get_store_cost (struct data_reference *dr, int ncopies,
970 unsigned int *inside_cost,
971 stmt_vector_for_cost *body_cost_vec)
973 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
974 gimple stmt = DR_STMT (dr);
975 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
977 switch (alignment_support_scheme)
979 case dr_aligned:
981 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
982 vector_store, stmt_info, 0,
983 vect_body);
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_NOTE, vect_location,
987 "vect_model_store_cost: aligned.\n");
988 break;
991 case dr_unaligned_supported:
993 /* Here, we assign an additional cost for the unaligned store. */
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 unaligned_store, stmt_info,
996 DR_MISALIGNMENT (dr), vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: unaligned supported by "
1000 "hardware.\n");
1001 break;
1004 case dr_unaligned_unsupported:
1006 *inside_cost = VECT_MAX_COST;
1008 if (dump_enabled_p ())
1009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1010 "vect_model_store_cost: unsupported access.\n");
1011 break;
1014 default:
1015 gcc_unreachable ();
1020 /* Function vect_model_load_cost
1022 Models cost for loads. In the case of grouped accesses, the last access
1023 has the overhead of the grouped access attributed to it. Since unaligned
1024 accesses are supported for loads, we also account for the costs of the
1025 access scheme chosen. */
1027 void
1028 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1029 bool load_lanes_p, slp_tree slp_node,
1030 stmt_vector_for_cost *prologue_cost_vec,
1031 stmt_vector_for_cost *body_cost_vec)
1033 int group_size;
1034 gimple first_stmt;
1035 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1036 unsigned int inside_cost = 0, prologue_cost = 0;
1038 /* The SLP costs were already calculated during SLP tree build. */
1039 if (PURE_SLP_STMT (stmt_info))
1040 return;
1042 /* Grouped accesses? */
1043 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1044 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1046 group_size = vect_cost_group_size (stmt_info);
1047 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1049 /* Not a grouped access. */
1050 else
1052 group_size = 1;
1053 first_dr = dr;
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1060 if (!load_lanes_p && group_size > 1)
1062 /* Uses an even and odd extract operations for each needed permute. */
1063 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1064 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1065 stmt_info, 0, vect_body);
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_NOTE, vect_location,
1069 "vect_model_load_cost: strided group_size = %d .\n",
1070 group_size);
1073 /* The loads themselves. */
1074 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1076 /* N scalar loads plus gathering them into a vector. */
1077 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1078 inside_cost += record_stmt_cost (body_cost_vec,
1079 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1080 scalar_load, stmt_info, 0, vect_body);
1081 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1082 stmt_info, 0, vect_body);
1084 else
1085 vect_get_load_cost (first_dr, ncopies,
1086 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1087 || group_size > 1 || slp_node),
1088 &inside_cost, &prologue_cost,
1089 prologue_cost_vec, body_cost_vec, true);
1091 if (dump_enabled_p ())
1092 dump_printf_loc (MSG_NOTE, vect_location,
1093 "vect_model_load_cost: inside_cost = %d, "
1094 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1098 /* Calculate cost of DR's memory access. */
1099 void
1100 vect_get_load_cost (struct data_reference *dr, int ncopies,
1101 bool add_realign_cost, unsigned int *inside_cost,
1102 unsigned int *prologue_cost,
1103 stmt_vector_for_cost *prologue_cost_vec,
1104 stmt_vector_for_cost *body_cost_vec,
1105 bool record_prologue_costs)
1107 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1108 gimple stmt = DR_STMT (dr);
1109 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1111 switch (alignment_support_scheme)
1113 case dr_aligned:
1115 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1116 stmt_info, 0, vect_body);
1118 if (dump_enabled_p ())
1119 dump_printf_loc (MSG_NOTE, vect_location,
1120 "vect_model_load_cost: aligned.\n");
1122 break;
1124 case dr_unaligned_supported:
1126 /* Here, we assign an additional cost for the unaligned load. */
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 unaligned_load, stmt_info,
1129 DR_MISALIGNMENT (dr), vect_body);
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: unaligned supported by "
1134 "hardware.\n");
1136 break;
1138 case dr_explicit_realign:
1140 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1141 vector_load, stmt_info, 0, vect_body);
1142 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1143 vec_perm, stmt_info, 0, vect_body);
1145 /* FIXME: If the misalignment remains fixed across the iterations of
1146 the containing loop, the following cost should be added to the
1147 prologue costs. */
1148 if (targetm.vectorize.builtin_mask_for_load)
1149 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1150 stmt_info, 0, vect_body);
1152 if (dump_enabled_p ())
1153 dump_printf_loc (MSG_NOTE, vect_location,
1154 "vect_model_load_cost: explicit realign\n");
1156 break;
1158 case dr_explicit_realign_optimized:
1160 if (dump_enabled_p ())
1161 dump_printf_loc (MSG_NOTE, vect_location,
1162 "vect_model_load_cost: unaligned software "
1163 "pipelined.\n");
1165 /* Unaligned software pipeline has a load of an address, an initial
1166 load, and possibly a mask operation to "prime" the loop. However,
1167 if this is an access in a group of loads, which provide grouped
1168 access, then the above cost should only be considered for one
1169 access in the group. Inside the loop, there is a load op
1170 and a realignment op. */
1172 if (add_realign_cost && record_prologue_costs)
1174 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1175 vector_stmt, stmt_info,
1176 0, vect_prologue);
1177 if (targetm.vectorize.builtin_mask_for_load)
1178 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1179 vector_stmt, stmt_info,
1180 0, vect_prologue);
1183 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1184 stmt_info, 0, vect_body);
1185 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1186 stmt_info, 0, vect_body);
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: explicit realign optimized"
1191 "\n");
1193 break;
1196 case dr_unaligned_unsupported:
1198 *inside_cost = VECT_MAX_COST;
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1202 "vect_model_load_cost: unsupported access.\n");
1203 break;
1206 default:
1207 gcc_unreachable ();
1211 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1212 the loop preheader for the vectorized stmt STMT. */
1214 static void
1215 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1217 if (gsi)
1218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1219 else
1221 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1222 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1224 if (loop_vinfo)
1226 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1227 basic_block new_bb;
1228 edge pe;
1230 if (nested_in_vect_loop_p (loop, stmt))
1231 loop = loop->inner;
1233 pe = loop_preheader_edge (loop);
1234 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1235 gcc_assert (!new_bb);
1237 else
1239 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1240 basic_block bb;
1241 gimple_stmt_iterator gsi_bb_start;
1243 gcc_assert (bb_vinfo);
1244 bb = BB_VINFO_BB (bb_vinfo);
1245 gsi_bb_start = gsi_after_labels (bb);
1246 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1250 if (dump_enabled_p ())
1252 dump_printf_loc (MSG_NOTE, vect_location,
1253 "created new init_stmt: ");
1254 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1255 dump_printf (MSG_NOTE, "\n");
1259 /* Function vect_init_vector.
1261 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1262 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1263 vector type a vector with all elements equal to VAL is created first.
1264 Place the initialization at BSI if it is not NULL. Otherwise, place the
1265 initialization at the loop preheader.
1266 Return the DEF of INIT_STMT.
1267 It will be used in the vectorization of STMT. */
1269 tree
1270 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1272 tree new_var;
1273 gimple init_stmt;
1274 tree vec_oprnd;
1275 tree new_temp;
1277 if (TREE_CODE (type) == VECTOR_TYPE
1278 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1280 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1282 if (CONSTANT_CLASS_P (val))
1283 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1284 else
1286 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1287 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1288 new_temp, val,
1289 NULL_TREE);
1290 vect_init_vector_1 (stmt, init_stmt, gsi);
1291 val = new_temp;
1294 val = build_vector_from_val (type, val);
1297 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1298 init_stmt = gimple_build_assign (new_var, val);
1299 new_temp = make_ssa_name (new_var, init_stmt);
1300 gimple_assign_set_lhs (init_stmt, new_temp);
1301 vect_init_vector_1 (stmt, init_stmt, gsi);
1302 vec_oprnd = gimple_assign_lhs (init_stmt);
1303 return vec_oprnd;
1307 /* Function vect_get_vec_def_for_operand.
1309 OP is an operand in STMT. This function returns a (vector) def that will be
1310 used in the vectorized stmt for STMT.
1312 In the case that OP is an SSA_NAME which is defined in the loop, then
1313 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1315 In case OP is an invariant or constant, a new stmt that creates a vector def
1316 needs to be introduced. */
1318 tree
1319 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1321 tree vec_oprnd;
1322 gimple vec_stmt;
1323 gimple def_stmt;
1324 stmt_vec_info def_stmt_info = NULL;
1325 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1326 unsigned int nunits;
1327 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1328 tree def;
1329 enum vect_def_type dt;
1330 bool is_simple_use;
1331 tree vector_type;
1333 if (dump_enabled_p ())
1335 dump_printf_loc (MSG_NOTE, vect_location,
1336 "vect_get_vec_def_for_operand: ");
1337 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1338 dump_printf (MSG_NOTE, "\n");
1341 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1342 &def_stmt, &def, &dt);
1343 gcc_assert (is_simple_use);
1344 if (dump_enabled_p ())
1346 int loc_printed = 0;
1347 if (def)
1349 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1350 loc_printed = 1;
1351 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1352 dump_printf (MSG_NOTE, "\n");
1354 if (def_stmt)
1356 if (loc_printed)
1357 dump_printf (MSG_NOTE, " def_stmt = ");
1358 else
1359 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1360 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1361 dump_printf (MSG_NOTE, "\n");
1365 switch (dt)
1367 /* Case 1: operand is a constant. */
1368 case vect_constant_def:
1370 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1371 gcc_assert (vector_type);
1372 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1374 if (scalar_def)
1375 *scalar_def = op;
1377 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1378 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "Create vector_cst. nunits = %d\n", nunits);
1382 return vect_init_vector (stmt, op, vector_type, NULL);
1385 /* Case 2: operand is defined outside the loop - loop invariant. */
1386 case vect_external_def:
1388 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1389 gcc_assert (vector_type);
1391 if (scalar_def)
1392 *scalar_def = def;
1394 /* Create 'vec_inv = {inv,inv,..,inv}' */
1395 if (dump_enabled_p ())
1396 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1398 return vect_init_vector (stmt, def, vector_type, NULL);
1401 /* Case 3: operand is defined inside the loop. */
1402 case vect_internal_def:
1404 if (scalar_def)
1405 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1407 /* Get the def from the vectorized stmt. */
1408 def_stmt_info = vinfo_for_stmt (def_stmt);
1410 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1411 /* Get vectorized pattern statement. */
1412 if (!vec_stmt
1413 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1414 && !STMT_VINFO_RELEVANT (def_stmt_info))
1415 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1416 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1417 gcc_assert (vec_stmt);
1418 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1419 vec_oprnd = PHI_RESULT (vec_stmt);
1420 else if (is_gimple_call (vec_stmt))
1421 vec_oprnd = gimple_call_lhs (vec_stmt);
1422 else
1423 vec_oprnd = gimple_assign_lhs (vec_stmt);
1424 return vec_oprnd;
1427 /* Case 4: operand is defined by a loop header phi - reduction */
1428 case vect_reduction_def:
1429 case vect_double_reduction_def:
1430 case vect_nested_cycle:
1432 struct loop *loop;
1434 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1435 loop = (gimple_bb (def_stmt))->loop_father;
1437 /* Get the def before the loop */
1438 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1439 return get_initial_def_for_reduction (stmt, op, scalar_def);
1442 /* Case 5: operand is defined by loop-header phi - induction. */
1443 case vect_induction_def:
1445 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1447 /* Get the def from the vectorized stmt. */
1448 def_stmt_info = vinfo_for_stmt (def_stmt);
1449 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1450 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1451 vec_oprnd = PHI_RESULT (vec_stmt);
1452 else
1453 vec_oprnd = gimple_get_lhs (vec_stmt);
1454 return vec_oprnd;
1457 default:
1458 gcc_unreachable ();
1463 /* Function vect_get_vec_def_for_stmt_copy
1465 Return a vector-def for an operand. This function is used when the
1466 vectorized stmt to be created (by the caller to this function) is a "copy"
1467 created in case the vectorized result cannot fit in one vector, and several
1468 copies of the vector-stmt are required. In this case the vector-def is
1469 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1470 of the stmt that defines VEC_OPRND.
1471 DT is the type of the vector def VEC_OPRND.
1473 Context:
1474 In case the vectorization factor (VF) is bigger than the number
1475 of elements that can fit in a vectype (nunits), we have to generate
1476 more than one vector stmt to vectorize the scalar stmt. This situation
1477 arises when there are multiple data-types operated upon in the loop; the
1478 smallest data-type determines the VF, and as a result, when vectorizing
1479 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1480 vector stmt (each computing a vector of 'nunits' results, and together
1481 computing 'VF' results in each iteration). This function is called when
1482 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1483 which VF=16 and nunits=4, so the number of copies required is 4):
1485 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1487 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1488 VS1.1: vx.1 = memref1 VS1.2
1489 VS1.2: vx.2 = memref2 VS1.3
1490 VS1.3: vx.3 = memref3
1492 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1493 VSnew.1: vz1 = vx.1 + ... VSnew.2
1494 VSnew.2: vz2 = vx.2 + ... VSnew.3
1495 VSnew.3: vz3 = vx.3 + ...
1497 The vectorization of S1 is explained in vectorizable_load.
1498 The vectorization of S2:
1499 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1500 the function 'vect_get_vec_def_for_operand' is called to
1501 get the relevant vector-def for each operand of S2. For operand x it
1502 returns the vector-def 'vx.0'.
1504 To create the remaining copies of the vector-stmt (VSnew.j), this
1505 function is called to get the relevant vector-def for each operand. It is
1506 obtained from the respective VS1.j stmt, which is recorded in the
1507 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1509 For example, to obtain the vector-def 'vx.1' in order to create the
1510 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1511 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1512 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1513 and return its def ('vx.1').
1514 Overall, to create the above sequence this function will be called 3 times:
1515 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1516 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1517 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1519 tree
1520 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1522 gimple vec_stmt_for_operand;
1523 stmt_vec_info def_stmt_info;
1525 /* Do nothing; can reuse same def. */
1526 if (dt == vect_external_def || dt == vect_constant_def )
1527 return vec_oprnd;
1529 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1530 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1531 gcc_assert (def_stmt_info);
1532 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1533 gcc_assert (vec_stmt_for_operand);
1534 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1535 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1536 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1537 else
1538 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1539 return vec_oprnd;
1543 /* Get vectorized definitions for the operands to create a copy of an original
1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1546 static void
1547 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1548 vec<tree> *vec_oprnds0,
1549 vec<tree> *vec_oprnds1)
1551 tree vec_oprnd = vec_oprnds0->pop ();
1553 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1554 vec_oprnds0->quick_push (vec_oprnd);
1556 if (vec_oprnds1 && vec_oprnds1->length ())
1558 vec_oprnd = vec_oprnds1->pop ();
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1560 vec_oprnds1->quick_push (vec_oprnd);
1565 /* Get vectorized definitions for OP0 and OP1.
1566 REDUC_INDEX is the index of reduction operand in case of reduction,
1567 and -1 otherwise. */
1569 void
1570 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1571 vec<tree> *vec_oprnds0,
1572 vec<tree> *vec_oprnds1,
1573 slp_tree slp_node, int reduc_index)
1575 if (slp_node)
1577 int nops = (op1 == NULL_TREE) ? 1 : 2;
1578 vec<tree> ops;
1579 ops.create (nops);
1580 vec<vec<tree> > vec_defs;
1581 vec_defs.create (nops);
1583 ops.quick_push (op0);
1584 if (op1)
1585 ops.quick_push (op1);
1587 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1589 *vec_oprnds0 = vec_defs[0];
1590 if (op1)
1591 *vec_oprnds1 = vec_defs[1];
1593 ops.release ();
1594 vec_defs.release ();
1596 else
1598 tree vec_oprnd;
1600 vec_oprnds0->create (1);
1601 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1602 vec_oprnds0->quick_push (vec_oprnd);
1604 if (op1)
1606 vec_oprnds1->create (1);
1607 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1608 vec_oprnds1->quick_push (vec_oprnd);
1614 /* Function vect_finish_stmt_generation.
1616 Insert a new stmt. */
1618 void
1619 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1620 gimple_stmt_iterator *gsi)
1622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1623 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1624 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1626 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1628 if (!gsi_end_p (*gsi)
1629 && gimple_has_mem_ops (vec_stmt))
1631 gimple at_stmt = gsi_stmt (*gsi);
1632 tree vuse = gimple_vuse (at_stmt);
1633 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1635 tree vdef = gimple_vdef (at_stmt);
1636 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1637 /* If we have an SSA vuse and insert a store, update virtual
1638 SSA form to avoid triggering the renamer. Do so only
1639 if we can easily see all uses - which is what almost always
1640 happens with the way vectorized stmts are inserted. */
1641 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1642 && ((is_gimple_assign (vec_stmt)
1643 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1644 || (is_gimple_call (vec_stmt)
1645 && !(gimple_call_flags (vec_stmt)
1646 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1648 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1649 gimple_set_vdef (vec_stmt, new_vdef);
1650 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1654 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1656 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1657 bb_vinfo));
1659 if (dump_enabled_p ())
1661 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1662 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1663 dump_printf (MSG_NOTE, "\n");
1666 gimple_set_location (vec_stmt, gimple_location (stmt));
1669 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1670 a function declaration if the target has a vectorized version
1671 of the function, or NULL_TREE if the function cannot be vectorized. */
1673 tree
1674 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1676 tree fndecl = gimple_call_fndecl (call);
1678 /* We only handle functions that do not read or clobber memory -- i.e.
1679 const or novops ones. */
1680 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1681 return NULL_TREE;
1683 if (!fndecl
1684 || TREE_CODE (fndecl) != FUNCTION_DECL
1685 || !DECL_BUILT_IN (fndecl))
1686 return NULL_TREE;
1688 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1689 vectype_in);
1692 /* Function vectorizable_call.
1694 Check if STMT performs a function call that can be vectorized.
1695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1699 static bool
1700 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1701 slp_tree slp_node)
1703 tree vec_dest;
1704 tree scalar_dest;
1705 tree op, type;
1706 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1708 tree vectype_out, vectype_in;
1709 int nunits_in;
1710 int nunits_out;
1711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1712 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1713 tree fndecl, new_temp, def, rhs_type;
1714 gimple def_stmt;
1715 enum vect_def_type dt[3]
1716 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1717 gimple new_stmt = NULL;
1718 int ncopies, j;
1719 vec<tree> vargs = vNULL;
1720 enum { NARROW, NONE, WIDEN } modifier;
1721 size_t i, nargs;
1722 tree lhs;
1724 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1725 return false;
1727 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1728 return false;
1730 /* Is STMT a vectorizable call? */
1731 if (!is_gimple_call (stmt))
1732 return false;
1734 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1735 return false;
1737 if (stmt_can_throw_internal (stmt))
1738 return false;
1740 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1742 /* Process function arguments. */
1743 rhs_type = NULL_TREE;
1744 vectype_in = NULL_TREE;
1745 nargs = gimple_call_num_args (stmt);
1747 /* Bail out if the function has more than three arguments, we do not have
1748 interesting builtin functions to vectorize with more than two arguments
1749 except for fma. No arguments is also not good. */
1750 if (nargs == 0 || nargs > 3)
1751 return false;
1753 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1754 if (gimple_call_internal_p (stmt)
1755 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1757 nargs = 0;
1758 rhs_type = unsigned_type_node;
1761 for (i = 0; i < nargs; i++)
1763 tree opvectype;
1765 op = gimple_call_arg (stmt, i);
1767 /* We can only handle calls with arguments of the same type. */
1768 if (rhs_type
1769 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1773 "argument types differ.\n");
1774 return false;
1776 if (!rhs_type)
1777 rhs_type = TREE_TYPE (op);
1779 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1780 &def_stmt, &def, &dt[i], &opvectype))
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1784 "use not simple.\n");
1785 return false;
1788 if (!vectype_in)
1789 vectype_in = opvectype;
1790 else if (opvectype
1791 && opvectype != vectype_in)
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1795 "argument vector types differ.\n");
1796 return false;
1799 /* If all arguments are external or constant defs use a vector type with
1800 the same size as the output vector type. */
1801 if (!vectype_in)
1802 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1803 if (vec_stmt)
1804 gcc_assert (vectype_in);
1805 if (!vectype_in)
1807 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "no vectype for scalar type ");
1811 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1812 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1815 return false;
1818 /* FORNOW */
1819 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1820 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1821 if (nunits_in == nunits_out / 2)
1822 modifier = NARROW;
1823 else if (nunits_out == nunits_in)
1824 modifier = NONE;
1825 else if (nunits_out == nunits_in / 2)
1826 modifier = WIDEN;
1827 else
1828 return false;
1830 /* For now, we only vectorize functions if a target specific builtin
1831 is available. TODO -- in some cases, it might be profitable to
1832 insert the calls for pieces of the vector, in order to be able
1833 to vectorize other operations in the loop. */
1834 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1835 if (fndecl == NULL_TREE)
1837 if (gimple_call_internal_p (stmt)
1838 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1839 && !slp_node
1840 && loop_vinfo
1841 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1842 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1843 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1844 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1846 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1847 { 0, 1, 2, ... vf - 1 } vector. */
1848 gcc_assert (nargs == 0);
1850 else
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1854 "function is not vectorizable.\n");
1855 return false;
1859 gcc_assert (!gimple_vuse (stmt));
1861 if (slp_node || PURE_SLP_STMT (stmt_info))
1862 ncopies = 1;
1863 else if (modifier == NARROW)
1864 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1865 else
1866 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1868 /* Sanity check: make sure that at least one copy of the vectorized stmt
1869 needs to be generated. */
1870 gcc_assert (ncopies >= 1);
1872 if (!vec_stmt) /* transformation not required. */
1874 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1877 "\n");
1878 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1879 return true;
1882 /** Transform. **/
1884 if (dump_enabled_p ())
1885 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
1887 /* Handle def. */
1888 scalar_dest = gimple_call_lhs (stmt);
1889 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1891 prev_stmt_info = NULL;
1892 switch (modifier)
1894 case NONE:
1895 for (j = 0; j < ncopies; ++j)
1897 /* Build argument list for the vectorized call. */
1898 if (j == 0)
1899 vargs.create (nargs);
1900 else
1901 vargs.truncate (0);
1903 if (slp_node)
1905 vec<vec<tree> > vec_defs;
1906 vec_defs.create (nargs);
1907 vec<tree> vec_oprnds0;
1909 for (i = 0; i < nargs; i++)
1910 vargs.quick_push (gimple_call_arg (stmt, i));
1911 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1912 vec_oprnds0 = vec_defs[0];
1914 /* Arguments are ready. Create the new vector stmt. */
1915 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1917 size_t k;
1918 for (k = 0; k < nargs; k++)
1920 vec<tree> vec_oprndsk = vec_defs[k];
1921 vargs[k] = vec_oprndsk[i];
1923 new_stmt = gimple_build_call_vec (fndecl, vargs);
1924 new_temp = make_ssa_name (vec_dest, new_stmt);
1925 gimple_call_set_lhs (new_stmt, new_temp);
1926 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1927 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1930 for (i = 0; i < nargs; i++)
1932 vec<tree> vec_oprndsi = vec_defs[i];
1933 vec_oprndsi.release ();
1935 vec_defs.release ();
1936 continue;
1939 for (i = 0; i < nargs; i++)
1941 op = gimple_call_arg (stmt, i);
1942 if (j == 0)
1943 vec_oprnd0
1944 = vect_get_vec_def_for_operand (op, stmt, NULL);
1945 else
1947 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1948 vec_oprnd0
1949 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1952 vargs.quick_push (vec_oprnd0);
1955 if (gimple_call_internal_p (stmt)
1956 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1958 tree *v = XALLOCAVEC (tree, nunits_out);
1959 int k;
1960 for (k = 0; k < nunits_out; ++k)
1961 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1962 tree cst = build_vector (vectype_out, v);
1963 tree new_var
1964 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1965 gimple init_stmt = gimple_build_assign (new_var, cst);
1966 new_temp = make_ssa_name (new_var, init_stmt);
1967 gimple_assign_set_lhs (init_stmt, new_temp);
1968 vect_init_vector_1 (stmt, init_stmt, NULL);
1969 new_temp = make_ssa_name (vec_dest, NULL);
1970 new_stmt = gimple_build_assign (new_temp,
1971 gimple_assign_lhs (init_stmt));
1973 else
1975 new_stmt = gimple_build_call_vec (fndecl, vargs);
1976 new_temp = make_ssa_name (vec_dest, new_stmt);
1977 gimple_call_set_lhs (new_stmt, new_temp);
1979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1981 if (j == 0)
1982 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1983 else
1984 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1986 prev_stmt_info = vinfo_for_stmt (new_stmt);
1989 break;
1991 case NARROW:
1992 for (j = 0; j < ncopies; ++j)
1994 /* Build argument list for the vectorized call. */
1995 if (j == 0)
1996 vargs.create (nargs * 2);
1997 else
1998 vargs.truncate (0);
2000 if (slp_node)
2002 vec<vec<tree> > vec_defs;
2003 vec_defs.create (nargs);
2004 vec<tree> vec_oprnds0;
2006 for (i = 0; i < nargs; i++)
2007 vargs.quick_push (gimple_call_arg (stmt, i));
2008 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2009 vec_oprnds0 = vec_defs[0];
2011 /* Arguments are ready. Create the new vector stmt. */
2012 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2014 size_t k;
2015 vargs.truncate (0);
2016 for (k = 0; k < nargs; k++)
2018 vec<tree> vec_oprndsk = vec_defs[k];
2019 vargs.quick_push (vec_oprndsk[i]);
2020 vargs.quick_push (vec_oprndsk[i + 1]);
2022 new_stmt = gimple_build_call_vec (fndecl, vargs);
2023 new_temp = make_ssa_name (vec_dest, new_stmt);
2024 gimple_call_set_lhs (new_stmt, new_temp);
2025 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2026 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2029 for (i = 0; i < nargs; i++)
2031 vec<tree> vec_oprndsi = vec_defs[i];
2032 vec_oprndsi.release ();
2034 vec_defs.release ();
2035 continue;
2038 for (i = 0; i < nargs; i++)
2040 op = gimple_call_arg (stmt, i);
2041 if (j == 0)
2043 vec_oprnd0
2044 = vect_get_vec_def_for_operand (op, stmt, NULL);
2045 vec_oprnd1
2046 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2048 else
2050 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2051 vec_oprnd0
2052 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2053 vec_oprnd1
2054 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2057 vargs.quick_push (vec_oprnd0);
2058 vargs.quick_push (vec_oprnd1);
2061 new_stmt = gimple_build_call_vec (fndecl, vargs);
2062 new_temp = make_ssa_name (vec_dest, new_stmt);
2063 gimple_call_set_lhs (new_stmt, new_temp);
2064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2066 if (j == 0)
2067 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2068 else
2069 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2071 prev_stmt_info = vinfo_for_stmt (new_stmt);
2074 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2076 break;
2078 case WIDEN:
2079 /* No current target implements this case. */
2080 return false;
2083 vargs.release ();
2085 /* Update the exception handling table with the vector stmt if necessary. */
2086 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2087 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2089 /* The call in STMT might prevent it from being removed in dce.
2090 We however cannot remove it here, due to the way the ssa name
2091 it defines is mapped to the new definition. So just replace
2092 rhs of the statement with something harmless. */
2094 if (slp_node)
2095 return true;
2097 type = TREE_TYPE (scalar_dest);
2098 if (is_pattern_stmt_p (stmt_info))
2099 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2100 else
2101 lhs = gimple_call_lhs (stmt);
2102 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2103 set_vinfo_for_stmt (new_stmt, stmt_info);
2104 set_vinfo_for_stmt (stmt, NULL);
2105 STMT_VINFO_STMT (stmt_info) = new_stmt;
2106 gsi_replace (gsi, new_stmt, false);
2107 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2109 return true;
2113 /* Function vect_gen_widened_results_half
2115 Create a vector stmt whose code, type, number of arguments, and result
2116 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2117 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2118 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2119 needs to be created (DECL is a function-decl of a target-builtin).
2120 STMT is the original scalar stmt that we are vectorizing. */
2122 static gimple
2123 vect_gen_widened_results_half (enum tree_code code,
2124 tree decl,
2125 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2126 tree vec_dest, gimple_stmt_iterator *gsi,
2127 gimple stmt)
2129 gimple new_stmt;
2130 tree new_temp;
2132 /* Generate half of the widened result: */
2133 if (code == CALL_EXPR)
2135 /* Target specific support */
2136 if (op_type == binary_op)
2137 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2138 else
2139 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2140 new_temp = make_ssa_name (vec_dest, new_stmt);
2141 gimple_call_set_lhs (new_stmt, new_temp);
2143 else
2145 /* Generic support */
2146 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2147 if (op_type != binary_op)
2148 vec_oprnd1 = NULL;
2149 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2150 vec_oprnd1);
2151 new_temp = make_ssa_name (vec_dest, new_stmt);
2152 gimple_assign_set_lhs (new_stmt, new_temp);
2154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2156 return new_stmt;
2160 /* Get vectorized definitions for loop-based vectorization. For the first
2161 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2162 scalar operand), and for the rest we get a copy with
2163 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2164 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2165 The vectors are collected into VEC_OPRNDS. */
2167 static void
2168 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2169 vec<tree> *vec_oprnds, int multi_step_cvt)
2171 tree vec_oprnd;
2173 /* Get first vector operand. */
2174 /* All the vector operands except the very first one (that is scalar oprnd)
2175 are stmt copies. */
2176 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2177 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2178 else
2179 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2181 vec_oprnds->quick_push (vec_oprnd);
2183 /* Get second vector operand. */
2184 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2185 vec_oprnds->quick_push (vec_oprnd);
2187 *oprnd = vec_oprnd;
2189 /* For conversion in multiple steps, continue to get operands
2190 recursively. */
2191 if (multi_step_cvt)
2192 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2196 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2197 For multi-step conversions store the resulting vectors and call the function
2198 recursively. */
2200 static void
2201 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2202 int multi_step_cvt, gimple stmt,
2203 vec<tree> vec_dsts,
2204 gimple_stmt_iterator *gsi,
2205 slp_tree slp_node, enum tree_code code,
2206 stmt_vec_info *prev_stmt_info)
2208 unsigned int i;
2209 tree vop0, vop1, new_tmp, vec_dest;
2210 gimple new_stmt;
2211 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2213 vec_dest = vec_dsts.pop ();
2215 for (i = 0; i < vec_oprnds->length (); i += 2)
2217 /* Create demotion operation. */
2218 vop0 = (*vec_oprnds)[i];
2219 vop1 = (*vec_oprnds)[i + 1];
2220 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2221 new_tmp = make_ssa_name (vec_dest, new_stmt);
2222 gimple_assign_set_lhs (new_stmt, new_tmp);
2223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2225 if (multi_step_cvt)
2226 /* Store the resulting vector for next recursive call. */
2227 (*vec_oprnds)[i/2] = new_tmp;
2228 else
2230 /* This is the last step of the conversion sequence. Store the
2231 vectors in SLP_NODE or in vector info of the scalar statement
2232 (or in STMT_VINFO_RELATED_STMT chain). */
2233 if (slp_node)
2234 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2235 else
2237 if (!*prev_stmt_info)
2238 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2239 else
2240 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2242 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2247 /* For multi-step demotion operations we first generate demotion operations
2248 from the source type to the intermediate types, and then combine the
2249 results (stored in VEC_OPRNDS) in demotion operation to the destination
2250 type. */
2251 if (multi_step_cvt)
2253 /* At each level of recursion we have half of the operands we had at the
2254 previous level. */
2255 vec_oprnds->truncate ((i+1)/2);
2256 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2257 stmt, vec_dsts, gsi, slp_node,
2258 VEC_PACK_TRUNC_EXPR,
2259 prev_stmt_info);
2262 vec_dsts.quick_push (vec_dest);
2266 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2267 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2268 the resulting vectors and call the function recursively. */
2270 static void
2271 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2272 vec<tree> *vec_oprnds1,
2273 gimple stmt, tree vec_dest,
2274 gimple_stmt_iterator *gsi,
2275 enum tree_code code1,
2276 enum tree_code code2, tree decl1,
2277 tree decl2, int op_type)
2279 int i;
2280 tree vop0, vop1, new_tmp1, new_tmp2;
2281 gimple new_stmt1, new_stmt2;
2282 vec<tree> vec_tmp = vNULL;
2284 vec_tmp.create (vec_oprnds0->length () * 2);
2285 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2287 if (op_type == binary_op)
2288 vop1 = (*vec_oprnds1)[i];
2289 else
2290 vop1 = NULL_TREE;
2292 /* Generate the two halves of promotion operation. */
2293 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2294 op_type, vec_dest, gsi, stmt);
2295 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2296 op_type, vec_dest, gsi, stmt);
2297 if (is_gimple_call (new_stmt1))
2299 new_tmp1 = gimple_call_lhs (new_stmt1);
2300 new_tmp2 = gimple_call_lhs (new_stmt2);
2302 else
2304 new_tmp1 = gimple_assign_lhs (new_stmt1);
2305 new_tmp2 = gimple_assign_lhs (new_stmt2);
2308 /* Store the results for the next step. */
2309 vec_tmp.quick_push (new_tmp1);
2310 vec_tmp.quick_push (new_tmp2);
2313 vec_oprnds0->release ();
2314 *vec_oprnds0 = vec_tmp;
2318 /* Check if STMT performs a conversion operation, that can be vectorized.
2319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2320 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2323 static bool
2324 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2325 gimple *vec_stmt, slp_tree slp_node)
2327 tree vec_dest;
2328 tree scalar_dest;
2329 tree op0, op1 = NULL_TREE;
2330 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2332 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2333 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2334 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2335 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2336 tree new_temp;
2337 tree def;
2338 gimple def_stmt;
2339 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2340 gimple new_stmt = NULL;
2341 stmt_vec_info prev_stmt_info;
2342 int nunits_in;
2343 int nunits_out;
2344 tree vectype_out, vectype_in;
2345 int ncopies, i, j;
2346 tree lhs_type, rhs_type;
2347 enum { NARROW, NONE, WIDEN } modifier;
2348 vec<tree> vec_oprnds0 = vNULL;
2349 vec<tree> vec_oprnds1 = vNULL;
2350 tree vop0;
2351 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2352 int multi_step_cvt = 0;
2353 vec<tree> vec_dsts = vNULL;
2354 vec<tree> interm_types = vNULL;
2355 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2356 int op_type;
2357 enum machine_mode rhs_mode;
2358 unsigned short fltsz;
2360 /* Is STMT a vectorizable conversion? */
2362 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2363 return false;
2365 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2366 return false;
2368 if (!is_gimple_assign (stmt))
2369 return false;
2371 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2372 return false;
2374 code = gimple_assign_rhs_code (stmt);
2375 if (!CONVERT_EXPR_CODE_P (code)
2376 && code != FIX_TRUNC_EXPR
2377 && code != FLOAT_EXPR
2378 && code != WIDEN_MULT_EXPR
2379 && code != WIDEN_LSHIFT_EXPR)
2380 return false;
2382 op_type = TREE_CODE_LENGTH (code);
2384 /* Check types of lhs and rhs. */
2385 scalar_dest = gimple_assign_lhs (stmt);
2386 lhs_type = TREE_TYPE (scalar_dest);
2387 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2389 op0 = gimple_assign_rhs1 (stmt);
2390 rhs_type = TREE_TYPE (op0);
2392 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2393 && !((INTEGRAL_TYPE_P (lhs_type)
2394 && INTEGRAL_TYPE_P (rhs_type))
2395 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2396 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2397 return false;
2399 if ((INTEGRAL_TYPE_P (lhs_type)
2400 && (TYPE_PRECISION (lhs_type)
2401 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2402 || (INTEGRAL_TYPE_P (rhs_type)
2403 && (TYPE_PRECISION (rhs_type)
2404 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2406 if (dump_enabled_p ())
2407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2408 "type conversion to/from bit-precision unsupported."
2409 "\n");
2410 return false;
2413 /* Check the operands of the operation. */
2414 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2415 &def_stmt, &def, &dt[0], &vectype_in))
2417 if (dump_enabled_p ())
2418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2419 "use not simple.\n");
2420 return false;
2422 if (op_type == binary_op)
2424 bool ok;
2426 op1 = gimple_assign_rhs2 (stmt);
2427 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2428 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2429 OP1. */
2430 if (CONSTANT_CLASS_P (op0))
2431 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2432 &def_stmt, &def, &dt[1], &vectype_in);
2433 else
2434 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2435 &def, &dt[1]);
2437 if (!ok)
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "use not simple.\n");
2442 return false;
2446 /* If op0 is an external or constant defs use a vector type of
2447 the same size as the output vector type. */
2448 if (!vectype_in)
2449 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2450 if (vec_stmt)
2451 gcc_assert (vectype_in);
2452 if (!vectype_in)
2454 if (dump_enabled_p ())
2456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2457 "no vectype for scalar type ");
2458 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2459 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2462 return false;
2465 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2466 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2467 if (nunits_in < nunits_out)
2468 modifier = NARROW;
2469 else if (nunits_out == nunits_in)
2470 modifier = NONE;
2471 else
2472 modifier = WIDEN;
2474 /* Multiple types in SLP are handled by creating the appropriate number of
2475 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2476 case of SLP. */
2477 if (slp_node || PURE_SLP_STMT (stmt_info))
2478 ncopies = 1;
2479 else if (modifier == NARROW)
2480 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2481 else
2482 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2484 /* Sanity check: make sure that at least one copy of the vectorized stmt
2485 needs to be generated. */
2486 gcc_assert (ncopies >= 1);
2488 /* Supportable by target? */
2489 switch (modifier)
2491 case NONE:
2492 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2493 return false;
2494 if (supportable_convert_operation (code, vectype_out, vectype_in,
2495 &decl1, &code1))
2496 break;
2497 /* FALLTHRU */
2498 unsupported:
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "conversion not supported by target.\n");
2502 return false;
2504 case WIDEN:
2505 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2506 &code1, &code2, &multi_step_cvt,
2507 &interm_types))
2509 /* Binary widening operation can only be supported directly by the
2510 architecture. */
2511 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2512 break;
2515 if (code != FLOAT_EXPR
2516 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2517 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2518 goto unsupported;
2520 rhs_mode = TYPE_MODE (rhs_type);
2521 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2522 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2523 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2524 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2526 cvt_type
2527 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2528 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2529 if (cvt_type == NULL_TREE)
2530 goto unsupported;
2532 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2534 if (!supportable_convert_operation (code, vectype_out,
2535 cvt_type, &decl1, &codecvt1))
2536 goto unsupported;
2538 else if (!supportable_widening_operation (code, stmt, vectype_out,
2539 cvt_type, &codecvt1,
2540 &codecvt2, &multi_step_cvt,
2541 &interm_types))
2542 continue;
2543 else
2544 gcc_assert (multi_step_cvt == 0);
2546 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2547 vectype_in, &code1, &code2,
2548 &multi_step_cvt, &interm_types))
2549 break;
2552 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2553 goto unsupported;
2555 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2556 codecvt2 = ERROR_MARK;
2557 else
2559 multi_step_cvt++;
2560 interm_types.safe_push (cvt_type);
2561 cvt_type = NULL_TREE;
2563 break;
2565 case NARROW:
2566 gcc_assert (op_type == unary_op);
2567 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2568 &code1, &multi_step_cvt,
2569 &interm_types))
2570 break;
2572 if (code != FIX_TRUNC_EXPR
2573 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2574 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2575 goto unsupported;
2577 rhs_mode = TYPE_MODE (rhs_type);
2578 cvt_type
2579 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2580 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2581 if (cvt_type == NULL_TREE)
2582 goto unsupported;
2583 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2584 &decl1, &codecvt1))
2585 goto unsupported;
2586 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2587 &code1, &multi_step_cvt,
2588 &interm_types))
2589 break;
2590 goto unsupported;
2592 default:
2593 gcc_unreachable ();
2596 if (!vec_stmt) /* transformation not required. */
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_NOTE, vect_location,
2600 "=== vectorizable_conversion ===\n");
2601 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2603 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2604 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2606 else if (modifier == NARROW)
2608 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2609 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2611 else
2613 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2614 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2616 interm_types.release ();
2617 return true;
2620 /** Transform. **/
2621 if (dump_enabled_p ())
2622 dump_printf_loc (MSG_NOTE, vect_location,
2623 "transform conversion. ncopies = %d.\n", ncopies);
2625 if (op_type == binary_op)
2627 if (CONSTANT_CLASS_P (op0))
2628 op0 = fold_convert (TREE_TYPE (op1), op0);
2629 else if (CONSTANT_CLASS_P (op1))
2630 op1 = fold_convert (TREE_TYPE (op0), op1);
2633 /* In case of multi-step conversion, we first generate conversion operations
2634 to the intermediate types, and then from that types to the final one.
2635 We create vector destinations for the intermediate type (TYPES) received
2636 from supportable_*_operation, and store them in the correct order
2637 for future use in vect_create_vectorized_*_stmts (). */
2638 vec_dsts.create (multi_step_cvt + 1);
2639 vec_dest = vect_create_destination_var (scalar_dest,
2640 (cvt_type && modifier == WIDEN)
2641 ? cvt_type : vectype_out);
2642 vec_dsts.quick_push (vec_dest);
2644 if (multi_step_cvt)
2646 for (i = interm_types.length () - 1;
2647 interm_types.iterate (i, &intermediate_type); i--)
2649 vec_dest = vect_create_destination_var (scalar_dest,
2650 intermediate_type);
2651 vec_dsts.quick_push (vec_dest);
2655 if (cvt_type)
2656 vec_dest = vect_create_destination_var (scalar_dest,
2657 modifier == WIDEN
2658 ? vectype_out : cvt_type);
2660 if (!slp_node)
2662 if (modifier == WIDEN)
2664 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
2665 if (op_type == binary_op)
2666 vec_oprnds1.create (1);
2668 else if (modifier == NARROW)
2669 vec_oprnds0.create (
2670 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2672 else if (code == WIDEN_LSHIFT_EXPR)
2673 vec_oprnds1.create (slp_node->vec_stmts_size);
2675 last_oprnd = op0;
2676 prev_stmt_info = NULL;
2677 switch (modifier)
2679 case NONE:
2680 for (j = 0; j < ncopies; j++)
2682 if (j == 0)
2683 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2684 -1);
2685 else
2686 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2688 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2690 /* Arguments are ready, create the new vector stmt. */
2691 if (code1 == CALL_EXPR)
2693 new_stmt = gimple_build_call (decl1, 1, vop0);
2694 new_temp = make_ssa_name (vec_dest, new_stmt);
2695 gimple_call_set_lhs (new_stmt, new_temp);
2697 else
2699 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2700 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2701 vop0, NULL);
2702 new_temp = make_ssa_name (vec_dest, new_stmt);
2703 gimple_assign_set_lhs (new_stmt, new_temp);
2706 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2707 if (slp_node)
2708 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2711 if (j == 0)
2712 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2713 else
2714 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2715 prev_stmt_info = vinfo_for_stmt (new_stmt);
2717 break;
2719 case WIDEN:
2720 /* In case the vectorization factor (VF) is bigger than the number
2721 of elements that we can fit in a vectype (nunits), we have to
2722 generate more than one vector stmt - i.e - we need to "unroll"
2723 the vector stmt by a factor VF/nunits. */
2724 for (j = 0; j < ncopies; j++)
2726 /* Handle uses. */
2727 if (j == 0)
2729 if (slp_node)
2731 if (code == WIDEN_LSHIFT_EXPR)
2733 unsigned int k;
2735 vec_oprnd1 = op1;
2736 /* Store vec_oprnd1 for every vector stmt to be created
2737 for SLP_NODE. We check during the analysis that all
2738 the shift arguments are the same. */
2739 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2740 vec_oprnds1.quick_push (vec_oprnd1);
2742 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2743 slp_node, -1);
2745 else
2746 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2747 &vec_oprnds1, slp_node, -1);
2749 else
2751 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2752 vec_oprnds0.quick_push (vec_oprnd0);
2753 if (op_type == binary_op)
2755 if (code == WIDEN_LSHIFT_EXPR)
2756 vec_oprnd1 = op1;
2757 else
2758 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2759 NULL);
2760 vec_oprnds1.quick_push (vec_oprnd1);
2764 else
2766 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2767 vec_oprnds0.truncate (0);
2768 vec_oprnds0.quick_push (vec_oprnd0);
2769 if (op_type == binary_op)
2771 if (code == WIDEN_LSHIFT_EXPR)
2772 vec_oprnd1 = op1;
2773 else
2774 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2775 vec_oprnd1);
2776 vec_oprnds1.truncate (0);
2777 vec_oprnds1.quick_push (vec_oprnd1);
2781 /* Arguments are ready. Create the new vector stmts. */
2782 for (i = multi_step_cvt; i >= 0; i--)
2784 tree this_dest = vec_dsts[i];
2785 enum tree_code c1 = code1, c2 = code2;
2786 if (i == 0 && codecvt2 != ERROR_MARK)
2788 c1 = codecvt1;
2789 c2 = codecvt2;
2791 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2792 &vec_oprnds1,
2793 stmt, this_dest, gsi,
2794 c1, c2, decl1, decl2,
2795 op_type);
2798 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2800 if (cvt_type)
2802 if (codecvt1 == CALL_EXPR)
2804 new_stmt = gimple_build_call (decl1, 1, vop0);
2805 new_temp = make_ssa_name (vec_dest, new_stmt);
2806 gimple_call_set_lhs (new_stmt, new_temp);
2808 else
2810 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2811 new_temp = make_ssa_name (vec_dest, NULL);
2812 new_stmt = gimple_build_assign_with_ops (codecvt1,
2813 new_temp,
2814 vop0, NULL);
2817 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2819 else
2820 new_stmt = SSA_NAME_DEF_STMT (vop0);
2822 if (slp_node)
2823 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2824 else
2826 if (!prev_stmt_info)
2827 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2828 else
2829 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2830 prev_stmt_info = vinfo_for_stmt (new_stmt);
2835 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2836 break;
2838 case NARROW:
2839 /* In case the vectorization factor (VF) is bigger than the number
2840 of elements that we can fit in a vectype (nunits), we have to
2841 generate more than one vector stmt - i.e - we need to "unroll"
2842 the vector stmt by a factor VF/nunits. */
2843 for (j = 0; j < ncopies; j++)
2845 /* Handle uses. */
2846 if (slp_node)
2847 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2848 slp_node, -1);
2849 else
2851 vec_oprnds0.truncate (0);
2852 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2853 vect_pow2 (multi_step_cvt) - 1);
2856 /* Arguments are ready. Create the new vector stmts. */
2857 if (cvt_type)
2858 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2860 if (codecvt1 == CALL_EXPR)
2862 new_stmt = gimple_build_call (decl1, 1, vop0);
2863 new_temp = make_ssa_name (vec_dest, new_stmt);
2864 gimple_call_set_lhs (new_stmt, new_temp);
2866 else
2868 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2869 new_temp = make_ssa_name (vec_dest, NULL);
2870 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2871 vop0, NULL);
2874 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2875 vec_oprnds0[i] = new_temp;
2878 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2879 stmt, vec_dsts, gsi,
2880 slp_node, code1,
2881 &prev_stmt_info);
2884 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2885 break;
2888 vec_oprnds0.release ();
2889 vec_oprnds1.release ();
2890 vec_dsts.release ();
2891 interm_types.release ();
2893 return true;
2897 /* Function vectorizable_assignment.
2899 Check if STMT performs an assignment (copy) that can be vectorized.
2900 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2901 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2902 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2904 static bool
2905 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2906 gimple *vec_stmt, slp_tree slp_node)
2908 tree vec_dest;
2909 tree scalar_dest;
2910 tree op;
2911 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2912 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2913 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2914 tree new_temp;
2915 tree def;
2916 gimple def_stmt;
2917 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2918 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2919 int ncopies;
2920 int i, j;
2921 vec<tree> vec_oprnds = vNULL;
2922 tree vop;
2923 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2924 gimple new_stmt = NULL;
2925 stmt_vec_info prev_stmt_info = NULL;
2926 enum tree_code code;
2927 tree vectype_in;
2929 /* Multiple types in SLP are handled by creating the appropriate number of
2930 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2931 case of SLP. */
2932 if (slp_node || PURE_SLP_STMT (stmt_info))
2933 ncopies = 1;
2934 else
2935 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2937 gcc_assert (ncopies >= 1);
2939 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2940 return false;
2942 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2943 return false;
2945 /* Is vectorizable assignment? */
2946 if (!is_gimple_assign (stmt))
2947 return false;
2949 scalar_dest = gimple_assign_lhs (stmt);
2950 if (TREE_CODE (scalar_dest) != SSA_NAME)
2951 return false;
2953 code = gimple_assign_rhs_code (stmt);
2954 if (gimple_assign_single_p (stmt)
2955 || code == PAREN_EXPR
2956 || CONVERT_EXPR_CODE_P (code))
2957 op = gimple_assign_rhs1 (stmt);
2958 else
2959 return false;
2961 if (code == VIEW_CONVERT_EXPR)
2962 op = TREE_OPERAND (op, 0);
2964 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2965 &def_stmt, &def, &dt[0], &vectype_in))
2967 if (dump_enabled_p ())
2968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2969 "use not simple.\n");
2970 return false;
2973 /* We can handle NOP_EXPR conversions that do not change the number
2974 of elements or the vector size. */
2975 if ((CONVERT_EXPR_CODE_P (code)
2976 || code == VIEW_CONVERT_EXPR)
2977 && (!vectype_in
2978 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2979 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2980 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2981 return false;
2983 /* We do not handle bit-precision changes. */
2984 if ((CONVERT_EXPR_CODE_P (code)
2985 || code == VIEW_CONVERT_EXPR)
2986 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2987 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2988 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2989 || ((TYPE_PRECISION (TREE_TYPE (op))
2990 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2991 /* But a conversion that does not change the bit-pattern is ok. */
2992 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2993 > TYPE_PRECISION (TREE_TYPE (op)))
2994 && TYPE_UNSIGNED (TREE_TYPE (op))))
2996 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2998 "type conversion to/from bit-precision "
2999 "unsupported.\n");
3000 return false;
3003 if (!vec_stmt) /* transformation not required. */
3005 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3006 if (dump_enabled_p ())
3007 dump_printf_loc (MSG_NOTE, vect_location,
3008 "=== vectorizable_assignment ===\n");
3009 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3010 return true;
3013 /** Transform. **/
3014 if (dump_enabled_p ())
3015 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
3017 /* Handle def. */
3018 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3020 /* Handle use. */
3021 for (j = 0; j < ncopies; j++)
3023 /* Handle uses. */
3024 if (j == 0)
3025 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
3026 else
3027 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3029 /* Arguments are ready. create the new vector stmt. */
3030 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3032 if (CONVERT_EXPR_CODE_P (code)
3033 || code == VIEW_CONVERT_EXPR)
3034 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
3035 new_stmt = gimple_build_assign (vec_dest, vop);
3036 new_temp = make_ssa_name (vec_dest, new_stmt);
3037 gimple_assign_set_lhs (new_stmt, new_temp);
3038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3039 if (slp_node)
3040 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3043 if (slp_node)
3044 continue;
3046 if (j == 0)
3047 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3048 else
3049 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3051 prev_stmt_info = vinfo_for_stmt (new_stmt);
3054 vec_oprnds.release ();
3055 return true;
3059 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3060 either as shift by a scalar or by a vector. */
3062 bool
3063 vect_supportable_shift (enum tree_code code, tree scalar_type)
3066 enum machine_mode vec_mode;
3067 optab optab;
3068 int icode;
3069 tree vectype;
3071 vectype = get_vectype_for_scalar_type (scalar_type);
3072 if (!vectype)
3073 return false;
3075 optab = optab_for_tree_code (code, vectype, optab_scalar);
3076 if (!optab
3077 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3079 optab = optab_for_tree_code (code, vectype, optab_vector);
3080 if (!optab
3081 || (optab_handler (optab, TYPE_MODE (vectype))
3082 == CODE_FOR_nothing))
3083 return false;
3086 vec_mode = TYPE_MODE (vectype);
3087 icode = (int) optab_handler (optab, vec_mode);
3088 if (icode == CODE_FOR_nothing)
3089 return false;
3091 return true;
3095 /* Function vectorizable_shift.
3097 Check if STMT performs a shift operation that can be vectorized.
3098 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3099 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3100 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3102 static bool
3103 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3104 gimple *vec_stmt, slp_tree slp_node)
3106 tree vec_dest;
3107 tree scalar_dest;
3108 tree op0, op1 = NULL;
3109 tree vec_oprnd1 = NULL_TREE;
3110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3111 tree vectype;
3112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3113 enum tree_code code;
3114 enum machine_mode vec_mode;
3115 tree new_temp;
3116 optab optab;
3117 int icode;
3118 enum machine_mode optab_op2_mode;
3119 tree def;
3120 gimple def_stmt;
3121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3122 gimple new_stmt = NULL;
3123 stmt_vec_info prev_stmt_info;
3124 int nunits_in;
3125 int nunits_out;
3126 tree vectype_out;
3127 tree op1_vectype;
3128 int ncopies;
3129 int j, i;
3130 vec<tree> vec_oprnds0 = vNULL;
3131 vec<tree> vec_oprnds1 = vNULL;
3132 tree vop0, vop1;
3133 unsigned int k;
3134 bool scalar_shift_arg = true;
3135 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3136 int vf;
3138 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3139 return false;
3141 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3142 return false;
3144 /* Is STMT a vectorizable binary/unary operation? */
3145 if (!is_gimple_assign (stmt))
3146 return false;
3148 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3149 return false;
3151 code = gimple_assign_rhs_code (stmt);
3153 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3154 || code == RROTATE_EXPR))
3155 return false;
3157 scalar_dest = gimple_assign_lhs (stmt);
3158 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3159 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3160 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3162 if (dump_enabled_p ())
3163 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3164 "bit-precision shifts not supported.\n");
3165 return false;
3168 op0 = gimple_assign_rhs1 (stmt);
3169 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3170 &def_stmt, &def, &dt[0], &vectype))
3172 if (dump_enabled_p ())
3173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3174 "use not simple.\n");
3175 return false;
3177 /* If op0 is an external or constant def use a vector type with
3178 the same size as the output vector type. */
3179 if (!vectype)
3180 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3181 if (vec_stmt)
3182 gcc_assert (vectype);
3183 if (!vectype)
3185 if (dump_enabled_p ())
3186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3187 "no vectype for scalar type\n");
3188 return false;
3191 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3192 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3193 if (nunits_out != nunits_in)
3194 return false;
3196 op1 = gimple_assign_rhs2 (stmt);
3197 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3198 &def, &dt[1], &op1_vectype))
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3202 "use not simple.\n");
3203 return false;
3206 if (loop_vinfo)
3207 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3208 else
3209 vf = 1;
3211 /* Multiple types in SLP are handled by creating the appropriate number of
3212 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3213 case of SLP. */
3214 if (slp_node || PURE_SLP_STMT (stmt_info))
3215 ncopies = 1;
3216 else
3217 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3219 gcc_assert (ncopies >= 1);
3221 /* Determine whether the shift amount is a vector, or scalar. If the
3222 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3224 if (dt[1] == vect_internal_def && !slp_node)
3225 scalar_shift_arg = false;
3226 else if (dt[1] == vect_constant_def
3227 || dt[1] == vect_external_def
3228 || dt[1] == vect_internal_def)
3230 /* In SLP, need to check whether the shift count is the same,
3231 in loops if it is a constant or invariant, it is always
3232 a scalar shift. */
3233 if (slp_node)
3235 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3236 gimple slpstmt;
3238 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3239 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3240 scalar_shift_arg = false;
3243 else
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "operand mode requires invariant argument.\n");
3248 return false;
3251 /* Vector shifted by vector. */
3252 if (!scalar_shift_arg)
3254 optab = optab_for_tree_code (code, vectype, optab_vector);
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_NOTE, vect_location,
3257 "vector/vector shift/rotate found.\n");
3259 if (!op1_vectype)
3260 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3261 if (op1_vectype == NULL_TREE
3262 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3264 if (dump_enabled_p ())
3265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3266 "unusable type for last operand in"
3267 " vector/vector shift/rotate.\n");
3268 return false;
3271 /* See if the machine has a vector shifted by scalar insn and if not
3272 then see if it has a vector shifted by vector insn. */
3273 else
3275 optab = optab_for_tree_code (code, vectype, optab_scalar);
3276 if (optab
3277 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3279 if (dump_enabled_p ())
3280 dump_printf_loc (MSG_NOTE, vect_location,
3281 "vector/scalar shift/rotate found.\n");
3283 else
3285 optab = optab_for_tree_code (code, vectype, optab_vector);
3286 if (optab
3287 && (optab_handler (optab, TYPE_MODE (vectype))
3288 != CODE_FOR_nothing))
3290 scalar_shift_arg = false;
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_NOTE, vect_location,
3294 "vector/vector shift/rotate found.\n");
3296 /* Unlike the other binary operators, shifts/rotates have
3297 the rhs being int, instead of the same type as the lhs,
3298 so make sure the scalar is the right type if we are
3299 dealing with vectors of long long/long/short/char. */
3300 if (dt[1] == vect_constant_def)
3301 op1 = fold_convert (TREE_TYPE (vectype), op1);
3302 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3303 TREE_TYPE (op1)))
3305 if (slp_node
3306 && TYPE_MODE (TREE_TYPE (vectype))
3307 != TYPE_MODE (TREE_TYPE (op1)))
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3311 "unusable type for last operand in"
3312 " vector/vector shift/rotate.\n");
3313 return false;
3315 if (vec_stmt && !slp_node)
3317 op1 = fold_convert (TREE_TYPE (vectype), op1);
3318 op1 = vect_init_vector (stmt, op1,
3319 TREE_TYPE (vectype), NULL);
3326 /* Supportable by target? */
3327 if (!optab)
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3331 "no optab.\n");
3332 return false;
3334 vec_mode = TYPE_MODE (vectype);
3335 icode = (int) optab_handler (optab, vec_mode);
3336 if (icode == CODE_FOR_nothing)
3338 if (dump_enabled_p ())
3339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3340 "op not supported by target.\n");
3341 /* Check only during analysis. */
3342 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3343 || (vf < vect_min_worthwhile_factor (code)
3344 && !vec_stmt))
3345 return false;
3346 if (dump_enabled_p ())
3347 dump_printf_loc (MSG_NOTE, vect_location,
3348 "proceeding using word mode.\n");
3351 /* Worthwhile without SIMD support? Check only during analysis. */
3352 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3353 && vf < vect_min_worthwhile_factor (code)
3354 && !vec_stmt)
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "not worthwhile without SIMD support.\n");
3359 return false;
3362 if (!vec_stmt) /* transformation not required. */
3364 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_NOTE, vect_location,
3367 "=== vectorizable_shift ===\n");
3368 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3369 return true;
3372 /** Transform. **/
3374 if (dump_enabled_p ())
3375 dump_printf_loc (MSG_NOTE, vect_location,
3376 "transform binary/unary operation.\n");
3378 /* Handle def. */
3379 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3381 prev_stmt_info = NULL;
3382 for (j = 0; j < ncopies; j++)
3384 /* Handle uses. */
3385 if (j == 0)
3387 if (scalar_shift_arg)
3389 /* Vector shl and shr insn patterns can be defined with scalar
3390 operand 2 (shift operand). In this case, use constant or loop
3391 invariant op1 directly, without extending it to vector mode
3392 first. */
3393 optab_op2_mode = insn_data[icode].operand[2].mode;
3394 if (!VECTOR_MODE_P (optab_op2_mode))
3396 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_NOTE, vect_location,
3398 "operand 1 using scalar mode.\n");
3399 vec_oprnd1 = op1;
3400 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3401 vec_oprnds1.quick_push (vec_oprnd1);
3402 if (slp_node)
3404 /* Store vec_oprnd1 for every vector stmt to be created
3405 for SLP_NODE. We check during the analysis that all
3406 the shift arguments are the same.
3407 TODO: Allow different constants for different vector
3408 stmts generated for an SLP instance. */
3409 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3410 vec_oprnds1.quick_push (vec_oprnd1);
3415 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3416 (a special case for certain kind of vector shifts); otherwise,
3417 operand 1 should be of a vector type (the usual case). */
3418 if (vec_oprnd1)
3419 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3420 slp_node, -1);
3421 else
3422 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3423 slp_node, -1);
3425 else
3426 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3428 /* Arguments are ready. Create the new vector stmt. */
3429 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3431 vop1 = vec_oprnds1[i];
3432 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3433 new_temp = make_ssa_name (vec_dest, new_stmt);
3434 gimple_assign_set_lhs (new_stmt, new_temp);
3435 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3436 if (slp_node)
3437 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3440 if (slp_node)
3441 continue;
3443 if (j == 0)
3444 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3445 else
3446 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3447 prev_stmt_info = vinfo_for_stmt (new_stmt);
3450 vec_oprnds0.release ();
3451 vec_oprnds1.release ();
3453 return true;
3457 static tree permute_vec_elements (tree, tree, tree, gimple,
3458 gimple_stmt_iterator *);
3461 /* Function vectorizable_operation.
3463 Check if STMT performs a binary, unary or ternary operation that can
3464 be vectorized.
3465 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3466 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3467 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3469 static bool
3470 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3471 gimple *vec_stmt, slp_tree slp_node)
3473 tree vec_dest;
3474 tree scalar_dest;
3475 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3477 tree vectype;
3478 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3479 enum tree_code code;
3480 enum machine_mode vec_mode;
3481 tree new_temp;
3482 int op_type;
3483 optab optab;
3484 int icode;
3485 tree def;
3486 gimple def_stmt;
3487 enum vect_def_type dt[3]
3488 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3489 gimple new_stmt = NULL;
3490 stmt_vec_info prev_stmt_info;
3491 int nunits_in;
3492 int nunits_out;
3493 tree vectype_out;
3494 int ncopies;
3495 int j, i;
3496 vec<tree> vec_oprnds0 = vNULL;
3497 vec<tree> vec_oprnds1 = vNULL;
3498 vec<tree> vec_oprnds2 = vNULL;
3499 tree vop0, vop1, vop2;
3500 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3501 int vf;
3503 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3504 return false;
3506 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3507 return false;
3509 /* Is STMT a vectorizable binary/unary operation? */
3510 if (!is_gimple_assign (stmt))
3511 return false;
3513 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3514 return false;
3516 code = gimple_assign_rhs_code (stmt);
3518 /* For pointer addition, we should use the normal plus for
3519 the vector addition. */
3520 if (code == POINTER_PLUS_EXPR)
3521 code = PLUS_EXPR;
3523 /* Support only unary or binary operations. */
3524 op_type = TREE_CODE_LENGTH (code);
3525 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3527 if (dump_enabled_p ())
3528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3529 "num. args = %d (not unary/binary/ternary op).\n",
3530 op_type);
3531 return false;
3534 scalar_dest = gimple_assign_lhs (stmt);
3535 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3537 /* Most operations cannot handle bit-precision types without extra
3538 truncations. */
3539 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3540 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3541 /* Exception are bitwise binary operations. */
3542 && code != BIT_IOR_EXPR
3543 && code != BIT_XOR_EXPR
3544 && code != BIT_AND_EXPR)
3546 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548 "bit-precision arithmetic not supported.\n");
3549 return false;
3552 op0 = gimple_assign_rhs1 (stmt);
3553 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3554 &def_stmt, &def, &dt[0], &vectype))
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "use not simple.\n");
3559 return false;
3561 /* If op0 is an external or constant def use a vector type with
3562 the same size as the output vector type. */
3563 if (!vectype)
3564 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3565 if (vec_stmt)
3566 gcc_assert (vectype);
3567 if (!vectype)
3569 if (dump_enabled_p ())
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3572 "no vectype for scalar type ");
3573 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3574 TREE_TYPE (op0));
3575 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3578 return false;
3581 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3582 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3583 if (nunits_out != nunits_in)
3584 return false;
3586 if (op_type == binary_op || op_type == ternary_op)
3588 op1 = gimple_assign_rhs2 (stmt);
3589 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3590 &def, &dt[1]))
3592 if (dump_enabled_p ())
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594 "use not simple.\n");
3595 return false;
3598 if (op_type == ternary_op)
3600 op2 = gimple_assign_rhs3 (stmt);
3601 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3602 &def, &dt[2]))
3604 if (dump_enabled_p ())
3605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3606 "use not simple.\n");
3607 return false;
3611 if (loop_vinfo)
3612 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3613 else
3614 vf = 1;
3616 /* Multiple types in SLP are handled by creating the appropriate number of
3617 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3618 case of SLP. */
3619 if (slp_node || PURE_SLP_STMT (stmt_info))
3620 ncopies = 1;
3621 else
3622 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3624 gcc_assert (ncopies >= 1);
3626 /* Shifts are handled in vectorizable_shift (). */
3627 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3628 || code == RROTATE_EXPR)
3629 return false;
3631 /* Supportable by target? */
3633 vec_mode = TYPE_MODE (vectype);
3634 if (code == MULT_HIGHPART_EXPR)
3636 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3637 icode = LAST_INSN_CODE;
3638 else
3639 icode = CODE_FOR_nothing;
3641 else
3643 optab = optab_for_tree_code (code, vectype, optab_default);
3644 if (!optab)
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3648 "no optab.\n");
3649 return false;
3651 icode = (int) optab_handler (optab, vec_mode);
3654 if (icode == CODE_FOR_nothing)
3656 if (dump_enabled_p ())
3657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3658 "op not supported by target.\n");
3659 /* Check only during analysis. */
3660 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3661 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3662 return false;
3663 if (dump_enabled_p ())
3664 dump_printf_loc (MSG_NOTE, vect_location,
3665 "proceeding using word mode.\n");
3668 /* Worthwhile without SIMD support? Check only during analysis. */
3669 if (!VECTOR_MODE_P (vec_mode)
3670 && !vec_stmt
3671 && vf < vect_min_worthwhile_factor (code))
3673 if (dump_enabled_p ())
3674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3675 "not worthwhile without SIMD support.\n");
3676 return false;
3679 if (!vec_stmt) /* transformation not required. */
3681 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3682 if (dump_enabled_p ())
3683 dump_printf_loc (MSG_NOTE, vect_location,
3684 "=== vectorizable_operation ===\n");
3685 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3686 return true;
3689 /** Transform. **/
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_NOTE, vect_location,
3693 "transform binary/unary operation.\n");
3695 /* Handle def. */
3696 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3698 /* In case the vectorization factor (VF) is bigger than the number
3699 of elements that we can fit in a vectype (nunits), we have to generate
3700 more than one vector stmt - i.e - we need to "unroll" the
3701 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3702 from one copy of the vector stmt to the next, in the field
3703 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3704 stages to find the correct vector defs to be used when vectorizing
3705 stmts that use the defs of the current stmt. The example below
3706 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3707 we need to create 4 vectorized stmts):
3709 before vectorization:
3710 RELATED_STMT VEC_STMT
3711 S1: x = memref - -
3712 S2: z = x + 1 - -
3714 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3715 there):
3716 RELATED_STMT VEC_STMT
3717 VS1_0: vx0 = memref0 VS1_1 -
3718 VS1_1: vx1 = memref1 VS1_2 -
3719 VS1_2: vx2 = memref2 VS1_3 -
3720 VS1_3: vx3 = memref3 - -
3721 S1: x = load - VS1_0
3722 S2: z = x + 1 - -
3724 step2: vectorize stmt S2 (done here):
3725 To vectorize stmt S2 we first need to find the relevant vector
3726 def for the first operand 'x'. This is, as usual, obtained from
3727 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3728 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3729 relevant vector def 'vx0'. Having found 'vx0' we can generate
3730 the vector stmt VS2_0, and as usual, record it in the
3731 STMT_VINFO_VEC_STMT of stmt S2.
3732 When creating the second copy (VS2_1), we obtain the relevant vector
3733 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3734 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3735 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3736 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3737 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3738 chain of stmts and pointers:
3739 RELATED_STMT VEC_STMT
3740 VS1_0: vx0 = memref0 VS1_1 -
3741 VS1_1: vx1 = memref1 VS1_2 -
3742 VS1_2: vx2 = memref2 VS1_3 -
3743 VS1_3: vx3 = memref3 - -
3744 S1: x = load - VS1_0
3745 VS2_0: vz0 = vx0 + v1 VS2_1 -
3746 VS2_1: vz1 = vx1 + v1 VS2_2 -
3747 VS2_2: vz2 = vx2 + v1 VS2_3 -
3748 VS2_3: vz3 = vx3 + v1 - -
3749 S2: z = x + 1 - VS2_0 */
3751 prev_stmt_info = NULL;
3752 for (j = 0; j < ncopies; j++)
3754 /* Handle uses. */
3755 if (j == 0)
3757 if (op_type == binary_op || op_type == ternary_op)
3758 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3759 slp_node, -1);
3760 else
3761 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3762 slp_node, -1);
3763 if (op_type == ternary_op)
3765 vec_oprnds2.create (1);
3766 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3767 stmt,
3768 NULL));
3771 else
3773 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3774 if (op_type == ternary_op)
3776 tree vec_oprnd = vec_oprnds2.pop ();
3777 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3778 vec_oprnd));
3782 /* Arguments are ready. Create the new vector stmt. */
3783 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3785 vop1 = ((op_type == binary_op || op_type == ternary_op)
3786 ? vec_oprnds1[i] : NULL_TREE);
3787 vop2 = ((op_type == ternary_op)
3788 ? vec_oprnds2[i] : NULL_TREE);
3789 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3790 vop0, vop1, vop2);
3791 new_temp = make_ssa_name (vec_dest, new_stmt);
3792 gimple_assign_set_lhs (new_stmt, new_temp);
3793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3794 if (slp_node)
3795 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3798 if (slp_node)
3799 continue;
3801 if (j == 0)
3802 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3803 else
3804 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3805 prev_stmt_info = vinfo_for_stmt (new_stmt);
3808 vec_oprnds0.release ();
3809 vec_oprnds1.release ();
3810 vec_oprnds2.release ();
3812 return true;
3815 /* A helper function to ensure data reference DR's base alignment
3816 for STMT_INFO. */
3818 static void
3819 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3821 if (!dr->aux)
3822 return;
3824 if (((dataref_aux *)dr->aux)->base_misaligned)
3826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3827 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3829 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3830 DECL_USER_ALIGN (base_decl) = 1;
3831 ((dataref_aux *)dr->aux)->base_misaligned = false;
3836 /* Function vectorizable_store.
3838 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3839 can be vectorized.
3840 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3841 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3842 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3844 static bool
3845 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3846 slp_tree slp_node)
3848 tree scalar_dest;
3849 tree data_ref;
3850 tree op;
3851 tree vec_oprnd = NULL_TREE;
3852 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3853 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3855 tree elem_type;
3856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3857 struct loop *loop = NULL;
3858 enum machine_mode vec_mode;
3859 tree dummy;
3860 enum dr_alignment_support alignment_support_scheme;
3861 tree def;
3862 gimple def_stmt;
3863 enum vect_def_type dt;
3864 stmt_vec_info prev_stmt_info = NULL;
3865 tree dataref_ptr = NULL_TREE;
3866 tree dataref_offset = NULL_TREE;
3867 gimple ptr_incr = NULL;
3868 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3869 int ncopies;
3870 int j;
3871 gimple next_stmt, first_stmt = NULL;
3872 bool grouped_store = false;
3873 bool store_lanes_p = false;
3874 unsigned int group_size, i;
3875 vec<tree> dr_chain = vNULL;
3876 vec<tree> oprnds = vNULL;
3877 vec<tree> result_chain = vNULL;
3878 bool inv_p;
3879 vec<tree> vec_oprnds = vNULL;
3880 bool slp = (slp_node != NULL);
3881 unsigned int vec_num;
3882 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3883 tree aggr_type;
3885 if (loop_vinfo)
3886 loop = LOOP_VINFO_LOOP (loop_vinfo);
3888 /* Multiple types in SLP are handled by creating the appropriate number of
3889 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3890 case of SLP. */
3891 if (slp || PURE_SLP_STMT (stmt_info))
3892 ncopies = 1;
3893 else
3894 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3896 gcc_assert (ncopies >= 1);
3898 /* FORNOW. This restriction should be relaxed. */
3899 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3901 if (dump_enabled_p ())
3902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3903 "multiple types in nested loop.\n");
3904 return false;
3907 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3908 return false;
3910 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3911 return false;
3913 /* Is vectorizable store? */
3915 if (!is_gimple_assign (stmt))
3916 return false;
3918 scalar_dest = gimple_assign_lhs (stmt);
3919 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3920 && is_pattern_stmt_p (stmt_info))
3921 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3922 if (TREE_CODE (scalar_dest) != ARRAY_REF
3923 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
3924 && TREE_CODE (scalar_dest) != INDIRECT_REF
3925 && TREE_CODE (scalar_dest) != COMPONENT_REF
3926 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3927 && TREE_CODE (scalar_dest) != REALPART_EXPR
3928 && TREE_CODE (scalar_dest) != MEM_REF)
3929 return false;
3931 gcc_assert (gimple_assign_single_p (stmt));
3932 op = gimple_assign_rhs1 (stmt);
3933 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3934 &def, &dt))
3936 if (dump_enabled_p ())
3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3938 "use not simple.\n");
3939 return false;
3942 elem_type = TREE_TYPE (vectype);
3943 vec_mode = TYPE_MODE (vectype);
3945 /* FORNOW. In some cases can vectorize even if data-type not supported
3946 (e.g. - array initialization with 0). */
3947 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3948 return false;
3950 if (!STMT_VINFO_DATA_REF (stmt_info))
3951 return false;
3953 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3954 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3955 size_zero_node) < 0)
3957 if (dump_enabled_p ())
3958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3959 "negative step for store.\n");
3960 return false;
3963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3965 grouped_store = true;
3966 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3967 if (!slp && !PURE_SLP_STMT (stmt_info))
3969 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3970 if (vect_store_lanes_supported (vectype, group_size))
3971 store_lanes_p = true;
3972 else if (!vect_grouped_store_supported (vectype, group_size))
3973 return false;
3976 if (first_stmt == stmt)
3978 /* STMT is the leader of the group. Check the operands of all the
3979 stmts of the group. */
3980 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3981 while (next_stmt)
3983 gcc_assert (gimple_assign_single_p (next_stmt));
3984 op = gimple_assign_rhs1 (next_stmt);
3985 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3986 &def_stmt, &def, &dt))
3988 if (dump_enabled_p ())
3989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3990 "use not simple.\n");
3991 return false;
3993 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3998 if (!vec_stmt) /* transformation not required. */
4000 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4001 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4002 NULL, NULL, NULL);
4003 return true;
4006 /** Transform. **/
4008 ensure_base_align (stmt_info, dr);
4010 if (grouped_store)
4012 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4013 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4015 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4017 /* FORNOW */
4018 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
4020 /* We vectorize all the stmts of the interleaving group when we
4021 reach the last stmt in the group. */
4022 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4023 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
4024 && !slp)
4026 *vec_stmt = NULL;
4027 return true;
4030 if (slp)
4032 grouped_store = false;
4033 /* VEC_NUM is the number of vect stmts to be created for this
4034 group. */
4035 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4036 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4037 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4038 op = gimple_assign_rhs1 (first_stmt);
4040 else
4041 /* VEC_NUM is the number of vect stmts to be created for this
4042 group. */
4043 vec_num = group_size;
4045 else
4047 first_stmt = stmt;
4048 first_dr = dr;
4049 group_size = vec_num = 1;
4052 if (dump_enabled_p ())
4053 dump_printf_loc (MSG_NOTE, vect_location,
4054 "transform store. ncopies = %d\n", ncopies);
4056 dr_chain.create (group_size);
4057 oprnds.create (group_size);
4059 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4060 gcc_assert (alignment_support_scheme);
4061 /* Targets with store-lane instructions must not require explicit
4062 realignment. */
4063 gcc_assert (!store_lanes_p
4064 || alignment_support_scheme == dr_aligned
4065 || alignment_support_scheme == dr_unaligned_supported);
4067 if (store_lanes_p)
4068 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4069 else
4070 aggr_type = vectype;
4072 /* In case the vectorization factor (VF) is bigger than the number
4073 of elements that we can fit in a vectype (nunits), we have to generate
4074 more than one vector stmt - i.e - we need to "unroll" the
4075 vector stmt by a factor VF/nunits. For more details see documentation in
4076 vect_get_vec_def_for_copy_stmt. */
4078 /* In case of interleaving (non-unit grouped access):
4080 S1: &base + 2 = x2
4081 S2: &base = x0
4082 S3: &base + 1 = x1
4083 S4: &base + 3 = x3
4085 We create vectorized stores starting from base address (the access of the
4086 first stmt in the chain (S2 in the above example), when the last store stmt
4087 of the chain (S4) is reached:
4089 VS1: &base = vx2
4090 VS2: &base + vec_size*1 = vx0
4091 VS3: &base + vec_size*2 = vx1
4092 VS4: &base + vec_size*3 = vx3
4094 Then permutation statements are generated:
4096 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4097 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4100 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4101 (the order of the data-refs in the output of vect_permute_store_chain
4102 corresponds to the order of scalar stmts in the interleaving chain - see
4103 the documentation of vect_permute_store_chain()).
4105 In case of both multiple types and interleaving, above vector stores and
4106 permutation stmts are created for every copy. The result vector stmts are
4107 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4108 STMT_VINFO_RELATED_STMT for the next copies.
4111 prev_stmt_info = NULL;
4112 for (j = 0; j < ncopies; j++)
4114 gimple new_stmt;
4116 if (j == 0)
4118 if (slp)
4120 /* Get vectorized arguments for SLP_NODE. */
4121 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4122 NULL, slp_node, -1);
4124 vec_oprnd = vec_oprnds[0];
4126 else
4128 /* For interleaved stores we collect vectorized defs for all the
4129 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4130 used as an input to vect_permute_store_chain(), and OPRNDS as
4131 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4133 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4134 OPRNDS are of size 1. */
4135 next_stmt = first_stmt;
4136 for (i = 0; i < group_size; i++)
4138 /* Since gaps are not supported for interleaved stores,
4139 GROUP_SIZE is the exact number of stmts in the chain.
4140 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4141 there is no interleaving, GROUP_SIZE is 1, and only one
4142 iteration of the loop will be executed. */
4143 gcc_assert (next_stmt
4144 && gimple_assign_single_p (next_stmt));
4145 op = gimple_assign_rhs1 (next_stmt);
4147 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4148 NULL);
4149 dr_chain.quick_push (vec_oprnd);
4150 oprnds.quick_push (vec_oprnd);
4151 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4155 /* We should have catched mismatched types earlier. */
4156 gcc_assert (useless_type_conversion_p (vectype,
4157 TREE_TYPE (vec_oprnd)));
4158 bool simd_lane_access_p
4159 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4160 if (simd_lane_access_p
4161 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4162 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4163 && integer_zerop (DR_OFFSET (first_dr))
4164 && integer_zerop (DR_INIT (first_dr))
4165 && alias_sets_conflict_p (get_alias_set (aggr_type),
4166 get_alias_set (DR_REF (first_dr))))
4168 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4169 dataref_offset = build_int_cst (reference_alias_ptr_type
4170 (DR_REF (first_dr)), 0);
4171 inv_p = false;
4173 else
4174 dataref_ptr
4175 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4176 simd_lane_access_p ? loop : NULL,
4177 NULL_TREE, &dummy, gsi, &ptr_incr,
4178 simd_lane_access_p, &inv_p);
4179 gcc_assert (bb_vinfo || !inv_p);
4181 else
4183 /* For interleaved stores we created vectorized defs for all the
4184 defs stored in OPRNDS in the previous iteration (previous copy).
4185 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4186 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4187 next copy.
4188 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4189 OPRNDS are of size 1. */
4190 for (i = 0; i < group_size; i++)
4192 op = oprnds[i];
4193 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4194 &def, &dt);
4195 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4196 dr_chain[i] = vec_oprnd;
4197 oprnds[i] = vec_oprnd;
4199 if (dataref_offset)
4200 dataref_offset
4201 = int_const_binop (PLUS_EXPR, dataref_offset,
4202 TYPE_SIZE_UNIT (aggr_type));
4203 else
4204 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4205 TYPE_SIZE_UNIT (aggr_type));
4208 if (store_lanes_p)
4210 tree vec_array;
4212 /* Combine all the vectors into an array. */
4213 vec_array = create_vector_array (vectype, vec_num);
4214 for (i = 0; i < vec_num; i++)
4216 vec_oprnd = dr_chain[i];
4217 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4220 /* Emit:
4221 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4222 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4223 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4224 gimple_call_set_lhs (new_stmt, data_ref);
4225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4227 else
4229 new_stmt = NULL;
4230 if (grouped_store)
4232 if (j == 0)
4233 result_chain.create (group_size);
4234 /* Permute. */
4235 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4236 &result_chain);
4239 next_stmt = first_stmt;
4240 for (i = 0; i < vec_num; i++)
4242 unsigned align, misalign;
4244 if (i > 0)
4245 /* Bump the vector pointer. */
4246 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4247 stmt, NULL_TREE);
4249 if (slp)
4250 vec_oprnd = vec_oprnds[i];
4251 else if (grouped_store)
4252 /* For grouped stores vectorized defs are interleaved in
4253 vect_permute_store_chain(). */
4254 vec_oprnd = result_chain[i];
4256 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4257 dataref_offset
4258 ? dataref_offset
4259 : build_int_cst (reference_alias_ptr_type
4260 (DR_REF (first_dr)), 0));
4261 align = TYPE_ALIGN_UNIT (vectype);
4262 if (aligned_access_p (first_dr))
4263 misalign = 0;
4264 else if (DR_MISALIGNMENT (first_dr) == -1)
4266 TREE_TYPE (data_ref)
4267 = build_aligned_type (TREE_TYPE (data_ref),
4268 TYPE_ALIGN (elem_type));
4269 align = TYPE_ALIGN_UNIT (elem_type);
4270 misalign = 0;
4272 else
4274 TREE_TYPE (data_ref)
4275 = build_aligned_type (TREE_TYPE (data_ref),
4276 TYPE_ALIGN (elem_type));
4277 misalign = DR_MISALIGNMENT (first_dr);
4279 if (dataref_offset == NULL_TREE)
4280 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4281 misalign);
4283 /* Arguments are ready. Create the new vector stmt. */
4284 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4287 if (slp)
4288 continue;
4290 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4291 if (!next_stmt)
4292 break;
4295 if (!slp)
4297 if (j == 0)
4298 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4299 else
4300 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4301 prev_stmt_info = vinfo_for_stmt (new_stmt);
4305 dr_chain.release ();
4306 oprnds.release ();
4307 result_chain.release ();
4308 vec_oprnds.release ();
4310 return true;
4313 /* Given a vector type VECTYPE and permutation SEL returns
4314 the VECTOR_CST mask that implements the permutation of the
4315 vector elements. If that is impossible to do, returns NULL. */
4317 tree
4318 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4320 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4321 int i, nunits;
4323 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4325 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4326 return NULL;
4328 mask_elt_type = lang_hooks.types.type_for_mode
4329 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4330 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4332 mask_elts = XALLOCAVEC (tree, nunits);
4333 for (i = nunits - 1; i >= 0; i--)
4334 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4335 mask_vec = build_vector (mask_type, mask_elts);
4337 return mask_vec;
4340 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4341 reversal of the vector elements. If that is impossible to do,
4342 returns NULL. */
4344 static tree
4345 perm_mask_for_reverse (tree vectype)
4347 int i, nunits;
4348 unsigned char *sel;
4350 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4351 sel = XALLOCAVEC (unsigned char, nunits);
4353 for (i = 0; i < nunits; ++i)
4354 sel[i] = nunits - 1 - i;
4356 return vect_gen_perm_mask (vectype, sel);
4359 /* Given a vector variable X and Y, that was generated for the scalar
4360 STMT, generate instructions to permute the vector elements of X and Y
4361 using permutation mask MASK_VEC, insert them at *GSI and return the
4362 permuted vector variable. */
4364 static tree
4365 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4366 gimple_stmt_iterator *gsi)
4368 tree vectype = TREE_TYPE (x);
4369 tree perm_dest, data_ref;
4370 gimple perm_stmt;
4372 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4373 data_ref = make_ssa_name (perm_dest, NULL);
4375 /* Generate the permute statement. */
4376 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4377 x, y, mask_vec);
4378 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4380 return data_ref;
4383 /* vectorizable_load.
4385 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4386 can be vectorized.
4387 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4388 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4389 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4391 static bool
4392 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4393 slp_tree slp_node, slp_instance slp_node_instance)
4395 tree scalar_dest;
4396 tree vec_dest = NULL;
4397 tree data_ref = NULL;
4398 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4399 stmt_vec_info prev_stmt_info;
4400 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4401 struct loop *loop = NULL;
4402 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4403 bool nested_in_vect_loop = false;
4404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4406 tree elem_type;
4407 tree new_temp;
4408 enum machine_mode mode;
4409 gimple new_stmt = NULL;
4410 tree dummy;
4411 enum dr_alignment_support alignment_support_scheme;
4412 tree dataref_ptr = NULL_TREE;
4413 tree dataref_offset = NULL_TREE;
4414 gimple ptr_incr = NULL;
4415 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4416 int ncopies;
4417 int i, j, group_size, group_gap;
4418 tree msq = NULL_TREE, lsq;
4419 tree offset = NULL_TREE;
4420 tree realignment_token = NULL_TREE;
4421 gimple phi = NULL;
4422 vec<tree> dr_chain = vNULL;
4423 bool grouped_load = false;
4424 bool load_lanes_p = false;
4425 gimple first_stmt;
4426 bool inv_p;
4427 bool negative = false;
4428 bool compute_in_loop = false;
4429 struct loop *at_loop;
4430 int vec_num;
4431 bool slp = (slp_node != NULL);
4432 bool slp_perm = false;
4433 enum tree_code code;
4434 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4435 int vf;
4436 tree aggr_type;
4437 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4438 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4439 int gather_scale = 1;
4440 enum vect_def_type gather_dt = vect_unknown_def_type;
4442 if (loop_vinfo)
4444 loop = LOOP_VINFO_LOOP (loop_vinfo);
4445 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4446 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4448 else
4449 vf = 1;
4451 /* Multiple types in SLP are handled by creating the appropriate number of
4452 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4453 case of SLP. */
4454 if (slp || PURE_SLP_STMT (stmt_info))
4455 ncopies = 1;
4456 else
4457 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4459 gcc_assert (ncopies >= 1);
4461 /* FORNOW. This restriction should be relaxed. */
4462 if (nested_in_vect_loop && ncopies > 1)
4464 if (dump_enabled_p ())
4465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4466 "multiple types in nested loop.\n");
4467 return false;
4470 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4471 return false;
4473 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4474 return false;
4476 /* Is vectorizable load? */
4477 if (!is_gimple_assign (stmt))
4478 return false;
4480 scalar_dest = gimple_assign_lhs (stmt);
4481 if (TREE_CODE (scalar_dest) != SSA_NAME)
4482 return false;
4484 code = gimple_assign_rhs_code (stmt);
4485 if (code != ARRAY_REF
4486 && code != BIT_FIELD_REF
4487 && code != INDIRECT_REF
4488 && code != COMPONENT_REF
4489 && code != IMAGPART_EXPR
4490 && code != REALPART_EXPR
4491 && code != MEM_REF
4492 && TREE_CODE_CLASS (code) != tcc_declaration)
4493 return false;
4495 if (!STMT_VINFO_DATA_REF (stmt_info))
4496 return false;
4498 elem_type = TREE_TYPE (vectype);
4499 mode = TYPE_MODE (vectype);
4501 /* FORNOW. In some cases can vectorize even if data-type not supported
4502 (e.g. - data copies). */
4503 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4505 if (dump_enabled_p ())
4506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4507 "Aligned load, but unsupported type.\n");
4508 return false;
4511 /* Check if the load is a part of an interleaving chain. */
4512 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4514 grouped_load = true;
4515 /* FORNOW */
4516 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4518 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4519 if (!slp && !PURE_SLP_STMT (stmt_info))
4521 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4522 if (vect_load_lanes_supported (vectype, group_size))
4523 load_lanes_p = true;
4524 else if (!vect_grouped_load_supported (vectype, group_size))
4525 return false;
4530 if (STMT_VINFO_GATHER_P (stmt_info))
4532 gimple def_stmt;
4533 tree def;
4534 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4535 &gather_off, &gather_scale);
4536 gcc_assert (gather_decl);
4537 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4538 &def_stmt, &def, &gather_dt,
4539 &gather_off_vectype))
4541 if (dump_enabled_p ())
4542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4543 "gather index use not simple.\n");
4544 return false;
4547 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4549 else
4551 negative = tree_int_cst_compare (nested_in_vect_loop
4552 ? STMT_VINFO_DR_STEP (stmt_info)
4553 : DR_STEP (dr),
4554 size_zero_node) < 0;
4555 if (negative && ncopies > 1)
4557 if (dump_enabled_p ())
4558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4559 "multiple types with negative step.\n");
4560 return false;
4563 if (negative)
4565 if (grouped_load)
4567 if (dump_enabled_p ())
4568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4569 "negative step for group load not supported"
4570 "\n");
4571 return false;
4573 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4574 if (alignment_support_scheme != dr_aligned
4575 && alignment_support_scheme != dr_unaligned_supported)
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4579 "negative step but alignment required.\n");
4580 return false;
4582 if (!perm_mask_for_reverse (vectype))
4584 if (dump_enabled_p ())
4585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4586 "negative step and reversing not supported."
4587 "\n");
4588 return false;
4593 if (!vec_stmt) /* transformation not required. */
4595 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4596 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4597 return true;
4600 if (dump_enabled_p ())
4601 dump_printf_loc (MSG_NOTE, vect_location,
4602 "transform load. ncopies = %d\n", ncopies);
4604 /** Transform. **/
4606 ensure_base_align (stmt_info, dr);
4608 if (STMT_VINFO_GATHER_P (stmt_info))
4610 tree vec_oprnd0 = NULL_TREE, op;
4611 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4612 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4613 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4614 edge pe = loop_preheader_edge (loop);
4615 gimple_seq seq;
4616 basic_block new_bb;
4617 enum { NARROW, NONE, WIDEN } modifier;
4618 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4620 if (nunits == gather_off_nunits)
4621 modifier = NONE;
4622 else if (nunits == gather_off_nunits / 2)
4624 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4625 modifier = WIDEN;
4627 for (i = 0; i < gather_off_nunits; ++i)
4628 sel[i] = i | nunits;
4630 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4631 gcc_assert (perm_mask != NULL_TREE);
4633 else if (nunits == gather_off_nunits * 2)
4635 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4636 modifier = NARROW;
4638 for (i = 0; i < nunits; ++i)
4639 sel[i] = i < gather_off_nunits
4640 ? i : i + nunits - gather_off_nunits;
4642 perm_mask = vect_gen_perm_mask (vectype, sel);
4643 gcc_assert (perm_mask != NULL_TREE);
4644 ncopies *= 2;
4646 else
4647 gcc_unreachable ();
4649 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4650 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4651 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4652 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4653 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4654 scaletype = TREE_VALUE (arglist);
4655 gcc_checking_assert (types_compatible_p (srctype, rettype)
4656 && types_compatible_p (srctype, masktype));
4658 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4660 ptr = fold_convert (ptrtype, gather_base);
4661 if (!is_gimple_min_invariant (ptr))
4663 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4664 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4665 gcc_assert (!new_bb);
4668 /* Currently we support only unconditional gather loads,
4669 so mask should be all ones. */
4670 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4671 mask = build_int_cst (TREE_TYPE (masktype), -1);
4672 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4674 REAL_VALUE_TYPE r;
4675 long tmp[6];
4676 for (j = 0; j < 6; ++j)
4677 tmp[j] = -1;
4678 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4679 mask = build_real (TREE_TYPE (masktype), r);
4681 else
4682 gcc_unreachable ();
4683 mask = build_vector_from_val (masktype, mask);
4684 mask = vect_init_vector (stmt, mask, masktype, NULL);
4686 scale = build_int_cst (scaletype, gather_scale);
4688 prev_stmt_info = NULL;
4689 for (j = 0; j < ncopies; ++j)
4691 if (modifier == WIDEN && (j & 1))
4692 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4693 perm_mask, stmt, gsi);
4694 else if (j == 0)
4695 op = vec_oprnd0
4696 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4697 else
4698 op = vec_oprnd0
4699 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4701 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4703 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4704 == TYPE_VECTOR_SUBPARTS (idxtype));
4705 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4706 var = make_ssa_name (var, NULL);
4707 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4708 new_stmt
4709 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4710 op, NULL_TREE);
4711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4712 op = var;
4715 new_stmt
4716 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4718 if (!useless_type_conversion_p (vectype, rettype))
4720 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4721 == TYPE_VECTOR_SUBPARTS (rettype));
4722 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4723 op = make_ssa_name (var, new_stmt);
4724 gimple_call_set_lhs (new_stmt, op);
4725 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4726 var = make_ssa_name (vec_dest, NULL);
4727 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4728 new_stmt
4729 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4730 NULL_TREE);
4732 else
4734 var = make_ssa_name (vec_dest, new_stmt);
4735 gimple_call_set_lhs (new_stmt, var);
4738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4740 if (modifier == NARROW)
4742 if ((j & 1) == 0)
4744 prev_res = var;
4745 continue;
4747 var = permute_vec_elements (prev_res, var,
4748 perm_mask, stmt, gsi);
4749 new_stmt = SSA_NAME_DEF_STMT (var);
4752 if (prev_stmt_info == NULL)
4753 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4754 else
4755 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4756 prev_stmt_info = vinfo_for_stmt (new_stmt);
4758 return true;
4760 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4762 gimple_stmt_iterator incr_gsi;
4763 bool insert_after;
4764 gimple incr;
4765 tree offvar;
4766 tree ivstep;
4767 tree running_off;
4768 vec<constructor_elt, va_gc> *v = NULL;
4769 gimple_seq stmts = NULL;
4770 tree stride_base, stride_step, alias_off;
4772 gcc_assert (!nested_in_vect_loop);
4774 stride_base
4775 = fold_build_pointer_plus
4776 (unshare_expr (DR_BASE_ADDRESS (dr)),
4777 size_binop (PLUS_EXPR,
4778 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4779 convert_to_ptrofftype (DR_INIT (dr))));
4780 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4782 /* For a load with loop-invariant (but other than power-of-2)
4783 stride (i.e. not a grouped access) like so:
4785 for (i = 0; i < n; i += stride)
4786 ... = array[i];
4788 we generate a new induction variable and new accesses to
4789 form a new vector (or vectors, depending on ncopies):
4791 for (j = 0; ; j += VF*stride)
4792 tmp1 = array[j];
4793 tmp2 = array[j + stride];
4795 vectemp = {tmp1, tmp2, ...}
4798 ivstep = stride_step;
4799 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4800 build_int_cst (TREE_TYPE (ivstep), vf));
4802 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4804 create_iv (stride_base, ivstep, NULL,
4805 loop, &incr_gsi, insert_after,
4806 &offvar, NULL);
4807 incr = gsi_stmt (incr_gsi);
4808 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4810 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4811 if (stmts)
4812 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4814 prev_stmt_info = NULL;
4815 running_off = offvar;
4816 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4817 for (j = 0; j < ncopies; j++)
4819 tree vec_inv;
4821 vec_alloc (v, nunits);
4822 for (i = 0; i < nunits; i++)
4824 tree newref, newoff;
4825 gimple incr;
4826 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4827 running_off, alias_off);
4829 newref = force_gimple_operand_gsi (gsi, newref, true,
4830 NULL_TREE, true,
4831 GSI_SAME_STMT);
4832 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4833 newoff = copy_ssa_name (running_off, NULL);
4834 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4835 running_off, stride_step);
4836 vect_finish_stmt_generation (stmt, incr, gsi);
4838 running_off = newoff;
4841 vec_inv = build_constructor (vectype, v);
4842 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4843 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4845 if (j == 0)
4846 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4847 else
4848 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4849 prev_stmt_info = vinfo_for_stmt (new_stmt);
4851 return true;
4854 if (grouped_load)
4856 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4857 if (slp
4858 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
4859 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4860 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4862 /* Check if the chain of loads is already vectorized. */
4863 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4864 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4865 ??? But we can only do so if there is exactly one
4866 as we have no way to get at the rest. Leave the CSE
4867 opportunity alone.
4868 ??? With the group load eventually participating
4869 in multiple different permutations (having multiple
4870 slp nodes which refer to the same group) the CSE
4871 is even wrong code. See PR56270. */
4872 && !slp)
4874 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4875 return true;
4877 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4878 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4880 /* VEC_NUM is the number of vect stmts to be created for this group. */
4881 if (slp)
4883 grouped_load = false;
4884 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4885 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4886 slp_perm = true;
4887 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
4889 else
4891 vec_num = group_size;
4892 group_gap = 0;
4895 else
4897 first_stmt = stmt;
4898 first_dr = dr;
4899 group_size = vec_num = 1;
4900 group_gap = 0;
4903 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4904 gcc_assert (alignment_support_scheme);
4905 /* Targets with load-lane instructions must not require explicit
4906 realignment. */
4907 gcc_assert (!load_lanes_p
4908 || alignment_support_scheme == dr_aligned
4909 || alignment_support_scheme == dr_unaligned_supported);
4911 /* In case the vectorization factor (VF) is bigger than the number
4912 of elements that we can fit in a vectype (nunits), we have to generate
4913 more than one vector stmt - i.e - we need to "unroll" the
4914 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4915 from one copy of the vector stmt to the next, in the field
4916 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4917 stages to find the correct vector defs to be used when vectorizing
4918 stmts that use the defs of the current stmt. The example below
4919 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4920 need to create 4 vectorized stmts):
4922 before vectorization:
4923 RELATED_STMT VEC_STMT
4924 S1: x = memref - -
4925 S2: z = x + 1 - -
4927 step 1: vectorize stmt S1:
4928 We first create the vector stmt VS1_0, and, as usual, record a
4929 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4930 Next, we create the vector stmt VS1_1, and record a pointer to
4931 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4932 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4933 stmts and pointers:
4934 RELATED_STMT VEC_STMT
4935 VS1_0: vx0 = memref0 VS1_1 -
4936 VS1_1: vx1 = memref1 VS1_2 -
4937 VS1_2: vx2 = memref2 VS1_3 -
4938 VS1_3: vx3 = memref3 - -
4939 S1: x = load - VS1_0
4940 S2: z = x + 1 - -
4942 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4943 information we recorded in RELATED_STMT field is used to vectorize
4944 stmt S2. */
4946 /* In case of interleaving (non-unit grouped access):
4948 S1: x2 = &base + 2
4949 S2: x0 = &base
4950 S3: x1 = &base + 1
4951 S4: x3 = &base + 3
4953 Vectorized loads are created in the order of memory accesses
4954 starting from the access of the first stmt of the chain:
4956 VS1: vx0 = &base
4957 VS2: vx1 = &base + vec_size*1
4958 VS3: vx3 = &base + vec_size*2
4959 VS4: vx4 = &base + vec_size*3
4961 Then permutation statements are generated:
4963 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4964 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4967 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4968 (the order of the data-refs in the output of vect_permute_load_chain
4969 corresponds to the order of scalar stmts in the interleaving chain - see
4970 the documentation of vect_permute_load_chain()).
4971 The generation of permutation stmts and recording them in
4972 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4974 In case of both multiple types and interleaving, the vector loads and
4975 permutation stmts above are created for every copy. The result vector
4976 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4977 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4979 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4980 on a target that supports unaligned accesses (dr_unaligned_supported)
4981 we generate the following code:
4982 p = initial_addr;
4983 indx = 0;
4984 loop {
4985 p = p + indx * vectype_size;
4986 vec_dest = *(p);
4987 indx = indx + 1;
4990 Otherwise, the data reference is potentially unaligned on a target that
4991 does not support unaligned accesses (dr_explicit_realign_optimized) -
4992 then generate the following code, in which the data in each iteration is
4993 obtained by two vector loads, one from the previous iteration, and one
4994 from the current iteration:
4995 p1 = initial_addr;
4996 msq_init = *(floor(p1))
4997 p2 = initial_addr + VS - 1;
4998 realignment_token = call target_builtin;
4999 indx = 0;
5000 loop {
5001 p2 = p2 + indx * vectype_size
5002 lsq = *(floor(p2))
5003 vec_dest = realign_load (msq, lsq, realignment_token)
5004 indx = indx + 1;
5005 msq = lsq;
5006 } */
5008 /* If the misalignment remains the same throughout the execution of the
5009 loop, we can create the init_addr and permutation mask at the loop
5010 preheader. Otherwise, it needs to be created inside the loop.
5011 This can only occur when vectorizing memory accesses in the inner-loop
5012 nested within an outer-loop that is being vectorized. */
5014 if (nested_in_vect_loop
5015 && (TREE_INT_CST_LOW (DR_STEP (dr))
5016 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5018 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5019 compute_in_loop = true;
5022 if ((alignment_support_scheme == dr_explicit_realign_optimized
5023 || alignment_support_scheme == dr_explicit_realign)
5024 && !compute_in_loop)
5026 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5027 alignment_support_scheme, NULL_TREE,
5028 &at_loop);
5029 if (alignment_support_scheme == dr_explicit_realign_optimized)
5031 phi = SSA_NAME_DEF_STMT (msq);
5032 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5035 else
5036 at_loop = loop;
5038 if (negative)
5039 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5041 if (load_lanes_p)
5042 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5043 else
5044 aggr_type = vectype;
5046 prev_stmt_info = NULL;
5047 for (j = 0; j < ncopies; j++)
5049 /* 1. Create the vector or array pointer update chain. */
5050 if (j == 0)
5052 bool simd_lane_access_p
5053 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5054 if (simd_lane_access_p
5055 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5056 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5057 && integer_zerop (DR_OFFSET (first_dr))
5058 && integer_zerop (DR_INIT (first_dr))
5059 && alias_sets_conflict_p (get_alias_set (aggr_type),
5060 get_alias_set (DR_REF (first_dr)))
5061 && (alignment_support_scheme == dr_aligned
5062 || alignment_support_scheme == dr_unaligned_supported))
5064 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5065 dataref_offset = build_int_cst (reference_alias_ptr_type
5066 (DR_REF (first_dr)), 0);
5067 inv_p = false;
5069 else
5070 dataref_ptr
5071 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5072 offset, &dummy, gsi, &ptr_incr,
5073 simd_lane_access_p, &inv_p);
5075 else if (dataref_offset)
5076 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5077 TYPE_SIZE_UNIT (aggr_type));
5078 else
5079 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5080 TYPE_SIZE_UNIT (aggr_type));
5082 if (grouped_load || slp_perm)
5083 dr_chain.create (vec_num);
5085 if (load_lanes_p)
5087 tree vec_array;
5089 vec_array = create_vector_array (vectype, vec_num);
5091 /* Emit:
5092 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5093 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5094 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5095 gimple_call_set_lhs (new_stmt, vec_array);
5096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5098 /* Extract each vector into an SSA_NAME. */
5099 for (i = 0; i < vec_num; i++)
5101 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5102 vec_array, i);
5103 dr_chain.quick_push (new_temp);
5106 /* Record the mapping between SSA_NAMEs and statements. */
5107 vect_record_grouped_load_vectors (stmt, dr_chain);
5109 else
5111 for (i = 0; i < vec_num; i++)
5113 if (i > 0)
5114 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5115 stmt, NULL_TREE);
5117 /* 2. Create the vector-load in the loop. */
5118 switch (alignment_support_scheme)
5120 case dr_aligned:
5121 case dr_unaligned_supported:
5123 unsigned int align, misalign;
5125 data_ref
5126 = build2 (MEM_REF, vectype, dataref_ptr,
5127 dataref_offset
5128 ? dataref_offset
5129 : build_int_cst (reference_alias_ptr_type
5130 (DR_REF (first_dr)), 0));
5131 align = TYPE_ALIGN_UNIT (vectype);
5132 if (alignment_support_scheme == dr_aligned)
5134 gcc_assert (aligned_access_p (first_dr));
5135 misalign = 0;
5137 else if (DR_MISALIGNMENT (first_dr) == -1)
5139 TREE_TYPE (data_ref)
5140 = build_aligned_type (TREE_TYPE (data_ref),
5141 TYPE_ALIGN (elem_type));
5142 align = TYPE_ALIGN_UNIT (elem_type);
5143 misalign = 0;
5145 else
5147 TREE_TYPE (data_ref)
5148 = build_aligned_type (TREE_TYPE (data_ref),
5149 TYPE_ALIGN (elem_type));
5150 misalign = DR_MISALIGNMENT (first_dr);
5152 if (dataref_offset == NULL_TREE)
5153 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5154 align, misalign);
5155 break;
5157 case dr_explicit_realign:
5159 tree ptr, bump;
5160 tree vs_minus_1;
5162 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5164 if (compute_in_loop)
5165 msq = vect_setup_realignment (first_stmt, gsi,
5166 &realignment_token,
5167 dr_explicit_realign,
5168 dataref_ptr, NULL);
5170 ptr = copy_ssa_name (dataref_ptr, NULL);
5171 new_stmt = gimple_build_assign_with_ops
5172 (BIT_AND_EXPR, ptr, dataref_ptr,
5173 build_int_cst
5174 (TREE_TYPE (dataref_ptr),
5175 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5177 data_ref
5178 = build2 (MEM_REF, vectype, ptr,
5179 build_int_cst (reference_alias_ptr_type
5180 (DR_REF (first_dr)), 0));
5181 vec_dest = vect_create_destination_var (scalar_dest,
5182 vectype);
5183 new_stmt = gimple_build_assign (vec_dest, data_ref);
5184 new_temp = make_ssa_name (vec_dest, new_stmt);
5185 gimple_assign_set_lhs (new_stmt, new_temp);
5186 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5187 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5189 msq = new_temp;
5191 bump = size_binop (MULT_EXPR, vs_minus_1,
5192 TYPE_SIZE_UNIT (elem_type));
5193 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5194 new_stmt = gimple_build_assign_with_ops
5195 (BIT_AND_EXPR, NULL_TREE, ptr,
5196 build_int_cst
5197 (TREE_TYPE (ptr),
5198 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5199 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5200 gimple_assign_set_lhs (new_stmt, ptr);
5201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5202 data_ref
5203 = build2 (MEM_REF, vectype, ptr,
5204 build_int_cst (reference_alias_ptr_type
5205 (DR_REF (first_dr)), 0));
5206 break;
5208 case dr_explicit_realign_optimized:
5209 new_temp = copy_ssa_name (dataref_ptr, NULL);
5210 new_stmt = gimple_build_assign_with_ops
5211 (BIT_AND_EXPR, new_temp, dataref_ptr,
5212 build_int_cst
5213 (TREE_TYPE (dataref_ptr),
5214 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5216 data_ref
5217 = build2 (MEM_REF, vectype, new_temp,
5218 build_int_cst (reference_alias_ptr_type
5219 (DR_REF (first_dr)), 0));
5220 break;
5221 default:
5222 gcc_unreachable ();
5224 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5225 new_stmt = gimple_build_assign (vec_dest, data_ref);
5226 new_temp = make_ssa_name (vec_dest, new_stmt);
5227 gimple_assign_set_lhs (new_stmt, new_temp);
5228 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5230 /* 3. Handle explicit realignment if necessary/supported.
5231 Create in loop:
5232 vec_dest = realign_load (msq, lsq, realignment_token) */
5233 if (alignment_support_scheme == dr_explicit_realign_optimized
5234 || alignment_support_scheme == dr_explicit_realign)
5236 lsq = gimple_assign_lhs (new_stmt);
5237 if (!realignment_token)
5238 realignment_token = dataref_ptr;
5239 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5240 new_stmt
5241 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5242 vec_dest, msq, lsq,
5243 realignment_token);
5244 new_temp = make_ssa_name (vec_dest, new_stmt);
5245 gimple_assign_set_lhs (new_stmt, new_temp);
5246 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5248 if (alignment_support_scheme == dr_explicit_realign_optimized)
5250 gcc_assert (phi);
5251 if (i == vec_num - 1 && j == ncopies - 1)
5252 add_phi_arg (phi, lsq,
5253 loop_latch_edge (containing_loop),
5254 UNKNOWN_LOCATION);
5255 msq = lsq;
5259 /* 4. Handle invariant-load. */
5260 if (inv_p && !bb_vinfo)
5262 gimple_stmt_iterator gsi2 = *gsi;
5263 gcc_assert (!grouped_load);
5264 gsi_next (&gsi2);
5265 new_temp = vect_init_vector (stmt, scalar_dest,
5266 vectype, &gsi2);
5267 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5270 if (negative)
5272 tree perm_mask = perm_mask_for_reverse (vectype);
5273 new_temp = permute_vec_elements (new_temp, new_temp,
5274 perm_mask, stmt, gsi);
5275 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5278 /* Collect vector loads and later create their permutation in
5279 vect_transform_grouped_load (). */
5280 if (grouped_load || slp_perm)
5281 dr_chain.quick_push (new_temp);
5283 /* Store vector loads in the corresponding SLP_NODE. */
5284 if (slp && !slp_perm)
5285 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5287 /* Bump the vector pointer to account for a gap. */
5288 if (slp && group_gap != 0)
5290 tree bump = size_binop (MULT_EXPR,
5291 TYPE_SIZE_UNIT (elem_type),
5292 size_int (group_gap));
5293 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5294 stmt, bump);
5298 if (slp && !slp_perm)
5299 continue;
5301 if (slp_perm)
5303 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
5304 slp_node_instance, false))
5306 dr_chain.release ();
5307 return false;
5310 else
5312 if (grouped_load)
5314 if (!load_lanes_p)
5315 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5316 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5318 else
5320 if (j == 0)
5321 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5322 else
5323 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5324 prev_stmt_info = vinfo_for_stmt (new_stmt);
5327 dr_chain.release ();
5330 return true;
5333 /* Function vect_is_simple_cond.
5335 Input:
5336 LOOP - the loop that is being vectorized.
5337 COND - Condition that is checked for simple use.
5339 Output:
5340 *COMP_VECTYPE - the vector type for the comparison.
5342 Returns whether a COND can be vectorized. Checks whether
5343 condition operands are supportable using vec_is_simple_use. */
5345 static bool
5346 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5347 bb_vec_info bb_vinfo, tree *comp_vectype)
5349 tree lhs, rhs;
5350 tree def;
5351 enum vect_def_type dt;
5352 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5354 if (!COMPARISON_CLASS_P (cond))
5355 return false;
5357 lhs = TREE_OPERAND (cond, 0);
5358 rhs = TREE_OPERAND (cond, 1);
5360 if (TREE_CODE (lhs) == SSA_NAME)
5362 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5363 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5364 &lhs_def_stmt, &def, &dt, &vectype1))
5365 return false;
5367 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5368 && TREE_CODE (lhs) != FIXED_CST)
5369 return false;
5371 if (TREE_CODE (rhs) == SSA_NAME)
5373 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5374 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5375 &rhs_def_stmt, &def, &dt, &vectype2))
5376 return false;
5378 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5379 && TREE_CODE (rhs) != FIXED_CST)
5380 return false;
5382 *comp_vectype = vectype1 ? vectype1 : vectype2;
5383 return true;
5386 /* vectorizable_condition.
5388 Check if STMT is conditional modify expression that can be vectorized.
5389 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5390 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5391 at GSI.
5393 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5394 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5395 else caluse if it is 2).
5397 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5399 bool
5400 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5401 gimple *vec_stmt, tree reduc_def, int reduc_index,
5402 slp_tree slp_node)
5404 tree scalar_dest = NULL_TREE;
5405 tree vec_dest = NULL_TREE;
5406 tree cond_expr, then_clause, else_clause;
5407 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5408 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5409 tree comp_vectype = NULL_TREE;
5410 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5411 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5412 tree vec_compare, vec_cond_expr;
5413 tree new_temp;
5414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5415 tree def;
5416 enum vect_def_type dt, dts[4];
5417 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5418 int ncopies;
5419 enum tree_code code;
5420 stmt_vec_info prev_stmt_info = NULL;
5421 int i, j;
5422 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5423 vec<tree> vec_oprnds0 = vNULL;
5424 vec<tree> vec_oprnds1 = vNULL;
5425 vec<tree> vec_oprnds2 = vNULL;
5426 vec<tree> vec_oprnds3 = vNULL;
5427 tree vec_cmp_type;
5429 if (slp_node || PURE_SLP_STMT (stmt_info))
5430 ncopies = 1;
5431 else
5432 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5434 gcc_assert (ncopies >= 1);
5435 if (reduc_index && ncopies > 1)
5436 return false; /* FORNOW */
5438 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5439 return false;
5441 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5442 return false;
5444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5445 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5446 && reduc_def))
5447 return false;
5449 /* FORNOW: not yet supported. */
5450 if (STMT_VINFO_LIVE_P (stmt_info))
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5454 "value used after loop.\n");
5455 return false;
5458 /* Is vectorizable conditional operation? */
5459 if (!is_gimple_assign (stmt))
5460 return false;
5462 code = gimple_assign_rhs_code (stmt);
5464 if (code != COND_EXPR)
5465 return false;
5467 cond_expr = gimple_assign_rhs1 (stmt);
5468 then_clause = gimple_assign_rhs2 (stmt);
5469 else_clause = gimple_assign_rhs3 (stmt);
5471 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5472 &comp_vectype)
5473 || !comp_vectype)
5474 return false;
5476 if (TREE_CODE (then_clause) == SSA_NAME)
5478 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5479 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5480 &then_def_stmt, &def, &dt))
5481 return false;
5483 else if (TREE_CODE (then_clause) != INTEGER_CST
5484 && TREE_CODE (then_clause) != REAL_CST
5485 && TREE_CODE (then_clause) != FIXED_CST)
5486 return false;
5488 if (TREE_CODE (else_clause) == SSA_NAME)
5490 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5491 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5492 &else_def_stmt, &def, &dt))
5493 return false;
5495 else if (TREE_CODE (else_clause) != INTEGER_CST
5496 && TREE_CODE (else_clause) != REAL_CST
5497 && TREE_CODE (else_clause) != FIXED_CST)
5498 return false;
5500 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5501 /* The result of a vector comparison should be signed type. */
5502 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5503 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5504 if (vec_cmp_type == NULL_TREE)
5505 return false;
5507 if (!vec_stmt)
5509 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5510 return expand_vec_cond_expr_p (vectype, comp_vectype);
5513 /* Transform. */
5515 if (!slp_node)
5517 vec_oprnds0.create (1);
5518 vec_oprnds1.create (1);
5519 vec_oprnds2.create (1);
5520 vec_oprnds3.create (1);
5523 /* Handle def. */
5524 scalar_dest = gimple_assign_lhs (stmt);
5525 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5527 /* Handle cond expr. */
5528 for (j = 0; j < ncopies; j++)
5530 gimple new_stmt = NULL;
5531 if (j == 0)
5533 if (slp_node)
5535 stack_vec<tree, 4> ops;
5536 stack_vec<vec<tree>, 4> vec_defs;
5538 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5539 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5540 ops.safe_push (then_clause);
5541 ops.safe_push (else_clause);
5542 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5543 vec_oprnds3 = vec_defs.pop ();
5544 vec_oprnds2 = vec_defs.pop ();
5545 vec_oprnds1 = vec_defs.pop ();
5546 vec_oprnds0 = vec_defs.pop ();
5548 ops.release ();
5549 vec_defs.release ();
5551 else
5553 gimple gtemp;
5554 vec_cond_lhs =
5555 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5556 stmt, NULL);
5557 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5558 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5560 vec_cond_rhs =
5561 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5562 stmt, NULL);
5563 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5564 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5565 if (reduc_index == 1)
5566 vec_then_clause = reduc_def;
5567 else
5569 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5570 stmt, NULL);
5571 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5572 NULL, &gtemp, &def, &dts[2]);
5574 if (reduc_index == 2)
5575 vec_else_clause = reduc_def;
5576 else
5578 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5579 stmt, NULL);
5580 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5581 NULL, &gtemp, &def, &dts[3]);
5585 else
5587 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5588 vec_oprnds0.pop ());
5589 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5590 vec_oprnds1.pop ());
5591 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5592 vec_oprnds2.pop ());
5593 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5594 vec_oprnds3.pop ());
5597 if (!slp_node)
5599 vec_oprnds0.quick_push (vec_cond_lhs);
5600 vec_oprnds1.quick_push (vec_cond_rhs);
5601 vec_oprnds2.quick_push (vec_then_clause);
5602 vec_oprnds3.quick_push (vec_else_clause);
5605 /* Arguments are ready. Create the new vector stmt. */
5606 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5608 vec_cond_rhs = vec_oprnds1[i];
5609 vec_then_clause = vec_oprnds2[i];
5610 vec_else_clause = vec_oprnds3[i];
5612 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5613 vec_cond_lhs, vec_cond_rhs);
5614 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5615 vec_compare, vec_then_clause, vec_else_clause);
5617 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5618 new_temp = make_ssa_name (vec_dest, new_stmt);
5619 gimple_assign_set_lhs (new_stmt, new_temp);
5620 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5621 if (slp_node)
5622 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5625 if (slp_node)
5626 continue;
5628 if (j == 0)
5629 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5630 else
5631 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5633 prev_stmt_info = vinfo_for_stmt (new_stmt);
5636 vec_oprnds0.release ();
5637 vec_oprnds1.release ();
5638 vec_oprnds2.release ();
5639 vec_oprnds3.release ();
5641 return true;
5645 /* Make sure the statement is vectorizable. */
5647 bool
5648 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5651 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5652 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5653 bool ok;
5654 tree scalar_type, vectype;
5655 gimple pattern_stmt;
5656 gimple_seq pattern_def_seq;
5658 if (dump_enabled_p ())
5660 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5662 dump_printf (MSG_NOTE, "\n");
5665 if (gimple_has_volatile_ops (stmt))
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5669 "not vectorized: stmt has volatile operands\n");
5671 return false;
5674 /* Skip stmts that do not need to be vectorized. In loops this is expected
5675 to include:
5676 - the COND_EXPR which is the loop exit condition
5677 - any LABEL_EXPRs in the loop
5678 - computations that are used only for array indexing or loop control.
5679 In basic blocks we only analyze statements that are a part of some SLP
5680 instance, therefore, all the statements are relevant.
5682 Pattern statement needs to be analyzed instead of the original statement
5683 if the original statement is not relevant. Otherwise, we analyze both
5684 statements. In basic blocks we are called from some SLP instance
5685 traversal, don't analyze pattern stmts instead, the pattern stmts
5686 already will be part of SLP instance. */
5688 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5689 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5690 && !STMT_VINFO_LIVE_P (stmt_info))
5692 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5693 && pattern_stmt
5694 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5695 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5697 /* Analyze PATTERN_STMT instead of the original stmt. */
5698 stmt = pattern_stmt;
5699 stmt_info = vinfo_for_stmt (pattern_stmt);
5700 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_NOTE, vect_location,
5703 "==> examining pattern statement: ");
5704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5705 dump_printf (MSG_NOTE, "\n");
5708 else
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
5713 return true;
5716 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5717 && node == NULL
5718 && pattern_stmt
5719 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5720 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5722 /* Analyze PATTERN_STMT too. */
5723 if (dump_enabled_p ())
5725 dump_printf_loc (MSG_NOTE, vect_location,
5726 "==> examining pattern statement: ");
5727 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5728 dump_printf (MSG_NOTE, "\n");
5731 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5732 return false;
5735 if (is_pattern_stmt_p (stmt_info)
5736 && node == NULL
5737 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5739 gimple_stmt_iterator si;
5741 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5743 gimple pattern_def_stmt = gsi_stmt (si);
5744 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5745 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5747 /* Analyze def stmt of STMT if it's a pattern stmt. */
5748 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_NOTE, vect_location,
5751 "==> examining pattern def statement: ");
5752 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5753 dump_printf (MSG_NOTE, "\n");
5756 if (!vect_analyze_stmt (pattern_def_stmt,
5757 need_to_vectorize, node))
5758 return false;
5763 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5765 case vect_internal_def:
5766 break;
5768 case vect_reduction_def:
5769 case vect_nested_cycle:
5770 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5771 || relevance == vect_used_in_outer_by_reduction
5772 || relevance == vect_unused_in_scope));
5773 break;
5775 case vect_induction_def:
5776 case vect_constant_def:
5777 case vect_external_def:
5778 case vect_unknown_def_type:
5779 default:
5780 gcc_unreachable ();
5783 if (bb_vinfo)
5785 gcc_assert (PURE_SLP_STMT (stmt_info));
5787 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5788 if (dump_enabled_p ())
5790 dump_printf_loc (MSG_NOTE, vect_location,
5791 "get vectype for scalar type: ");
5792 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5793 dump_printf (MSG_NOTE, "\n");
5796 vectype = get_vectype_for_scalar_type (scalar_type);
5797 if (!vectype)
5799 if (dump_enabled_p ())
5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5802 "not SLPed: unsupported data-type ");
5803 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5804 scalar_type);
5805 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5807 return false;
5810 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5813 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5814 dump_printf (MSG_NOTE, "\n");
5817 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5820 if (STMT_VINFO_RELEVANT_P (stmt_info))
5822 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5823 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5824 *need_to_vectorize = true;
5827 ok = true;
5828 if (!bb_vinfo
5829 && (STMT_VINFO_RELEVANT_P (stmt_info)
5830 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5831 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5832 || vectorizable_shift (stmt, NULL, NULL, NULL)
5833 || vectorizable_operation (stmt, NULL, NULL, NULL)
5834 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5835 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5836 || vectorizable_call (stmt, NULL, NULL, NULL)
5837 || vectorizable_store (stmt, NULL, NULL, NULL)
5838 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5839 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5840 else
5842 if (bb_vinfo)
5843 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5844 || vectorizable_shift (stmt, NULL, NULL, node)
5845 || vectorizable_operation (stmt, NULL, NULL, node)
5846 || vectorizable_assignment (stmt, NULL, NULL, node)
5847 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5848 || vectorizable_call (stmt, NULL, NULL, node)
5849 || vectorizable_store (stmt, NULL, NULL, node)
5850 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5853 if (!ok)
5855 if (dump_enabled_p ())
5857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5858 "not vectorized: relevant stmt not ");
5859 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5860 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5861 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5864 return false;
5867 if (bb_vinfo)
5868 return true;
5870 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5871 need extra handling, except for vectorizable reductions. */
5872 if (STMT_VINFO_LIVE_P (stmt_info)
5873 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5874 ok = vectorizable_live_operation (stmt, NULL, NULL);
5876 if (!ok)
5878 if (dump_enabled_p ())
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "not vectorized: live stmt not ");
5882 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5883 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5884 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5887 return false;
5890 return true;
5894 /* Function vect_transform_stmt.
5896 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5898 bool
5899 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5900 bool *grouped_store, slp_tree slp_node,
5901 slp_instance slp_node_instance)
5903 bool is_store = false;
5904 gimple vec_stmt = NULL;
5905 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5906 bool done;
5908 switch (STMT_VINFO_TYPE (stmt_info))
5910 case type_demotion_vec_info_type:
5911 case type_promotion_vec_info_type:
5912 case type_conversion_vec_info_type:
5913 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5914 gcc_assert (done);
5915 break;
5917 case induc_vec_info_type:
5918 gcc_assert (!slp_node);
5919 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5920 gcc_assert (done);
5921 break;
5923 case shift_vec_info_type:
5924 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5925 gcc_assert (done);
5926 break;
5928 case op_vec_info_type:
5929 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5930 gcc_assert (done);
5931 break;
5933 case assignment_vec_info_type:
5934 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5935 gcc_assert (done);
5936 break;
5938 case load_vec_info_type:
5939 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5940 slp_node_instance);
5941 gcc_assert (done);
5942 break;
5944 case store_vec_info_type:
5945 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5946 gcc_assert (done);
5947 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5949 /* In case of interleaving, the whole chain is vectorized when the
5950 last store in the chain is reached. Store stmts before the last
5951 one are skipped, and there vec_stmt_info shouldn't be freed
5952 meanwhile. */
5953 *grouped_store = true;
5954 if (STMT_VINFO_VEC_STMT (stmt_info))
5955 is_store = true;
5957 else
5958 is_store = true;
5959 break;
5961 case condition_vec_info_type:
5962 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5963 gcc_assert (done);
5964 break;
5966 case call_vec_info_type:
5967 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5968 stmt = gsi_stmt (*gsi);
5969 break;
5971 case reduc_vec_info_type:
5972 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5973 gcc_assert (done);
5974 break;
5976 default:
5977 if (!STMT_VINFO_LIVE_P (stmt_info))
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5981 "stmt not supported.\n");
5982 gcc_unreachable ();
5986 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5987 is being vectorized, but outside the immediately enclosing loop. */
5988 if (vec_stmt
5989 && STMT_VINFO_LOOP_VINFO (stmt_info)
5990 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5991 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5992 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5993 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5994 || STMT_VINFO_RELEVANT (stmt_info) ==
5995 vect_used_in_outer_by_reduction))
5997 struct loop *innerloop = LOOP_VINFO_LOOP (
5998 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5999 imm_use_iterator imm_iter;
6000 use_operand_p use_p;
6001 tree scalar_dest;
6002 gimple exit_phi;
6004 if (dump_enabled_p ())
6005 dump_printf_loc (MSG_NOTE, vect_location,
6006 "Record the vdef for outer-loop vectorization.\n");
6008 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6009 (to be used when vectorizing outer-loop stmts that use the DEF of
6010 STMT). */
6011 if (gimple_code (stmt) == GIMPLE_PHI)
6012 scalar_dest = PHI_RESULT (stmt);
6013 else
6014 scalar_dest = gimple_assign_lhs (stmt);
6016 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6018 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6020 exit_phi = USE_STMT (use_p);
6021 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6026 /* Handle stmts whose DEF is used outside the loop-nest that is
6027 being vectorized. */
6028 if (STMT_VINFO_LIVE_P (stmt_info)
6029 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6031 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6032 gcc_assert (done);
6035 if (vec_stmt)
6036 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6038 return is_store;
6042 /* Remove a group of stores (for SLP or interleaving), free their
6043 stmt_vec_info. */
6045 void
6046 vect_remove_stores (gimple first_stmt)
6048 gimple next = first_stmt;
6049 gimple tmp;
6050 gimple_stmt_iterator next_si;
6052 while (next)
6054 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6056 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6057 if (is_pattern_stmt_p (stmt_info))
6058 next = STMT_VINFO_RELATED_STMT (stmt_info);
6059 /* Free the attached stmt_vec_info and remove the stmt. */
6060 next_si = gsi_for_stmt (next);
6061 unlink_stmt_vdef (next);
6062 gsi_remove (&next_si, true);
6063 release_defs (next);
6064 free_stmt_vec_info (next);
6065 next = tmp;
6070 /* Function new_stmt_vec_info.
6072 Create and initialize a new stmt_vec_info struct for STMT. */
6074 stmt_vec_info
6075 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
6076 bb_vec_info bb_vinfo)
6078 stmt_vec_info res;
6079 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6081 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6082 STMT_VINFO_STMT (res) = stmt;
6083 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
6084 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
6085 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
6086 STMT_VINFO_LIVE_P (res) = false;
6087 STMT_VINFO_VECTYPE (res) = NULL;
6088 STMT_VINFO_VEC_STMT (res) = NULL;
6089 STMT_VINFO_VECTORIZABLE (res) = true;
6090 STMT_VINFO_IN_PATTERN_P (res) = false;
6091 STMT_VINFO_RELATED_STMT (res) = NULL;
6092 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
6093 STMT_VINFO_DATA_REF (res) = NULL;
6095 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6096 STMT_VINFO_DR_OFFSET (res) = NULL;
6097 STMT_VINFO_DR_INIT (res) = NULL;
6098 STMT_VINFO_DR_STEP (res) = NULL;
6099 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6101 if (gimple_code (stmt) == GIMPLE_PHI
6102 && is_loop_header_bb_p (gimple_bb (stmt)))
6103 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6104 else
6105 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6107 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
6108 STMT_SLP_TYPE (res) = loop_vect;
6109 GROUP_FIRST_ELEMENT (res) = NULL;
6110 GROUP_NEXT_ELEMENT (res) = NULL;
6111 GROUP_SIZE (res) = 0;
6112 GROUP_STORE_COUNT (res) = 0;
6113 GROUP_GAP (res) = 0;
6114 GROUP_SAME_DR_STMT (res) = NULL;
6116 return res;
6120 /* Create a hash table for stmt_vec_info. */
6122 void
6123 init_stmt_vec_info_vec (void)
6125 gcc_assert (!stmt_vec_info_vec.exists ());
6126 stmt_vec_info_vec.create (50);
6130 /* Free hash table for stmt_vec_info. */
6132 void
6133 free_stmt_vec_info_vec (void)
6135 unsigned int i;
6136 vec_void_p info;
6137 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6138 if (info != NULL)
6139 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
6140 gcc_assert (stmt_vec_info_vec.exists ());
6141 stmt_vec_info_vec.release ();
6145 /* Free stmt vectorization related info. */
6147 void
6148 free_stmt_vec_info (gimple stmt)
6150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6152 if (!stmt_info)
6153 return;
6155 /* Check if this statement has a related "pattern stmt"
6156 (introduced by the vectorizer during the pattern recognition
6157 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6158 too. */
6159 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6161 stmt_vec_info patt_info
6162 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6163 if (patt_info)
6165 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6166 if (seq)
6168 gimple_stmt_iterator si;
6169 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6170 free_stmt_vec_info (gsi_stmt (si));
6172 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6176 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6177 set_vinfo_for_stmt (stmt, NULL);
6178 free (stmt_info);
6182 /* Function get_vectype_for_scalar_type_and_size.
6184 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6185 by the target. */
6187 static tree
6188 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6190 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6191 enum machine_mode simd_mode;
6192 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6193 int nunits;
6194 tree vectype;
6196 if (nbytes == 0)
6197 return NULL_TREE;
6199 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6200 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6201 return NULL_TREE;
6203 /* For vector types of elements whose mode precision doesn't
6204 match their types precision we use a element type of mode
6205 precision. The vectorization routines will have to make sure
6206 they support the proper result truncation/extension.
6207 We also make sure to build vector types with INTEGER_TYPE
6208 component type only. */
6209 if (INTEGRAL_TYPE_P (scalar_type)
6210 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6211 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6212 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6213 TYPE_UNSIGNED (scalar_type));
6215 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6216 When the component mode passes the above test simply use a type
6217 corresponding to that mode. The theory is that any use that
6218 would cause problems with this will disable vectorization anyway. */
6219 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6220 && !INTEGRAL_TYPE_P (scalar_type))
6221 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6223 /* We can't build a vector type of elements with alignment bigger than
6224 their size. */
6225 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6226 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6227 TYPE_UNSIGNED (scalar_type));
6229 /* If we felt back to using the mode fail if there was
6230 no scalar type for it. */
6231 if (scalar_type == NULL_TREE)
6232 return NULL_TREE;
6234 /* If no size was supplied use the mode the target prefers. Otherwise
6235 lookup a vector mode of the specified size. */
6236 if (size == 0)
6237 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6238 else
6239 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6240 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6241 if (nunits <= 1)
6242 return NULL_TREE;
6244 vectype = build_vector_type (scalar_type, nunits);
6246 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6247 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6248 return NULL_TREE;
6250 return vectype;
6253 unsigned int current_vector_size;
6255 /* Function get_vectype_for_scalar_type.
6257 Returns the vector type corresponding to SCALAR_TYPE as supported
6258 by the target. */
6260 tree
6261 get_vectype_for_scalar_type (tree scalar_type)
6263 tree vectype;
6264 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6265 current_vector_size);
6266 if (vectype
6267 && current_vector_size == 0)
6268 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6269 return vectype;
6272 /* Function get_same_sized_vectype
6274 Returns a vector type corresponding to SCALAR_TYPE of size
6275 VECTOR_TYPE if supported by the target. */
6277 tree
6278 get_same_sized_vectype (tree scalar_type, tree vector_type)
6280 return get_vectype_for_scalar_type_and_size
6281 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6284 /* Function vect_is_simple_use.
6286 Input:
6287 LOOP_VINFO - the vect info of the loop that is being vectorized.
6288 BB_VINFO - the vect info of the basic block that is being vectorized.
6289 OPERAND - operand of STMT in the loop or bb.
6290 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6292 Returns whether a stmt with OPERAND can be vectorized.
6293 For loops, supportable operands are constants, loop invariants, and operands
6294 that are defined by the current iteration of the loop. Unsupportable
6295 operands are those that are defined by a previous iteration of the loop (as
6296 is the case in reduction/induction computations).
6297 For basic blocks, supportable operands are constants and bb invariants.
6298 For now, operands defined outside the basic block are not supported. */
6300 bool
6301 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6302 bb_vec_info bb_vinfo, gimple *def_stmt,
6303 tree *def, enum vect_def_type *dt)
6305 basic_block bb;
6306 stmt_vec_info stmt_vinfo;
6307 struct loop *loop = NULL;
6309 if (loop_vinfo)
6310 loop = LOOP_VINFO_LOOP (loop_vinfo);
6312 *def_stmt = NULL;
6313 *def = NULL_TREE;
6315 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_NOTE, vect_location,
6318 "vect_is_simple_use: operand ");
6319 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6320 dump_printf (MSG_NOTE, "\n");
6323 if (CONSTANT_CLASS_P (operand))
6325 *dt = vect_constant_def;
6326 return true;
6329 if (is_gimple_min_invariant (operand))
6331 *def = operand;
6332 *dt = vect_external_def;
6333 return true;
6336 if (TREE_CODE (operand) == PAREN_EXPR)
6338 if (dump_enabled_p ())
6339 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
6340 operand = TREE_OPERAND (operand, 0);
6343 if (TREE_CODE (operand) != SSA_NAME)
6345 if (dump_enabled_p ())
6346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6347 "not ssa-name.\n");
6348 return false;
6351 *def_stmt = SSA_NAME_DEF_STMT (operand);
6352 if (*def_stmt == NULL)
6354 if (dump_enabled_p ())
6355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6356 "no def_stmt.\n");
6357 return false;
6360 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6363 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6364 dump_printf (MSG_NOTE, "\n");
6367 /* Empty stmt is expected only in case of a function argument.
6368 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6369 if (gimple_nop_p (*def_stmt))
6371 *def = operand;
6372 *dt = vect_external_def;
6373 return true;
6376 bb = gimple_bb (*def_stmt);
6378 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6379 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6380 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6381 *dt = vect_external_def;
6382 else
6384 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6385 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6388 if (*dt == vect_unknown_def_type
6389 || (stmt
6390 && *dt == vect_double_reduction_def
6391 && gimple_code (stmt) != GIMPLE_PHI))
6393 if (dump_enabled_p ())
6394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6395 "Unsupported pattern.\n");
6396 return false;
6399 if (dump_enabled_p ())
6400 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
6402 switch (gimple_code (*def_stmt))
6404 case GIMPLE_PHI:
6405 *def = gimple_phi_result (*def_stmt);
6406 break;
6408 case GIMPLE_ASSIGN:
6409 *def = gimple_assign_lhs (*def_stmt);
6410 break;
6412 case GIMPLE_CALL:
6413 *def = gimple_call_lhs (*def_stmt);
6414 if (*def != NULL)
6415 break;
6416 /* FALLTHRU */
6417 default:
6418 if (dump_enabled_p ())
6419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6420 "unsupported defining stmt:\n");
6421 return false;
6424 return true;
6427 /* Function vect_is_simple_use_1.
6429 Same as vect_is_simple_use_1 but also determines the vector operand
6430 type of OPERAND and stores it to *VECTYPE. If the definition of
6431 OPERAND is vect_uninitialized_def, vect_constant_def or
6432 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6433 is responsible to compute the best suited vector type for the
6434 scalar operand. */
6436 bool
6437 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6438 bb_vec_info bb_vinfo, gimple *def_stmt,
6439 tree *def, enum vect_def_type *dt, tree *vectype)
6441 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6442 def, dt))
6443 return false;
6445 /* Now get a vector type if the def is internal, otherwise supply
6446 NULL_TREE and leave it up to the caller to figure out a proper
6447 type for the use stmt. */
6448 if (*dt == vect_internal_def
6449 || *dt == vect_induction_def
6450 || *dt == vect_reduction_def
6451 || *dt == vect_double_reduction_def
6452 || *dt == vect_nested_cycle)
6454 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6456 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6457 && !STMT_VINFO_RELEVANT (stmt_info)
6458 && !STMT_VINFO_LIVE_P (stmt_info))
6459 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6461 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6462 gcc_assert (*vectype != NULL_TREE);
6464 else if (*dt == vect_uninitialized_def
6465 || *dt == vect_constant_def
6466 || *dt == vect_external_def)
6467 *vectype = NULL_TREE;
6468 else
6469 gcc_unreachable ();
6471 return true;
6475 /* Function supportable_widening_operation
6477 Check whether an operation represented by the code CODE is a
6478 widening operation that is supported by the target platform in
6479 vector form (i.e., when operating on arguments of type VECTYPE_IN
6480 producing a result of type VECTYPE_OUT).
6482 Widening operations we currently support are NOP (CONVERT), FLOAT
6483 and WIDEN_MULT. This function checks if these operations are supported
6484 by the target platform either directly (via vector tree-codes), or via
6485 target builtins.
6487 Output:
6488 - CODE1 and CODE2 are codes of vector operations to be used when
6489 vectorizing the operation, if available.
6490 - MULTI_STEP_CVT determines the number of required intermediate steps in
6491 case of multi-step conversion (like char->short->int - in that case
6492 MULTI_STEP_CVT will be 1).
6493 - INTERM_TYPES contains the intermediate type required to perform the
6494 widening operation (short in the above example). */
6496 bool
6497 supportable_widening_operation (enum tree_code code, gimple stmt,
6498 tree vectype_out, tree vectype_in,
6499 enum tree_code *code1, enum tree_code *code2,
6500 int *multi_step_cvt,
6501 vec<tree> *interm_types)
6503 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6504 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6505 struct loop *vect_loop = NULL;
6506 enum machine_mode vec_mode;
6507 enum insn_code icode1, icode2;
6508 optab optab1, optab2;
6509 tree vectype = vectype_in;
6510 tree wide_vectype = vectype_out;
6511 enum tree_code c1, c2;
6512 int i;
6513 tree prev_type, intermediate_type;
6514 enum machine_mode intermediate_mode, prev_mode;
6515 optab optab3, optab4;
6517 *multi_step_cvt = 0;
6518 if (loop_info)
6519 vect_loop = LOOP_VINFO_LOOP (loop_info);
6521 switch (code)
6523 case WIDEN_MULT_EXPR:
6524 /* The result of a vectorized widening operation usually requires
6525 two vectors (because the widened results do not fit into one vector).
6526 The generated vector results would normally be expected to be
6527 generated in the same order as in the original scalar computation,
6528 i.e. if 8 results are generated in each vector iteration, they are
6529 to be organized as follows:
6530 vect1: [res1,res2,res3,res4],
6531 vect2: [res5,res6,res7,res8].
6533 However, in the special case that the result of the widening
6534 operation is used in a reduction computation only, the order doesn't
6535 matter (because when vectorizing a reduction we change the order of
6536 the computation). Some targets can take advantage of this and
6537 generate more efficient code. For example, targets like Altivec,
6538 that support widen_mult using a sequence of {mult_even,mult_odd}
6539 generate the following vectors:
6540 vect1: [res1,res3,res5,res7],
6541 vect2: [res2,res4,res6,res8].
6543 When vectorizing outer-loops, we execute the inner-loop sequentially
6544 (each vectorized inner-loop iteration contributes to VF outer-loop
6545 iterations in parallel). We therefore don't allow to change the
6546 order of the computation in the inner-loop during outer-loop
6547 vectorization. */
6548 /* TODO: Another case in which order doesn't *really* matter is when we
6549 widen and then contract again, e.g. (short)((int)x * y >> 8).
6550 Normally, pack_trunc performs an even/odd permute, whereas the
6551 repack from an even/odd expansion would be an interleave, which
6552 would be significantly simpler for e.g. AVX2. */
6553 /* In any case, in order to avoid duplicating the code below, recurse
6554 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6555 are properly set up for the caller. If we fail, we'll continue with
6556 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6557 if (vect_loop
6558 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6559 && !nested_in_vect_loop_p (vect_loop, stmt)
6560 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6561 stmt, vectype_out, vectype_in,
6562 code1, code2, multi_step_cvt,
6563 interm_types))
6564 return true;
6565 c1 = VEC_WIDEN_MULT_LO_EXPR;
6566 c2 = VEC_WIDEN_MULT_HI_EXPR;
6567 break;
6569 case VEC_WIDEN_MULT_EVEN_EXPR:
6570 /* Support the recursion induced just above. */
6571 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6572 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6573 break;
6575 case WIDEN_LSHIFT_EXPR:
6576 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6577 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6578 break;
6580 CASE_CONVERT:
6581 c1 = VEC_UNPACK_LO_EXPR;
6582 c2 = VEC_UNPACK_HI_EXPR;
6583 break;
6585 case FLOAT_EXPR:
6586 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6587 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6588 break;
6590 case FIX_TRUNC_EXPR:
6591 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6592 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6593 computing the operation. */
6594 return false;
6596 default:
6597 gcc_unreachable ();
6600 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6602 enum tree_code ctmp = c1;
6603 c1 = c2;
6604 c2 = ctmp;
6607 if (code == FIX_TRUNC_EXPR)
6609 /* The signedness is determined from output operand. */
6610 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6611 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6613 else
6615 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6616 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6619 if (!optab1 || !optab2)
6620 return false;
6622 vec_mode = TYPE_MODE (vectype);
6623 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6624 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6625 return false;
6627 *code1 = c1;
6628 *code2 = c2;
6630 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6631 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6632 return true;
6634 /* Check if it's a multi-step conversion that can be done using intermediate
6635 types. */
6637 prev_type = vectype;
6638 prev_mode = vec_mode;
6640 if (!CONVERT_EXPR_CODE_P (code))
6641 return false;
6643 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6644 intermediate steps in promotion sequence. We try
6645 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6646 not. */
6647 interm_types->create (MAX_INTERM_CVT_STEPS);
6648 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6650 intermediate_mode = insn_data[icode1].operand[0].mode;
6651 intermediate_type
6652 = lang_hooks.types.type_for_mode (intermediate_mode,
6653 TYPE_UNSIGNED (prev_type));
6654 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6655 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6657 if (!optab3 || !optab4
6658 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6659 || insn_data[icode1].operand[0].mode != intermediate_mode
6660 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6661 || insn_data[icode2].operand[0].mode != intermediate_mode
6662 || ((icode1 = optab_handler (optab3, intermediate_mode))
6663 == CODE_FOR_nothing)
6664 || ((icode2 = optab_handler (optab4, intermediate_mode))
6665 == CODE_FOR_nothing))
6666 break;
6668 interm_types->quick_push (intermediate_type);
6669 (*multi_step_cvt)++;
6671 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6672 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6673 return true;
6675 prev_type = intermediate_type;
6676 prev_mode = intermediate_mode;
6679 interm_types->release ();
6680 return false;
6684 /* Function supportable_narrowing_operation
6686 Check whether an operation represented by the code CODE is a
6687 narrowing operation that is supported by the target platform in
6688 vector form (i.e., when operating on arguments of type VECTYPE_IN
6689 and producing a result of type VECTYPE_OUT).
6691 Narrowing operations we currently support are NOP (CONVERT) and
6692 FIX_TRUNC. This function checks if these operations are supported by
6693 the target platform directly via vector tree-codes.
6695 Output:
6696 - CODE1 is the code of a vector operation to be used when
6697 vectorizing the operation, if available.
6698 - MULTI_STEP_CVT determines the number of required intermediate steps in
6699 case of multi-step conversion (like int->short->char - in that case
6700 MULTI_STEP_CVT will be 1).
6701 - INTERM_TYPES contains the intermediate type required to perform the
6702 narrowing operation (short in the above example). */
6704 bool
6705 supportable_narrowing_operation (enum tree_code code,
6706 tree vectype_out, tree vectype_in,
6707 enum tree_code *code1, int *multi_step_cvt,
6708 vec<tree> *interm_types)
6710 enum machine_mode vec_mode;
6711 enum insn_code icode1;
6712 optab optab1, interm_optab;
6713 tree vectype = vectype_in;
6714 tree narrow_vectype = vectype_out;
6715 enum tree_code c1;
6716 tree intermediate_type;
6717 enum machine_mode intermediate_mode, prev_mode;
6718 int i;
6719 bool uns;
6721 *multi_step_cvt = 0;
6722 switch (code)
6724 CASE_CONVERT:
6725 c1 = VEC_PACK_TRUNC_EXPR;
6726 break;
6728 case FIX_TRUNC_EXPR:
6729 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6730 break;
6732 case FLOAT_EXPR:
6733 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6734 tree code and optabs used for computing the operation. */
6735 return false;
6737 default:
6738 gcc_unreachable ();
6741 if (code == FIX_TRUNC_EXPR)
6742 /* The signedness is determined from output operand. */
6743 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6744 else
6745 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6747 if (!optab1)
6748 return false;
6750 vec_mode = TYPE_MODE (vectype);
6751 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6752 return false;
6754 *code1 = c1;
6756 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6757 return true;
6759 /* Check if it's a multi-step conversion that can be done using intermediate
6760 types. */
6761 prev_mode = vec_mode;
6762 if (code == FIX_TRUNC_EXPR)
6763 uns = TYPE_UNSIGNED (vectype_out);
6764 else
6765 uns = TYPE_UNSIGNED (vectype);
6767 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6768 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6769 costly than signed. */
6770 if (code == FIX_TRUNC_EXPR && uns)
6772 enum insn_code icode2;
6774 intermediate_type
6775 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6776 interm_optab
6777 = optab_for_tree_code (c1, intermediate_type, optab_default);
6778 if (interm_optab != unknown_optab
6779 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6780 && insn_data[icode1].operand[0].mode
6781 == insn_data[icode2].operand[0].mode)
6783 uns = false;
6784 optab1 = interm_optab;
6785 icode1 = icode2;
6789 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6790 intermediate steps in promotion sequence. We try
6791 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6792 interm_types->create (MAX_INTERM_CVT_STEPS);
6793 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6795 intermediate_mode = insn_data[icode1].operand[0].mode;
6796 intermediate_type
6797 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6798 interm_optab
6799 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6800 optab_default);
6801 if (!interm_optab
6802 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6803 || insn_data[icode1].operand[0].mode != intermediate_mode
6804 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6805 == CODE_FOR_nothing))
6806 break;
6808 interm_types->quick_push (intermediate_type);
6809 (*multi_step_cvt)++;
6811 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6812 return true;
6814 prev_mode = intermediate_mode;
6815 optab1 = interm_optab;
6818 interm_types->release ();
6819 return false;