* config/rx/rx.c (ADD_RX_BUILTIN0): New macro, used for builtins
[official-gcc.git] / gcc / tree-vect-stmts.c
blob9516e76bda2c68c45891111db6519414008d7bb9
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "gimple.h"
33 #include "gimple-ssa.h"
34 #include "tree-cfg.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop-manip.h"
39 #include "cfgloop.h"
40 #include "expr.h"
41 #include "recog.h" /* FIXME: for insn_data */
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "tree-vectorizer.h"
45 #include "dumpfile.h"
47 /* For lang_hooks.types.type_for_mode. */
48 #include "langhooks.h"
50 /* Return the vectorized type for the given statement. */
52 tree
53 stmt_vectype (struct _stmt_vec_info *stmt_info)
55 return STMT_VINFO_VECTYPE (stmt_info);
58 /* Return TRUE iff the given statement is in an inner loop relative to
59 the loop being vectorized. */
60 bool
61 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
63 gimple stmt = STMT_VINFO_STMT (stmt_info);
64 basic_block bb = gimple_bb (stmt);
65 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
66 struct loop* loop;
68 if (!loop_vinfo)
69 return false;
71 loop = LOOP_VINFO_LOOP (loop_vinfo);
73 return (bb->loop_father == loop->inner);
76 /* Record the cost of a statement, either by directly informing the
77 target model or by saving it in a vector for later processing.
78 Return a preliminary estimate of the statement's cost. */
80 unsigned
81 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
82 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
83 int misalign, enum vect_cost_model_location where)
85 if (body_cost_vec)
87 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
88 add_stmt_info_to_vec (body_cost_vec, count, kind,
89 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
90 misalign);
91 return (unsigned)
92 (builtin_vectorization_cost (kind, vectype, misalign) * count);
95 else
97 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
98 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
99 void *target_cost_data;
101 if (loop_vinfo)
102 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
103 else
104 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
106 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
107 misalign, where);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
125 static tree
126 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
127 tree array, unsigned HOST_WIDE_INT n)
129 tree vect_type, vect, vect_name, array_ref;
130 gimple new_stmt;
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
144 return vect_name;
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
151 static void
152 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
153 tree array, unsigned HOST_WIDE_INT n)
155 tree array_ref;
156 gimple new_stmt;
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
170 static tree
171 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
173 tree mem_ref, alias_ptr_type;
175 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
182 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
184 /* Function vect_mark_relevant.
186 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
188 static void
189 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
190 enum vect_relevant relevant, bool live_p,
191 bool used_in_pattern)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple pattern_stmt;
198 if (dump_enabled_p ())
199 dump_printf_loc (MSG_NOTE, vect_location,
200 "mark relevant %d, live %d.\n", relevant, live_p);
202 /* If this stmt is an original stmt in a pattern, we might need to mark its
203 related pattern stmt instead of the original stmt. However, such stmts
204 may have their own uses that are not in any pattern, in such cases the
205 stmt itself should be marked. */
206 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
208 bool found = false;
209 if (!used_in_pattern)
211 imm_use_iterator imm_iter;
212 use_operand_p use_p;
213 gimple use_stmt;
214 tree lhs;
215 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
216 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
218 if (is_gimple_assign (stmt))
219 lhs = gimple_assign_lhs (stmt);
220 else
221 lhs = gimple_call_lhs (stmt);
223 /* This use is out of pattern use, if LHS has other uses that are
224 pattern uses, we should mark the stmt itself, and not the pattern
225 stmt. */
226 if (TREE_CODE (lhs) == SSA_NAME)
227 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
229 if (is_gimple_debug (USE_STMT (use_p)))
230 continue;
231 use_stmt = USE_STMT (use_p);
233 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
234 continue;
236 if (vinfo_for_stmt (use_stmt)
237 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
239 found = true;
240 break;
245 if (!found)
247 /* This is the last stmt in a sequence that was detected as a
248 pattern that can potentially be vectorized. Don't mark the stmt
249 as relevant/live because it's not going to be vectorized.
250 Instead mark the pattern-stmt that replaces it. */
252 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
254 if (dump_enabled_p ())
255 dump_printf_loc (MSG_NOTE, vect_location,
256 "last stmt in pattern. don't mark"
257 " relevant/live.\n");
258 stmt_info = vinfo_for_stmt (pattern_stmt);
259 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
260 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
261 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
262 stmt = pattern_stmt;
266 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
267 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
268 STMT_VINFO_RELEVANT (stmt_info) = relevant;
270 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
271 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "already marked relevant/live.\n");
276 return;
279 worklist->safe_push (stmt);
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT in loop that is represented by LOOP_VINFO is
286 "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
295 static bool
296 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
297 enum vect_relevant *relevant, bool *live_p)
299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300 ssa_op_iter op_iter;
301 imm_use_iterator imm_iter;
302 use_operand_p use_p;
303 def_operand_p def_p;
305 *relevant = vect_unused_in_scope;
306 *live_p = false;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt)
310 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
311 != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
314 /* changing memory. */
315 if (gimple_code (stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant = vect_used_in_scope;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
344 *live_p = true;
349 return (*live_p || *relevant);
353 /* Function exist_non_indexing_operands_for_use_p
355 USE is one of the uses attached to STMT. Check if USE is
356 used in STMT for anything other than indexing an array. */
358 static bool
359 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
361 tree operand;
362 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
364 /* USE corresponds to some operand in STMT. If there is no data
365 reference in STMT, then any operand that corresponds to USE
366 is not indexing an array. */
367 if (!STMT_VINFO_DATA_REF (stmt_info))
368 return true;
370 /* STMT has a data_ref. FORNOW this means that its of one of
371 the following forms:
372 -1- ARRAY_REF = var
373 -2- var = ARRAY_REF
374 (This should have been verified in analyze_data_refs).
376 'var' in the second case corresponds to a def, not a use,
377 so USE cannot correspond to any operands that are not used
378 for array indexing.
380 Therefore, all we need to check is if STMT falls into the
381 first case, and whether var corresponds to USE. */
383 if (!gimple_assign_copy_p (stmt))
384 return false;
385 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
386 return false;
387 operand = gimple_assign_rhs1 (stmt);
388 if (TREE_CODE (operand) != SSA_NAME)
389 return false;
391 if (operand == use)
392 return true;
394 return false;
399 Function process_use.
401 Inputs:
402 - a USE in STMT in a loop represented by LOOP_VINFO
403 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
404 that defined USE. This is done by calling mark_relevant and passing it
405 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
406 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
407 be performed.
409 Outputs:
410 Generally, LIVE_P and RELEVANT are used to define the liveness and
411 relevance info of the DEF_STMT of this USE:
412 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
413 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
414 Exceptions:
415 - case 1: If USE is used only for address computations (e.g. array indexing),
416 which does not need to be directly vectorized, then the liveness/relevance
417 of the respective DEF_STMT is left unchanged.
418 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
419 skip DEF_STMT cause it had already been processed.
420 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
421 be modified accordingly.
423 Return true if everything is as expected. Return false otherwise. */
425 static bool
426 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
427 enum vect_relevant relevant, vec<gimple> *worklist,
428 bool force)
430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
431 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
432 stmt_vec_info dstmt_vinfo;
433 basic_block bb, def_bb;
434 tree def;
435 gimple def_stmt;
436 enum vect_def_type dt;
438 /* case 1: we are only interested in uses that need to be vectorized. Uses
439 that are used for address computation are not considered relevant. */
440 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
441 return true;
443 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
445 if (dump_enabled_p ())
446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
447 "not vectorized: unsupported use in stmt.\n");
448 return false;
451 if (!def_stmt || gimple_nop_p (def_stmt))
452 return true;
454 def_bb = gimple_bb (def_stmt);
455 if (!flow_bb_inside_loop_p (loop, def_bb))
457 if (dump_enabled_p ())
458 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
459 return true;
462 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
463 DEF_STMT must have already been processed, because this should be the
464 only way that STMT, which is a reduction-phi, was put in the worklist,
465 as there should be no other uses for DEF_STMT in the loop. So we just
466 check that everything is as expected, and we are done. */
467 dstmt_vinfo = vinfo_for_stmt (def_stmt);
468 bb = gimple_bb (stmt);
469 if (gimple_code (stmt) == GIMPLE_PHI
470 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
471 && gimple_code (def_stmt) != GIMPLE_PHI
472 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
473 && bb->loop_father == def_bb->loop_father)
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_NOTE, vect_location,
477 "reduc-stmt defining reduc-phi in the same nest.\n");
478 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
479 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
480 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
481 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
482 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
483 return true;
486 /* case 3a: outer-loop stmt defining an inner-loop stmt:
487 outer-loop-header-bb:
488 d = def_stmt
489 inner-loop:
490 stmt # use (d)
491 outer-loop-tail-bb:
492 ... */
493 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
495 if (dump_enabled_p ())
496 dump_printf_loc (MSG_NOTE, vect_location,
497 "outer-loop def-stmt defining inner-loop stmt.\n");
499 switch (relevant)
501 case vect_unused_in_scope:
502 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
503 vect_used_in_scope : vect_unused_in_scope;
504 break;
506 case vect_used_in_outer_by_reduction:
507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
508 relevant = vect_used_by_reduction;
509 break;
511 case vect_used_in_outer:
512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
513 relevant = vect_used_in_scope;
514 break;
516 case vect_used_in_scope:
517 break;
519 default:
520 gcc_unreachable ();
524 /* case 3b: inner-loop stmt defining an outer-loop stmt:
525 outer-loop-header-bb:
527 inner-loop:
528 d = def_stmt
529 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
530 stmt # use (d) */
531 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
533 if (dump_enabled_p ())
534 dump_printf_loc (MSG_NOTE, vect_location,
535 "inner-loop def-stmt defining outer-loop stmt.\n");
537 switch (relevant)
539 case vect_unused_in_scope:
540 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
541 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
542 vect_used_in_outer_by_reduction : vect_unused_in_scope;
543 break;
545 case vect_used_by_reduction:
546 relevant = vect_used_in_outer_by_reduction;
547 break;
549 case vect_used_in_scope:
550 relevant = vect_used_in_outer;
551 break;
553 default:
554 gcc_unreachable ();
558 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
559 is_pattern_stmt_p (stmt_vinfo));
560 return true;
564 /* Function vect_mark_stmts_to_be_vectorized.
566 Not all stmts in the loop need to be vectorized. For example:
568 for i...
569 for j...
570 1. T0 = i + j
571 2. T1 = a[T0]
573 3. j = j + 1
575 Stmt 1 and 3 do not need to be vectorized, because loop control and
576 addressing of vectorized data-refs are handled differently.
578 This pass detects such stmts. */
580 bool
581 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
583 vec<gimple> worklist;
584 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
585 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
586 unsigned int nbbs = loop->num_nodes;
587 gimple_stmt_iterator si;
588 gimple stmt;
589 unsigned int i;
590 stmt_vec_info stmt_vinfo;
591 basic_block bb;
592 gimple phi;
593 bool live_p;
594 enum vect_relevant relevant, tmp_relevant;
595 enum vect_def_type def_type;
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_NOTE, vect_location,
599 "=== vect_mark_stmts_to_be_vectorized ===\n");
601 worklist.create (64);
603 /* 1. Init worklist. */
604 for (i = 0; i < nbbs; i++)
606 bb = bbs[i];
607 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
609 phi = gsi_stmt (si);
610 if (dump_enabled_p ())
612 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
613 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
614 dump_printf (MSG_NOTE, "\n");
617 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
618 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
620 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
622 stmt = gsi_stmt (si);
623 if (dump_enabled_p ())
625 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
627 dump_printf (MSG_NOTE, "\n");
630 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
631 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
635 /* 2. Process_worklist */
636 while (worklist.length () > 0)
638 use_operand_p use_p;
639 ssa_op_iter iter;
641 stmt = worklist.pop ();
642 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
645 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
646 dump_printf (MSG_NOTE, "\n");
649 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
650 (DEF_STMT) as relevant/irrelevant and live/dead according to the
651 liveness and relevance properties of STMT. */
652 stmt_vinfo = vinfo_for_stmt (stmt);
653 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
654 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
656 /* Generally, the liveness and relevance properties of STMT are
657 propagated as is to the DEF_STMTs of its USEs:
658 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
659 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
661 One exception is when STMT has been identified as defining a reduction
662 variable; in this case we set the liveness/relevance as follows:
663 live_p = false
664 relevant = vect_used_by_reduction
665 This is because we distinguish between two kinds of relevant stmts -
666 those that are used by a reduction computation, and those that are
667 (also) used by a regular computation. This allows us later on to
668 identify stmts that are used solely by a reduction, and therefore the
669 order of the results that they produce does not have to be kept. */
671 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
672 tmp_relevant = relevant;
673 switch (def_type)
675 case vect_reduction_def:
676 switch (tmp_relevant)
678 case vect_unused_in_scope:
679 relevant = vect_used_by_reduction;
680 break;
682 case vect_used_by_reduction:
683 if (gimple_code (stmt) == GIMPLE_PHI)
684 break;
685 /* fall through */
687 default:
688 if (dump_enabled_p ())
689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
690 "unsupported use of reduction.\n");
691 worklist.release ();
692 return false;
695 live_p = false;
696 break;
698 case vect_nested_cycle:
699 if (tmp_relevant != vect_unused_in_scope
700 && tmp_relevant != vect_used_in_outer_by_reduction
701 && tmp_relevant != vect_used_in_outer)
703 if (dump_enabled_p ())
704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
705 "unsupported use of nested cycle.\n");
707 worklist.release ();
708 return false;
711 live_p = false;
712 break;
714 case vect_double_reduction_def:
715 if (tmp_relevant != vect_unused_in_scope
716 && tmp_relevant != vect_used_by_reduction)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of double reduction.\n");
722 worklist.release ();
723 return false;
726 live_p = false;
727 break;
729 default:
730 break;
733 if (is_pattern_stmt_p (stmt_vinfo))
735 /* Pattern statements are not inserted into the code, so
736 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
737 have to scan the RHS or function arguments instead. */
738 if (is_gimple_assign (stmt))
740 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
741 tree op = gimple_assign_rhs1 (stmt);
743 i = 1;
744 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
746 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
747 live_p, relevant, &worklist, false)
748 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
749 live_p, relevant, &worklist, false))
751 worklist.release ();
752 return false;
754 i = 2;
756 for (; i < gimple_num_ops (stmt); i++)
758 op = gimple_op (stmt, i);
759 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
760 &worklist, false))
762 worklist.release ();
763 return false;
767 else if (is_gimple_call (stmt))
769 for (i = 0; i < gimple_call_num_args (stmt); i++)
771 tree arg = gimple_call_arg (stmt, i);
772 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
773 &worklist, false))
775 worklist.release ();
776 return false;
781 else
782 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
784 tree op = USE_FROM_PTR (use_p);
785 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
786 &worklist, false))
788 worklist.release ();
789 return false;
793 if (STMT_VINFO_GATHER_P (stmt_vinfo))
795 tree off;
796 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
797 gcc_assert (decl);
798 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
799 &worklist, true))
801 worklist.release ();
802 return false;
805 } /* while worklist */
807 worklist.release ();
808 return true;
812 /* Function vect_model_simple_cost.
814 Models cost for simple operations, i.e. those that only emit ncopies of a
815 single op. Right now, this does not account for multiple insns that could
816 be generated for the single vector op. We will handle that shortly. */
818 void
819 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
820 enum vect_def_type *dt,
821 stmt_vector_for_cost *prologue_cost_vec,
822 stmt_vector_for_cost *body_cost_vec)
824 int i;
825 int inside_cost = 0, prologue_cost = 0;
827 /* The SLP costs were already calculated during SLP tree build. */
828 if (PURE_SLP_STMT (stmt_info))
829 return;
831 /* FORNOW: Assuming maximum 2 args per stmts. */
832 for (i = 0; i < 2; i++)
833 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
834 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
835 stmt_info, 0, vect_prologue);
837 /* Pass the inside-of-loop statements to the target-specific cost model. */
838 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
839 stmt_info, 0, vect_body);
841 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location,
843 "vect_model_simple_cost: inside_cost = %d, "
844 "prologue_cost = %d .\n", inside_cost, prologue_cost);
848 /* Model cost for type demotion and promotion operations. PWR is normally
849 zero for single-step promotions and demotions. It will be one if
850 two-step promotion/demotion is required, and so on. Each additional
851 step doubles the number of instructions required. */
853 static void
854 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
855 enum vect_def_type *dt, int pwr)
857 int i, tmp;
858 int inside_cost = 0, prologue_cost = 0;
859 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
860 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
861 void *target_cost_data;
863 /* The SLP costs were already calculated during SLP tree build. */
864 if (PURE_SLP_STMT (stmt_info))
865 return;
867 if (loop_vinfo)
868 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
869 else
870 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 for (i = 0; i < pwr + 1; i++)
874 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
875 (i + 1) : i;
876 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
877 vec_promote_demote, stmt_info, 0,
878 vect_body);
881 /* FORNOW: Assuming maximum 2 args per stmts. */
882 for (i = 0; i < 2; i++)
883 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
884 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
885 stmt_info, 0, vect_prologue);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE, vect_location,
889 "vect_model_promotion_demotion_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost, prologue_cost);
893 /* Function vect_cost_group_size
895 For grouped load or store, return the group_size only if it is the first
896 load or store of a group, else return 1. This ensures that group size is
897 only returned once per group. */
899 static int
900 vect_cost_group_size (stmt_vec_info stmt_info)
902 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
904 if (first_stmt == STMT_VINFO_STMT (stmt_info))
905 return GROUP_SIZE (stmt_info);
907 return 1;
911 /* Function vect_model_store_cost
913 Models cost for stores. In the case of grouped accesses, one access
914 has the overhead of the grouped access attributed to it. */
916 void
917 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
918 bool store_lanes_p, enum vect_def_type dt,
919 slp_tree slp_node,
920 stmt_vector_for_cost *prologue_cost_vec,
921 stmt_vector_for_cost *body_cost_vec)
923 int group_size;
924 unsigned int inside_cost = 0, prologue_cost = 0;
925 struct data_reference *first_dr;
926 gimple first_stmt;
928 /* The SLP costs were already calculated during SLP tree build. */
929 if (PURE_SLP_STMT (stmt_info))
930 return;
932 if (dt == vect_constant_def || dt == vect_external_def)
933 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
934 stmt_info, 0, vect_prologue);
936 /* Grouped access? */
937 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
939 if (slp_node)
941 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
942 group_size = 1;
944 else
946 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
947 group_size = vect_cost_group_size (stmt_info);
950 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
952 /* Not a grouped access. */
953 else
955 group_size = 1;
956 first_dr = STMT_VINFO_DATA_REF (stmt_info);
959 /* We assume that the cost of a single store-lanes instruction is
960 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
961 access is instead being provided by a permute-and-store operation,
962 include the cost of the permutes. */
963 if (!store_lanes_p && group_size > 1)
965 /* Uses a high and low interleave operation for each needed permute. */
967 int nstmts = ncopies * exact_log2 (group_size) * group_size;
968 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
969 stmt_info, 0, vect_body);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: strided group_size = %d .\n",
974 group_size);
977 /* Costs of the stores. */
978 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
980 if (dump_enabled_p ())
981 dump_printf_loc (MSG_NOTE, vect_location,
982 "vect_model_store_cost: inside_cost = %d, "
983 "prologue_cost = %d .\n", inside_cost, prologue_cost);
987 /* Calculate cost of DR's memory access. */
988 void
989 vect_get_store_cost (struct data_reference *dr, int ncopies,
990 unsigned int *inside_cost,
991 stmt_vector_for_cost *body_cost_vec)
993 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
994 gimple stmt = DR_STMT (dr);
995 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
997 switch (alignment_support_scheme)
999 case dr_aligned:
1001 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1002 vector_store, stmt_info, 0,
1003 vect_body);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: aligned.\n");
1008 break;
1011 case dr_unaligned_supported:
1013 /* Here, we assign an additional cost for the unaligned store. */
1014 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1015 unaligned_store, stmt_info,
1016 DR_MISALIGNMENT (dr), vect_body);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: unaligned supported by "
1020 "hardware.\n");
1021 break;
1024 case dr_unaligned_unsupported:
1026 *inside_cost = VECT_MAX_COST;
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1030 "vect_model_store_cost: unsupported access.\n");
1031 break;
1034 default:
1035 gcc_unreachable ();
1040 /* Function vect_model_load_cost
1042 Models cost for loads. In the case of grouped accesses, the last access
1043 has the overhead of the grouped access attributed to it. Since unaligned
1044 accesses are supported for loads, we also account for the costs of the
1045 access scheme chosen. */
1047 void
1048 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1049 bool load_lanes_p, slp_tree slp_node,
1050 stmt_vector_for_cost *prologue_cost_vec,
1051 stmt_vector_for_cost *body_cost_vec)
1053 int group_size;
1054 gimple first_stmt;
1055 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1056 unsigned int inside_cost = 0, prologue_cost = 0;
1058 /* The SLP costs were already calculated during SLP tree build. */
1059 if (PURE_SLP_STMT (stmt_info))
1060 return;
1062 /* Grouped accesses? */
1063 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1064 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1066 group_size = vect_cost_group_size (stmt_info);
1067 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1069 /* Not a grouped access. */
1070 else
1072 group_size = 1;
1073 first_dr = dr;
1076 /* We assume that the cost of a single load-lanes instruction is
1077 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1078 access is instead being provided by a load-and-permute operation,
1079 include the cost of the permutes. */
1080 if (!load_lanes_p && group_size > 1)
1082 /* Uses an even and odd extract operations for each needed permute. */
1083 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1084 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1085 stmt_info, 0, vect_body);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_load_cost: strided group_size = %d .\n",
1090 group_size);
1093 /* The loads themselves. */
1094 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1096 /* N scalar loads plus gathering them into a vector. */
1097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1098 inside_cost += record_stmt_cost (body_cost_vec,
1099 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1100 scalar_load, stmt_info, 0, vect_body);
1101 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102 stmt_info, 0, vect_body);
1104 else
1105 vect_get_load_cost (first_dr, ncopies,
1106 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1107 || group_size > 1 || slp_node),
1108 &inside_cost, &prologue_cost,
1109 prologue_cost_vec, body_cost_vec, true);
1111 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE, vect_location,
1113 "vect_model_load_cost: inside_cost = %d, "
1114 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1118 /* Calculate cost of DR's memory access. */
1119 void
1120 vect_get_load_cost (struct data_reference *dr, int ncopies,
1121 bool add_realign_cost, unsigned int *inside_cost,
1122 unsigned int *prologue_cost,
1123 stmt_vector_for_cost *prologue_cost_vec,
1124 stmt_vector_for_cost *body_cost_vec,
1125 bool record_prologue_costs)
1127 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1128 gimple stmt = DR_STMT (dr);
1129 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1131 switch (alignment_support_scheme)
1133 case dr_aligned:
1135 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1136 stmt_info, 0, vect_body);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: aligned.\n");
1142 break;
1144 case dr_unaligned_supported:
1146 /* Here, we assign an additional cost for the unaligned load. */
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1148 unaligned_load, stmt_info,
1149 DR_MISALIGNMENT (dr), vect_body);
1151 if (dump_enabled_p ())
1152 dump_printf_loc (MSG_NOTE, vect_location,
1153 "vect_model_load_cost: unaligned supported by "
1154 "hardware.\n");
1156 break;
1158 case dr_explicit_realign:
1160 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1161 vector_load, stmt_info, 0, vect_body);
1162 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1163 vec_perm, stmt_info, 0, vect_body);
1165 /* FIXME: If the misalignment remains fixed across the iterations of
1166 the containing loop, the following cost should be added to the
1167 prologue costs. */
1168 if (targetm.vectorize.builtin_mask_for_load)
1169 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1170 stmt_info, 0, vect_body);
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: explicit realign\n");
1176 break;
1178 case dr_explicit_realign_optimized:
1180 if (dump_enabled_p ())
1181 dump_printf_loc (MSG_NOTE, vect_location,
1182 "vect_model_load_cost: unaligned software "
1183 "pipelined.\n");
1185 /* Unaligned software pipeline has a load of an address, an initial
1186 load, and possibly a mask operation to "prime" the loop. However,
1187 if this is an access in a group of loads, which provide grouped
1188 access, then the above cost should only be considered for one
1189 access in the group. Inside the loop, there is a load op
1190 and a realignment op. */
1192 if (add_realign_cost && record_prologue_costs)
1194 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1195 vector_stmt, stmt_info,
1196 0, vect_prologue);
1197 if (targetm.vectorize.builtin_mask_for_load)
1198 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1199 vector_stmt, stmt_info,
1200 0, vect_prologue);
1203 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1204 stmt_info, 0, vect_body);
1205 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1206 stmt_info, 0, vect_body);
1208 if (dump_enabled_p ())
1209 dump_printf_loc (MSG_NOTE, vect_location,
1210 "vect_model_load_cost: explicit realign optimized"
1211 "\n");
1213 break;
1216 case dr_unaligned_unsupported:
1218 *inside_cost = VECT_MAX_COST;
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1222 "vect_model_load_cost: unsupported access.\n");
1223 break;
1226 default:
1227 gcc_unreachable ();
1231 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1232 the loop preheader for the vectorized stmt STMT. */
1234 static void
1235 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1237 if (gsi)
1238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1239 else
1241 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1244 if (loop_vinfo)
1246 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1247 basic_block new_bb;
1248 edge pe;
1250 if (nested_in_vect_loop_p (loop, stmt))
1251 loop = loop->inner;
1253 pe = loop_preheader_edge (loop);
1254 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1255 gcc_assert (!new_bb);
1257 else
1259 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1260 basic_block bb;
1261 gimple_stmt_iterator gsi_bb_start;
1263 gcc_assert (bb_vinfo);
1264 bb = BB_VINFO_BB (bb_vinfo);
1265 gsi_bb_start = gsi_after_labels (bb);
1266 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1270 if (dump_enabled_p ())
1272 dump_printf_loc (MSG_NOTE, vect_location,
1273 "created new init_stmt: ");
1274 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1275 dump_printf (MSG_NOTE, "\n");
1279 /* Function vect_init_vector.
1281 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1282 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1283 vector type a vector with all elements equal to VAL is created first.
1284 Place the initialization at BSI if it is not NULL. Otherwise, place the
1285 initialization at the loop preheader.
1286 Return the DEF of INIT_STMT.
1287 It will be used in the vectorization of STMT. */
1289 tree
1290 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1292 tree new_var;
1293 gimple init_stmt;
1294 tree vec_oprnd;
1295 tree new_temp;
1297 if (TREE_CODE (type) == VECTOR_TYPE
1298 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1300 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1302 if (CONSTANT_CLASS_P (val))
1303 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1304 else
1306 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1307 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1308 new_temp, val,
1309 NULL_TREE);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
1311 val = new_temp;
1314 val = build_vector_from_val (type, val);
1317 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1318 init_stmt = gimple_build_assign (new_var, val);
1319 new_temp = make_ssa_name (new_var, init_stmt);
1320 gimple_assign_set_lhs (init_stmt, new_temp);
1321 vect_init_vector_1 (stmt, init_stmt, gsi);
1322 vec_oprnd = gimple_assign_lhs (init_stmt);
1323 return vec_oprnd;
1327 /* Function vect_get_vec_def_for_operand.
1329 OP is an operand in STMT. This function returns a (vector) def that will be
1330 used in the vectorized stmt for STMT.
1332 In the case that OP is an SSA_NAME which is defined in the loop, then
1333 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1335 In case OP is an invariant or constant, a new stmt that creates a vector def
1336 needs to be introduced. */
1338 tree
1339 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1341 tree vec_oprnd;
1342 gimple vec_stmt;
1343 gimple def_stmt;
1344 stmt_vec_info def_stmt_info = NULL;
1345 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1346 unsigned int nunits;
1347 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1348 tree def;
1349 enum vect_def_type dt;
1350 bool is_simple_use;
1351 tree vector_type;
1353 if (dump_enabled_p ())
1355 dump_printf_loc (MSG_NOTE, vect_location,
1356 "vect_get_vec_def_for_operand: ");
1357 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1358 dump_printf (MSG_NOTE, "\n");
1361 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1362 &def_stmt, &def, &dt);
1363 gcc_assert (is_simple_use);
1364 if (dump_enabled_p ())
1366 int loc_printed = 0;
1367 if (def)
1369 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1370 loc_printed = 1;
1371 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1372 dump_printf (MSG_NOTE, "\n");
1374 if (def_stmt)
1376 if (loc_printed)
1377 dump_printf (MSG_NOTE, " def_stmt = ");
1378 else
1379 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1380 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1381 dump_printf (MSG_NOTE, "\n");
1385 switch (dt)
1387 /* Case 1: operand is a constant. */
1388 case vect_constant_def:
1390 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1391 gcc_assert (vector_type);
1392 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1394 if (scalar_def)
1395 *scalar_def = op;
1397 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1398 if (dump_enabled_p ())
1399 dump_printf_loc (MSG_NOTE, vect_location,
1400 "Create vector_cst. nunits = %d\n", nunits);
1402 return vect_init_vector (stmt, op, vector_type, NULL);
1405 /* Case 2: operand is defined outside the loop - loop invariant. */
1406 case vect_external_def:
1408 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1409 gcc_assert (vector_type);
1411 if (scalar_def)
1412 *scalar_def = def;
1414 /* Create 'vec_inv = {inv,inv,..,inv}' */
1415 if (dump_enabled_p ())
1416 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1418 return vect_init_vector (stmt, def, vector_type, NULL);
1421 /* Case 3: operand is defined inside the loop. */
1422 case vect_internal_def:
1424 if (scalar_def)
1425 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1427 /* Get the def from the vectorized stmt. */
1428 def_stmt_info = vinfo_for_stmt (def_stmt);
1430 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1431 /* Get vectorized pattern statement. */
1432 if (!vec_stmt
1433 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1434 && !STMT_VINFO_RELEVANT (def_stmt_info))
1435 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1436 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1437 gcc_assert (vec_stmt);
1438 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1439 vec_oprnd = PHI_RESULT (vec_stmt);
1440 else if (is_gimple_call (vec_stmt))
1441 vec_oprnd = gimple_call_lhs (vec_stmt);
1442 else
1443 vec_oprnd = gimple_assign_lhs (vec_stmt);
1444 return vec_oprnd;
1447 /* Case 4: operand is defined by a loop header phi - reduction */
1448 case vect_reduction_def:
1449 case vect_double_reduction_def:
1450 case vect_nested_cycle:
1452 struct loop *loop;
1454 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1455 loop = (gimple_bb (def_stmt))->loop_father;
1457 /* Get the def before the loop */
1458 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1459 return get_initial_def_for_reduction (stmt, op, scalar_def);
1462 /* Case 5: operand is defined by loop-header phi - induction. */
1463 case vect_induction_def:
1465 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1467 /* Get the def from the vectorized stmt. */
1468 def_stmt_info = vinfo_for_stmt (def_stmt);
1469 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1470 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1471 vec_oprnd = PHI_RESULT (vec_stmt);
1472 else
1473 vec_oprnd = gimple_get_lhs (vec_stmt);
1474 return vec_oprnd;
1477 default:
1478 gcc_unreachable ();
1483 /* Function vect_get_vec_def_for_stmt_copy
1485 Return a vector-def for an operand. This function is used when the
1486 vectorized stmt to be created (by the caller to this function) is a "copy"
1487 created in case the vectorized result cannot fit in one vector, and several
1488 copies of the vector-stmt are required. In this case the vector-def is
1489 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1490 of the stmt that defines VEC_OPRND.
1491 DT is the type of the vector def VEC_OPRND.
1493 Context:
1494 In case the vectorization factor (VF) is bigger than the number
1495 of elements that can fit in a vectype (nunits), we have to generate
1496 more than one vector stmt to vectorize the scalar stmt. This situation
1497 arises when there are multiple data-types operated upon in the loop; the
1498 smallest data-type determines the VF, and as a result, when vectorizing
1499 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1500 vector stmt (each computing a vector of 'nunits' results, and together
1501 computing 'VF' results in each iteration). This function is called when
1502 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1503 which VF=16 and nunits=4, so the number of copies required is 4):
1505 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1507 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1508 VS1.1: vx.1 = memref1 VS1.2
1509 VS1.2: vx.2 = memref2 VS1.3
1510 VS1.3: vx.3 = memref3
1512 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1513 VSnew.1: vz1 = vx.1 + ... VSnew.2
1514 VSnew.2: vz2 = vx.2 + ... VSnew.3
1515 VSnew.3: vz3 = vx.3 + ...
1517 The vectorization of S1 is explained in vectorizable_load.
1518 The vectorization of S2:
1519 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1520 the function 'vect_get_vec_def_for_operand' is called to
1521 get the relevant vector-def for each operand of S2. For operand x it
1522 returns the vector-def 'vx.0'.
1524 To create the remaining copies of the vector-stmt (VSnew.j), this
1525 function is called to get the relevant vector-def for each operand. It is
1526 obtained from the respective VS1.j stmt, which is recorded in the
1527 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1529 For example, to obtain the vector-def 'vx.1' in order to create the
1530 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1531 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1532 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1533 and return its def ('vx.1').
1534 Overall, to create the above sequence this function will be called 3 times:
1535 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1536 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1537 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1539 tree
1540 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1542 gimple vec_stmt_for_operand;
1543 stmt_vec_info def_stmt_info;
1545 /* Do nothing; can reuse same def. */
1546 if (dt == vect_external_def || dt == vect_constant_def )
1547 return vec_oprnd;
1549 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1550 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1551 gcc_assert (def_stmt_info);
1552 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1553 gcc_assert (vec_stmt_for_operand);
1554 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1555 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1556 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1557 else
1558 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1559 return vec_oprnd;
1563 /* Get vectorized definitions for the operands to create a copy of an original
1564 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1566 static void
1567 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1568 vec<tree> *vec_oprnds0,
1569 vec<tree> *vec_oprnds1)
1571 tree vec_oprnd = vec_oprnds0->pop ();
1573 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1574 vec_oprnds0->quick_push (vec_oprnd);
1576 if (vec_oprnds1 && vec_oprnds1->length ())
1578 vec_oprnd = vec_oprnds1->pop ();
1579 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1580 vec_oprnds1->quick_push (vec_oprnd);
1585 /* Get vectorized definitions for OP0 and OP1.
1586 REDUC_INDEX is the index of reduction operand in case of reduction,
1587 and -1 otherwise. */
1589 void
1590 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1591 vec<tree> *vec_oprnds0,
1592 vec<tree> *vec_oprnds1,
1593 slp_tree slp_node, int reduc_index)
1595 if (slp_node)
1597 int nops = (op1 == NULL_TREE) ? 1 : 2;
1598 vec<tree> ops;
1599 ops.create (nops);
1600 vec<vec<tree> > vec_defs;
1601 vec_defs.create (nops);
1603 ops.quick_push (op0);
1604 if (op1)
1605 ops.quick_push (op1);
1607 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1609 *vec_oprnds0 = vec_defs[0];
1610 if (op1)
1611 *vec_oprnds1 = vec_defs[1];
1613 ops.release ();
1614 vec_defs.release ();
1616 else
1618 tree vec_oprnd;
1620 vec_oprnds0->create (1);
1621 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1622 vec_oprnds0->quick_push (vec_oprnd);
1624 if (op1)
1626 vec_oprnds1->create (1);
1627 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1628 vec_oprnds1->quick_push (vec_oprnd);
1634 /* Function vect_finish_stmt_generation.
1636 Insert a new stmt. */
1638 void
1639 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1640 gimple_stmt_iterator *gsi)
1642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1643 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1644 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1646 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1648 if (!gsi_end_p (*gsi)
1649 && gimple_has_mem_ops (vec_stmt))
1651 gimple at_stmt = gsi_stmt (*gsi);
1652 tree vuse = gimple_vuse (at_stmt);
1653 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1655 tree vdef = gimple_vdef (at_stmt);
1656 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1657 /* If we have an SSA vuse and insert a store, update virtual
1658 SSA form to avoid triggering the renamer. Do so only
1659 if we can easily see all uses - which is what almost always
1660 happens with the way vectorized stmts are inserted. */
1661 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1662 && ((is_gimple_assign (vec_stmt)
1663 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1664 || (is_gimple_call (vec_stmt)
1665 && !(gimple_call_flags (vec_stmt)
1666 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1668 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1669 gimple_set_vdef (vec_stmt, new_vdef);
1670 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1674 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1676 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1677 bb_vinfo));
1679 if (dump_enabled_p ())
1681 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1682 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1683 dump_printf (MSG_NOTE, "\n");
1686 gimple_set_location (vec_stmt, gimple_location (stmt));
1689 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1690 a function declaration if the target has a vectorized version
1691 of the function, or NULL_TREE if the function cannot be vectorized. */
1693 tree
1694 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1696 tree fndecl = gimple_call_fndecl (call);
1698 /* We only handle functions that do not read or clobber memory -- i.e.
1699 const or novops ones. */
1700 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1701 return NULL_TREE;
1703 if (!fndecl
1704 || TREE_CODE (fndecl) != FUNCTION_DECL
1705 || !DECL_BUILT_IN (fndecl))
1706 return NULL_TREE;
1708 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1709 vectype_in);
1712 /* Function vectorizable_call.
1714 Check if STMT performs a function call that can be vectorized.
1715 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1716 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1717 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1719 static bool
1720 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1721 slp_tree slp_node)
1723 tree vec_dest;
1724 tree scalar_dest;
1725 tree op, type;
1726 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1727 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1728 tree vectype_out, vectype_in;
1729 int nunits_in;
1730 int nunits_out;
1731 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1732 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1733 tree fndecl, new_temp, def, rhs_type;
1734 gimple def_stmt;
1735 enum vect_def_type dt[3]
1736 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1737 gimple new_stmt = NULL;
1738 int ncopies, j;
1739 vec<tree> vargs = vNULL;
1740 enum { NARROW, NONE, WIDEN } modifier;
1741 size_t i, nargs;
1742 tree lhs;
1744 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1745 return false;
1747 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1748 return false;
1750 /* Is STMT a vectorizable call? */
1751 if (!is_gimple_call (stmt))
1752 return false;
1754 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1755 return false;
1757 if (stmt_can_throw_internal (stmt))
1758 return false;
1760 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1762 /* Process function arguments. */
1763 rhs_type = NULL_TREE;
1764 vectype_in = NULL_TREE;
1765 nargs = gimple_call_num_args (stmt);
1767 /* Bail out if the function has more than three arguments, we do not have
1768 interesting builtin functions to vectorize with more than two arguments
1769 except for fma. No arguments is also not good. */
1770 if (nargs == 0 || nargs > 3)
1771 return false;
1773 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1774 if (gimple_call_internal_p (stmt)
1775 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1777 nargs = 0;
1778 rhs_type = unsigned_type_node;
1781 for (i = 0; i < nargs; i++)
1783 tree opvectype;
1785 op = gimple_call_arg (stmt, i);
1787 /* We can only handle calls with arguments of the same type. */
1788 if (rhs_type
1789 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1791 if (dump_enabled_p ())
1792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1793 "argument types differ.\n");
1794 return false;
1796 if (!rhs_type)
1797 rhs_type = TREE_TYPE (op);
1799 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1800 &def_stmt, &def, &dt[i], &opvectype))
1802 if (dump_enabled_p ())
1803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1804 "use not simple.\n");
1805 return false;
1808 if (!vectype_in)
1809 vectype_in = opvectype;
1810 else if (opvectype
1811 && opvectype != vectype_in)
1813 if (dump_enabled_p ())
1814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1815 "argument vector types differ.\n");
1816 return false;
1819 /* If all arguments are external or constant defs use a vector type with
1820 the same size as the output vector type. */
1821 if (!vectype_in)
1822 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1823 if (vec_stmt)
1824 gcc_assert (vectype_in);
1825 if (!vectype_in)
1827 if (dump_enabled_p ())
1829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1830 "no vectype for scalar type ");
1831 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1832 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1835 return false;
1838 /* FORNOW */
1839 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1840 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1841 if (nunits_in == nunits_out / 2)
1842 modifier = NARROW;
1843 else if (nunits_out == nunits_in)
1844 modifier = NONE;
1845 else if (nunits_out == nunits_in / 2)
1846 modifier = WIDEN;
1847 else
1848 return false;
1850 /* For now, we only vectorize functions if a target specific builtin
1851 is available. TODO -- in some cases, it might be profitable to
1852 insert the calls for pieces of the vector, in order to be able
1853 to vectorize other operations in the loop. */
1854 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1855 if (fndecl == NULL_TREE)
1857 if (gimple_call_internal_p (stmt)
1858 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1859 && !slp_node
1860 && loop_vinfo
1861 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1862 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1863 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1864 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1866 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1867 { 0, 1, 2, ... vf - 1 } vector. */
1868 gcc_assert (nargs == 0);
1870 else
1872 if (dump_enabled_p ())
1873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1874 "function is not vectorizable.\n");
1875 return false;
1879 gcc_assert (!gimple_vuse (stmt));
1881 if (slp_node || PURE_SLP_STMT (stmt_info))
1882 ncopies = 1;
1883 else if (modifier == NARROW)
1884 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1885 else
1886 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1888 /* Sanity check: make sure that at least one copy of the vectorized stmt
1889 needs to be generated. */
1890 gcc_assert (ncopies >= 1);
1892 if (!vec_stmt) /* transformation not required. */
1894 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1897 "\n");
1898 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1899 return true;
1902 /** Transform. **/
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
1907 /* Handle def. */
1908 scalar_dest = gimple_call_lhs (stmt);
1909 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1911 prev_stmt_info = NULL;
1912 switch (modifier)
1914 case NONE:
1915 for (j = 0; j < ncopies; ++j)
1917 /* Build argument list for the vectorized call. */
1918 if (j == 0)
1919 vargs.create (nargs);
1920 else
1921 vargs.truncate (0);
1923 if (slp_node)
1925 vec<vec<tree> > vec_defs;
1926 vec_defs.create (nargs);
1927 vec<tree> vec_oprnds0;
1929 for (i = 0; i < nargs; i++)
1930 vargs.quick_push (gimple_call_arg (stmt, i));
1931 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1932 vec_oprnds0 = vec_defs[0];
1934 /* Arguments are ready. Create the new vector stmt. */
1935 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1937 size_t k;
1938 for (k = 0; k < nargs; k++)
1940 vec<tree> vec_oprndsk = vec_defs[k];
1941 vargs[k] = vec_oprndsk[i];
1943 new_stmt = gimple_build_call_vec (fndecl, vargs);
1944 new_temp = make_ssa_name (vec_dest, new_stmt);
1945 gimple_call_set_lhs (new_stmt, new_temp);
1946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1947 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1950 for (i = 0; i < nargs; i++)
1952 vec<tree> vec_oprndsi = vec_defs[i];
1953 vec_oprndsi.release ();
1955 vec_defs.release ();
1956 continue;
1959 for (i = 0; i < nargs; i++)
1961 op = gimple_call_arg (stmt, i);
1962 if (j == 0)
1963 vec_oprnd0
1964 = vect_get_vec_def_for_operand (op, stmt, NULL);
1965 else
1967 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1968 vec_oprnd0
1969 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1972 vargs.quick_push (vec_oprnd0);
1975 if (gimple_call_internal_p (stmt)
1976 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1978 tree *v = XALLOCAVEC (tree, nunits_out);
1979 int k;
1980 for (k = 0; k < nunits_out; ++k)
1981 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1982 tree cst = build_vector (vectype_out, v);
1983 tree new_var
1984 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1985 gimple init_stmt = gimple_build_assign (new_var, cst);
1986 new_temp = make_ssa_name (new_var, init_stmt);
1987 gimple_assign_set_lhs (init_stmt, new_temp);
1988 vect_init_vector_1 (stmt, init_stmt, NULL);
1989 new_temp = make_ssa_name (vec_dest, NULL);
1990 new_stmt = gimple_build_assign (new_temp,
1991 gimple_assign_lhs (init_stmt));
1993 else
1995 new_stmt = gimple_build_call_vec (fndecl, vargs);
1996 new_temp = make_ssa_name (vec_dest, new_stmt);
1997 gimple_call_set_lhs (new_stmt, new_temp);
1999 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2001 if (j == 0)
2002 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2003 else
2004 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2006 prev_stmt_info = vinfo_for_stmt (new_stmt);
2009 break;
2011 case NARROW:
2012 for (j = 0; j < ncopies; ++j)
2014 /* Build argument list for the vectorized call. */
2015 if (j == 0)
2016 vargs.create (nargs * 2);
2017 else
2018 vargs.truncate (0);
2020 if (slp_node)
2022 vec<vec<tree> > vec_defs;
2023 vec_defs.create (nargs);
2024 vec<tree> vec_oprnds0;
2026 for (i = 0; i < nargs; i++)
2027 vargs.quick_push (gimple_call_arg (stmt, i));
2028 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2029 vec_oprnds0 = vec_defs[0];
2031 /* Arguments are ready. Create the new vector stmt. */
2032 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2034 size_t k;
2035 vargs.truncate (0);
2036 for (k = 0; k < nargs; k++)
2038 vec<tree> vec_oprndsk = vec_defs[k];
2039 vargs.quick_push (vec_oprndsk[i]);
2040 vargs.quick_push (vec_oprndsk[i + 1]);
2042 new_stmt = gimple_build_call_vec (fndecl, vargs);
2043 new_temp = make_ssa_name (vec_dest, new_stmt);
2044 gimple_call_set_lhs (new_stmt, new_temp);
2045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2046 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2049 for (i = 0; i < nargs; i++)
2051 vec<tree> vec_oprndsi = vec_defs[i];
2052 vec_oprndsi.release ();
2054 vec_defs.release ();
2055 continue;
2058 for (i = 0; i < nargs; i++)
2060 op = gimple_call_arg (stmt, i);
2061 if (j == 0)
2063 vec_oprnd0
2064 = vect_get_vec_def_for_operand (op, stmt, NULL);
2065 vec_oprnd1
2066 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2068 else
2070 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2071 vec_oprnd0
2072 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2073 vec_oprnd1
2074 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2077 vargs.quick_push (vec_oprnd0);
2078 vargs.quick_push (vec_oprnd1);
2081 new_stmt = gimple_build_call_vec (fndecl, vargs);
2082 new_temp = make_ssa_name (vec_dest, new_stmt);
2083 gimple_call_set_lhs (new_stmt, new_temp);
2084 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2086 if (j == 0)
2087 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2088 else
2089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2091 prev_stmt_info = vinfo_for_stmt (new_stmt);
2094 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2096 break;
2098 case WIDEN:
2099 /* No current target implements this case. */
2100 return false;
2103 vargs.release ();
2105 /* Update the exception handling table with the vector stmt if necessary. */
2106 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2107 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2109 /* The call in STMT might prevent it from being removed in dce.
2110 We however cannot remove it here, due to the way the ssa name
2111 it defines is mapped to the new definition. So just replace
2112 rhs of the statement with something harmless. */
2114 if (slp_node)
2115 return true;
2117 type = TREE_TYPE (scalar_dest);
2118 if (is_pattern_stmt_p (stmt_info))
2119 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2120 else
2121 lhs = gimple_call_lhs (stmt);
2122 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2123 set_vinfo_for_stmt (new_stmt, stmt_info);
2124 set_vinfo_for_stmt (stmt, NULL);
2125 STMT_VINFO_STMT (stmt_info) = new_stmt;
2126 gsi_replace (gsi, new_stmt, false);
2127 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2129 return true;
2133 /* Function vect_gen_widened_results_half
2135 Create a vector stmt whose code, type, number of arguments, and result
2136 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2137 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2138 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2139 needs to be created (DECL is a function-decl of a target-builtin).
2140 STMT is the original scalar stmt that we are vectorizing. */
2142 static gimple
2143 vect_gen_widened_results_half (enum tree_code code,
2144 tree decl,
2145 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2146 tree vec_dest, gimple_stmt_iterator *gsi,
2147 gimple stmt)
2149 gimple new_stmt;
2150 tree new_temp;
2152 /* Generate half of the widened result: */
2153 if (code == CALL_EXPR)
2155 /* Target specific support */
2156 if (op_type == binary_op)
2157 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2158 else
2159 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2160 new_temp = make_ssa_name (vec_dest, new_stmt);
2161 gimple_call_set_lhs (new_stmt, new_temp);
2163 else
2165 /* Generic support */
2166 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2167 if (op_type != binary_op)
2168 vec_oprnd1 = NULL;
2169 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2170 vec_oprnd1);
2171 new_temp = make_ssa_name (vec_dest, new_stmt);
2172 gimple_assign_set_lhs (new_stmt, new_temp);
2174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2176 return new_stmt;
2180 /* Get vectorized definitions for loop-based vectorization. For the first
2181 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2182 scalar operand), and for the rest we get a copy with
2183 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2184 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2185 The vectors are collected into VEC_OPRNDS. */
2187 static void
2188 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2189 vec<tree> *vec_oprnds, int multi_step_cvt)
2191 tree vec_oprnd;
2193 /* Get first vector operand. */
2194 /* All the vector operands except the very first one (that is scalar oprnd)
2195 are stmt copies. */
2196 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2197 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2198 else
2199 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2201 vec_oprnds->quick_push (vec_oprnd);
2203 /* Get second vector operand. */
2204 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2205 vec_oprnds->quick_push (vec_oprnd);
2207 *oprnd = vec_oprnd;
2209 /* For conversion in multiple steps, continue to get operands
2210 recursively. */
2211 if (multi_step_cvt)
2212 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2216 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2217 For multi-step conversions store the resulting vectors and call the function
2218 recursively. */
2220 static void
2221 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2222 int multi_step_cvt, gimple stmt,
2223 vec<tree> vec_dsts,
2224 gimple_stmt_iterator *gsi,
2225 slp_tree slp_node, enum tree_code code,
2226 stmt_vec_info *prev_stmt_info)
2228 unsigned int i;
2229 tree vop0, vop1, new_tmp, vec_dest;
2230 gimple new_stmt;
2231 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2233 vec_dest = vec_dsts.pop ();
2235 for (i = 0; i < vec_oprnds->length (); i += 2)
2237 /* Create demotion operation. */
2238 vop0 = (*vec_oprnds)[i];
2239 vop1 = (*vec_oprnds)[i + 1];
2240 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2241 new_tmp = make_ssa_name (vec_dest, new_stmt);
2242 gimple_assign_set_lhs (new_stmt, new_tmp);
2243 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2245 if (multi_step_cvt)
2246 /* Store the resulting vector for next recursive call. */
2247 (*vec_oprnds)[i/2] = new_tmp;
2248 else
2250 /* This is the last step of the conversion sequence. Store the
2251 vectors in SLP_NODE or in vector info of the scalar statement
2252 (or in STMT_VINFO_RELATED_STMT chain). */
2253 if (slp_node)
2254 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2255 else
2257 if (!*prev_stmt_info)
2258 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2259 else
2260 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2262 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2267 /* For multi-step demotion operations we first generate demotion operations
2268 from the source type to the intermediate types, and then combine the
2269 results (stored in VEC_OPRNDS) in demotion operation to the destination
2270 type. */
2271 if (multi_step_cvt)
2273 /* At each level of recursion we have half of the operands we had at the
2274 previous level. */
2275 vec_oprnds->truncate ((i+1)/2);
2276 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2277 stmt, vec_dsts, gsi, slp_node,
2278 VEC_PACK_TRUNC_EXPR,
2279 prev_stmt_info);
2282 vec_dsts.quick_push (vec_dest);
2286 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2287 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2288 the resulting vectors and call the function recursively. */
2290 static void
2291 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2292 vec<tree> *vec_oprnds1,
2293 gimple stmt, tree vec_dest,
2294 gimple_stmt_iterator *gsi,
2295 enum tree_code code1,
2296 enum tree_code code2, tree decl1,
2297 tree decl2, int op_type)
2299 int i;
2300 tree vop0, vop1, new_tmp1, new_tmp2;
2301 gimple new_stmt1, new_stmt2;
2302 vec<tree> vec_tmp = vNULL;
2304 vec_tmp.create (vec_oprnds0->length () * 2);
2305 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2307 if (op_type == binary_op)
2308 vop1 = (*vec_oprnds1)[i];
2309 else
2310 vop1 = NULL_TREE;
2312 /* Generate the two halves of promotion operation. */
2313 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2314 op_type, vec_dest, gsi, stmt);
2315 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2316 op_type, vec_dest, gsi, stmt);
2317 if (is_gimple_call (new_stmt1))
2319 new_tmp1 = gimple_call_lhs (new_stmt1);
2320 new_tmp2 = gimple_call_lhs (new_stmt2);
2322 else
2324 new_tmp1 = gimple_assign_lhs (new_stmt1);
2325 new_tmp2 = gimple_assign_lhs (new_stmt2);
2328 /* Store the results for the next step. */
2329 vec_tmp.quick_push (new_tmp1);
2330 vec_tmp.quick_push (new_tmp2);
2333 vec_oprnds0->release ();
2334 *vec_oprnds0 = vec_tmp;
2338 /* Check if STMT performs a conversion operation, that can be vectorized.
2339 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2340 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2341 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2343 static bool
2344 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2345 gimple *vec_stmt, slp_tree slp_node)
2347 tree vec_dest;
2348 tree scalar_dest;
2349 tree op0, op1 = NULL_TREE;
2350 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2351 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2352 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2353 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2354 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2355 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2356 tree new_temp;
2357 tree def;
2358 gimple def_stmt;
2359 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2360 gimple new_stmt = NULL;
2361 stmt_vec_info prev_stmt_info;
2362 int nunits_in;
2363 int nunits_out;
2364 tree vectype_out, vectype_in;
2365 int ncopies, i, j;
2366 tree lhs_type, rhs_type;
2367 enum { NARROW, NONE, WIDEN } modifier;
2368 vec<tree> vec_oprnds0 = vNULL;
2369 vec<tree> vec_oprnds1 = vNULL;
2370 tree vop0;
2371 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2372 int multi_step_cvt = 0;
2373 vec<tree> vec_dsts = vNULL;
2374 vec<tree> interm_types = vNULL;
2375 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2376 int op_type;
2377 enum machine_mode rhs_mode;
2378 unsigned short fltsz;
2380 /* Is STMT a vectorizable conversion? */
2382 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2383 return false;
2385 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2386 return false;
2388 if (!is_gimple_assign (stmt))
2389 return false;
2391 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2392 return false;
2394 code = gimple_assign_rhs_code (stmt);
2395 if (!CONVERT_EXPR_CODE_P (code)
2396 && code != FIX_TRUNC_EXPR
2397 && code != FLOAT_EXPR
2398 && code != WIDEN_MULT_EXPR
2399 && code != WIDEN_LSHIFT_EXPR)
2400 return false;
2402 op_type = TREE_CODE_LENGTH (code);
2404 /* Check types of lhs and rhs. */
2405 scalar_dest = gimple_assign_lhs (stmt);
2406 lhs_type = TREE_TYPE (scalar_dest);
2407 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2409 op0 = gimple_assign_rhs1 (stmt);
2410 rhs_type = TREE_TYPE (op0);
2412 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2413 && !((INTEGRAL_TYPE_P (lhs_type)
2414 && INTEGRAL_TYPE_P (rhs_type))
2415 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2416 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2417 return false;
2419 if ((INTEGRAL_TYPE_P (lhs_type)
2420 && (TYPE_PRECISION (lhs_type)
2421 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2422 || (INTEGRAL_TYPE_P (rhs_type)
2423 && (TYPE_PRECISION (rhs_type)
2424 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2428 "type conversion to/from bit-precision unsupported."
2429 "\n");
2430 return false;
2433 /* Check the operands of the operation. */
2434 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2435 &def_stmt, &def, &dt[0], &vectype_in))
2437 if (dump_enabled_p ())
2438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2439 "use not simple.\n");
2440 return false;
2442 if (op_type == binary_op)
2444 bool ok;
2446 op1 = gimple_assign_rhs2 (stmt);
2447 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2448 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2449 OP1. */
2450 if (CONSTANT_CLASS_P (op0))
2451 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2452 &def_stmt, &def, &dt[1], &vectype_in);
2453 else
2454 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2455 &def, &dt[1]);
2457 if (!ok)
2459 if (dump_enabled_p ())
2460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 "use not simple.\n");
2462 return false;
2466 /* If op0 is an external or constant defs use a vector type of
2467 the same size as the output vector type. */
2468 if (!vectype_in)
2469 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2470 if (vec_stmt)
2471 gcc_assert (vectype_in);
2472 if (!vectype_in)
2474 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2477 "no vectype for scalar type ");
2478 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2479 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2482 return false;
2485 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2486 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2487 if (nunits_in < nunits_out)
2488 modifier = NARROW;
2489 else if (nunits_out == nunits_in)
2490 modifier = NONE;
2491 else
2492 modifier = WIDEN;
2494 /* Multiple types in SLP are handled by creating the appropriate number of
2495 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2496 case of SLP. */
2497 if (slp_node || PURE_SLP_STMT (stmt_info))
2498 ncopies = 1;
2499 else if (modifier == NARROW)
2500 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2501 else
2502 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2504 /* Sanity check: make sure that at least one copy of the vectorized stmt
2505 needs to be generated. */
2506 gcc_assert (ncopies >= 1);
2508 /* Supportable by target? */
2509 switch (modifier)
2511 case NONE:
2512 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2513 return false;
2514 if (supportable_convert_operation (code, vectype_out, vectype_in,
2515 &decl1, &code1))
2516 break;
2517 /* FALLTHRU */
2518 unsupported:
2519 if (dump_enabled_p ())
2520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2521 "conversion not supported by target.\n");
2522 return false;
2524 case WIDEN:
2525 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2526 &code1, &code2, &multi_step_cvt,
2527 &interm_types))
2529 /* Binary widening operation can only be supported directly by the
2530 architecture. */
2531 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2532 break;
2535 if (code != FLOAT_EXPR
2536 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2537 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2538 goto unsupported;
2540 rhs_mode = TYPE_MODE (rhs_type);
2541 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2542 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2543 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2544 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2546 cvt_type
2547 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2548 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2549 if (cvt_type == NULL_TREE)
2550 goto unsupported;
2552 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2554 if (!supportable_convert_operation (code, vectype_out,
2555 cvt_type, &decl1, &codecvt1))
2556 goto unsupported;
2558 else if (!supportable_widening_operation (code, stmt, vectype_out,
2559 cvt_type, &codecvt1,
2560 &codecvt2, &multi_step_cvt,
2561 &interm_types))
2562 continue;
2563 else
2564 gcc_assert (multi_step_cvt == 0);
2566 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2567 vectype_in, &code1, &code2,
2568 &multi_step_cvt, &interm_types))
2569 break;
2572 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2573 goto unsupported;
2575 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2576 codecvt2 = ERROR_MARK;
2577 else
2579 multi_step_cvt++;
2580 interm_types.safe_push (cvt_type);
2581 cvt_type = NULL_TREE;
2583 break;
2585 case NARROW:
2586 gcc_assert (op_type == unary_op);
2587 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2588 &code1, &multi_step_cvt,
2589 &interm_types))
2590 break;
2592 if (code != FIX_TRUNC_EXPR
2593 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2594 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2595 goto unsupported;
2597 rhs_mode = TYPE_MODE (rhs_type);
2598 cvt_type
2599 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2600 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2601 if (cvt_type == NULL_TREE)
2602 goto unsupported;
2603 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2604 &decl1, &codecvt1))
2605 goto unsupported;
2606 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2607 &code1, &multi_step_cvt,
2608 &interm_types))
2609 break;
2610 goto unsupported;
2612 default:
2613 gcc_unreachable ();
2616 if (!vec_stmt) /* transformation not required. */
2618 if (dump_enabled_p ())
2619 dump_printf_loc (MSG_NOTE, vect_location,
2620 "=== vectorizable_conversion ===\n");
2621 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2623 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2624 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2626 else if (modifier == NARROW)
2628 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2629 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2631 else
2633 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2634 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2636 interm_types.release ();
2637 return true;
2640 /** Transform. **/
2641 if (dump_enabled_p ())
2642 dump_printf_loc (MSG_NOTE, vect_location,
2643 "transform conversion. ncopies = %d.\n", ncopies);
2645 if (op_type == binary_op)
2647 if (CONSTANT_CLASS_P (op0))
2648 op0 = fold_convert (TREE_TYPE (op1), op0);
2649 else if (CONSTANT_CLASS_P (op1))
2650 op1 = fold_convert (TREE_TYPE (op0), op1);
2653 /* In case of multi-step conversion, we first generate conversion operations
2654 to the intermediate types, and then from that types to the final one.
2655 We create vector destinations for the intermediate type (TYPES) received
2656 from supportable_*_operation, and store them in the correct order
2657 for future use in vect_create_vectorized_*_stmts (). */
2658 vec_dsts.create (multi_step_cvt + 1);
2659 vec_dest = vect_create_destination_var (scalar_dest,
2660 (cvt_type && modifier == WIDEN)
2661 ? cvt_type : vectype_out);
2662 vec_dsts.quick_push (vec_dest);
2664 if (multi_step_cvt)
2666 for (i = interm_types.length () - 1;
2667 interm_types.iterate (i, &intermediate_type); i--)
2669 vec_dest = vect_create_destination_var (scalar_dest,
2670 intermediate_type);
2671 vec_dsts.quick_push (vec_dest);
2675 if (cvt_type)
2676 vec_dest = vect_create_destination_var (scalar_dest,
2677 modifier == WIDEN
2678 ? vectype_out : cvt_type);
2680 if (!slp_node)
2682 if (modifier == WIDEN)
2684 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
2685 if (op_type == binary_op)
2686 vec_oprnds1.create (1);
2688 else if (modifier == NARROW)
2689 vec_oprnds0.create (
2690 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2692 else if (code == WIDEN_LSHIFT_EXPR)
2693 vec_oprnds1.create (slp_node->vec_stmts_size);
2695 last_oprnd = op0;
2696 prev_stmt_info = NULL;
2697 switch (modifier)
2699 case NONE:
2700 for (j = 0; j < ncopies; j++)
2702 if (j == 0)
2703 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2704 -1);
2705 else
2706 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2708 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2710 /* Arguments are ready, create the new vector stmt. */
2711 if (code1 == CALL_EXPR)
2713 new_stmt = gimple_build_call (decl1, 1, vop0);
2714 new_temp = make_ssa_name (vec_dest, new_stmt);
2715 gimple_call_set_lhs (new_stmt, new_temp);
2717 else
2719 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2720 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2721 vop0, NULL);
2722 new_temp = make_ssa_name (vec_dest, new_stmt);
2723 gimple_assign_set_lhs (new_stmt, new_temp);
2726 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2727 if (slp_node)
2728 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2731 if (j == 0)
2732 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2733 else
2734 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2735 prev_stmt_info = vinfo_for_stmt (new_stmt);
2737 break;
2739 case WIDEN:
2740 /* In case the vectorization factor (VF) is bigger than the number
2741 of elements that we can fit in a vectype (nunits), we have to
2742 generate more than one vector stmt - i.e - we need to "unroll"
2743 the vector stmt by a factor VF/nunits. */
2744 for (j = 0; j < ncopies; j++)
2746 /* Handle uses. */
2747 if (j == 0)
2749 if (slp_node)
2751 if (code == WIDEN_LSHIFT_EXPR)
2753 unsigned int k;
2755 vec_oprnd1 = op1;
2756 /* Store vec_oprnd1 for every vector stmt to be created
2757 for SLP_NODE. We check during the analysis that all
2758 the shift arguments are the same. */
2759 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2760 vec_oprnds1.quick_push (vec_oprnd1);
2762 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2763 slp_node, -1);
2765 else
2766 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2767 &vec_oprnds1, slp_node, -1);
2769 else
2771 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2772 vec_oprnds0.quick_push (vec_oprnd0);
2773 if (op_type == binary_op)
2775 if (code == WIDEN_LSHIFT_EXPR)
2776 vec_oprnd1 = op1;
2777 else
2778 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2779 NULL);
2780 vec_oprnds1.quick_push (vec_oprnd1);
2784 else
2786 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2787 vec_oprnds0.truncate (0);
2788 vec_oprnds0.quick_push (vec_oprnd0);
2789 if (op_type == binary_op)
2791 if (code == WIDEN_LSHIFT_EXPR)
2792 vec_oprnd1 = op1;
2793 else
2794 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2795 vec_oprnd1);
2796 vec_oprnds1.truncate (0);
2797 vec_oprnds1.quick_push (vec_oprnd1);
2801 /* Arguments are ready. Create the new vector stmts. */
2802 for (i = multi_step_cvt; i >= 0; i--)
2804 tree this_dest = vec_dsts[i];
2805 enum tree_code c1 = code1, c2 = code2;
2806 if (i == 0 && codecvt2 != ERROR_MARK)
2808 c1 = codecvt1;
2809 c2 = codecvt2;
2811 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2812 &vec_oprnds1,
2813 stmt, this_dest, gsi,
2814 c1, c2, decl1, decl2,
2815 op_type);
2818 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2820 if (cvt_type)
2822 if (codecvt1 == CALL_EXPR)
2824 new_stmt = gimple_build_call (decl1, 1, vop0);
2825 new_temp = make_ssa_name (vec_dest, new_stmt);
2826 gimple_call_set_lhs (new_stmt, new_temp);
2828 else
2830 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2831 new_temp = make_ssa_name (vec_dest, NULL);
2832 new_stmt = gimple_build_assign_with_ops (codecvt1,
2833 new_temp,
2834 vop0, NULL);
2837 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2839 else
2840 new_stmt = SSA_NAME_DEF_STMT (vop0);
2842 if (slp_node)
2843 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2844 else
2846 if (!prev_stmt_info)
2847 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2848 else
2849 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2850 prev_stmt_info = vinfo_for_stmt (new_stmt);
2855 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2856 break;
2858 case NARROW:
2859 /* In case the vectorization factor (VF) is bigger than the number
2860 of elements that we can fit in a vectype (nunits), we have to
2861 generate more than one vector stmt - i.e - we need to "unroll"
2862 the vector stmt by a factor VF/nunits. */
2863 for (j = 0; j < ncopies; j++)
2865 /* Handle uses. */
2866 if (slp_node)
2867 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2868 slp_node, -1);
2869 else
2871 vec_oprnds0.truncate (0);
2872 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2873 vect_pow2 (multi_step_cvt) - 1);
2876 /* Arguments are ready. Create the new vector stmts. */
2877 if (cvt_type)
2878 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2880 if (codecvt1 == CALL_EXPR)
2882 new_stmt = gimple_build_call (decl1, 1, vop0);
2883 new_temp = make_ssa_name (vec_dest, new_stmt);
2884 gimple_call_set_lhs (new_stmt, new_temp);
2886 else
2888 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2889 new_temp = make_ssa_name (vec_dest, NULL);
2890 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2891 vop0, NULL);
2894 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2895 vec_oprnds0[i] = new_temp;
2898 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2899 stmt, vec_dsts, gsi,
2900 slp_node, code1,
2901 &prev_stmt_info);
2904 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2905 break;
2908 vec_oprnds0.release ();
2909 vec_oprnds1.release ();
2910 vec_dsts.release ();
2911 interm_types.release ();
2913 return true;
2917 /* Function vectorizable_assignment.
2919 Check if STMT performs an assignment (copy) that can be vectorized.
2920 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2921 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2922 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2924 static bool
2925 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2926 gimple *vec_stmt, slp_tree slp_node)
2928 tree vec_dest;
2929 tree scalar_dest;
2930 tree op;
2931 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2932 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2933 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2934 tree new_temp;
2935 tree def;
2936 gimple def_stmt;
2937 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2938 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2939 int ncopies;
2940 int i, j;
2941 vec<tree> vec_oprnds = vNULL;
2942 tree vop;
2943 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2944 gimple new_stmt = NULL;
2945 stmt_vec_info prev_stmt_info = NULL;
2946 enum tree_code code;
2947 tree vectype_in;
2949 /* Multiple types in SLP are handled by creating the appropriate number of
2950 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2951 case of SLP. */
2952 if (slp_node || PURE_SLP_STMT (stmt_info))
2953 ncopies = 1;
2954 else
2955 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2957 gcc_assert (ncopies >= 1);
2959 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2960 return false;
2962 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2963 return false;
2965 /* Is vectorizable assignment? */
2966 if (!is_gimple_assign (stmt))
2967 return false;
2969 scalar_dest = gimple_assign_lhs (stmt);
2970 if (TREE_CODE (scalar_dest) != SSA_NAME)
2971 return false;
2973 code = gimple_assign_rhs_code (stmt);
2974 if (gimple_assign_single_p (stmt)
2975 || code == PAREN_EXPR
2976 || CONVERT_EXPR_CODE_P (code))
2977 op = gimple_assign_rhs1 (stmt);
2978 else
2979 return false;
2981 if (code == VIEW_CONVERT_EXPR)
2982 op = TREE_OPERAND (op, 0);
2984 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2985 &def_stmt, &def, &dt[0], &vectype_in))
2987 if (dump_enabled_p ())
2988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2989 "use not simple.\n");
2990 return false;
2993 /* We can handle NOP_EXPR conversions that do not change the number
2994 of elements or the vector size. */
2995 if ((CONVERT_EXPR_CODE_P (code)
2996 || code == VIEW_CONVERT_EXPR)
2997 && (!vectype_in
2998 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2999 || (GET_MODE_SIZE (TYPE_MODE (vectype))
3000 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
3001 return false;
3003 /* We do not handle bit-precision changes. */
3004 if ((CONVERT_EXPR_CODE_P (code)
3005 || code == VIEW_CONVERT_EXPR)
3006 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3007 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3008 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3009 || ((TYPE_PRECISION (TREE_TYPE (op))
3010 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
3011 /* But a conversion that does not change the bit-pattern is ok. */
3012 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3013 > TYPE_PRECISION (TREE_TYPE (op)))
3014 && TYPE_UNSIGNED (TREE_TYPE (op))))
3016 if (dump_enabled_p ())
3017 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3018 "type conversion to/from bit-precision "
3019 "unsupported.\n");
3020 return false;
3023 if (!vec_stmt) /* transformation not required. */
3025 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3026 if (dump_enabled_p ())
3027 dump_printf_loc (MSG_NOTE, vect_location,
3028 "=== vectorizable_assignment ===\n");
3029 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3030 return true;
3033 /** Transform. **/
3034 if (dump_enabled_p ())
3035 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
3037 /* Handle def. */
3038 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3040 /* Handle use. */
3041 for (j = 0; j < ncopies; j++)
3043 /* Handle uses. */
3044 if (j == 0)
3045 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
3046 else
3047 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3049 /* Arguments are ready. create the new vector stmt. */
3050 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3052 if (CONVERT_EXPR_CODE_P (code)
3053 || code == VIEW_CONVERT_EXPR)
3054 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
3055 new_stmt = gimple_build_assign (vec_dest, vop);
3056 new_temp = make_ssa_name (vec_dest, new_stmt);
3057 gimple_assign_set_lhs (new_stmt, new_temp);
3058 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3059 if (slp_node)
3060 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3063 if (slp_node)
3064 continue;
3066 if (j == 0)
3067 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3068 else
3069 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3071 prev_stmt_info = vinfo_for_stmt (new_stmt);
3074 vec_oprnds.release ();
3075 return true;
3079 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3080 either as shift by a scalar or by a vector. */
3082 bool
3083 vect_supportable_shift (enum tree_code code, tree scalar_type)
3086 enum machine_mode vec_mode;
3087 optab optab;
3088 int icode;
3089 tree vectype;
3091 vectype = get_vectype_for_scalar_type (scalar_type);
3092 if (!vectype)
3093 return false;
3095 optab = optab_for_tree_code (code, vectype, optab_scalar);
3096 if (!optab
3097 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3099 optab = optab_for_tree_code (code, vectype, optab_vector);
3100 if (!optab
3101 || (optab_handler (optab, TYPE_MODE (vectype))
3102 == CODE_FOR_nothing))
3103 return false;
3106 vec_mode = TYPE_MODE (vectype);
3107 icode = (int) optab_handler (optab, vec_mode);
3108 if (icode == CODE_FOR_nothing)
3109 return false;
3111 return true;
3115 /* Function vectorizable_shift.
3117 Check if STMT performs a shift operation that can be vectorized.
3118 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3119 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3120 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3122 static bool
3123 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3124 gimple *vec_stmt, slp_tree slp_node)
3126 tree vec_dest;
3127 tree scalar_dest;
3128 tree op0, op1 = NULL;
3129 tree vec_oprnd1 = NULL_TREE;
3130 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3131 tree vectype;
3132 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3133 enum tree_code code;
3134 enum machine_mode vec_mode;
3135 tree new_temp;
3136 optab optab;
3137 int icode;
3138 enum machine_mode optab_op2_mode;
3139 tree def;
3140 gimple def_stmt;
3141 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3142 gimple new_stmt = NULL;
3143 stmt_vec_info prev_stmt_info;
3144 int nunits_in;
3145 int nunits_out;
3146 tree vectype_out;
3147 tree op1_vectype;
3148 int ncopies;
3149 int j, i;
3150 vec<tree> vec_oprnds0 = vNULL;
3151 vec<tree> vec_oprnds1 = vNULL;
3152 tree vop0, vop1;
3153 unsigned int k;
3154 bool scalar_shift_arg = true;
3155 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3156 int vf;
3158 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3159 return false;
3161 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3162 return false;
3164 /* Is STMT a vectorizable binary/unary operation? */
3165 if (!is_gimple_assign (stmt))
3166 return false;
3168 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3169 return false;
3171 code = gimple_assign_rhs_code (stmt);
3173 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3174 || code == RROTATE_EXPR))
3175 return false;
3177 scalar_dest = gimple_assign_lhs (stmt);
3178 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3179 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3180 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3182 if (dump_enabled_p ())
3183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3184 "bit-precision shifts not supported.\n");
3185 return false;
3188 op0 = gimple_assign_rhs1 (stmt);
3189 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3190 &def_stmt, &def, &dt[0], &vectype))
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3194 "use not simple.\n");
3195 return false;
3197 /* If op0 is an external or constant def use a vector type with
3198 the same size as the output vector type. */
3199 if (!vectype)
3200 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3201 if (vec_stmt)
3202 gcc_assert (vectype);
3203 if (!vectype)
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 "no vectype for scalar type\n");
3208 return false;
3211 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3212 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3213 if (nunits_out != nunits_in)
3214 return false;
3216 op1 = gimple_assign_rhs2 (stmt);
3217 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3218 &def, &dt[1], &op1_vectype))
3220 if (dump_enabled_p ())
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3222 "use not simple.\n");
3223 return false;
3226 if (loop_vinfo)
3227 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3228 else
3229 vf = 1;
3231 /* Multiple types in SLP are handled by creating the appropriate number of
3232 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3233 case of SLP. */
3234 if (slp_node || PURE_SLP_STMT (stmt_info))
3235 ncopies = 1;
3236 else
3237 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3239 gcc_assert (ncopies >= 1);
3241 /* Determine whether the shift amount is a vector, or scalar. If the
3242 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3244 if (dt[1] == vect_internal_def && !slp_node)
3245 scalar_shift_arg = false;
3246 else if (dt[1] == vect_constant_def
3247 || dt[1] == vect_external_def
3248 || dt[1] == vect_internal_def)
3250 /* In SLP, need to check whether the shift count is the same,
3251 in loops if it is a constant or invariant, it is always
3252 a scalar shift. */
3253 if (slp_node)
3255 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3256 gimple slpstmt;
3258 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3259 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3260 scalar_shift_arg = false;
3263 else
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3267 "operand mode requires invariant argument.\n");
3268 return false;
3271 /* Vector shifted by vector. */
3272 if (!scalar_shift_arg)
3274 optab = optab_for_tree_code (code, vectype, optab_vector);
3275 if (dump_enabled_p ())
3276 dump_printf_loc (MSG_NOTE, vect_location,
3277 "vector/vector shift/rotate found.\n");
3279 if (!op1_vectype)
3280 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3281 if (op1_vectype == NULL_TREE
3282 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3286 "unusable type for last operand in"
3287 " vector/vector shift/rotate.\n");
3288 return false;
3291 /* See if the machine has a vector shifted by scalar insn and if not
3292 then see if it has a vector shifted by vector insn. */
3293 else
3295 optab = optab_for_tree_code (code, vectype, optab_scalar);
3296 if (optab
3297 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_NOTE, vect_location,
3301 "vector/scalar shift/rotate found.\n");
3303 else
3305 optab = optab_for_tree_code (code, vectype, optab_vector);
3306 if (optab
3307 && (optab_handler (optab, TYPE_MODE (vectype))
3308 != CODE_FOR_nothing))
3310 scalar_shift_arg = false;
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_NOTE, vect_location,
3314 "vector/vector shift/rotate found.\n");
3316 /* Unlike the other binary operators, shifts/rotates have
3317 the rhs being int, instead of the same type as the lhs,
3318 so make sure the scalar is the right type if we are
3319 dealing with vectors of long long/long/short/char. */
3320 if (dt[1] == vect_constant_def)
3321 op1 = fold_convert (TREE_TYPE (vectype), op1);
3322 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3323 TREE_TYPE (op1)))
3325 if (slp_node
3326 && TYPE_MODE (TREE_TYPE (vectype))
3327 != TYPE_MODE (TREE_TYPE (op1)))
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3331 "unusable type for last operand in"
3332 " vector/vector shift/rotate.\n");
3333 return false;
3335 if (vec_stmt && !slp_node)
3337 op1 = fold_convert (TREE_TYPE (vectype), op1);
3338 op1 = vect_init_vector (stmt, op1,
3339 TREE_TYPE (vectype), NULL);
3346 /* Supportable by target? */
3347 if (!optab)
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3351 "no optab.\n");
3352 return false;
3354 vec_mode = TYPE_MODE (vectype);
3355 icode = (int) optab_handler (optab, vec_mode);
3356 if (icode == CODE_FOR_nothing)
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3360 "op not supported by target.\n");
3361 /* Check only during analysis. */
3362 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3363 || (vf < vect_min_worthwhile_factor (code)
3364 && !vec_stmt))
3365 return false;
3366 if (dump_enabled_p ())
3367 dump_printf_loc (MSG_NOTE, vect_location,
3368 "proceeding using word mode.\n");
3371 /* Worthwhile without SIMD support? Check only during analysis. */
3372 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3373 && vf < vect_min_worthwhile_factor (code)
3374 && !vec_stmt)
3376 if (dump_enabled_p ())
3377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3378 "not worthwhile without SIMD support.\n");
3379 return false;
3382 if (!vec_stmt) /* transformation not required. */
3384 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_NOTE, vect_location,
3387 "=== vectorizable_shift ===\n");
3388 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3389 return true;
3392 /** Transform. **/
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_NOTE, vect_location,
3396 "transform binary/unary operation.\n");
3398 /* Handle def. */
3399 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3401 prev_stmt_info = NULL;
3402 for (j = 0; j < ncopies; j++)
3404 /* Handle uses. */
3405 if (j == 0)
3407 if (scalar_shift_arg)
3409 /* Vector shl and shr insn patterns can be defined with scalar
3410 operand 2 (shift operand). In this case, use constant or loop
3411 invariant op1 directly, without extending it to vector mode
3412 first. */
3413 optab_op2_mode = insn_data[icode].operand[2].mode;
3414 if (!VECTOR_MODE_P (optab_op2_mode))
3416 if (dump_enabled_p ())
3417 dump_printf_loc (MSG_NOTE, vect_location,
3418 "operand 1 using scalar mode.\n");
3419 vec_oprnd1 = op1;
3420 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3421 vec_oprnds1.quick_push (vec_oprnd1);
3422 if (slp_node)
3424 /* Store vec_oprnd1 for every vector stmt to be created
3425 for SLP_NODE. We check during the analysis that all
3426 the shift arguments are the same.
3427 TODO: Allow different constants for different vector
3428 stmts generated for an SLP instance. */
3429 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3430 vec_oprnds1.quick_push (vec_oprnd1);
3435 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3436 (a special case for certain kind of vector shifts); otherwise,
3437 operand 1 should be of a vector type (the usual case). */
3438 if (vec_oprnd1)
3439 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3440 slp_node, -1);
3441 else
3442 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3443 slp_node, -1);
3445 else
3446 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3448 /* Arguments are ready. Create the new vector stmt. */
3449 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3451 vop1 = vec_oprnds1[i];
3452 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3453 new_temp = make_ssa_name (vec_dest, new_stmt);
3454 gimple_assign_set_lhs (new_stmt, new_temp);
3455 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3456 if (slp_node)
3457 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3460 if (slp_node)
3461 continue;
3463 if (j == 0)
3464 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3465 else
3466 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3467 prev_stmt_info = vinfo_for_stmt (new_stmt);
3470 vec_oprnds0.release ();
3471 vec_oprnds1.release ();
3473 return true;
3477 static tree permute_vec_elements (tree, tree, tree, gimple,
3478 gimple_stmt_iterator *);
3481 /* Function vectorizable_operation.
3483 Check if STMT performs a binary, unary or ternary operation that can
3484 be vectorized.
3485 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3486 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3487 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3489 static bool
3490 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3491 gimple *vec_stmt, slp_tree slp_node)
3493 tree vec_dest;
3494 tree scalar_dest;
3495 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3496 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3497 tree vectype;
3498 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3499 enum tree_code code;
3500 enum machine_mode vec_mode;
3501 tree new_temp;
3502 int op_type;
3503 optab optab;
3504 int icode;
3505 tree def;
3506 gimple def_stmt;
3507 enum vect_def_type dt[3]
3508 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3509 gimple new_stmt = NULL;
3510 stmt_vec_info prev_stmt_info;
3511 int nunits_in;
3512 int nunits_out;
3513 tree vectype_out;
3514 int ncopies;
3515 int j, i;
3516 vec<tree> vec_oprnds0 = vNULL;
3517 vec<tree> vec_oprnds1 = vNULL;
3518 vec<tree> vec_oprnds2 = vNULL;
3519 tree vop0, vop1, vop2;
3520 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3521 int vf;
3523 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3524 return false;
3526 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3527 return false;
3529 /* Is STMT a vectorizable binary/unary operation? */
3530 if (!is_gimple_assign (stmt))
3531 return false;
3533 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3534 return false;
3536 code = gimple_assign_rhs_code (stmt);
3538 /* For pointer addition, we should use the normal plus for
3539 the vector addition. */
3540 if (code == POINTER_PLUS_EXPR)
3541 code = PLUS_EXPR;
3543 /* Support only unary or binary operations. */
3544 op_type = TREE_CODE_LENGTH (code);
3545 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3547 if (dump_enabled_p ())
3548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3549 "num. args = %d (not unary/binary/ternary op).\n",
3550 op_type);
3551 return false;
3554 scalar_dest = gimple_assign_lhs (stmt);
3555 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3557 /* Most operations cannot handle bit-precision types without extra
3558 truncations. */
3559 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3560 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3561 /* Exception are bitwise binary operations. */
3562 && code != BIT_IOR_EXPR
3563 && code != BIT_XOR_EXPR
3564 && code != BIT_AND_EXPR)
3566 if (dump_enabled_p ())
3567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3568 "bit-precision arithmetic not supported.\n");
3569 return false;
3572 op0 = gimple_assign_rhs1 (stmt);
3573 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3574 &def_stmt, &def, &dt[0], &vectype))
3576 if (dump_enabled_p ())
3577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3578 "use not simple.\n");
3579 return false;
3581 /* If op0 is an external or constant def use a vector type with
3582 the same size as the output vector type. */
3583 if (!vectype)
3584 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3585 if (vec_stmt)
3586 gcc_assert (vectype);
3587 if (!vectype)
3589 if (dump_enabled_p ())
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3592 "no vectype for scalar type ");
3593 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3594 TREE_TYPE (op0));
3595 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3598 return false;
3601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3602 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3603 if (nunits_out != nunits_in)
3604 return false;
3606 if (op_type == binary_op || op_type == ternary_op)
3608 op1 = gimple_assign_rhs2 (stmt);
3609 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3610 &def, &dt[1]))
3612 if (dump_enabled_p ())
3613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3614 "use not simple.\n");
3615 return false;
3618 if (op_type == ternary_op)
3620 op2 = gimple_assign_rhs3 (stmt);
3621 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3622 &def, &dt[2]))
3624 if (dump_enabled_p ())
3625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3626 "use not simple.\n");
3627 return false;
3631 if (loop_vinfo)
3632 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3633 else
3634 vf = 1;
3636 /* Multiple types in SLP are handled by creating the appropriate number of
3637 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3638 case of SLP. */
3639 if (slp_node || PURE_SLP_STMT (stmt_info))
3640 ncopies = 1;
3641 else
3642 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3644 gcc_assert (ncopies >= 1);
3646 /* Shifts are handled in vectorizable_shift (). */
3647 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3648 || code == RROTATE_EXPR)
3649 return false;
3651 /* Supportable by target? */
3653 vec_mode = TYPE_MODE (vectype);
3654 if (code == MULT_HIGHPART_EXPR)
3656 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3657 icode = LAST_INSN_CODE;
3658 else
3659 icode = CODE_FOR_nothing;
3661 else
3663 optab = optab_for_tree_code (code, vectype, optab_default);
3664 if (!optab)
3666 if (dump_enabled_p ())
3667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3668 "no optab.\n");
3669 return false;
3671 icode = (int) optab_handler (optab, vec_mode);
3674 if (icode == CODE_FOR_nothing)
3676 if (dump_enabled_p ())
3677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3678 "op not supported by target.\n");
3679 /* Check only during analysis. */
3680 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3681 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3682 return false;
3683 if (dump_enabled_p ())
3684 dump_printf_loc (MSG_NOTE, vect_location,
3685 "proceeding using word mode.\n");
3688 /* Worthwhile without SIMD support? Check only during analysis. */
3689 if (!VECTOR_MODE_P (vec_mode)
3690 && !vec_stmt
3691 && vf < vect_min_worthwhile_factor (code))
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3695 "not worthwhile without SIMD support.\n");
3696 return false;
3699 if (!vec_stmt) /* transformation not required. */
3701 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3702 if (dump_enabled_p ())
3703 dump_printf_loc (MSG_NOTE, vect_location,
3704 "=== vectorizable_operation ===\n");
3705 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3706 return true;
3709 /** Transform. **/
3711 if (dump_enabled_p ())
3712 dump_printf_loc (MSG_NOTE, vect_location,
3713 "transform binary/unary operation.\n");
3715 /* Handle def. */
3716 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3718 /* In case the vectorization factor (VF) is bigger than the number
3719 of elements that we can fit in a vectype (nunits), we have to generate
3720 more than one vector stmt - i.e - we need to "unroll" the
3721 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3722 from one copy of the vector stmt to the next, in the field
3723 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3724 stages to find the correct vector defs to be used when vectorizing
3725 stmts that use the defs of the current stmt. The example below
3726 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3727 we need to create 4 vectorized stmts):
3729 before vectorization:
3730 RELATED_STMT VEC_STMT
3731 S1: x = memref - -
3732 S2: z = x + 1 - -
3734 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3735 there):
3736 RELATED_STMT VEC_STMT
3737 VS1_0: vx0 = memref0 VS1_1 -
3738 VS1_1: vx1 = memref1 VS1_2 -
3739 VS1_2: vx2 = memref2 VS1_3 -
3740 VS1_3: vx3 = memref3 - -
3741 S1: x = load - VS1_0
3742 S2: z = x + 1 - -
3744 step2: vectorize stmt S2 (done here):
3745 To vectorize stmt S2 we first need to find the relevant vector
3746 def for the first operand 'x'. This is, as usual, obtained from
3747 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3748 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3749 relevant vector def 'vx0'. Having found 'vx0' we can generate
3750 the vector stmt VS2_0, and as usual, record it in the
3751 STMT_VINFO_VEC_STMT of stmt S2.
3752 When creating the second copy (VS2_1), we obtain the relevant vector
3753 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3754 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3755 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3756 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3757 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3758 chain of stmts and pointers:
3759 RELATED_STMT VEC_STMT
3760 VS1_0: vx0 = memref0 VS1_1 -
3761 VS1_1: vx1 = memref1 VS1_2 -
3762 VS1_2: vx2 = memref2 VS1_3 -
3763 VS1_3: vx3 = memref3 - -
3764 S1: x = load - VS1_0
3765 VS2_0: vz0 = vx0 + v1 VS2_1 -
3766 VS2_1: vz1 = vx1 + v1 VS2_2 -
3767 VS2_2: vz2 = vx2 + v1 VS2_3 -
3768 VS2_3: vz3 = vx3 + v1 - -
3769 S2: z = x + 1 - VS2_0 */
3771 prev_stmt_info = NULL;
3772 for (j = 0; j < ncopies; j++)
3774 /* Handle uses. */
3775 if (j == 0)
3777 if (op_type == binary_op || op_type == ternary_op)
3778 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3779 slp_node, -1);
3780 else
3781 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3782 slp_node, -1);
3783 if (op_type == ternary_op)
3785 vec_oprnds2.create (1);
3786 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3787 stmt,
3788 NULL));
3791 else
3793 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3794 if (op_type == ternary_op)
3796 tree vec_oprnd = vec_oprnds2.pop ();
3797 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3798 vec_oprnd));
3802 /* Arguments are ready. Create the new vector stmt. */
3803 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3805 vop1 = ((op_type == binary_op || op_type == ternary_op)
3806 ? vec_oprnds1[i] : NULL_TREE);
3807 vop2 = ((op_type == ternary_op)
3808 ? vec_oprnds2[i] : NULL_TREE);
3809 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3810 vop0, vop1, vop2);
3811 new_temp = make_ssa_name (vec_dest, new_stmt);
3812 gimple_assign_set_lhs (new_stmt, new_temp);
3813 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3814 if (slp_node)
3815 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3818 if (slp_node)
3819 continue;
3821 if (j == 0)
3822 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3823 else
3824 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3825 prev_stmt_info = vinfo_for_stmt (new_stmt);
3828 vec_oprnds0.release ();
3829 vec_oprnds1.release ();
3830 vec_oprnds2.release ();
3832 return true;
3835 /* A helper function to ensure data reference DR's base alignment
3836 for STMT_INFO. */
3838 static void
3839 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3841 if (!dr->aux)
3842 return;
3844 if (((dataref_aux *)dr->aux)->base_misaligned)
3846 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3847 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3849 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3850 DECL_USER_ALIGN (base_decl) = 1;
3851 ((dataref_aux *)dr->aux)->base_misaligned = false;
3856 /* Function vectorizable_store.
3858 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3859 can be vectorized.
3860 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3861 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3862 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3864 static bool
3865 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3866 slp_tree slp_node)
3868 tree scalar_dest;
3869 tree data_ref;
3870 tree op;
3871 tree vec_oprnd = NULL_TREE;
3872 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3873 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3874 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3875 tree elem_type;
3876 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3877 struct loop *loop = NULL;
3878 enum machine_mode vec_mode;
3879 tree dummy;
3880 enum dr_alignment_support alignment_support_scheme;
3881 tree def;
3882 gimple def_stmt;
3883 enum vect_def_type dt;
3884 stmt_vec_info prev_stmt_info = NULL;
3885 tree dataref_ptr = NULL_TREE;
3886 tree dataref_offset = NULL_TREE;
3887 gimple ptr_incr = NULL;
3888 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3889 int ncopies;
3890 int j;
3891 gimple next_stmt, first_stmt = NULL;
3892 bool grouped_store = false;
3893 bool store_lanes_p = false;
3894 unsigned int group_size, i;
3895 vec<tree> dr_chain = vNULL;
3896 vec<tree> oprnds = vNULL;
3897 vec<tree> result_chain = vNULL;
3898 bool inv_p;
3899 vec<tree> vec_oprnds = vNULL;
3900 bool slp = (slp_node != NULL);
3901 unsigned int vec_num;
3902 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3903 tree aggr_type;
3905 if (loop_vinfo)
3906 loop = LOOP_VINFO_LOOP (loop_vinfo);
3908 /* Multiple types in SLP are handled by creating the appropriate number of
3909 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3910 case of SLP. */
3911 if (slp || PURE_SLP_STMT (stmt_info))
3912 ncopies = 1;
3913 else
3914 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3916 gcc_assert (ncopies >= 1);
3918 /* FORNOW. This restriction should be relaxed. */
3919 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3921 if (dump_enabled_p ())
3922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3923 "multiple types in nested loop.\n");
3924 return false;
3927 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3928 return false;
3930 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3931 return false;
3933 /* Is vectorizable store? */
3935 if (!is_gimple_assign (stmt))
3936 return false;
3938 scalar_dest = gimple_assign_lhs (stmt);
3939 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3940 && is_pattern_stmt_p (stmt_info))
3941 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3942 if (TREE_CODE (scalar_dest) != ARRAY_REF
3943 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
3944 && TREE_CODE (scalar_dest) != INDIRECT_REF
3945 && TREE_CODE (scalar_dest) != COMPONENT_REF
3946 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3947 && TREE_CODE (scalar_dest) != REALPART_EXPR
3948 && TREE_CODE (scalar_dest) != MEM_REF)
3949 return false;
3951 gcc_assert (gimple_assign_single_p (stmt));
3952 op = gimple_assign_rhs1 (stmt);
3953 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3954 &def, &dt))
3956 if (dump_enabled_p ())
3957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3958 "use not simple.\n");
3959 return false;
3962 elem_type = TREE_TYPE (vectype);
3963 vec_mode = TYPE_MODE (vectype);
3965 /* FORNOW. In some cases can vectorize even if data-type not supported
3966 (e.g. - array initialization with 0). */
3967 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3968 return false;
3970 if (!STMT_VINFO_DATA_REF (stmt_info))
3971 return false;
3973 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3974 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3975 size_zero_node) < 0)
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3979 "negative step for store.\n");
3980 return false;
3983 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3985 grouped_store = true;
3986 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3987 if (!slp && !PURE_SLP_STMT (stmt_info))
3989 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3990 if (vect_store_lanes_supported (vectype, group_size))
3991 store_lanes_p = true;
3992 else if (!vect_grouped_store_supported (vectype, group_size))
3993 return false;
3996 if (first_stmt == stmt)
3998 /* STMT is the leader of the group. Check the operands of all the
3999 stmts of the group. */
4000 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
4001 while (next_stmt)
4003 gcc_assert (gimple_assign_single_p (next_stmt));
4004 op = gimple_assign_rhs1 (next_stmt);
4005 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
4006 &def_stmt, &def, &dt))
4008 if (dump_enabled_p ())
4009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4010 "use not simple.\n");
4011 return false;
4013 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4018 if (!vec_stmt) /* transformation not required. */
4020 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4021 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4022 NULL, NULL, NULL);
4023 return true;
4026 /** Transform. **/
4028 ensure_base_align (stmt_info, dr);
4030 if (grouped_store)
4032 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4033 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4035 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4037 /* FORNOW */
4038 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
4040 /* We vectorize all the stmts of the interleaving group when we
4041 reach the last stmt in the group. */
4042 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4043 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
4044 && !slp)
4046 *vec_stmt = NULL;
4047 return true;
4050 if (slp)
4052 grouped_store = false;
4053 /* VEC_NUM is the number of vect stmts to be created for this
4054 group. */
4055 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4056 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4057 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4058 op = gimple_assign_rhs1 (first_stmt);
4060 else
4061 /* VEC_NUM is the number of vect stmts to be created for this
4062 group. */
4063 vec_num = group_size;
4065 else
4067 first_stmt = stmt;
4068 first_dr = dr;
4069 group_size = vec_num = 1;
4072 if (dump_enabled_p ())
4073 dump_printf_loc (MSG_NOTE, vect_location,
4074 "transform store. ncopies = %d\n", ncopies);
4076 dr_chain.create (group_size);
4077 oprnds.create (group_size);
4079 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4080 gcc_assert (alignment_support_scheme);
4081 /* Targets with store-lane instructions must not require explicit
4082 realignment. */
4083 gcc_assert (!store_lanes_p
4084 || alignment_support_scheme == dr_aligned
4085 || alignment_support_scheme == dr_unaligned_supported);
4087 if (store_lanes_p)
4088 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4089 else
4090 aggr_type = vectype;
4092 /* In case the vectorization factor (VF) is bigger than the number
4093 of elements that we can fit in a vectype (nunits), we have to generate
4094 more than one vector stmt - i.e - we need to "unroll" the
4095 vector stmt by a factor VF/nunits. For more details see documentation in
4096 vect_get_vec_def_for_copy_stmt. */
4098 /* In case of interleaving (non-unit grouped access):
4100 S1: &base + 2 = x2
4101 S2: &base = x0
4102 S3: &base + 1 = x1
4103 S4: &base + 3 = x3
4105 We create vectorized stores starting from base address (the access of the
4106 first stmt in the chain (S2 in the above example), when the last store stmt
4107 of the chain (S4) is reached:
4109 VS1: &base = vx2
4110 VS2: &base + vec_size*1 = vx0
4111 VS3: &base + vec_size*2 = vx1
4112 VS4: &base + vec_size*3 = vx3
4114 Then permutation statements are generated:
4116 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4117 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4120 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4121 (the order of the data-refs in the output of vect_permute_store_chain
4122 corresponds to the order of scalar stmts in the interleaving chain - see
4123 the documentation of vect_permute_store_chain()).
4125 In case of both multiple types and interleaving, above vector stores and
4126 permutation stmts are created for every copy. The result vector stmts are
4127 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4128 STMT_VINFO_RELATED_STMT for the next copies.
4131 prev_stmt_info = NULL;
4132 for (j = 0; j < ncopies; j++)
4134 gimple new_stmt;
4136 if (j == 0)
4138 if (slp)
4140 /* Get vectorized arguments for SLP_NODE. */
4141 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4142 NULL, slp_node, -1);
4144 vec_oprnd = vec_oprnds[0];
4146 else
4148 /* For interleaved stores we collect vectorized defs for all the
4149 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4150 used as an input to vect_permute_store_chain(), and OPRNDS as
4151 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4153 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4154 OPRNDS are of size 1. */
4155 next_stmt = first_stmt;
4156 for (i = 0; i < group_size; i++)
4158 /* Since gaps are not supported for interleaved stores,
4159 GROUP_SIZE is the exact number of stmts in the chain.
4160 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4161 there is no interleaving, GROUP_SIZE is 1, and only one
4162 iteration of the loop will be executed. */
4163 gcc_assert (next_stmt
4164 && gimple_assign_single_p (next_stmt));
4165 op = gimple_assign_rhs1 (next_stmt);
4167 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4168 NULL);
4169 dr_chain.quick_push (vec_oprnd);
4170 oprnds.quick_push (vec_oprnd);
4171 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4175 /* We should have catched mismatched types earlier. */
4176 gcc_assert (useless_type_conversion_p (vectype,
4177 TREE_TYPE (vec_oprnd)));
4178 bool simd_lane_access_p
4179 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4180 if (simd_lane_access_p
4181 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4182 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4183 && integer_zerop (DR_OFFSET (first_dr))
4184 && integer_zerop (DR_INIT (first_dr))
4185 && alias_sets_conflict_p (get_alias_set (aggr_type),
4186 get_alias_set (DR_REF (first_dr))))
4188 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4189 dataref_offset = build_int_cst (reference_alias_ptr_type
4190 (DR_REF (first_dr)), 0);
4191 inv_p = false;
4193 else
4194 dataref_ptr
4195 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4196 simd_lane_access_p ? loop : NULL,
4197 NULL_TREE, &dummy, gsi, &ptr_incr,
4198 simd_lane_access_p, &inv_p);
4199 gcc_assert (bb_vinfo || !inv_p);
4201 else
4203 /* For interleaved stores we created vectorized defs for all the
4204 defs stored in OPRNDS in the previous iteration (previous copy).
4205 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4206 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4207 next copy.
4208 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4209 OPRNDS are of size 1. */
4210 for (i = 0; i < group_size; i++)
4212 op = oprnds[i];
4213 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4214 &def, &dt);
4215 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4216 dr_chain[i] = vec_oprnd;
4217 oprnds[i] = vec_oprnd;
4219 if (dataref_offset)
4220 dataref_offset
4221 = int_const_binop (PLUS_EXPR, dataref_offset,
4222 TYPE_SIZE_UNIT (aggr_type));
4223 else
4224 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4225 TYPE_SIZE_UNIT (aggr_type));
4228 if (store_lanes_p)
4230 tree vec_array;
4232 /* Combine all the vectors into an array. */
4233 vec_array = create_vector_array (vectype, vec_num);
4234 for (i = 0; i < vec_num; i++)
4236 vec_oprnd = dr_chain[i];
4237 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4240 /* Emit:
4241 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4242 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4243 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4244 gimple_call_set_lhs (new_stmt, data_ref);
4245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4247 else
4249 new_stmt = NULL;
4250 if (grouped_store)
4252 if (j == 0)
4253 result_chain.create (group_size);
4254 /* Permute. */
4255 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4256 &result_chain);
4259 next_stmt = first_stmt;
4260 for (i = 0; i < vec_num; i++)
4262 unsigned align, misalign;
4264 if (i > 0)
4265 /* Bump the vector pointer. */
4266 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4267 stmt, NULL_TREE);
4269 if (slp)
4270 vec_oprnd = vec_oprnds[i];
4271 else if (grouped_store)
4272 /* For grouped stores vectorized defs are interleaved in
4273 vect_permute_store_chain(). */
4274 vec_oprnd = result_chain[i];
4276 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4277 dataref_offset
4278 ? dataref_offset
4279 : build_int_cst (reference_alias_ptr_type
4280 (DR_REF (first_dr)), 0));
4281 align = TYPE_ALIGN_UNIT (vectype);
4282 if (aligned_access_p (first_dr))
4283 misalign = 0;
4284 else if (DR_MISALIGNMENT (first_dr) == -1)
4286 TREE_TYPE (data_ref)
4287 = build_aligned_type (TREE_TYPE (data_ref),
4288 TYPE_ALIGN (elem_type));
4289 align = TYPE_ALIGN_UNIT (elem_type);
4290 misalign = 0;
4292 else
4294 TREE_TYPE (data_ref)
4295 = build_aligned_type (TREE_TYPE (data_ref),
4296 TYPE_ALIGN (elem_type));
4297 misalign = DR_MISALIGNMENT (first_dr);
4299 if (dataref_offset == NULL_TREE)
4300 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4301 misalign);
4303 /* Arguments are ready. Create the new vector stmt. */
4304 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4305 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4307 if (slp)
4308 continue;
4310 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4311 if (!next_stmt)
4312 break;
4315 if (!slp)
4317 if (j == 0)
4318 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4319 else
4320 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4321 prev_stmt_info = vinfo_for_stmt (new_stmt);
4325 dr_chain.release ();
4326 oprnds.release ();
4327 result_chain.release ();
4328 vec_oprnds.release ();
4330 return true;
4333 /* Given a vector type VECTYPE and permutation SEL returns
4334 the VECTOR_CST mask that implements the permutation of the
4335 vector elements. If that is impossible to do, returns NULL. */
4337 tree
4338 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4340 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4341 int i, nunits;
4343 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4345 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4346 return NULL;
4348 mask_elt_type = lang_hooks.types.type_for_mode
4349 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4350 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4352 mask_elts = XALLOCAVEC (tree, nunits);
4353 for (i = nunits - 1; i >= 0; i--)
4354 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4355 mask_vec = build_vector (mask_type, mask_elts);
4357 return mask_vec;
4360 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4361 reversal of the vector elements. If that is impossible to do,
4362 returns NULL. */
4364 static tree
4365 perm_mask_for_reverse (tree vectype)
4367 int i, nunits;
4368 unsigned char *sel;
4370 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4371 sel = XALLOCAVEC (unsigned char, nunits);
4373 for (i = 0; i < nunits; ++i)
4374 sel[i] = nunits - 1 - i;
4376 return vect_gen_perm_mask (vectype, sel);
4379 /* Given a vector variable X and Y, that was generated for the scalar
4380 STMT, generate instructions to permute the vector elements of X and Y
4381 using permutation mask MASK_VEC, insert them at *GSI and return the
4382 permuted vector variable. */
4384 static tree
4385 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4386 gimple_stmt_iterator *gsi)
4388 tree vectype = TREE_TYPE (x);
4389 tree perm_dest, data_ref;
4390 gimple perm_stmt;
4392 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4393 data_ref = make_ssa_name (perm_dest, NULL);
4395 /* Generate the permute statement. */
4396 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4397 x, y, mask_vec);
4398 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4400 return data_ref;
4403 /* vectorizable_load.
4405 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4406 can be vectorized.
4407 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4408 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4409 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4411 static bool
4412 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4413 slp_tree slp_node, slp_instance slp_node_instance)
4415 tree scalar_dest;
4416 tree vec_dest = NULL;
4417 tree data_ref = NULL;
4418 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4419 stmt_vec_info prev_stmt_info;
4420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4421 struct loop *loop = NULL;
4422 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4423 bool nested_in_vect_loop = false;
4424 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4425 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4426 tree elem_type;
4427 tree new_temp;
4428 enum machine_mode mode;
4429 gimple new_stmt = NULL;
4430 tree dummy;
4431 enum dr_alignment_support alignment_support_scheme;
4432 tree dataref_ptr = NULL_TREE;
4433 tree dataref_offset = NULL_TREE;
4434 gimple ptr_incr = NULL;
4435 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4436 int ncopies;
4437 int i, j, group_size, group_gap;
4438 tree msq = NULL_TREE, lsq;
4439 tree offset = NULL_TREE;
4440 tree realignment_token = NULL_TREE;
4441 gimple phi = NULL;
4442 vec<tree> dr_chain = vNULL;
4443 bool grouped_load = false;
4444 bool load_lanes_p = false;
4445 gimple first_stmt;
4446 bool inv_p;
4447 bool negative = false;
4448 bool compute_in_loop = false;
4449 struct loop *at_loop;
4450 int vec_num;
4451 bool slp = (slp_node != NULL);
4452 bool slp_perm = false;
4453 enum tree_code code;
4454 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4455 int vf;
4456 tree aggr_type;
4457 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4458 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4459 int gather_scale = 1;
4460 enum vect_def_type gather_dt = vect_unknown_def_type;
4462 if (loop_vinfo)
4464 loop = LOOP_VINFO_LOOP (loop_vinfo);
4465 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4466 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4468 else
4469 vf = 1;
4471 /* Multiple types in SLP are handled by creating the appropriate number of
4472 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4473 case of SLP. */
4474 if (slp || PURE_SLP_STMT (stmt_info))
4475 ncopies = 1;
4476 else
4477 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4479 gcc_assert (ncopies >= 1);
4481 /* FORNOW. This restriction should be relaxed. */
4482 if (nested_in_vect_loop && ncopies > 1)
4484 if (dump_enabled_p ())
4485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4486 "multiple types in nested loop.\n");
4487 return false;
4490 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4491 return false;
4493 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4494 return false;
4496 /* Is vectorizable load? */
4497 if (!is_gimple_assign (stmt))
4498 return false;
4500 scalar_dest = gimple_assign_lhs (stmt);
4501 if (TREE_CODE (scalar_dest) != SSA_NAME)
4502 return false;
4504 code = gimple_assign_rhs_code (stmt);
4505 if (code != ARRAY_REF
4506 && code != BIT_FIELD_REF
4507 && code != INDIRECT_REF
4508 && code != COMPONENT_REF
4509 && code != IMAGPART_EXPR
4510 && code != REALPART_EXPR
4511 && code != MEM_REF
4512 && TREE_CODE_CLASS (code) != tcc_declaration)
4513 return false;
4515 if (!STMT_VINFO_DATA_REF (stmt_info))
4516 return false;
4518 elem_type = TREE_TYPE (vectype);
4519 mode = TYPE_MODE (vectype);
4521 /* FORNOW. In some cases can vectorize even if data-type not supported
4522 (e.g. - data copies). */
4523 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4525 if (dump_enabled_p ())
4526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4527 "Aligned load, but unsupported type.\n");
4528 return false;
4531 /* Check if the load is a part of an interleaving chain. */
4532 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4534 grouped_load = true;
4535 /* FORNOW */
4536 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4538 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4539 if (!slp && !PURE_SLP_STMT (stmt_info))
4541 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4542 if (vect_load_lanes_supported (vectype, group_size))
4543 load_lanes_p = true;
4544 else if (!vect_grouped_load_supported (vectype, group_size))
4545 return false;
4550 if (STMT_VINFO_GATHER_P (stmt_info))
4552 gimple def_stmt;
4553 tree def;
4554 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4555 &gather_off, &gather_scale);
4556 gcc_assert (gather_decl);
4557 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4558 &def_stmt, &def, &gather_dt,
4559 &gather_off_vectype))
4561 if (dump_enabled_p ())
4562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4563 "gather index use not simple.\n");
4564 return false;
4567 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4569 else
4571 negative = tree_int_cst_compare (nested_in_vect_loop
4572 ? STMT_VINFO_DR_STEP (stmt_info)
4573 : DR_STEP (dr),
4574 size_zero_node) < 0;
4575 if (negative && ncopies > 1)
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4579 "multiple types with negative step.\n");
4580 return false;
4583 if (negative)
4585 if (grouped_load)
4587 if (dump_enabled_p ())
4588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4589 "negative step for group load not supported"
4590 "\n");
4591 return false;
4593 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4594 if (alignment_support_scheme != dr_aligned
4595 && alignment_support_scheme != dr_unaligned_supported)
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4599 "negative step but alignment required.\n");
4600 return false;
4602 if (!perm_mask_for_reverse (vectype))
4604 if (dump_enabled_p ())
4605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4606 "negative step and reversing not supported."
4607 "\n");
4608 return false;
4613 if (!vec_stmt) /* transformation not required. */
4615 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4616 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4617 return true;
4620 if (dump_enabled_p ())
4621 dump_printf_loc (MSG_NOTE, vect_location,
4622 "transform load. ncopies = %d\n", ncopies);
4624 /** Transform. **/
4626 ensure_base_align (stmt_info, dr);
4628 if (STMT_VINFO_GATHER_P (stmt_info))
4630 tree vec_oprnd0 = NULL_TREE, op;
4631 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4632 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4633 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4634 edge pe = loop_preheader_edge (loop);
4635 gimple_seq seq;
4636 basic_block new_bb;
4637 enum { NARROW, NONE, WIDEN } modifier;
4638 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4640 if (nunits == gather_off_nunits)
4641 modifier = NONE;
4642 else if (nunits == gather_off_nunits / 2)
4644 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4645 modifier = WIDEN;
4647 for (i = 0; i < gather_off_nunits; ++i)
4648 sel[i] = i | nunits;
4650 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4651 gcc_assert (perm_mask != NULL_TREE);
4653 else if (nunits == gather_off_nunits * 2)
4655 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4656 modifier = NARROW;
4658 for (i = 0; i < nunits; ++i)
4659 sel[i] = i < gather_off_nunits
4660 ? i : i + nunits - gather_off_nunits;
4662 perm_mask = vect_gen_perm_mask (vectype, sel);
4663 gcc_assert (perm_mask != NULL_TREE);
4664 ncopies *= 2;
4666 else
4667 gcc_unreachable ();
4669 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4670 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4671 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4672 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4673 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4674 scaletype = TREE_VALUE (arglist);
4675 gcc_checking_assert (types_compatible_p (srctype, rettype)
4676 && types_compatible_p (srctype, masktype));
4678 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4680 ptr = fold_convert (ptrtype, gather_base);
4681 if (!is_gimple_min_invariant (ptr))
4683 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4684 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4685 gcc_assert (!new_bb);
4688 /* Currently we support only unconditional gather loads,
4689 so mask should be all ones. */
4690 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4691 mask = build_int_cst (TREE_TYPE (masktype), -1);
4692 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4694 REAL_VALUE_TYPE r;
4695 long tmp[6];
4696 for (j = 0; j < 6; ++j)
4697 tmp[j] = -1;
4698 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4699 mask = build_real (TREE_TYPE (masktype), r);
4701 else
4702 gcc_unreachable ();
4703 mask = build_vector_from_val (masktype, mask);
4704 mask = vect_init_vector (stmt, mask, masktype, NULL);
4706 scale = build_int_cst (scaletype, gather_scale);
4708 prev_stmt_info = NULL;
4709 for (j = 0; j < ncopies; ++j)
4711 if (modifier == WIDEN && (j & 1))
4712 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4713 perm_mask, stmt, gsi);
4714 else if (j == 0)
4715 op = vec_oprnd0
4716 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4717 else
4718 op = vec_oprnd0
4719 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4721 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4723 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4724 == TYPE_VECTOR_SUBPARTS (idxtype));
4725 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4726 var = make_ssa_name (var, NULL);
4727 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4728 new_stmt
4729 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4730 op, NULL_TREE);
4731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4732 op = var;
4735 new_stmt
4736 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4738 if (!useless_type_conversion_p (vectype, rettype))
4740 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4741 == TYPE_VECTOR_SUBPARTS (rettype));
4742 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4743 op = make_ssa_name (var, new_stmt);
4744 gimple_call_set_lhs (new_stmt, op);
4745 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4746 var = make_ssa_name (vec_dest, NULL);
4747 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4748 new_stmt
4749 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4750 NULL_TREE);
4752 else
4754 var = make_ssa_name (vec_dest, new_stmt);
4755 gimple_call_set_lhs (new_stmt, var);
4758 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4760 if (modifier == NARROW)
4762 if ((j & 1) == 0)
4764 prev_res = var;
4765 continue;
4767 var = permute_vec_elements (prev_res, var,
4768 perm_mask, stmt, gsi);
4769 new_stmt = SSA_NAME_DEF_STMT (var);
4772 if (prev_stmt_info == NULL)
4773 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4774 else
4775 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4776 prev_stmt_info = vinfo_for_stmt (new_stmt);
4778 return true;
4780 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4782 gimple_stmt_iterator incr_gsi;
4783 bool insert_after;
4784 gimple incr;
4785 tree offvar;
4786 tree ivstep;
4787 tree running_off;
4788 vec<constructor_elt, va_gc> *v = NULL;
4789 gimple_seq stmts = NULL;
4790 tree stride_base, stride_step, alias_off;
4792 gcc_assert (!nested_in_vect_loop);
4794 stride_base
4795 = fold_build_pointer_plus
4796 (unshare_expr (DR_BASE_ADDRESS (dr)),
4797 size_binop (PLUS_EXPR,
4798 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4799 convert_to_ptrofftype (DR_INIT (dr))));
4800 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4802 /* For a load with loop-invariant (but other than power-of-2)
4803 stride (i.e. not a grouped access) like so:
4805 for (i = 0; i < n; i += stride)
4806 ... = array[i];
4808 we generate a new induction variable and new accesses to
4809 form a new vector (or vectors, depending on ncopies):
4811 for (j = 0; ; j += VF*stride)
4812 tmp1 = array[j];
4813 tmp2 = array[j + stride];
4815 vectemp = {tmp1, tmp2, ...}
4818 ivstep = stride_step;
4819 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4820 build_int_cst (TREE_TYPE (ivstep), vf));
4822 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4824 create_iv (stride_base, ivstep, NULL,
4825 loop, &incr_gsi, insert_after,
4826 &offvar, NULL);
4827 incr = gsi_stmt (incr_gsi);
4828 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4830 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4831 if (stmts)
4832 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4834 prev_stmt_info = NULL;
4835 running_off = offvar;
4836 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4837 for (j = 0; j < ncopies; j++)
4839 tree vec_inv;
4841 vec_alloc (v, nunits);
4842 for (i = 0; i < nunits; i++)
4844 tree newref, newoff;
4845 gimple incr;
4846 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4847 running_off, alias_off);
4849 newref = force_gimple_operand_gsi (gsi, newref, true,
4850 NULL_TREE, true,
4851 GSI_SAME_STMT);
4852 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4853 newoff = copy_ssa_name (running_off, NULL);
4854 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4855 running_off, stride_step);
4856 vect_finish_stmt_generation (stmt, incr, gsi);
4858 running_off = newoff;
4861 vec_inv = build_constructor (vectype, v);
4862 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4863 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4865 if (j == 0)
4866 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4867 else
4868 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4869 prev_stmt_info = vinfo_for_stmt (new_stmt);
4871 return true;
4874 if (grouped_load)
4876 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4877 if (slp
4878 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
4879 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4880 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4882 /* Check if the chain of loads is already vectorized. */
4883 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4884 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4885 ??? But we can only do so if there is exactly one
4886 as we have no way to get at the rest. Leave the CSE
4887 opportunity alone.
4888 ??? With the group load eventually participating
4889 in multiple different permutations (having multiple
4890 slp nodes which refer to the same group) the CSE
4891 is even wrong code. See PR56270. */
4892 && !slp)
4894 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4895 return true;
4897 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4898 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4900 /* VEC_NUM is the number of vect stmts to be created for this group. */
4901 if (slp)
4903 grouped_load = false;
4904 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4905 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4906 slp_perm = true;
4907 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
4909 else
4911 vec_num = group_size;
4912 group_gap = 0;
4915 else
4917 first_stmt = stmt;
4918 first_dr = dr;
4919 group_size = vec_num = 1;
4920 group_gap = 0;
4923 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4924 gcc_assert (alignment_support_scheme);
4925 /* Targets with load-lane instructions must not require explicit
4926 realignment. */
4927 gcc_assert (!load_lanes_p
4928 || alignment_support_scheme == dr_aligned
4929 || alignment_support_scheme == dr_unaligned_supported);
4931 /* In case the vectorization factor (VF) is bigger than the number
4932 of elements that we can fit in a vectype (nunits), we have to generate
4933 more than one vector stmt - i.e - we need to "unroll" the
4934 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4935 from one copy of the vector stmt to the next, in the field
4936 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4937 stages to find the correct vector defs to be used when vectorizing
4938 stmts that use the defs of the current stmt. The example below
4939 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4940 need to create 4 vectorized stmts):
4942 before vectorization:
4943 RELATED_STMT VEC_STMT
4944 S1: x = memref - -
4945 S2: z = x + 1 - -
4947 step 1: vectorize stmt S1:
4948 We first create the vector stmt VS1_0, and, as usual, record a
4949 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4950 Next, we create the vector stmt VS1_1, and record a pointer to
4951 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4952 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4953 stmts and pointers:
4954 RELATED_STMT VEC_STMT
4955 VS1_0: vx0 = memref0 VS1_1 -
4956 VS1_1: vx1 = memref1 VS1_2 -
4957 VS1_2: vx2 = memref2 VS1_3 -
4958 VS1_3: vx3 = memref3 - -
4959 S1: x = load - VS1_0
4960 S2: z = x + 1 - -
4962 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4963 information we recorded in RELATED_STMT field is used to vectorize
4964 stmt S2. */
4966 /* In case of interleaving (non-unit grouped access):
4968 S1: x2 = &base + 2
4969 S2: x0 = &base
4970 S3: x1 = &base + 1
4971 S4: x3 = &base + 3
4973 Vectorized loads are created in the order of memory accesses
4974 starting from the access of the first stmt of the chain:
4976 VS1: vx0 = &base
4977 VS2: vx1 = &base + vec_size*1
4978 VS3: vx3 = &base + vec_size*2
4979 VS4: vx4 = &base + vec_size*3
4981 Then permutation statements are generated:
4983 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4984 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4987 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4988 (the order of the data-refs in the output of vect_permute_load_chain
4989 corresponds to the order of scalar stmts in the interleaving chain - see
4990 the documentation of vect_permute_load_chain()).
4991 The generation of permutation stmts and recording them in
4992 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4994 In case of both multiple types and interleaving, the vector loads and
4995 permutation stmts above are created for every copy. The result vector
4996 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4997 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4999 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5000 on a target that supports unaligned accesses (dr_unaligned_supported)
5001 we generate the following code:
5002 p = initial_addr;
5003 indx = 0;
5004 loop {
5005 p = p + indx * vectype_size;
5006 vec_dest = *(p);
5007 indx = indx + 1;
5010 Otherwise, the data reference is potentially unaligned on a target that
5011 does not support unaligned accesses (dr_explicit_realign_optimized) -
5012 then generate the following code, in which the data in each iteration is
5013 obtained by two vector loads, one from the previous iteration, and one
5014 from the current iteration:
5015 p1 = initial_addr;
5016 msq_init = *(floor(p1))
5017 p2 = initial_addr + VS - 1;
5018 realignment_token = call target_builtin;
5019 indx = 0;
5020 loop {
5021 p2 = p2 + indx * vectype_size
5022 lsq = *(floor(p2))
5023 vec_dest = realign_load (msq, lsq, realignment_token)
5024 indx = indx + 1;
5025 msq = lsq;
5026 } */
5028 /* If the misalignment remains the same throughout the execution of the
5029 loop, we can create the init_addr and permutation mask at the loop
5030 preheader. Otherwise, it needs to be created inside the loop.
5031 This can only occur when vectorizing memory accesses in the inner-loop
5032 nested within an outer-loop that is being vectorized. */
5034 if (nested_in_vect_loop
5035 && (TREE_INT_CST_LOW (DR_STEP (dr))
5036 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5038 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5039 compute_in_loop = true;
5042 if ((alignment_support_scheme == dr_explicit_realign_optimized
5043 || alignment_support_scheme == dr_explicit_realign)
5044 && !compute_in_loop)
5046 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5047 alignment_support_scheme, NULL_TREE,
5048 &at_loop);
5049 if (alignment_support_scheme == dr_explicit_realign_optimized)
5051 phi = SSA_NAME_DEF_STMT (msq);
5052 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5055 else
5056 at_loop = loop;
5058 if (negative)
5059 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5061 if (load_lanes_p)
5062 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5063 else
5064 aggr_type = vectype;
5066 prev_stmt_info = NULL;
5067 for (j = 0; j < ncopies; j++)
5069 /* 1. Create the vector or array pointer update chain. */
5070 if (j == 0)
5072 bool simd_lane_access_p
5073 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5074 if (simd_lane_access_p
5075 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5076 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5077 && integer_zerop (DR_OFFSET (first_dr))
5078 && integer_zerop (DR_INIT (first_dr))
5079 && alias_sets_conflict_p (get_alias_set (aggr_type),
5080 get_alias_set (DR_REF (first_dr)))
5081 && (alignment_support_scheme == dr_aligned
5082 || alignment_support_scheme == dr_unaligned_supported))
5084 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5085 dataref_offset = build_int_cst (reference_alias_ptr_type
5086 (DR_REF (first_dr)), 0);
5087 inv_p = false;
5089 else
5090 dataref_ptr
5091 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5092 offset, &dummy, gsi, &ptr_incr,
5093 simd_lane_access_p, &inv_p);
5095 else if (dataref_offset)
5096 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5097 TYPE_SIZE_UNIT (aggr_type));
5098 else
5099 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5100 TYPE_SIZE_UNIT (aggr_type));
5102 if (grouped_load || slp_perm)
5103 dr_chain.create (vec_num);
5105 if (load_lanes_p)
5107 tree vec_array;
5109 vec_array = create_vector_array (vectype, vec_num);
5111 /* Emit:
5112 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5113 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5114 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5115 gimple_call_set_lhs (new_stmt, vec_array);
5116 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5118 /* Extract each vector into an SSA_NAME. */
5119 for (i = 0; i < vec_num; i++)
5121 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5122 vec_array, i);
5123 dr_chain.quick_push (new_temp);
5126 /* Record the mapping between SSA_NAMEs and statements. */
5127 vect_record_grouped_load_vectors (stmt, dr_chain);
5129 else
5131 for (i = 0; i < vec_num; i++)
5133 if (i > 0)
5134 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5135 stmt, NULL_TREE);
5137 /* 2. Create the vector-load in the loop. */
5138 switch (alignment_support_scheme)
5140 case dr_aligned:
5141 case dr_unaligned_supported:
5143 unsigned int align, misalign;
5145 data_ref
5146 = build2 (MEM_REF, vectype, dataref_ptr,
5147 dataref_offset
5148 ? dataref_offset
5149 : build_int_cst (reference_alias_ptr_type
5150 (DR_REF (first_dr)), 0));
5151 align = TYPE_ALIGN_UNIT (vectype);
5152 if (alignment_support_scheme == dr_aligned)
5154 gcc_assert (aligned_access_p (first_dr));
5155 misalign = 0;
5157 else if (DR_MISALIGNMENT (first_dr) == -1)
5159 TREE_TYPE (data_ref)
5160 = build_aligned_type (TREE_TYPE (data_ref),
5161 TYPE_ALIGN (elem_type));
5162 align = TYPE_ALIGN_UNIT (elem_type);
5163 misalign = 0;
5165 else
5167 TREE_TYPE (data_ref)
5168 = build_aligned_type (TREE_TYPE (data_ref),
5169 TYPE_ALIGN (elem_type));
5170 misalign = DR_MISALIGNMENT (first_dr);
5172 if (dataref_offset == NULL_TREE)
5173 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5174 align, misalign);
5175 break;
5177 case dr_explicit_realign:
5179 tree ptr, bump;
5180 tree vs_minus_1;
5182 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5184 if (compute_in_loop)
5185 msq = vect_setup_realignment (first_stmt, gsi,
5186 &realignment_token,
5187 dr_explicit_realign,
5188 dataref_ptr, NULL);
5190 ptr = copy_ssa_name (dataref_ptr, NULL);
5191 new_stmt = gimple_build_assign_with_ops
5192 (BIT_AND_EXPR, ptr, dataref_ptr,
5193 build_int_cst
5194 (TREE_TYPE (dataref_ptr),
5195 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5197 data_ref
5198 = build2 (MEM_REF, vectype, ptr,
5199 build_int_cst (reference_alias_ptr_type
5200 (DR_REF (first_dr)), 0));
5201 vec_dest = vect_create_destination_var (scalar_dest,
5202 vectype);
5203 new_stmt = gimple_build_assign (vec_dest, data_ref);
5204 new_temp = make_ssa_name (vec_dest, new_stmt);
5205 gimple_assign_set_lhs (new_stmt, new_temp);
5206 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5207 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5209 msq = new_temp;
5211 bump = size_binop (MULT_EXPR, vs_minus_1,
5212 TYPE_SIZE_UNIT (elem_type));
5213 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5214 new_stmt = gimple_build_assign_with_ops
5215 (BIT_AND_EXPR, NULL_TREE, ptr,
5216 build_int_cst
5217 (TREE_TYPE (ptr),
5218 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5219 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5220 gimple_assign_set_lhs (new_stmt, ptr);
5221 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5222 data_ref
5223 = build2 (MEM_REF, vectype, ptr,
5224 build_int_cst (reference_alias_ptr_type
5225 (DR_REF (first_dr)), 0));
5226 break;
5228 case dr_explicit_realign_optimized:
5229 new_temp = copy_ssa_name (dataref_ptr, NULL);
5230 new_stmt = gimple_build_assign_with_ops
5231 (BIT_AND_EXPR, new_temp, dataref_ptr,
5232 build_int_cst
5233 (TREE_TYPE (dataref_ptr),
5234 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5236 data_ref
5237 = build2 (MEM_REF, vectype, new_temp,
5238 build_int_cst (reference_alias_ptr_type
5239 (DR_REF (first_dr)), 0));
5240 break;
5241 default:
5242 gcc_unreachable ();
5244 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5245 new_stmt = gimple_build_assign (vec_dest, data_ref);
5246 new_temp = make_ssa_name (vec_dest, new_stmt);
5247 gimple_assign_set_lhs (new_stmt, new_temp);
5248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5250 /* 3. Handle explicit realignment if necessary/supported.
5251 Create in loop:
5252 vec_dest = realign_load (msq, lsq, realignment_token) */
5253 if (alignment_support_scheme == dr_explicit_realign_optimized
5254 || alignment_support_scheme == dr_explicit_realign)
5256 lsq = gimple_assign_lhs (new_stmt);
5257 if (!realignment_token)
5258 realignment_token = dataref_ptr;
5259 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5260 new_stmt
5261 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5262 vec_dest, msq, lsq,
5263 realignment_token);
5264 new_temp = make_ssa_name (vec_dest, new_stmt);
5265 gimple_assign_set_lhs (new_stmt, new_temp);
5266 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5268 if (alignment_support_scheme == dr_explicit_realign_optimized)
5270 gcc_assert (phi);
5271 if (i == vec_num - 1 && j == ncopies - 1)
5272 add_phi_arg (phi, lsq,
5273 loop_latch_edge (containing_loop),
5274 UNKNOWN_LOCATION);
5275 msq = lsq;
5279 /* 4. Handle invariant-load. */
5280 if (inv_p && !bb_vinfo)
5282 gimple_stmt_iterator gsi2 = *gsi;
5283 gcc_assert (!grouped_load);
5284 gsi_next (&gsi2);
5285 new_temp = vect_init_vector (stmt, scalar_dest,
5286 vectype, &gsi2);
5287 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5290 if (negative)
5292 tree perm_mask = perm_mask_for_reverse (vectype);
5293 new_temp = permute_vec_elements (new_temp, new_temp,
5294 perm_mask, stmt, gsi);
5295 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5298 /* Collect vector loads and later create their permutation in
5299 vect_transform_grouped_load (). */
5300 if (grouped_load || slp_perm)
5301 dr_chain.quick_push (new_temp);
5303 /* Store vector loads in the corresponding SLP_NODE. */
5304 if (slp && !slp_perm)
5305 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5307 /* Bump the vector pointer to account for a gap. */
5308 if (slp && group_gap != 0)
5310 tree bump = size_binop (MULT_EXPR,
5311 TYPE_SIZE_UNIT (elem_type),
5312 size_int (group_gap));
5313 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5314 stmt, bump);
5318 if (slp && !slp_perm)
5319 continue;
5321 if (slp_perm)
5323 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
5324 slp_node_instance, false))
5326 dr_chain.release ();
5327 return false;
5330 else
5332 if (grouped_load)
5334 if (!load_lanes_p)
5335 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5336 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5338 else
5340 if (j == 0)
5341 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5342 else
5343 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5344 prev_stmt_info = vinfo_for_stmt (new_stmt);
5347 dr_chain.release ();
5350 return true;
5353 /* Function vect_is_simple_cond.
5355 Input:
5356 LOOP - the loop that is being vectorized.
5357 COND - Condition that is checked for simple use.
5359 Output:
5360 *COMP_VECTYPE - the vector type for the comparison.
5362 Returns whether a COND can be vectorized. Checks whether
5363 condition operands are supportable using vec_is_simple_use. */
5365 static bool
5366 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5367 bb_vec_info bb_vinfo, tree *comp_vectype)
5369 tree lhs, rhs;
5370 tree def;
5371 enum vect_def_type dt;
5372 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5374 if (!COMPARISON_CLASS_P (cond))
5375 return false;
5377 lhs = TREE_OPERAND (cond, 0);
5378 rhs = TREE_OPERAND (cond, 1);
5380 if (TREE_CODE (lhs) == SSA_NAME)
5382 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5383 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5384 &lhs_def_stmt, &def, &dt, &vectype1))
5385 return false;
5387 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5388 && TREE_CODE (lhs) != FIXED_CST)
5389 return false;
5391 if (TREE_CODE (rhs) == SSA_NAME)
5393 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5394 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5395 &rhs_def_stmt, &def, &dt, &vectype2))
5396 return false;
5398 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5399 && TREE_CODE (rhs) != FIXED_CST)
5400 return false;
5402 *comp_vectype = vectype1 ? vectype1 : vectype2;
5403 return true;
5406 /* vectorizable_condition.
5408 Check if STMT is conditional modify expression that can be vectorized.
5409 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5410 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5411 at GSI.
5413 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5414 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5415 else caluse if it is 2).
5417 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5419 bool
5420 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5421 gimple *vec_stmt, tree reduc_def, int reduc_index,
5422 slp_tree slp_node)
5424 tree scalar_dest = NULL_TREE;
5425 tree vec_dest = NULL_TREE;
5426 tree cond_expr, then_clause, else_clause;
5427 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5428 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5429 tree comp_vectype = NULL_TREE;
5430 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5431 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5432 tree vec_compare, vec_cond_expr;
5433 tree new_temp;
5434 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5435 tree def;
5436 enum vect_def_type dt, dts[4];
5437 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5438 int ncopies;
5439 enum tree_code code;
5440 stmt_vec_info prev_stmt_info = NULL;
5441 int i, j;
5442 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5443 vec<tree> vec_oprnds0 = vNULL;
5444 vec<tree> vec_oprnds1 = vNULL;
5445 vec<tree> vec_oprnds2 = vNULL;
5446 vec<tree> vec_oprnds3 = vNULL;
5447 tree vec_cmp_type;
5449 if (slp_node || PURE_SLP_STMT (stmt_info))
5450 ncopies = 1;
5451 else
5452 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5454 gcc_assert (ncopies >= 1);
5455 if (reduc_index && ncopies > 1)
5456 return false; /* FORNOW */
5458 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5459 return false;
5461 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5462 return false;
5464 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5465 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5466 && reduc_def))
5467 return false;
5469 /* FORNOW: not yet supported. */
5470 if (STMT_VINFO_LIVE_P (stmt_info))
5472 if (dump_enabled_p ())
5473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5474 "value used after loop.\n");
5475 return false;
5478 /* Is vectorizable conditional operation? */
5479 if (!is_gimple_assign (stmt))
5480 return false;
5482 code = gimple_assign_rhs_code (stmt);
5484 if (code != COND_EXPR)
5485 return false;
5487 cond_expr = gimple_assign_rhs1 (stmt);
5488 then_clause = gimple_assign_rhs2 (stmt);
5489 else_clause = gimple_assign_rhs3 (stmt);
5491 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5492 &comp_vectype)
5493 || !comp_vectype)
5494 return false;
5496 if (TREE_CODE (then_clause) == SSA_NAME)
5498 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5499 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5500 &then_def_stmt, &def, &dt))
5501 return false;
5503 else if (TREE_CODE (then_clause) != INTEGER_CST
5504 && TREE_CODE (then_clause) != REAL_CST
5505 && TREE_CODE (then_clause) != FIXED_CST)
5506 return false;
5508 if (TREE_CODE (else_clause) == SSA_NAME)
5510 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5511 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5512 &else_def_stmt, &def, &dt))
5513 return false;
5515 else if (TREE_CODE (else_clause) != INTEGER_CST
5516 && TREE_CODE (else_clause) != REAL_CST
5517 && TREE_CODE (else_clause) != FIXED_CST)
5518 return false;
5520 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5521 /* The result of a vector comparison should be signed type. */
5522 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5523 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5524 if (vec_cmp_type == NULL_TREE)
5525 return false;
5527 if (!vec_stmt)
5529 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5530 return expand_vec_cond_expr_p (vectype, comp_vectype);
5533 /* Transform. */
5535 if (!slp_node)
5537 vec_oprnds0.create (1);
5538 vec_oprnds1.create (1);
5539 vec_oprnds2.create (1);
5540 vec_oprnds3.create (1);
5543 /* Handle def. */
5544 scalar_dest = gimple_assign_lhs (stmt);
5545 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5547 /* Handle cond expr. */
5548 for (j = 0; j < ncopies; j++)
5550 gimple new_stmt = NULL;
5551 if (j == 0)
5553 if (slp_node)
5555 vec<tree> ops;
5556 ops.create (4);
5557 vec<vec<tree> > vec_defs;
5559 vec_defs.create (4);
5560 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5561 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5562 ops.safe_push (then_clause);
5563 ops.safe_push (else_clause);
5564 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5565 vec_oprnds3 = vec_defs.pop ();
5566 vec_oprnds2 = vec_defs.pop ();
5567 vec_oprnds1 = vec_defs.pop ();
5568 vec_oprnds0 = vec_defs.pop ();
5570 ops.release ();
5571 vec_defs.release ();
5573 else
5575 gimple gtemp;
5576 vec_cond_lhs =
5577 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5578 stmt, NULL);
5579 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5580 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5582 vec_cond_rhs =
5583 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5584 stmt, NULL);
5585 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5586 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5587 if (reduc_index == 1)
5588 vec_then_clause = reduc_def;
5589 else
5591 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5592 stmt, NULL);
5593 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5594 NULL, &gtemp, &def, &dts[2]);
5596 if (reduc_index == 2)
5597 vec_else_clause = reduc_def;
5598 else
5600 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5601 stmt, NULL);
5602 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5603 NULL, &gtemp, &def, &dts[3]);
5607 else
5609 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5610 vec_oprnds0.pop ());
5611 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5612 vec_oprnds1.pop ());
5613 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5614 vec_oprnds2.pop ());
5615 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5616 vec_oprnds3.pop ());
5619 if (!slp_node)
5621 vec_oprnds0.quick_push (vec_cond_lhs);
5622 vec_oprnds1.quick_push (vec_cond_rhs);
5623 vec_oprnds2.quick_push (vec_then_clause);
5624 vec_oprnds3.quick_push (vec_else_clause);
5627 /* Arguments are ready. Create the new vector stmt. */
5628 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5630 vec_cond_rhs = vec_oprnds1[i];
5631 vec_then_clause = vec_oprnds2[i];
5632 vec_else_clause = vec_oprnds3[i];
5634 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5635 vec_cond_lhs, vec_cond_rhs);
5636 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5637 vec_compare, vec_then_clause, vec_else_clause);
5639 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5640 new_temp = make_ssa_name (vec_dest, new_stmt);
5641 gimple_assign_set_lhs (new_stmt, new_temp);
5642 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5643 if (slp_node)
5644 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5647 if (slp_node)
5648 continue;
5650 if (j == 0)
5651 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5652 else
5653 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5655 prev_stmt_info = vinfo_for_stmt (new_stmt);
5658 vec_oprnds0.release ();
5659 vec_oprnds1.release ();
5660 vec_oprnds2.release ();
5661 vec_oprnds3.release ();
5663 return true;
5667 /* Make sure the statement is vectorizable. */
5669 bool
5670 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5673 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5674 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5675 bool ok;
5676 tree scalar_type, vectype;
5677 gimple pattern_stmt;
5678 gimple_seq pattern_def_seq;
5680 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5684 dump_printf (MSG_NOTE, "\n");
5687 if (gimple_has_volatile_ops (stmt))
5689 if (dump_enabled_p ())
5690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5691 "not vectorized: stmt has volatile operands\n");
5693 return false;
5696 /* Skip stmts that do not need to be vectorized. In loops this is expected
5697 to include:
5698 - the COND_EXPR which is the loop exit condition
5699 - any LABEL_EXPRs in the loop
5700 - computations that are used only for array indexing or loop control.
5701 In basic blocks we only analyze statements that are a part of some SLP
5702 instance, therefore, all the statements are relevant.
5704 Pattern statement needs to be analyzed instead of the original statement
5705 if the original statement is not relevant. Otherwise, we analyze both
5706 statements. In basic blocks we are called from some SLP instance
5707 traversal, don't analyze pattern stmts instead, the pattern stmts
5708 already will be part of SLP instance. */
5710 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5711 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5712 && !STMT_VINFO_LIVE_P (stmt_info))
5714 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5715 && pattern_stmt
5716 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5717 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5719 /* Analyze PATTERN_STMT instead of the original stmt. */
5720 stmt = pattern_stmt;
5721 stmt_info = vinfo_for_stmt (pattern_stmt);
5722 if (dump_enabled_p ())
5724 dump_printf_loc (MSG_NOTE, vect_location,
5725 "==> examining pattern statement: ");
5726 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5727 dump_printf (MSG_NOTE, "\n");
5730 else
5732 if (dump_enabled_p ())
5733 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
5735 return true;
5738 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5739 && node == NULL
5740 && pattern_stmt
5741 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5742 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5744 /* Analyze PATTERN_STMT too. */
5745 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_NOTE, vect_location,
5748 "==> examining pattern statement: ");
5749 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5750 dump_printf (MSG_NOTE, "\n");
5753 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5754 return false;
5757 if (is_pattern_stmt_p (stmt_info)
5758 && node == NULL
5759 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5761 gimple_stmt_iterator si;
5763 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5765 gimple pattern_def_stmt = gsi_stmt (si);
5766 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5767 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5769 /* Analyze def stmt of STMT if it's a pattern stmt. */
5770 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_NOTE, vect_location,
5773 "==> examining pattern def statement: ");
5774 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5775 dump_printf (MSG_NOTE, "\n");
5778 if (!vect_analyze_stmt (pattern_def_stmt,
5779 need_to_vectorize, node))
5780 return false;
5785 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5787 case vect_internal_def:
5788 break;
5790 case vect_reduction_def:
5791 case vect_nested_cycle:
5792 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5793 || relevance == vect_used_in_outer_by_reduction
5794 || relevance == vect_unused_in_scope));
5795 break;
5797 case vect_induction_def:
5798 case vect_constant_def:
5799 case vect_external_def:
5800 case vect_unknown_def_type:
5801 default:
5802 gcc_unreachable ();
5805 if (bb_vinfo)
5807 gcc_assert (PURE_SLP_STMT (stmt_info));
5809 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5810 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE, vect_location,
5813 "get vectype for scalar type: ");
5814 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5815 dump_printf (MSG_NOTE, "\n");
5818 vectype = get_vectype_for_scalar_type (scalar_type);
5819 if (!vectype)
5821 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824 "not SLPed: unsupported data-type ");
5825 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5826 scalar_type);
5827 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5829 return false;
5832 if (dump_enabled_p ())
5834 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5835 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5836 dump_printf (MSG_NOTE, "\n");
5839 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5842 if (STMT_VINFO_RELEVANT_P (stmt_info))
5844 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5845 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5846 *need_to_vectorize = true;
5849 ok = true;
5850 if (!bb_vinfo
5851 && (STMT_VINFO_RELEVANT_P (stmt_info)
5852 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5853 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5854 || vectorizable_shift (stmt, NULL, NULL, NULL)
5855 || vectorizable_operation (stmt, NULL, NULL, NULL)
5856 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5857 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5858 || vectorizable_call (stmt, NULL, NULL, NULL)
5859 || vectorizable_store (stmt, NULL, NULL, NULL)
5860 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5861 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5862 else
5864 if (bb_vinfo)
5865 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5866 || vectorizable_shift (stmt, NULL, NULL, node)
5867 || vectorizable_operation (stmt, NULL, NULL, node)
5868 || vectorizable_assignment (stmt, NULL, NULL, node)
5869 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5870 || vectorizable_call (stmt, NULL, NULL, node)
5871 || vectorizable_store (stmt, NULL, NULL, node)
5872 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5875 if (!ok)
5877 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5880 "not vectorized: relevant stmt not ");
5881 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5882 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5883 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5886 return false;
5889 if (bb_vinfo)
5890 return true;
5892 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5893 need extra handling, except for vectorizable reductions. */
5894 if (STMT_VINFO_LIVE_P (stmt_info)
5895 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5896 ok = vectorizable_live_operation (stmt, NULL, NULL);
5898 if (!ok)
5900 if (dump_enabled_p ())
5902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5903 "not vectorized: live stmt not ");
5904 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5905 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5906 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5909 return false;
5912 return true;
5916 /* Function vect_transform_stmt.
5918 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5920 bool
5921 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5922 bool *grouped_store, slp_tree slp_node,
5923 slp_instance slp_node_instance)
5925 bool is_store = false;
5926 gimple vec_stmt = NULL;
5927 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5928 bool done;
5930 switch (STMT_VINFO_TYPE (stmt_info))
5932 case type_demotion_vec_info_type:
5933 case type_promotion_vec_info_type:
5934 case type_conversion_vec_info_type:
5935 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5936 gcc_assert (done);
5937 break;
5939 case induc_vec_info_type:
5940 gcc_assert (!slp_node);
5941 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5942 gcc_assert (done);
5943 break;
5945 case shift_vec_info_type:
5946 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5947 gcc_assert (done);
5948 break;
5950 case op_vec_info_type:
5951 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5952 gcc_assert (done);
5953 break;
5955 case assignment_vec_info_type:
5956 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5957 gcc_assert (done);
5958 break;
5960 case load_vec_info_type:
5961 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5962 slp_node_instance);
5963 gcc_assert (done);
5964 break;
5966 case store_vec_info_type:
5967 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5968 gcc_assert (done);
5969 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5971 /* In case of interleaving, the whole chain is vectorized when the
5972 last store in the chain is reached. Store stmts before the last
5973 one are skipped, and there vec_stmt_info shouldn't be freed
5974 meanwhile. */
5975 *grouped_store = true;
5976 if (STMT_VINFO_VEC_STMT (stmt_info))
5977 is_store = true;
5979 else
5980 is_store = true;
5981 break;
5983 case condition_vec_info_type:
5984 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5985 gcc_assert (done);
5986 break;
5988 case call_vec_info_type:
5989 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5990 stmt = gsi_stmt (*gsi);
5991 break;
5993 case reduc_vec_info_type:
5994 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5995 gcc_assert (done);
5996 break;
5998 default:
5999 if (!STMT_VINFO_LIVE_P (stmt_info))
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6003 "stmt not supported.\n");
6004 gcc_unreachable ();
6008 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
6009 is being vectorized, but outside the immediately enclosing loop. */
6010 if (vec_stmt
6011 && STMT_VINFO_LOOP_VINFO (stmt_info)
6012 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
6013 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
6014 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
6015 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
6016 || STMT_VINFO_RELEVANT (stmt_info) ==
6017 vect_used_in_outer_by_reduction))
6019 struct loop *innerloop = LOOP_VINFO_LOOP (
6020 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
6021 imm_use_iterator imm_iter;
6022 use_operand_p use_p;
6023 tree scalar_dest;
6024 gimple exit_phi;
6026 if (dump_enabled_p ())
6027 dump_printf_loc (MSG_NOTE, vect_location,
6028 "Record the vdef for outer-loop vectorization.\n");
6030 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6031 (to be used when vectorizing outer-loop stmts that use the DEF of
6032 STMT). */
6033 if (gimple_code (stmt) == GIMPLE_PHI)
6034 scalar_dest = PHI_RESULT (stmt);
6035 else
6036 scalar_dest = gimple_assign_lhs (stmt);
6038 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6040 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6042 exit_phi = USE_STMT (use_p);
6043 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6048 /* Handle stmts whose DEF is used outside the loop-nest that is
6049 being vectorized. */
6050 if (STMT_VINFO_LIVE_P (stmt_info)
6051 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6053 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6054 gcc_assert (done);
6057 if (vec_stmt)
6058 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6060 return is_store;
6064 /* Remove a group of stores (for SLP or interleaving), free their
6065 stmt_vec_info. */
6067 void
6068 vect_remove_stores (gimple first_stmt)
6070 gimple next = first_stmt;
6071 gimple tmp;
6072 gimple_stmt_iterator next_si;
6074 while (next)
6076 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6078 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6079 if (is_pattern_stmt_p (stmt_info))
6080 next = STMT_VINFO_RELATED_STMT (stmt_info);
6081 /* Free the attached stmt_vec_info and remove the stmt. */
6082 next_si = gsi_for_stmt (next);
6083 unlink_stmt_vdef (next);
6084 gsi_remove (&next_si, true);
6085 release_defs (next);
6086 free_stmt_vec_info (next);
6087 next = tmp;
6092 /* Function new_stmt_vec_info.
6094 Create and initialize a new stmt_vec_info struct for STMT. */
6096 stmt_vec_info
6097 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
6098 bb_vec_info bb_vinfo)
6100 stmt_vec_info res;
6101 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6103 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6104 STMT_VINFO_STMT (res) = stmt;
6105 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
6106 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
6107 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
6108 STMT_VINFO_LIVE_P (res) = false;
6109 STMT_VINFO_VECTYPE (res) = NULL;
6110 STMT_VINFO_VEC_STMT (res) = NULL;
6111 STMT_VINFO_VECTORIZABLE (res) = true;
6112 STMT_VINFO_IN_PATTERN_P (res) = false;
6113 STMT_VINFO_RELATED_STMT (res) = NULL;
6114 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
6115 STMT_VINFO_DATA_REF (res) = NULL;
6117 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6118 STMT_VINFO_DR_OFFSET (res) = NULL;
6119 STMT_VINFO_DR_INIT (res) = NULL;
6120 STMT_VINFO_DR_STEP (res) = NULL;
6121 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6123 if (gimple_code (stmt) == GIMPLE_PHI
6124 && is_loop_header_bb_p (gimple_bb (stmt)))
6125 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6126 else
6127 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6129 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
6130 STMT_SLP_TYPE (res) = loop_vect;
6131 GROUP_FIRST_ELEMENT (res) = NULL;
6132 GROUP_NEXT_ELEMENT (res) = NULL;
6133 GROUP_SIZE (res) = 0;
6134 GROUP_STORE_COUNT (res) = 0;
6135 GROUP_GAP (res) = 0;
6136 GROUP_SAME_DR_STMT (res) = NULL;
6138 return res;
6142 /* Create a hash table for stmt_vec_info. */
6144 void
6145 init_stmt_vec_info_vec (void)
6147 gcc_assert (!stmt_vec_info_vec.exists ());
6148 stmt_vec_info_vec.create (50);
6152 /* Free hash table for stmt_vec_info. */
6154 void
6155 free_stmt_vec_info_vec (void)
6157 unsigned int i;
6158 vec_void_p info;
6159 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6160 if (info != NULL)
6161 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
6162 gcc_assert (stmt_vec_info_vec.exists ());
6163 stmt_vec_info_vec.release ();
6167 /* Free stmt vectorization related info. */
6169 void
6170 free_stmt_vec_info (gimple stmt)
6172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6174 if (!stmt_info)
6175 return;
6177 /* Check if this statement has a related "pattern stmt"
6178 (introduced by the vectorizer during the pattern recognition
6179 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6180 too. */
6181 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6183 stmt_vec_info patt_info
6184 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6185 if (patt_info)
6187 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6188 if (seq)
6190 gimple_stmt_iterator si;
6191 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6192 free_stmt_vec_info (gsi_stmt (si));
6194 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6198 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6199 set_vinfo_for_stmt (stmt, NULL);
6200 free (stmt_info);
6204 /* Function get_vectype_for_scalar_type_and_size.
6206 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6207 by the target. */
6209 static tree
6210 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6212 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6213 enum machine_mode simd_mode;
6214 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6215 int nunits;
6216 tree vectype;
6218 if (nbytes == 0)
6219 return NULL_TREE;
6221 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6222 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6223 return NULL_TREE;
6225 /* For vector types of elements whose mode precision doesn't
6226 match their types precision we use a element type of mode
6227 precision. The vectorization routines will have to make sure
6228 they support the proper result truncation/extension.
6229 We also make sure to build vector types with INTEGER_TYPE
6230 component type only. */
6231 if (INTEGRAL_TYPE_P (scalar_type)
6232 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6233 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6234 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6235 TYPE_UNSIGNED (scalar_type));
6237 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6238 When the component mode passes the above test simply use a type
6239 corresponding to that mode. The theory is that any use that
6240 would cause problems with this will disable vectorization anyway. */
6241 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6242 && !INTEGRAL_TYPE_P (scalar_type))
6243 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6245 /* We can't build a vector type of elements with alignment bigger than
6246 their size. */
6247 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6248 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6249 TYPE_UNSIGNED (scalar_type));
6251 /* If we felt back to using the mode fail if there was
6252 no scalar type for it. */
6253 if (scalar_type == NULL_TREE)
6254 return NULL_TREE;
6256 /* If no size was supplied use the mode the target prefers. Otherwise
6257 lookup a vector mode of the specified size. */
6258 if (size == 0)
6259 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6260 else
6261 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6262 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6263 if (nunits <= 1)
6264 return NULL_TREE;
6266 vectype = build_vector_type (scalar_type, nunits);
6268 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6269 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6270 return NULL_TREE;
6272 return vectype;
6275 unsigned int current_vector_size;
6277 /* Function get_vectype_for_scalar_type.
6279 Returns the vector type corresponding to SCALAR_TYPE as supported
6280 by the target. */
6282 tree
6283 get_vectype_for_scalar_type (tree scalar_type)
6285 tree vectype;
6286 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6287 current_vector_size);
6288 if (vectype
6289 && current_vector_size == 0)
6290 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6291 return vectype;
6294 /* Function get_same_sized_vectype
6296 Returns a vector type corresponding to SCALAR_TYPE of size
6297 VECTOR_TYPE if supported by the target. */
6299 tree
6300 get_same_sized_vectype (tree scalar_type, tree vector_type)
6302 return get_vectype_for_scalar_type_and_size
6303 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6306 /* Function vect_is_simple_use.
6308 Input:
6309 LOOP_VINFO - the vect info of the loop that is being vectorized.
6310 BB_VINFO - the vect info of the basic block that is being vectorized.
6311 OPERAND - operand of STMT in the loop or bb.
6312 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6314 Returns whether a stmt with OPERAND can be vectorized.
6315 For loops, supportable operands are constants, loop invariants, and operands
6316 that are defined by the current iteration of the loop. Unsupportable
6317 operands are those that are defined by a previous iteration of the loop (as
6318 is the case in reduction/induction computations).
6319 For basic blocks, supportable operands are constants and bb invariants.
6320 For now, operands defined outside the basic block are not supported. */
6322 bool
6323 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6324 bb_vec_info bb_vinfo, gimple *def_stmt,
6325 tree *def, enum vect_def_type *dt)
6327 basic_block bb;
6328 stmt_vec_info stmt_vinfo;
6329 struct loop *loop = NULL;
6331 if (loop_vinfo)
6332 loop = LOOP_VINFO_LOOP (loop_vinfo);
6334 *def_stmt = NULL;
6335 *def = NULL_TREE;
6337 if (dump_enabled_p ())
6339 dump_printf_loc (MSG_NOTE, vect_location,
6340 "vect_is_simple_use: operand ");
6341 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6342 dump_printf (MSG_NOTE, "\n");
6345 if (CONSTANT_CLASS_P (operand))
6347 *dt = vect_constant_def;
6348 return true;
6351 if (is_gimple_min_invariant (operand))
6353 *def = operand;
6354 *dt = vect_external_def;
6355 return true;
6358 if (TREE_CODE (operand) == PAREN_EXPR)
6360 if (dump_enabled_p ())
6361 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
6362 operand = TREE_OPERAND (operand, 0);
6365 if (TREE_CODE (operand) != SSA_NAME)
6367 if (dump_enabled_p ())
6368 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6369 "not ssa-name.\n");
6370 return false;
6373 *def_stmt = SSA_NAME_DEF_STMT (operand);
6374 if (*def_stmt == NULL)
6376 if (dump_enabled_p ())
6377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6378 "no def_stmt.\n");
6379 return false;
6382 if (dump_enabled_p ())
6384 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6385 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6386 dump_printf (MSG_NOTE, "\n");
6389 /* Empty stmt is expected only in case of a function argument.
6390 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6391 if (gimple_nop_p (*def_stmt))
6393 *def = operand;
6394 *dt = vect_external_def;
6395 return true;
6398 bb = gimple_bb (*def_stmt);
6400 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6401 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6402 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6403 *dt = vect_external_def;
6404 else
6406 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6407 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6410 if (*dt == vect_unknown_def_type
6411 || (stmt
6412 && *dt == vect_double_reduction_def
6413 && gimple_code (stmt) != GIMPLE_PHI))
6415 if (dump_enabled_p ())
6416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6417 "Unsupported pattern.\n");
6418 return false;
6421 if (dump_enabled_p ())
6422 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
6424 switch (gimple_code (*def_stmt))
6426 case GIMPLE_PHI:
6427 *def = gimple_phi_result (*def_stmt);
6428 break;
6430 case GIMPLE_ASSIGN:
6431 *def = gimple_assign_lhs (*def_stmt);
6432 break;
6434 case GIMPLE_CALL:
6435 *def = gimple_call_lhs (*def_stmt);
6436 if (*def != NULL)
6437 break;
6438 /* FALLTHRU */
6439 default:
6440 if (dump_enabled_p ())
6441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6442 "unsupported defining stmt:\n");
6443 return false;
6446 return true;
6449 /* Function vect_is_simple_use_1.
6451 Same as vect_is_simple_use_1 but also determines the vector operand
6452 type of OPERAND and stores it to *VECTYPE. If the definition of
6453 OPERAND is vect_uninitialized_def, vect_constant_def or
6454 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6455 is responsible to compute the best suited vector type for the
6456 scalar operand. */
6458 bool
6459 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6460 bb_vec_info bb_vinfo, gimple *def_stmt,
6461 tree *def, enum vect_def_type *dt, tree *vectype)
6463 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6464 def, dt))
6465 return false;
6467 /* Now get a vector type if the def is internal, otherwise supply
6468 NULL_TREE and leave it up to the caller to figure out a proper
6469 type for the use stmt. */
6470 if (*dt == vect_internal_def
6471 || *dt == vect_induction_def
6472 || *dt == vect_reduction_def
6473 || *dt == vect_double_reduction_def
6474 || *dt == vect_nested_cycle)
6476 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6478 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6479 && !STMT_VINFO_RELEVANT (stmt_info)
6480 && !STMT_VINFO_LIVE_P (stmt_info))
6481 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6483 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6484 gcc_assert (*vectype != NULL_TREE);
6486 else if (*dt == vect_uninitialized_def
6487 || *dt == vect_constant_def
6488 || *dt == vect_external_def)
6489 *vectype = NULL_TREE;
6490 else
6491 gcc_unreachable ();
6493 return true;
6497 /* Function supportable_widening_operation
6499 Check whether an operation represented by the code CODE is a
6500 widening operation that is supported by the target platform in
6501 vector form (i.e., when operating on arguments of type VECTYPE_IN
6502 producing a result of type VECTYPE_OUT).
6504 Widening operations we currently support are NOP (CONVERT), FLOAT
6505 and WIDEN_MULT. This function checks if these operations are supported
6506 by the target platform either directly (via vector tree-codes), or via
6507 target builtins.
6509 Output:
6510 - CODE1 and CODE2 are codes of vector operations to be used when
6511 vectorizing the operation, if available.
6512 - MULTI_STEP_CVT determines the number of required intermediate steps in
6513 case of multi-step conversion (like char->short->int - in that case
6514 MULTI_STEP_CVT will be 1).
6515 - INTERM_TYPES contains the intermediate type required to perform the
6516 widening operation (short in the above example). */
6518 bool
6519 supportable_widening_operation (enum tree_code code, gimple stmt,
6520 tree vectype_out, tree vectype_in,
6521 enum tree_code *code1, enum tree_code *code2,
6522 int *multi_step_cvt,
6523 vec<tree> *interm_types)
6525 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6526 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6527 struct loop *vect_loop = NULL;
6528 enum machine_mode vec_mode;
6529 enum insn_code icode1, icode2;
6530 optab optab1, optab2;
6531 tree vectype = vectype_in;
6532 tree wide_vectype = vectype_out;
6533 enum tree_code c1, c2;
6534 int i;
6535 tree prev_type, intermediate_type;
6536 enum machine_mode intermediate_mode, prev_mode;
6537 optab optab3, optab4;
6539 *multi_step_cvt = 0;
6540 if (loop_info)
6541 vect_loop = LOOP_VINFO_LOOP (loop_info);
6543 switch (code)
6545 case WIDEN_MULT_EXPR:
6546 /* The result of a vectorized widening operation usually requires
6547 two vectors (because the widened results do not fit into one vector).
6548 The generated vector results would normally be expected to be
6549 generated in the same order as in the original scalar computation,
6550 i.e. if 8 results are generated in each vector iteration, they are
6551 to be organized as follows:
6552 vect1: [res1,res2,res3,res4],
6553 vect2: [res5,res6,res7,res8].
6555 However, in the special case that the result of the widening
6556 operation is used in a reduction computation only, the order doesn't
6557 matter (because when vectorizing a reduction we change the order of
6558 the computation). Some targets can take advantage of this and
6559 generate more efficient code. For example, targets like Altivec,
6560 that support widen_mult using a sequence of {mult_even,mult_odd}
6561 generate the following vectors:
6562 vect1: [res1,res3,res5,res7],
6563 vect2: [res2,res4,res6,res8].
6565 When vectorizing outer-loops, we execute the inner-loop sequentially
6566 (each vectorized inner-loop iteration contributes to VF outer-loop
6567 iterations in parallel). We therefore don't allow to change the
6568 order of the computation in the inner-loop during outer-loop
6569 vectorization. */
6570 /* TODO: Another case in which order doesn't *really* matter is when we
6571 widen and then contract again, e.g. (short)((int)x * y >> 8).
6572 Normally, pack_trunc performs an even/odd permute, whereas the
6573 repack from an even/odd expansion would be an interleave, which
6574 would be significantly simpler for e.g. AVX2. */
6575 /* In any case, in order to avoid duplicating the code below, recurse
6576 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6577 are properly set up for the caller. If we fail, we'll continue with
6578 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6579 if (vect_loop
6580 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6581 && !nested_in_vect_loop_p (vect_loop, stmt)
6582 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6583 stmt, vectype_out, vectype_in,
6584 code1, code2, multi_step_cvt,
6585 interm_types))
6586 return true;
6587 c1 = VEC_WIDEN_MULT_LO_EXPR;
6588 c2 = VEC_WIDEN_MULT_HI_EXPR;
6589 break;
6591 case VEC_WIDEN_MULT_EVEN_EXPR:
6592 /* Support the recursion induced just above. */
6593 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6594 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6595 break;
6597 case WIDEN_LSHIFT_EXPR:
6598 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6599 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6600 break;
6602 CASE_CONVERT:
6603 c1 = VEC_UNPACK_LO_EXPR;
6604 c2 = VEC_UNPACK_HI_EXPR;
6605 break;
6607 case FLOAT_EXPR:
6608 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6609 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6610 break;
6612 case FIX_TRUNC_EXPR:
6613 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6614 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6615 computing the operation. */
6616 return false;
6618 default:
6619 gcc_unreachable ();
6622 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6624 enum tree_code ctmp = c1;
6625 c1 = c2;
6626 c2 = ctmp;
6629 if (code == FIX_TRUNC_EXPR)
6631 /* The signedness is determined from output operand. */
6632 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6633 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6635 else
6637 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6638 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6641 if (!optab1 || !optab2)
6642 return false;
6644 vec_mode = TYPE_MODE (vectype);
6645 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6646 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6647 return false;
6649 *code1 = c1;
6650 *code2 = c2;
6652 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6653 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6654 return true;
6656 /* Check if it's a multi-step conversion that can be done using intermediate
6657 types. */
6659 prev_type = vectype;
6660 prev_mode = vec_mode;
6662 if (!CONVERT_EXPR_CODE_P (code))
6663 return false;
6665 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6666 intermediate steps in promotion sequence. We try
6667 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6668 not. */
6669 interm_types->create (MAX_INTERM_CVT_STEPS);
6670 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6672 intermediate_mode = insn_data[icode1].operand[0].mode;
6673 intermediate_type
6674 = lang_hooks.types.type_for_mode (intermediate_mode,
6675 TYPE_UNSIGNED (prev_type));
6676 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6677 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6679 if (!optab3 || !optab4
6680 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6681 || insn_data[icode1].operand[0].mode != intermediate_mode
6682 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6683 || insn_data[icode2].operand[0].mode != intermediate_mode
6684 || ((icode1 = optab_handler (optab3, intermediate_mode))
6685 == CODE_FOR_nothing)
6686 || ((icode2 = optab_handler (optab4, intermediate_mode))
6687 == CODE_FOR_nothing))
6688 break;
6690 interm_types->quick_push (intermediate_type);
6691 (*multi_step_cvt)++;
6693 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6694 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6695 return true;
6697 prev_type = intermediate_type;
6698 prev_mode = intermediate_mode;
6701 interm_types->release ();
6702 return false;
6706 /* Function supportable_narrowing_operation
6708 Check whether an operation represented by the code CODE is a
6709 narrowing operation that is supported by the target platform in
6710 vector form (i.e., when operating on arguments of type VECTYPE_IN
6711 and producing a result of type VECTYPE_OUT).
6713 Narrowing operations we currently support are NOP (CONVERT) and
6714 FIX_TRUNC. This function checks if these operations are supported by
6715 the target platform directly via vector tree-codes.
6717 Output:
6718 - CODE1 is the code of a vector operation to be used when
6719 vectorizing the operation, if available.
6720 - MULTI_STEP_CVT determines the number of required intermediate steps in
6721 case of multi-step conversion (like int->short->char - in that case
6722 MULTI_STEP_CVT will be 1).
6723 - INTERM_TYPES contains the intermediate type required to perform the
6724 narrowing operation (short in the above example). */
6726 bool
6727 supportable_narrowing_operation (enum tree_code code,
6728 tree vectype_out, tree vectype_in,
6729 enum tree_code *code1, int *multi_step_cvt,
6730 vec<tree> *interm_types)
6732 enum machine_mode vec_mode;
6733 enum insn_code icode1;
6734 optab optab1, interm_optab;
6735 tree vectype = vectype_in;
6736 tree narrow_vectype = vectype_out;
6737 enum tree_code c1;
6738 tree intermediate_type;
6739 enum machine_mode intermediate_mode, prev_mode;
6740 int i;
6741 bool uns;
6743 *multi_step_cvt = 0;
6744 switch (code)
6746 CASE_CONVERT:
6747 c1 = VEC_PACK_TRUNC_EXPR;
6748 break;
6750 case FIX_TRUNC_EXPR:
6751 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6752 break;
6754 case FLOAT_EXPR:
6755 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6756 tree code and optabs used for computing the operation. */
6757 return false;
6759 default:
6760 gcc_unreachable ();
6763 if (code == FIX_TRUNC_EXPR)
6764 /* The signedness is determined from output operand. */
6765 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6766 else
6767 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6769 if (!optab1)
6770 return false;
6772 vec_mode = TYPE_MODE (vectype);
6773 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6774 return false;
6776 *code1 = c1;
6778 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6779 return true;
6781 /* Check if it's a multi-step conversion that can be done using intermediate
6782 types. */
6783 prev_mode = vec_mode;
6784 if (code == FIX_TRUNC_EXPR)
6785 uns = TYPE_UNSIGNED (vectype_out);
6786 else
6787 uns = TYPE_UNSIGNED (vectype);
6789 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6790 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6791 costly than signed. */
6792 if (code == FIX_TRUNC_EXPR && uns)
6794 enum insn_code icode2;
6796 intermediate_type
6797 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6798 interm_optab
6799 = optab_for_tree_code (c1, intermediate_type, optab_default);
6800 if (interm_optab != unknown_optab
6801 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6802 && insn_data[icode1].operand[0].mode
6803 == insn_data[icode2].operand[0].mode)
6805 uns = false;
6806 optab1 = interm_optab;
6807 icode1 = icode2;
6811 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6812 intermediate steps in promotion sequence. We try
6813 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6814 interm_types->create (MAX_INTERM_CVT_STEPS);
6815 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6817 intermediate_mode = insn_data[icode1].operand[0].mode;
6818 intermediate_type
6819 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6820 interm_optab
6821 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6822 optab_default);
6823 if (!interm_optab
6824 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6825 || insn_data[icode1].operand[0].mode != intermediate_mode
6826 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6827 == CODE_FOR_nothing))
6828 break;
6830 interm_types->quick_push (intermediate_type);
6831 (*multi_step_cvt)++;
6833 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6834 return true;
6836 prev_mode = intermediate_mode;
6837 optab1 = interm_optab;
6840 interm_types->release ();
6841 return false;