ChangeLog/
[official-gcc.git] / gcc / tree-vect-stmts.c
blob2731084624c990f599d790d510734a6308e5eccf
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "dumpfile.h"
27 #include "tm.h"
28 #include "ggc.h"
29 #include "tree.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "recog.h" /* FIXME: for insn_data */
37 #include "optabs.h"
38 #include "diagnostic-core.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
42 /* For lang_hooks.types.type_for_mode. */
43 #include "langhooks.h"
45 /* Return the vectorized type for the given statement. */
47 tree
48 stmt_vectype (struct _stmt_vec_info *stmt_info)
50 return STMT_VINFO_VECTYPE (stmt_info);
53 /* Return TRUE iff the given statement is in an inner loop relative to
54 the loop being vectorized. */
55 bool
56 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
58 gimple stmt = STMT_VINFO_STMT (stmt_info);
59 basic_block bb = gimple_bb (stmt);
60 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
61 struct loop* loop;
63 if (!loop_vinfo)
64 return false;
66 loop = LOOP_VINFO_LOOP (loop_vinfo);
68 return (bb->loop_father == loop->inner);
71 /* Record the cost of a statement, either by directly informing the
72 target model or by saving it in a vector for later processing.
73 Return a preliminary estimate of the statement's cost. */
75 unsigned
76 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
77 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
78 int misalign, enum vect_cost_model_location where)
80 if (body_cost_vec)
82 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
83 add_stmt_info_to_vec (body_cost_vec, count, kind,
84 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
85 misalign);
86 return (unsigned)
87 (builtin_vectorization_cost (kind, vectype, misalign) * count);
90 else
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
94 void *target_cost_data;
96 if (loop_vinfo)
97 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
98 else
99 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
101 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
102 misalign, where);
106 /* Return a variable of type ELEM_TYPE[NELEMS]. */
108 static tree
109 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
111 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
112 "vect_array");
115 /* ARRAY is an array of vectors created by create_vector_array.
116 Return an SSA_NAME for the vector in index N. The reference
117 is part of the vectorization of STMT and the vector is associated
118 with scalar destination SCALAR_DEST. */
120 static tree
121 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
122 tree array, unsigned HOST_WIDE_INT n)
124 tree vect_type, vect, vect_name, array_ref;
125 gimple new_stmt;
127 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
128 vect_type = TREE_TYPE (TREE_TYPE (array));
129 vect = vect_create_destination_var (scalar_dest, vect_type);
130 array_ref = build4 (ARRAY_REF, vect_type, array,
131 build_int_cst (size_type_node, n),
132 NULL_TREE, NULL_TREE);
134 new_stmt = gimple_build_assign (vect, array_ref);
135 vect_name = make_ssa_name (vect, new_stmt);
136 gimple_assign_set_lhs (new_stmt, vect_name);
137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
139 return vect_name;
142 /* ARRAY is an array of vectors created by create_vector_array.
143 Emit code to store SSA_NAME VECT in index N of the array.
144 The store is part of the vectorization of STMT. */
146 static void
147 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
148 tree array, unsigned HOST_WIDE_INT n)
150 tree array_ref;
151 gimple new_stmt;
153 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
154 build_int_cst (size_type_node, n),
155 NULL_TREE, NULL_TREE);
157 new_stmt = gimple_build_assign (array_ref, vect);
158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
161 /* PTR is a pointer to an array of type TYPE. Return a representation
162 of *PTR. The memory reference replaces those in FIRST_DR
163 (and its group). */
165 static tree
166 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
168 tree mem_ref, alias_ptr_type;
170 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
171 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
172 /* Arrays have the same alignment as their type. */
173 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
174 return mem_ref;
177 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
179 /* Function vect_mark_relevant.
181 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
183 static void
184 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
185 enum vect_relevant relevant, bool live_p,
186 bool used_in_pattern)
188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
189 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
190 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
191 gimple pattern_stmt;
193 if (dump_enabled_p ())
194 dump_printf_loc (MSG_NOTE, vect_location,
195 "mark relevant %d, live %d.", relevant, live_p);
197 /* If this stmt is an original stmt in a pattern, we might need to mark its
198 related pattern stmt instead of the original stmt. However, such stmts
199 may have their own uses that are not in any pattern, in such cases the
200 stmt itself should be marked. */
201 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
203 bool found = false;
204 if (!used_in_pattern)
206 imm_use_iterator imm_iter;
207 use_operand_p use_p;
208 gimple use_stmt;
209 tree lhs;
210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
213 if (is_gimple_assign (stmt))
214 lhs = gimple_assign_lhs (stmt);
215 else
216 lhs = gimple_call_lhs (stmt);
218 /* This use is out of pattern use, if LHS has other uses that are
219 pattern uses, we should mark the stmt itself, and not the pattern
220 stmt. */
221 if (TREE_CODE (lhs) == SSA_NAME)
222 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
224 if (is_gimple_debug (USE_STMT (use_p)))
225 continue;
226 use_stmt = USE_STMT (use_p);
228 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
229 continue;
231 if (vinfo_for_stmt (use_stmt)
232 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
234 found = true;
235 break;
240 if (!found)
242 /* This is the last stmt in a sequence that was detected as a
243 pattern that can potentially be vectorized. Don't mark the stmt
244 as relevant/live because it's not going to be vectorized.
245 Instead mark the pattern-stmt that replaces it. */
247 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
249 if (dump_enabled_p ())
250 dump_printf_loc (MSG_NOTE, vect_location,
251 "last stmt in pattern. don't mark"
252 " relevant/live.");
253 stmt_info = vinfo_for_stmt (pattern_stmt);
254 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
255 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
256 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
257 stmt = pattern_stmt;
261 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
262 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
263 STMT_VINFO_RELEVANT (stmt_info) = relevant;
265 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
266 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_NOTE, vect_location,
270 "already marked relevant/live.");
271 return;
274 VEC_safe_push (gimple, heap, *worklist, stmt);
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt))
313 if (dump_enabled_p ())
314 dump_printf_loc (MSG_NOTE, vect_location,
315 "vec_stmt_relevant_p: stmt has vdefs.");
316 *relevant = vect_used_in_scope;
319 /* uses outside the loop. */
320 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
322 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
324 basic_block bb = gimple_bb (USE_STMT (use_p));
325 if (!flow_bb_inside_loop_p (loop, bb))
327 if (dump_enabled_p ())
328 dump_printf_loc (MSG_NOTE, vect_location,
329 "vec_stmt_relevant_p: used out of loop.");
331 if (is_gimple_debug (USE_STMT (use_p)))
332 continue;
334 /* We expect all such uses to be in the loop exit phis
335 (because of loop closed form) */
336 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
337 gcc_assert (bb == single_exit (loop)->dest);
339 *live_p = true;
344 return (*live_p || *relevant);
348 /* Function exist_non_indexing_operands_for_use_p
350 USE is one of the uses attached to STMT. Check if USE is
351 used in STMT for anything other than indexing an array. */
353 static bool
354 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
356 tree operand;
357 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
359 /* USE corresponds to some operand in STMT. If there is no data
360 reference in STMT, then any operand that corresponds to USE
361 is not indexing an array. */
362 if (!STMT_VINFO_DATA_REF (stmt_info))
363 return true;
365 /* STMT has a data_ref. FORNOW this means that its of one of
366 the following forms:
367 -1- ARRAY_REF = var
368 -2- var = ARRAY_REF
369 (This should have been verified in analyze_data_refs).
371 'var' in the second case corresponds to a def, not a use,
372 so USE cannot correspond to any operands that are not used
373 for array indexing.
375 Therefore, all we need to check is if STMT falls into the
376 first case, and whether var corresponds to USE. */
378 if (!gimple_assign_copy_p (stmt))
379 return false;
380 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
381 return false;
382 operand = gimple_assign_rhs1 (stmt);
383 if (TREE_CODE (operand) != SSA_NAME)
384 return false;
386 if (operand == use)
387 return true;
389 return false;
394 Function process_use.
396 Inputs:
397 - a USE in STMT in a loop represented by LOOP_VINFO
398 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
399 that defined USE. This is done by calling mark_relevant and passing it
400 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
401 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
402 be performed.
404 Outputs:
405 Generally, LIVE_P and RELEVANT are used to define the liveness and
406 relevance info of the DEF_STMT of this USE:
407 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
408 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
409 Exceptions:
410 - case 1: If USE is used only for address computations (e.g. array indexing),
411 which does not need to be directly vectorized, then the liveness/relevance
412 of the respective DEF_STMT is left unchanged.
413 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
414 skip DEF_STMT cause it had already been processed.
415 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
416 be modified accordingly.
418 Return true if everything is as expected. Return false otherwise. */
420 static bool
421 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
422 enum vect_relevant relevant, VEC(gimple,heap) **worklist,
423 bool force)
425 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
426 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
427 stmt_vec_info dstmt_vinfo;
428 basic_block bb, def_bb;
429 tree def;
430 gimple def_stmt;
431 enum vect_def_type dt;
433 /* case 1: we are only interested in uses that need to be vectorized. Uses
434 that are used for address computation are not considered relevant. */
435 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
436 return true;
438 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
440 if (dump_enabled_p ())
441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
442 "not vectorized: unsupported use in stmt.");
443 return false;
446 if (!def_stmt || gimple_nop_p (def_stmt))
447 return true;
449 def_bb = gimple_bb (def_stmt);
450 if (!flow_bb_inside_loop_p (loop, def_bb))
452 if (dump_enabled_p ())
453 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
454 return true;
457 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
458 DEF_STMT must have already been processed, because this should be the
459 only way that STMT, which is a reduction-phi, was put in the worklist,
460 as there should be no other uses for DEF_STMT in the loop. So we just
461 check that everything is as expected, and we are done. */
462 dstmt_vinfo = vinfo_for_stmt (def_stmt);
463 bb = gimple_bb (stmt);
464 if (gimple_code (stmt) == GIMPLE_PHI
465 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
466 && gimple_code (def_stmt) != GIMPLE_PHI
467 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
468 && bb->loop_father == def_bb->loop_father)
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_NOTE, vect_location,
472 "reduc-stmt defining reduc-phi in the same nest.");
473 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
474 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
475 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
476 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
477 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
478 return true;
481 /* case 3a: outer-loop stmt defining an inner-loop stmt:
482 outer-loop-header-bb:
483 d = def_stmt
484 inner-loop:
485 stmt # use (d)
486 outer-loop-tail-bb:
487 ... */
488 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE, vect_location,
492 "outer-loop def-stmt defining inner-loop stmt.");
494 switch (relevant)
496 case vect_unused_in_scope:
497 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
498 vect_used_in_scope : vect_unused_in_scope;
499 break;
501 case vect_used_in_outer_by_reduction:
502 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
503 relevant = vect_used_by_reduction;
504 break;
506 case vect_used_in_outer:
507 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
508 relevant = vect_used_in_scope;
509 break;
511 case vect_used_in_scope:
512 break;
514 default:
515 gcc_unreachable ();
519 /* case 3b: inner-loop stmt defining an outer-loop stmt:
520 outer-loop-header-bb:
522 inner-loop:
523 d = def_stmt
524 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
525 stmt # use (d) */
526 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
528 if (dump_enabled_p ())
529 dump_printf_loc (MSG_NOTE, vect_location,
530 "inner-loop def-stmt defining outer-loop stmt.");
532 switch (relevant)
534 case vect_unused_in_scope:
535 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
536 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
537 vect_used_in_outer_by_reduction : vect_unused_in_scope;
538 break;
540 case vect_used_by_reduction:
541 relevant = vect_used_in_outer_by_reduction;
542 break;
544 case vect_used_in_scope:
545 relevant = vect_used_in_outer;
546 break;
548 default:
549 gcc_unreachable ();
553 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
554 is_pattern_stmt_p (stmt_vinfo));
555 return true;
559 /* Function vect_mark_stmts_to_be_vectorized.
561 Not all stmts in the loop need to be vectorized. For example:
563 for i...
564 for j...
565 1. T0 = i + j
566 2. T1 = a[T0]
568 3. j = j + 1
570 Stmt 1 and 3 do not need to be vectorized, because loop control and
571 addressing of vectorized data-refs are handled differently.
573 This pass detects such stmts. */
575 bool
576 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
578 VEC(gimple,heap) *worklist;
579 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
580 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
581 unsigned int nbbs = loop->num_nodes;
582 gimple_stmt_iterator si;
583 gimple stmt;
584 unsigned int i;
585 stmt_vec_info stmt_vinfo;
586 basic_block bb;
587 gimple phi;
588 bool live_p;
589 enum vect_relevant relevant, tmp_relevant;
590 enum vect_def_type def_type;
592 if (dump_enabled_p ())
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "=== vect_mark_stmts_to_be_vectorized ===");
596 worklist = VEC_alloc (gimple, heap, 64);
598 /* 1. Init worklist. */
599 for (i = 0; i < nbbs; i++)
601 bb = bbs[i];
602 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
604 phi = gsi_stmt (si);
605 if (dump_enabled_p ())
607 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
608 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
611 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
612 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
614 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
616 stmt = gsi_stmt (si);
617 if (dump_enabled_p ())
619 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
620 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
623 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
624 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
628 /* 2. Process_worklist */
629 while (VEC_length (gimple, worklist) > 0)
631 use_operand_p use_p;
632 ssa_op_iter iter;
634 stmt = VEC_pop (gimple, worklist);
635 if (dump_enabled_p ())
637 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
638 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
641 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
642 (DEF_STMT) as relevant/irrelevant and live/dead according to the
643 liveness and relevance properties of STMT. */
644 stmt_vinfo = vinfo_for_stmt (stmt);
645 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
646 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
648 /* Generally, the liveness and relevance properties of STMT are
649 propagated as is to the DEF_STMTs of its USEs:
650 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
651 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
653 One exception is when STMT has been identified as defining a reduction
654 variable; in this case we set the liveness/relevance as follows:
655 live_p = false
656 relevant = vect_used_by_reduction
657 This is because we distinguish between two kinds of relevant stmts -
658 those that are used by a reduction computation, and those that are
659 (also) used by a regular computation. This allows us later on to
660 identify stmts that are used solely by a reduction, and therefore the
661 order of the results that they produce does not have to be kept. */
663 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
664 tmp_relevant = relevant;
665 switch (def_type)
667 case vect_reduction_def:
668 switch (tmp_relevant)
670 case vect_unused_in_scope:
671 relevant = vect_used_by_reduction;
672 break;
674 case vect_used_by_reduction:
675 if (gimple_code (stmt) == GIMPLE_PHI)
676 break;
677 /* fall through */
679 default:
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "unsupported use of reduction.");
683 VEC_free (gimple, heap, worklist);
684 return false;
687 live_p = false;
688 break;
690 case vect_nested_cycle:
691 if (tmp_relevant != vect_unused_in_scope
692 && tmp_relevant != vect_used_in_outer_by_reduction
693 && tmp_relevant != vect_used_in_outer)
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "unsupported use of nested cycle.");
699 VEC_free (gimple, heap, worklist);
700 return false;
703 live_p = false;
704 break;
706 case vect_double_reduction_def:
707 if (tmp_relevant != vect_unused_in_scope
708 && tmp_relevant != vect_used_by_reduction)
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of double reduction.");
714 VEC_free (gimple, heap, worklist);
715 return false;
718 live_p = false;
719 break;
721 default:
722 break;
725 if (is_pattern_stmt_p (stmt_vinfo))
727 /* Pattern statements are not inserted into the code, so
728 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
729 have to scan the RHS or function arguments instead. */
730 if (is_gimple_assign (stmt))
732 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
733 tree op = gimple_assign_rhs1 (stmt);
735 i = 1;
736 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
738 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
739 live_p, relevant, &worklist, false)
740 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
741 live_p, relevant, &worklist, false))
743 VEC_free (gimple, heap, worklist);
744 return false;
746 i = 2;
748 for (; i < gimple_num_ops (stmt); i++)
750 op = gimple_op (stmt, i);
751 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
752 &worklist, false))
754 VEC_free (gimple, heap, worklist);
755 return false;
759 else if (is_gimple_call (stmt))
761 for (i = 0; i < gimple_call_num_args (stmt); i++)
763 tree arg = gimple_call_arg (stmt, i);
764 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
765 &worklist, false))
767 VEC_free (gimple, heap, worklist);
768 return false;
773 else
774 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
776 tree op = USE_FROM_PTR (use_p);
777 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
778 &worklist, false))
780 VEC_free (gimple, heap, worklist);
781 return false;
785 if (STMT_VINFO_GATHER_P (stmt_vinfo))
787 tree off;
788 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
789 gcc_assert (decl);
790 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
791 &worklist, true))
793 VEC_free (gimple, heap, worklist);
794 return false;
797 } /* while worklist */
799 VEC_free (gimple, heap, worklist);
800 return true;
804 /* Function vect_model_simple_cost.
806 Models cost for simple operations, i.e. those that only emit ncopies of a
807 single op. Right now, this does not account for multiple insns that could
808 be generated for the single vector op. We will handle that shortly. */
810 void
811 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
812 enum vect_def_type *dt,
813 stmt_vector_for_cost *prologue_cost_vec,
814 stmt_vector_for_cost *body_cost_vec)
816 int i;
817 int inside_cost = 0, prologue_cost = 0;
819 /* The SLP costs were already calculated during SLP tree build. */
820 if (PURE_SLP_STMT (stmt_info))
821 return;
823 /* FORNOW: Assuming maximum 2 args per stmts. */
824 for (i = 0; i < 2; i++)
825 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
826 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
827 stmt_info, 0, vect_prologue);
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
831 stmt_info, 0, vect_body);
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE, vect_location,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .", inside_cost, prologue_cost);
840 /* Model cost for type demotion and promotion operations. PWR is normally
841 zero for single-step promotions and demotions. It will be one if
842 two-step promotion/demotion is required, and so on. Each additional
843 step doubles the number of instructions required. */
845 static void
846 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
847 enum vect_def_type *dt, int pwr)
849 int i, tmp;
850 int inside_cost = 0, prologue_cost = 0;
851 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
852 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
853 void *target_cost_data;
855 /* The SLP costs were already calculated during SLP tree build. */
856 if (PURE_SLP_STMT (stmt_info))
857 return;
859 if (loop_vinfo)
860 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
861 else
862 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
864 for (i = 0; i < pwr + 1; i++)
866 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
867 (i + 1) : i;
868 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
869 vec_promote_demote, stmt_info, 0,
870 vect_body);
873 /* FORNOW: Assuming maximum 2 args per stmts. */
874 for (i = 0; i < 2; i++)
875 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
876 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
877 stmt_info, 0, vect_prologue);
879 if (dump_enabled_p ())
880 dump_printf_loc (MSG_NOTE, vect_location,
881 "vect_model_promotion_demotion_cost: inside_cost = %d, "
882 "prologue_cost = %d .", inside_cost, prologue_cost);
885 /* Function vect_cost_group_size
887 For grouped load or store, return the group_size only if it is the first
888 load or store of a group, else return 1. This ensures that group size is
889 only returned once per group. */
891 static int
892 vect_cost_group_size (stmt_vec_info stmt_info)
894 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
896 if (first_stmt == STMT_VINFO_STMT (stmt_info))
897 return GROUP_SIZE (stmt_info);
899 return 1;
903 /* Function vect_model_store_cost
905 Models cost for stores. In the case of grouped accesses, one access
906 has the overhead of the grouped access attributed to it. */
908 void
909 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
910 bool store_lanes_p, enum vect_def_type dt,
911 slp_tree slp_node,
912 stmt_vector_for_cost *prologue_cost_vec,
913 stmt_vector_for_cost *body_cost_vec)
915 int group_size;
916 unsigned int inside_cost = 0, prologue_cost = 0;
917 struct data_reference *first_dr;
918 gimple first_stmt;
920 /* The SLP costs were already calculated during SLP tree build. */
921 if (PURE_SLP_STMT (stmt_info))
922 return;
924 if (dt == vect_constant_def || dt == vect_external_def)
925 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
926 stmt_info, 0, vect_prologue);
928 /* Grouped access? */
929 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
931 if (slp_node)
933 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
934 group_size = 1;
936 else
938 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
939 group_size = vect_cost_group_size (stmt_info);
942 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
944 /* Not a grouped access. */
945 else
947 group_size = 1;
948 first_dr = STMT_VINFO_DATA_REF (stmt_info);
951 /* We assume that the cost of a single store-lanes instruction is
952 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
953 access is instead being provided by a permute-and-store operation,
954 include the cost of the permutes. */
955 if (!store_lanes_p && group_size > 1)
957 /* Uses a high and low interleave operation for each needed permute. */
959 int nstmts = ncopies * exact_log2 (group_size) * group_size;
960 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
961 stmt_info, 0, vect_body);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: strided group_size = %d .",
966 group_size);
969 /* Costs of the stores. */
970 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_NOTE, vect_location,
974 "vect_model_store_cost: inside_cost = %d, "
975 "prologue_cost = %d .", inside_cost, prologue_cost);
979 /* Calculate cost of DR's memory access. */
980 void
981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982 unsigned int *inside_cost,
983 stmt_vector_for_cost *body_cost_vec)
985 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986 gimple stmt = DR_STMT (dr);
987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
989 switch (alignment_support_scheme)
991 case dr_aligned:
993 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994 vector_store, stmt_info, 0,
995 vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: aligned.");
1000 break;
1003 case dr_unaligned_supported:
1005 /* Here, we assign an additional cost for the unaligned store. */
1006 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007 unaligned_store, stmt_info,
1008 DR_MISALIGNMENT (dr), vect_body);
1009 if (dump_enabled_p ())
1010 dump_printf_loc (MSG_NOTE, vect_location,
1011 "vect_model_store_cost: unaligned supported by "
1012 "hardware.");
1013 break;
1016 case dr_unaligned_unsupported:
1018 *inside_cost = VECT_MAX_COST;
1020 if (dump_enabled_p ())
1021 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022 "vect_model_store_cost: unsupported access.");
1023 break;
1026 default:
1027 gcc_unreachable ();
1032 /* Function vect_model_load_cost
1034 Models cost for loads. In the case of grouped accesses, the last access
1035 has the overhead of the grouped access attributed to it. Since unaligned
1036 accesses are supported for loads, we also account for the costs of the
1037 access scheme chosen. */
1039 void
1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041 bool load_lanes_p, slp_tree slp_node,
1042 stmt_vector_for_cost *prologue_cost_vec,
1043 stmt_vector_for_cost *body_cost_vec)
1045 int group_size;
1046 gimple first_stmt;
1047 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1048 unsigned int inside_cost = 0, prologue_cost = 0;
1050 /* The SLP costs were already calculated during SLP tree build. */
1051 if (PURE_SLP_STMT (stmt_info))
1052 return;
1054 /* Grouped accesses? */
1055 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1058 group_size = vect_cost_group_size (stmt_info);
1059 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1061 /* Not a grouped access. */
1062 else
1064 group_size = 1;
1065 first_dr = dr;
1068 /* We assume that the cost of a single load-lanes instruction is
1069 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1070 access is instead being provided by a load-and-permute operation,
1071 include the cost of the permutes. */
1072 if (!load_lanes_p && group_size > 1)
1074 /* Uses an even and odd extract operations for each needed permute. */
1075 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1076 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .",
1082 group_size);
1085 /* The loads themselves. */
1086 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1088 /* N scalar loads plus gathering them into a vector. */
1089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090 inside_cost += record_stmt_cost (body_cost_vec,
1091 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1092 scalar_load, stmt_info, 0, vect_body);
1093 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1094 stmt_info, 0, vect_body);
1096 else
1097 vect_get_load_cost (first_dr, ncopies,
1098 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1099 || group_size > 1 || slp_node),
1100 &inside_cost, &prologue_cost,
1101 prologue_cost_vec, body_cost_vec, true);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: inside_cost = %d, "
1106 "prologue_cost = %d .", inside_cost, prologue_cost);
1110 /* Calculate cost of DR's memory access. */
1111 void
1112 vect_get_load_cost (struct data_reference *dr, int ncopies,
1113 bool add_realign_cost, unsigned int *inside_cost,
1114 unsigned int *prologue_cost,
1115 stmt_vector_for_cost *prologue_cost_vec,
1116 stmt_vector_for_cost *body_cost_vec,
1117 bool record_prologue_costs)
1119 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1120 gimple stmt = DR_STMT (dr);
1121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 switch (alignment_support_scheme)
1125 case dr_aligned:
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1128 stmt_info, 0, vect_body);
1130 if (dump_enabled_p ())
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "vect_model_load_cost: aligned.");
1134 break;
1136 case dr_unaligned_supported:
1138 /* Here, we assign an additional cost for the unaligned load. */
1139 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1140 unaligned_load, stmt_info,
1141 DR_MISALIGNMENT (dr), vect_body);
1143 if (dump_enabled_p ())
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "vect_model_load_cost: unaligned supported by "
1146 "hardware.");
1148 break;
1150 case dr_explicit_realign:
1152 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1153 vector_load, stmt_info, 0, vect_body);
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1155 vec_perm, stmt_info, 0, vect_body);
1157 /* FIXME: If the misalignment remains fixed across the iterations of
1158 the containing loop, the following cost should be added to the
1159 prologue costs. */
1160 if (targetm.vectorize.builtin_mask_for_load)
1161 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1162 stmt_info, 0, vect_body);
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: explicit realign");
1168 break;
1170 case dr_explicit_realign_optimized:
1172 if (dump_enabled_p ())
1173 dump_printf_loc (MSG_NOTE, vect_location,
1174 "vect_model_load_cost: unaligned software "
1175 "pipelined.");
1177 /* Unaligned software pipeline has a load of an address, an initial
1178 load, and possibly a mask operation to "prime" the loop. However,
1179 if this is an access in a group of loads, which provide grouped
1180 access, then the above cost should only be considered for one
1181 access in the group. Inside the loop, there is a load op
1182 and a realignment op. */
1184 if (add_realign_cost && record_prologue_costs)
1186 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1187 vector_stmt, stmt_info,
1188 0, vect_prologue);
1189 if (targetm.vectorize.builtin_mask_for_load)
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1196 stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1198 stmt_info, 0, vect_body);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign optimized");
1204 break;
1207 case dr_unaligned_unsupported:
1209 *inside_cost = VECT_MAX_COST;
1211 if (dump_enabled_p ())
1212 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1213 "vect_model_load_cost: unsupported access.");
1214 break;
1217 default:
1218 gcc_unreachable ();
1222 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1223 the loop preheader for the vectorized stmt STMT. */
1225 static void
1226 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1228 if (gsi)
1229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1230 else
1232 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1233 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1235 if (loop_vinfo)
1237 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1238 basic_block new_bb;
1239 edge pe;
1241 if (nested_in_vect_loop_p (loop, stmt))
1242 loop = loop->inner;
1244 pe = loop_preheader_edge (loop);
1245 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1246 gcc_assert (!new_bb);
1248 else
1250 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1251 basic_block bb;
1252 gimple_stmt_iterator gsi_bb_start;
1254 gcc_assert (bb_vinfo);
1255 bb = BB_VINFO_BB (bb_vinfo);
1256 gsi_bb_start = gsi_after_labels (bb);
1257 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1261 if (dump_enabled_p ())
1263 dump_printf_loc (MSG_NOTE, vect_location,
1264 "created new init_stmt: ");
1265 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1269 /* Function vect_init_vector.
1271 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1272 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1273 vector type a vector with all elements equal to VAL is created first.
1274 Place the initialization at BSI if it is not NULL. Otherwise, place the
1275 initialization at the loop preheader.
1276 Return the DEF of INIT_STMT.
1277 It will be used in the vectorization of STMT. */
1279 tree
1280 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1282 tree new_var;
1283 gimple init_stmt;
1284 tree vec_oprnd;
1285 tree new_temp;
1287 if (TREE_CODE (type) == VECTOR_TYPE
1288 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1290 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292 if (CONSTANT_CLASS_P (val))
1293 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1294 else
1296 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1297 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1298 new_temp, val,
1299 NULL_TREE);
1300 vect_init_vector_1 (stmt, init_stmt, gsi);
1301 val = new_temp;
1304 val = build_vector_from_val (type, val);
1307 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1308 init_stmt = gimple_build_assign (new_var, val);
1309 new_temp = make_ssa_name (new_var, init_stmt);
1310 gimple_assign_set_lhs (init_stmt, new_temp);
1311 vect_init_vector_1 (stmt, init_stmt, gsi);
1312 vec_oprnd = gimple_assign_lhs (init_stmt);
1313 return vec_oprnd;
1317 /* Function vect_get_vec_def_for_operand.
1319 OP is an operand in STMT. This function returns a (vector) def that will be
1320 used in the vectorized stmt for STMT.
1322 In the case that OP is an SSA_NAME which is defined in the loop, then
1323 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1325 In case OP is an invariant or constant, a new stmt that creates a vector def
1326 needs to be introduced. */
1328 tree
1329 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1331 tree vec_oprnd;
1332 gimple vec_stmt;
1333 gimple def_stmt;
1334 stmt_vec_info def_stmt_info = NULL;
1335 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1336 unsigned int nunits;
1337 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1338 tree def;
1339 enum vect_def_type dt;
1340 bool is_simple_use;
1341 tree vector_type;
1343 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_get_vec_def_for_operand: ");
1347 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1350 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1351 &def_stmt, &def, &dt);
1352 gcc_assert (is_simple_use);
1353 if (dump_enabled_p ())
1355 int loc_printed = 0;
1356 if (def)
1358 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1359 loc_printed = 1;
1360 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1362 if (def_stmt)
1364 if (loc_printed)
1365 dump_printf (MSG_NOTE, " def_stmt = ");
1366 else
1367 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1368 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1372 switch (dt)
1374 /* Case 1: operand is a constant. */
1375 case vect_constant_def:
1377 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1378 gcc_assert (vector_type);
1379 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1381 if (scalar_def)
1382 *scalar_def = op;
1384 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1385 if (dump_enabled_p ())
1386 dump_printf_loc (MSG_NOTE, vect_location,
1387 "Create vector_cst. nunits = %d", nunits);
1389 return vect_init_vector (stmt, op, vector_type, NULL);
1392 /* Case 2: operand is defined outside the loop - loop invariant. */
1393 case vect_external_def:
1395 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1396 gcc_assert (vector_type);
1398 if (scalar_def)
1399 *scalar_def = def;
1401 /* Create 'vec_inv = {inv,inv,..,inv}' */
1402 if (dump_enabled_p ())
1403 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
1405 return vect_init_vector (stmt, def, vector_type, NULL);
1408 /* Case 3: operand is defined inside the loop. */
1409 case vect_internal_def:
1411 if (scalar_def)
1412 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1414 /* Get the def from the vectorized stmt. */
1415 def_stmt_info = vinfo_for_stmt (def_stmt);
1417 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1418 /* Get vectorized pattern statement. */
1419 if (!vec_stmt
1420 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1421 && !STMT_VINFO_RELEVANT (def_stmt_info))
1422 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1423 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1424 gcc_assert (vec_stmt);
1425 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1426 vec_oprnd = PHI_RESULT (vec_stmt);
1427 else if (is_gimple_call (vec_stmt))
1428 vec_oprnd = gimple_call_lhs (vec_stmt);
1429 else
1430 vec_oprnd = gimple_assign_lhs (vec_stmt);
1431 return vec_oprnd;
1434 /* Case 4: operand is defined by a loop header phi - reduction */
1435 case vect_reduction_def:
1436 case vect_double_reduction_def:
1437 case vect_nested_cycle:
1439 struct loop *loop;
1441 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1442 loop = (gimple_bb (def_stmt))->loop_father;
1444 /* Get the def before the loop */
1445 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1446 return get_initial_def_for_reduction (stmt, op, scalar_def);
1449 /* Case 5: operand is defined by loop-header phi - induction. */
1450 case vect_induction_def:
1452 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1456 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1457 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1458 vec_oprnd = PHI_RESULT (vec_stmt);
1459 else
1460 vec_oprnd = gimple_get_lhs (vec_stmt);
1461 return vec_oprnd;
1464 default:
1465 gcc_unreachable ();
1470 /* Function vect_get_vec_def_for_stmt_copy
1472 Return a vector-def for an operand. This function is used when the
1473 vectorized stmt to be created (by the caller to this function) is a "copy"
1474 created in case the vectorized result cannot fit in one vector, and several
1475 copies of the vector-stmt are required. In this case the vector-def is
1476 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1477 of the stmt that defines VEC_OPRND.
1478 DT is the type of the vector def VEC_OPRND.
1480 Context:
1481 In case the vectorization factor (VF) is bigger than the number
1482 of elements that can fit in a vectype (nunits), we have to generate
1483 more than one vector stmt to vectorize the scalar stmt. This situation
1484 arises when there are multiple data-types operated upon in the loop; the
1485 smallest data-type determines the VF, and as a result, when vectorizing
1486 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1487 vector stmt (each computing a vector of 'nunits' results, and together
1488 computing 'VF' results in each iteration). This function is called when
1489 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1490 which VF=16 and nunits=4, so the number of copies required is 4):
1492 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1494 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1495 VS1.1: vx.1 = memref1 VS1.2
1496 VS1.2: vx.2 = memref2 VS1.3
1497 VS1.3: vx.3 = memref3
1499 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1500 VSnew.1: vz1 = vx.1 + ... VSnew.2
1501 VSnew.2: vz2 = vx.2 + ... VSnew.3
1502 VSnew.3: vz3 = vx.3 + ...
1504 The vectorization of S1 is explained in vectorizable_load.
1505 The vectorization of S2:
1506 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1507 the function 'vect_get_vec_def_for_operand' is called to
1508 get the relevant vector-def for each operand of S2. For operand x it
1509 returns the vector-def 'vx.0'.
1511 To create the remaining copies of the vector-stmt (VSnew.j), this
1512 function is called to get the relevant vector-def for each operand. It is
1513 obtained from the respective VS1.j stmt, which is recorded in the
1514 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1516 For example, to obtain the vector-def 'vx.1' in order to create the
1517 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1518 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1519 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1520 and return its def ('vx.1').
1521 Overall, to create the above sequence this function will be called 3 times:
1522 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1523 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1524 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1526 tree
1527 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1529 gimple vec_stmt_for_operand;
1530 stmt_vec_info def_stmt_info;
1532 /* Do nothing; can reuse same def. */
1533 if (dt == vect_external_def || dt == vect_constant_def )
1534 return vec_oprnd;
1536 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1537 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1538 gcc_assert (def_stmt_info);
1539 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1540 gcc_assert (vec_stmt_for_operand);
1541 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1542 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1543 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1544 else
1545 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1546 return vec_oprnd;
1550 /* Get vectorized definitions for the operands to create a copy of an original
1551 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1553 static void
1554 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1555 VEC(tree,heap) **vec_oprnds0,
1556 VEC(tree,heap) **vec_oprnds1)
1558 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1560 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1561 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1563 if (vec_oprnds1 && *vec_oprnds1)
1565 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1566 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1567 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1572 /* Get vectorized definitions for OP0 and OP1.
1573 REDUC_INDEX is the index of reduction operand in case of reduction,
1574 and -1 otherwise. */
1576 void
1577 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1578 VEC (tree, heap) **vec_oprnds0,
1579 VEC (tree, heap) **vec_oprnds1,
1580 slp_tree slp_node, int reduc_index)
1582 if (slp_node)
1584 int nops = (op1 == NULL_TREE) ? 1 : 2;
1585 VEC (tree, heap) *ops = VEC_alloc (tree, heap, nops);
1586 VEC (slp_void_p, heap) *vec_defs = VEC_alloc (slp_void_p, heap, nops);
1588 VEC_quick_push (tree, ops, op0);
1589 if (op1)
1590 VEC_quick_push (tree, ops, op1);
1592 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1594 *vec_oprnds0 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1595 if (op1)
1596 *vec_oprnds1 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 1);
1598 VEC_free (tree, heap, ops);
1599 VEC_free (slp_void_p, heap, vec_defs);
1601 else
1603 tree vec_oprnd;
1605 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1606 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1607 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1609 if (op1)
1611 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1612 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1613 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1619 /* Function vect_finish_stmt_generation.
1621 Insert a new stmt. */
1623 void
1624 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1625 gimple_stmt_iterator *gsi)
1627 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1628 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1631 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1633 if (!gsi_end_p (*gsi)
1634 && gimple_has_mem_ops (vec_stmt))
1636 gimple at_stmt = gsi_stmt (*gsi);
1637 tree vuse = gimple_vuse (at_stmt);
1638 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1640 tree vdef = gimple_vdef (at_stmt);
1641 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1642 /* If we have an SSA vuse and insert a store, update virtual
1643 SSA form to avoid triggering the renamer. Do so only
1644 if we can easily see all uses - which is what almost always
1645 happens with the way vectorized stmts are inserted. */
1646 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1647 && ((is_gimple_assign (vec_stmt)
1648 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1649 || (is_gimple_call (vec_stmt)
1650 && !(gimple_call_flags (vec_stmt)
1651 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1653 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1654 gimple_set_vdef (vec_stmt, new_vdef);
1655 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1659 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1661 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1662 bb_vinfo));
1664 if (dump_enabled_p ())
1666 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1670 gimple_set_location (vec_stmt, gimple_location (stmt));
1673 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1674 a function declaration if the target has a vectorized version
1675 of the function, or NULL_TREE if the function cannot be vectorized. */
1677 tree
1678 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680 tree fndecl = gimple_call_fndecl (call);
1682 /* We only handle functions that do not read or clobber memory -- i.e.
1683 const or novops ones. */
1684 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1685 return NULL_TREE;
1687 if (!fndecl
1688 || TREE_CODE (fndecl) != FUNCTION_DECL
1689 || !DECL_BUILT_IN (fndecl))
1690 return NULL_TREE;
1692 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1693 vectype_in);
1696 /* Function vectorizable_call.
1698 Check if STMT performs a function call that can be vectorized.
1699 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1700 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1701 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1703 static bool
1704 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1705 slp_tree slp_node)
1707 tree vec_dest;
1708 tree scalar_dest;
1709 tree op, type;
1710 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1711 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1712 tree vectype_out, vectype_in;
1713 int nunits_in;
1714 int nunits_out;
1715 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1716 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1717 tree fndecl, new_temp, def, rhs_type;
1718 gimple def_stmt;
1719 enum vect_def_type dt[3]
1720 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1721 gimple new_stmt = NULL;
1722 int ncopies, j;
1723 VEC(tree, heap) *vargs = NULL;
1724 enum { NARROW, NONE, WIDEN } modifier;
1725 size_t i, nargs;
1726 tree lhs;
1728 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1729 return false;
1731 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1732 return false;
1734 /* Is STMT a vectorizable call? */
1735 if (!is_gimple_call (stmt))
1736 return false;
1738 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1739 return false;
1741 if (stmt_can_throw_internal (stmt))
1742 return false;
1744 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746 /* Process function arguments. */
1747 rhs_type = NULL_TREE;
1748 vectype_in = NULL_TREE;
1749 nargs = gimple_call_num_args (stmt);
1751 /* Bail out if the function has more than three arguments, we do not have
1752 interesting builtin functions to vectorize with more than two arguments
1753 except for fma. No arguments is also not good. */
1754 if (nargs == 0 || nargs > 3)
1755 return false;
1757 for (i = 0; i < nargs; i++)
1759 tree opvectype;
1761 op = gimple_call_arg (stmt, i);
1763 /* We can only handle calls with arguments of the same type. */
1764 if (rhs_type
1765 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1767 if (dump_enabled_p ())
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "argument types differ.");
1770 return false;
1772 if (!rhs_type)
1773 rhs_type = TREE_TYPE (op);
1775 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1776 &def_stmt, &def, &dt[i], &opvectype))
1778 if (dump_enabled_p ())
1779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1780 "use not simple.");
1781 return false;
1784 if (!vectype_in)
1785 vectype_in = opvectype;
1786 else if (opvectype
1787 && opvectype != vectype_in)
1789 if (dump_enabled_p ())
1790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1791 "argument vector types differ.");
1792 return false;
1795 /* If all arguments are external or constant defs use a vector type with
1796 the same size as the output vector type. */
1797 if (!vectype_in)
1798 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1799 if (vec_stmt)
1800 gcc_assert (vectype_in);
1801 if (!vectype_in)
1803 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "no vectype for scalar type ");
1807 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1810 return false;
1813 /* FORNOW */
1814 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1815 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1816 if (nunits_in == nunits_out / 2)
1817 modifier = NARROW;
1818 else if (nunits_out == nunits_in)
1819 modifier = NONE;
1820 else if (nunits_out == nunits_in / 2)
1821 modifier = WIDEN;
1822 else
1823 return false;
1825 /* For now, we only vectorize functions if a target specific builtin
1826 is available. TODO -- in some cases, it might be profitable to
1827 insert the calls for pieces of the vector, in order to be able
1828 to vectorize other operations in the loop. */
1829 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1830 if (fndecl == NULL_TREE)
1832 if (dump_enabled_p ())
1833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1834 "function is not vectorizable.");
1836 return false;
1839 gcc_assert (!gimple_vuse (stmt));
1841 if (slp_node || PURE_SLP_STMT (stmt_info))
1842 ncopies = 1;
1843 else if (modifier == NARROW)
1844 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1845 else
1846 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1848 /* Sanity check: make sure that at least one copy of the vectorized stmt
1849 needs to be generated. */
1850 gcc_assert (ncopies >= 1);
1852 if (!vec_stmt) /* transformation not required. */
1854 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1855 if (dump_enabled_p ())
1856 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
1857 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1858 return true;
1861 /** Transform. **/
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
1866 /* Handle def. */
1867 scalar_dest = gimple_call_lhs (stmt);
1868 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1870 prev_stmt_info = NULL;
1871 switch (modifier)
1873 case NONE:
1874 for (j = 0; j < ncopies; ++j)
1876 /* Build argument list for the vectorized call. */
1877 if (j == 0)
1878 vargs = VEC_alloc (tree, heap, nargs);
1879 else
1880 VEC_truncate (tree, vargs, 0);
1882 if (slp_node)
1884 VEC (slp_void_p, heap) *vec_defs
1885 = VEC_alloc (slp_void_p, heap, nargs);
1886 VEC (tree, heap) *vec_oprnds0;
1888 for (i = 0; i < nargs; i++)
1889 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1890 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1891 vec_oprnds0
1892 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1894 /* Arguments are ready. Create the new vector stmt. */
1895 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_oprnd0)
1897 size_t k;
1898 for (k = 0; k < nargs; k++)
1900 VEC (tree, heap) *vec_oprndsk
1901 = (VEC (tree, heap) *)
1902 VEC_index (slp_void_p, vec_defs, k);
1903 VEC_replace (tree, vargs, k,
1904 VEC_index (tree, vec_oprndsk, i));
1906 new_stmt = gimple_build_call_vec (fndecl, vargs);
1907 new_temp = make_ssa_name (vec_dest, new_stmt);
1908 gimple_call_set_lhs (new_stmt, new_temp);
1909 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1910 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1911 new_stmt);
1914 for (i = 0; i < nargs; i++)
1916 VEC (tree, heap) *vec_oprndsi
1917 = (VEC (tree, heap) *)
1918 VEC_index (slp_void_p, vec_defs, i);
1919 VEC_free (tree, heap, vec_oprndsi);
1921 VEC_free (slp_void_p, heap, vec_defs);
1922 continue;
1925 for (i = 0; i < nargs; i++)
1927 op = gimple_call_arg (stmt, i);
1928 if (j == 0)
1929 vec_oprnd0
1930 = vect_get_vec_def_for_operand (op, stmt, NULL);
1931 else
1933 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1934 vec_oprnd0
1935 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1938 VEC_quick_push (tree, vargs, vec_oprnd0);
1941 new_stmt = gimple_build_call_vec (fndecl, vargs);
1942 new_temp = make_ssa_name (vec_dest, new_stmt);
1943 gimple_call_set_lhs (new_stmt, new_temp);
1944 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1946 if (j == 0)
1947 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1948 else
1949 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1951 prev_stmt_info = vinfo_for_stmt (new_stmt);
1954 break;
1956 case NARROW:
1957 for (j = 0; j < ncopies; ++j)
1959 /* Build argument list for the vectorized call. */
1960 if (j == 0)
1961 vargs = VEC_alloc (tree, heap, nargs * 2);
1962 else
1963 VEC_truncate (tree, vargs, 0);
1965 if (slp_node)
1967 VEC (slp_void_p, heap) *vec_defs
1968 = VEC_alloc (slp_void_p, heap, nargs);
1969 VEC (tree, heap) *vec_oprnds0;
1971 for (i = 0; i < nargs; i++)
1972 VEC_quick_push (tree, vargs, gimple_call_arg (stmt, i));
1973 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1974 vec_oprnds0
1975 = (VEC (tree, heap) *) VEC_index (slp_void_p, vec_defs, 0);
1977 /* Arguments are ready. Create the new vector stmt. */
1978 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vec_oprnd0);
1979 i += 2)
1981 size_t k;
1982 VEC_truncate (tree, vargs, 0);
1983 for (k = 0; k < nargs; k++)
1985 VEC (tree, heap) *vec_oprndsk
1986 = (VEC (tree, heap) *)
1987 VEC_index (slp_void_p, vec_defs, k);
1988 VEC_quick_push (tree, vargs,
1989 VEC_index (tree, vec_oprndsk, i));
1990 VEC_quick_push (tree, vargs,
1991 VEC_index (tree, vec_oprndsk, i + 1));
1993 new_stmt = gimple_build_call_vec (fndecl, vargs);
1994 new_temp = make_ssa_name (vec_dest, new_stmt);
1995 gimple_call_set_lhs (new_stmt, new_temp);
1996 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1997 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
1998 new_stmt);
2001 for (i = 0; i < nargs; i++)
2003 VEC (tree, heap) *vec_oprndsi
2004 = (VEC (tree, heap) *)
2005 VEC_index (slp_void_p, vec_defs, i);
2006 VEC_free (tree, heap, vec_oprndsi);
2008 VEC_free (slp_void_p, heap, vec_defs);
2009 continue;
2012 for (i = 0; i < nargs; i++)
2014 op = gimple_call_arg (stmt, i);
2015 if (j == 0)
2017 vec_oprnd0
2018 = vect_get_vec_def_for_operand (op, stmt, NULL);
2019 vec_oprnd1
2020 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2022 else
2024 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2025 vec_oprnd0
2026 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2027 vec_oprnd1
2028 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2031 VEC_quick_push (tree, vargs, vec_oprnd0);
2032 VEC_quick_push (tree, vargs, vec_oprnd1);
2035 new_stmt = gimple_build_call_vec (fndecl, vargs);
2036 new_temp = make_ssa_name (vec_dest, new_stmt);
2037 gimple_call_set_lhs (new_stmt, new_temp);
2038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2040 if (j == 0)
2041 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2042 else
2043 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2045 prev_stmt_info = vinfo_for_stmt (new_stmt);
2048 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2050 break;
2052 case WIDEN:
2053 /* No current target implements this case. */
2054 return false;
2057 VEC_free (tree, heap, vargs);
2059 /* Update the exception handling table with the vector stmt if necessary. */
2060 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2061 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2063 /* The call in STMT might prevent it from being removed in dce.
2064 We however cannot remove it here, due to the way the ssa name
2065 it defines is mapped to the new definition. So just replace
2066 rhs of the statement with something harmless. */
2068 if (slp_node)
2069 return true;
2071 type = TREE_TYPE (scalar_dest);
2072 if (is_pattern_stmt_p (stmt_info))
2073 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2074 else
2075 lhs = gimple_call_lhs (stmt);
2076 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2077 set_vinfo_for_stmt (new_stmt, stmt_info);
2078 set_vinfo_for_stmt (stmt, NULL);
2079 STMT_VINFO_STMT (stmt_info) = new_stmt;
2080 gsi_replace (gsi, new_stmt, false);
2081 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2083 return true;
2087 /* Function vect_gen_widened_results_half
2089 Create a vector stmt whose code, type, number of arguments, and result
2090 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2091 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2092 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2093 needs to be created (DECL is a function-decl of a target-builtin).
2094 STMT is the original scalar stmt that we are vectorizing. */
2096 static gimple
2097 vect_gen_widened_results_half (enum tree_code code,
2098 tree decl,
2099 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2100 tree vec_dest, gimple_stmt_iterator *gsi,
2101 gimple stmt)
2103 gimple new_stmt;
2104 tree new_temp;
2106 /* Generate half of the widened result: */
2107 if (code == CALL_EXPR)
2109 /* Target specific support */
2110 if (op_type == binary_op)
2111 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2112 else
2113 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2114 new_temp = make_ssa_name (vec_dest, new_stmt);
2115 gimple_call_set_lhs (new_stmt, new_temp);
2117 else
2119 /* Generic support */
2120 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2121 if (op_type != binary_op)
2122 vec_oprnd1 = NULL;
2123 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2124 vec_oprnd1);
2125 new_temp = make_ssa_name (vec_dest, new_stmt);
2126 gimple_assign_set_lhs (new_stmt, new_temp);
2128 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2130 return new_stmt;
2134 /* Get vectorized definitions for loop-based vectorization. For the first
2135 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2136 scalar operand), and for the rest we get a copy with
2137 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2138 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2139 The vectors are collected into VEC_OPRNDS. */
2141 static void
2142 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2143 VEC (tree, heap) **vec_oprnds, int multi_step_cvt)
2145 tree vec_oprnd;
2147 /* Get first vector operand. */
2148 /* All the vector operands except the very first one (that is scalar oprnd)
2149 are stmt copies. */
2150 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2151 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2152 else
2153 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2155 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2157 /* Get second vector operand. */
2158 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2159 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2161 *oprnd = vec_oprnd;
2163 /* For conversion in multiple steps, continue to get operands
2164 recursively. */
2165 if (multi_step_cvt)
2166 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2170 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2171 For multi-step conversions store the resulting vectors and call the function
2172 recursively. */
2174 static void
2175 vect_create_vectorized_demotion_stmts (VEC (tree, heap) **vec_oprnds,
2176 int multi_step_cvt, gimple stmt,
2177 VEC (tree, heap) *vec_dsts,
2178 gimple_stmt_iterator *gsi,
2179 slp_tree slp_node, enum tree_code code,
2180 stmt_vec_info *prev_stmt_info)
2182 unsigned int i;
2183 tree vop0, vop1, new_tmp, vec_dest;
2184 gimple new_stmt;
2185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2187 vec_dest = VEC_pop (tree, vec_dsts);
2189 for (i = 0; i < VEC_length (tree, *vec_oprnds); i += 2)
2191 /* Create demotion operation. */
2192 vop0 = VEC_index (tree, *vec_oprnds, i);
2193 vop1 = VEC_index (tree, *vec_oprnds, i + 1);
2194 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2195 new_tmp = make_ssa_name (vec_dest, new_stmt);
2196 gimple_assign_set_lhs (new_stmt, new_tmp);
2197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2199 if (multi_step_cvt)
2200 /* Store the resulting vector for next recursive call. */
2201 VEC_replace (tree, *vec_oprnds, i/2, new_tmp);
2202 else
2204 /* This is the last step of the conversion sequence. Store the
2205 vectors in SLP_NODE or in vector info of the scalar statement
2206 (or in STMT_VINFO_RELATED_STMT chain). */
2207 if (slp_node)
2208 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
2209 else
2211 if (!*prev_stmt_info)
2212 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2213 else
2214 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2216 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2221 /* For multi-step demotion operations we first generate demotion operations
2222 from the source type to the intermediate types, and then combine the
2223 results (stored in VEC_OPRNDS) in demotion operation to the destination
2224 type. */
2225 if (multi_step_cvt)
2227 /* At each level of recursion we have half of the operands we had at the
2228 previous level. */
2229 VEC_truncate (tree, *vec_oprnds, (i+1)/2);
2230 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2231 stmt, vec_dsts, gsi, slp_node,
2232 VEC_PACK_TRUNC_EXPR,
2233 prev_stmt_info);
2236 VEC_quick_push (tree, vec_dsts, vec_dest);
2240 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2241 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2242 the resulting vectors and call the function recursively. */
2244 static void
2245 vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
2246 VEC (tree, heap) **vec_oprnds1,
2247 gimple stmt, tree vec_dest,
2248 gimple_stmt_iterator *gsi,
2249 enum tree_code code1,
2250 enum tree_code code2, tree decl1,
2251 tree decl2, int op_type)
2253 int i;
2254 tree vop0, vop1, new_tmp1, new_tmp2;
2255 gimple new_stmt1, new_stmt2;
2256 VEC (tree, heap) *vec_tmp = NULL;
2258 vec_tmp = VEC_alloc (tree, heap, VEC_length (tree, *vec_oprnds0) * 2);
2259 FOR_EACH_VEC_ELT (tree, *vec_oprnds0, i, vop0)
2261 if (op_type == binary_op)
2262 vop1 = VEC_index (tree, *vec_oprnds1, i);
2263 else
2264 vop1 = NULL_TREE;
2266 /* Generate the two halves of promotion operation. */
2267 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2268 op_type, vec_dest, gsi, stmt);
2269 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2270 op_type, vec_dest, gsi, stmt);
2271 if (is_gimple_call (new_stmt1))
2273 new_tmp1 = gimple_call_lhs (new_stmt1);
2274 new_tmp2 = gimple_call_lhs (new_stmt2);
2276 else
2278 new_tmp1 = gimple_assign_lhs (new_stmt1);
2279 new_tmp2 = gimple_assign_lhs (new_stmt2);
2282 /* Store the results for the next step. */
2283 VEC_quick_push (tree, vec_tmp, new_tmp1);
2284 VEC_quick_push (tree, vec_tmp, new_tmp2);
2287 VEC_free (tree, heap, *vec_oprnds0);
2288 *vec_oprnds0 = vec_tmp;
2292 /* Check if STMT performs a conversion operation, that can be vectorized.
2293 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2294 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2295 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2297 static bool
2298 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2299 gimple *vec_stmt, slp_tree slp_node)
2301 tree vec_dest;
2302 tree scalar_dest;
2303 tree op0, op1 = NULL_TREE;
2304 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2306 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2307 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2308 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2309 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2310 tree new_temp;
2311 tree def;
2312 gimple def_stmt;
2313 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2314 gimple new_stmt = NULL;
2315 stmt_vec_info prev_stmt_info;
2316 int nunits_in;
2317 int nunits_out;
2318 tree vectype_out, vectype_in;
2319 int ncopies, i, j;
2320 tree lhs_type, rhs_type;
2321 enum { NARROW, NONE, WIDEN } modifier;
2322 VEC (tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
2323 tree vop0;
2324 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2325 int multi_step_cvt = 0;
2326 VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL;
2327 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2328 int op_type;
2329 enum machine_mode rhs_mode;
2330 unsigned short fltsz;
2332 /* Is STMT a vectorizable conversion? */
2334 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2335 return false;
2337 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2338 return false;
2340 if (!is_gimple_assign (stmt))
2341 return false;
2343 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2344 return false;
2346 code = gimple_assign_rhs_code (stmt);
2347 if (!CONVERT_EXPR_CODE_P (code)
2348 && code != FIX_TRUNC_EXPR
2349 && code != FLOAT_EXPR
2350 && code != WIDEN_MULT_EXPR
2351 && code != WIDEN_LSHIFT_EXPR)
2352 return false;
2354 op_type = TREE_CODE_LENGTH (code);
2356 /* Check types of lhs and rhs. */
2357 scalar_dest = gimple_assign_lhs (stmt);
2358 lhs_type = TREE_TYPE (scalar_dest);
2359 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2361 op0 = gimple_assign_rhs1 (stmt);
2362 rhs_type = TREE_TYPE (op0);
2364 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2365 && !((INTEGRAL_TYPE_P (lhs_type)
2366 && INTEGRAL_TYPE_P (rhs_type))
2367 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2368 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2369 return false;
2371 if ((INTEGRAL_TYPE_P (lhs_type)
2372 && (TYPE_PRECISION (lhs_type)
2373 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2374 || (INTEGRAL_TYPE_P (rhs_type)
2375 && (TYPE_PRECISION (rhs_type)
2376 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "type conversion to/from bit-precision unsupported.");
2381 return false;
2384 /* Check the operands of the operation. */
2385 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2386 &def_stmt, &def, &dt[0], &vectype_in))
2388 if (dump_enabled_p ())
2389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2390 "use not simple.");
2391 return false;
2393 if (op_type == binary_op)
2395 bool ok;
2397 op1 = gimple_assign_rhs2 (stmt);
2398 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2399 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2400 OP1. */
2401 if (CONSTANT_CLASS_P (op0))
2402 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2403 &def_stmt, &def, &dt[1], &vectype_in);
2404 else
2405 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2406 &def, &dt[1]);
2408 if (!ok)
2410 if (dump_enabled_p ())
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2412 "use not simple.");
2413 return false;
2417 /* If op0 is an external or constant defs use a vector type of
2418 the same size as the output vector type. */
2419 if (!vectype_in)
2420 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2421 if (vec_stmt)
2422 gcc_assert (vectype_in);
2423 if (!vectype_in)
2425 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2428 "no vectype for scalar type ");
2429 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2432 return false;
2435 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2436 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2437 if (nunits_in < nunits_out)
2438 modifier = NARROW;
2439 else if (nunits_out == nunits_in)
2440 modifier = NONE;
2441 else
2442 modifier = WIDEN;
2444 /* Multiple types in SLP are handled by creating the appropriate number of
2445 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2446 case of SLP. */
2447 if (slp_node || PURE_SLP_STMT (stmt_info))
2448 ncopies = 1;
2449 else if (modifier == NARROW)
2450 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2451 else
2452 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2454 /* Sanity check: make sure that at least one copy of the vectorized stmt
2455 needs to be generated. */
2456 gcc_assert (ncopies >= 1);
2458 /* Supportable by target? */
2459 switch (modifier)
2461 case NONE:
2462 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2463 return false;
2464 if (supportable_convert_operation (code, vectype_out, vectype_in,
2465 &decl1, &code1))
2466 break;
2467 /* FALLTHRU */
2468 unsupported:
2469 if (dump_enabled_p ())
2470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2471 "conversion not supported by target.");
2472 return false;
2474 case WIDEN:
2475 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2476 &code1, &code2, &multi_step_cvt,
2477 &interm_types))
2479 /* Binary widening operation can only be supported directly by the
2480 architecture. */
2481 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2482 break;
2485 if (code != FLOAT_EXPR
2486 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2487 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2488 goto unsupported;
2490 rhs_mode = TYPE_MODE (rhs_type);
2491 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2492 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2493 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2494 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2496 cvt_type
2497 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2498 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2499 if (cvt_type == NULL_TREE)
2500 goto unsupported;
2502 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2504 if (!supportable_convert_operation (code, vectype_out,
2505 cvt_type, &decl1, &codecvt1))
2506 goto unsupported;
2508 else if (!supportable_widening_operation (code, stmt, vectype_out,
2509 cvt_type, &codecvt1,
2510 &codecvt2, &multi_step_cvt,
2511 &interm_types))
2512 continue;
2513 else
2514 gcc_assert (multi_step_cvt == 0);
2516 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2517 vectype_in, &code1, &code2,
2518 &multi_step_cvt, &interm_types))
2519 break;
2522 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2523 goto unsupported;
2525 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2526 codecvt2 = ERROR_MARK;
2527 else
2529 multi_step_cvt++;
2530 VEC_safe_push (tree, heap, interm_types, cvt_type);
2531 cvt_type = NULL_TREE;
2533 break;
2535 case NARROW:
2536 gcc_assert (op_type == unary_op);
2537 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2538 &code1, &multi_step_cvt,
2539 &interm_types))
2540 break;
2542 if (code != FIX_TRUNC_EXPR
2543 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2544 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2545 goto unsupported;
2547 rhs_mode = TYPE_MODE (rhs_type);
2548 cvt_type
2549 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2550 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2551 if (cvt_type == NULL_TREE)
2552 goto unsupported;
2553 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2554 &decl1, &codecvt1))
2555 goto unsupported;
2556 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2557 &code1, &multi_step_cvt,
2558 &interm_types))
2559 break;
2560 goto unsupported;
2562 default:
2563 gcc_unreachable ();
2566 if (!vec_stmt) /* transformation not required. */
2568 if (dump_enabled_p ())
2569 dump_printf_loc (MSG_NOTE, vect_location,
2570 "=== vectorizable_conversion ===");
2571 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2573 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2574 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2576 else if (modifier == NARROW)
2578 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2579 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2581 else
2583 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2584 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2586 VEC_free (tree, heap, interm_types);
2587 return true;
2590 /** Transform. **/
2591 if (dump_enabled_p ())
2592 dump_printf_loc (MSG_NOTE, vect_location,
2593 "transform conversion. ncopies = %d.", ncopies);
2595 if (op_type == binary_op)
2597 if (CONSTANT_CLASS_P (op0))
2598 op0 = fold_convert (TREE_TYPE (op1), op0);
2599 else if (CONSTANT_CLASS_P (op1))
2600 op1 = fold_convert (TREE_TYPE (op0), op1);
2603 /* In case of multi-step conversion, we first generate conversion operations
2604 to the intermediate types, and then from that types to the final one.
2605 We create vector destinations for the intermediate type (TYPES) received
2606 from supportable_*_operation, and store them in the correct order
2607 for future use in vect_create_vectorized_*_stmts (). */
2608 vec_dsts = VEC_alloc (tree, heap, multi_step_cvt + 1);
2609 vec_dest = vect_create_destination_var (scalar_dest,
2610 (cvt_type && modifier == WIDEN)
2611 ? cvt_type : vectype_out);
2612 VEC_quick_push (tree, vec_dsts, vec_dest);
2614 if (multi_step_cvt)
2616 for (i = VEC_length (tree, interm_types) - 1;
2617 VEC_iterate (tree, interm_types, i, intermediate_type); i--)
2619 vec_dest = vect_create_destination_var (scalar_dest,
2620 intermediate_type);
2621 VEC_quick_push (tree, vec_dsts, vec_dest);
2625 if (cvt_type)
2626 vec_dest = vect_create_destination_var (scalar_dest,
2627 modifier == WIDEN
2628 ? vectype_out : cvt_type);
2630 if (!slp_node)
2632 if (modifier == NONE)
2633 vec_oprnds0 = VEC_alloc (tree, heap, 1);
2634 else if (modifier == WIDEN)
2636 vec_oprnds0 = VEC_alloc (tree, heap,
2637 (multi_step_cvt
2638 ? vect_pow2 (multi_step_cvt) : 1));
2639 if (op_type == binary_op)
2640 vec_oprnds1 = VEC_alloc (tree, heap, 1);
2642 else
2643 vec_oprnds0 = VEC_alloc (tree, heap,
2644 2 * (multi_step_cvt
2645 ? vect_pow2 (multi_step_cvt) : 1));
2647 else if (code == WIDEN_LSHIFT_EXPR)
2648 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
2650 last_oprnd = op0;
2651 prev_stmt_info = NULL;
2652 switch (modifier)
2654 case NONE:
2655 for (j = 0; j < ncopies; j++)
2657 if (j == 0)
2658 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2659 -1);
2660 else
2661 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2663 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2665 /* Arguments are ready, create the new vector stmt. */
2666 if (code1 == CALL_EXPR)
2668 new_stmt = gimple_build_call (decl1, 1, vop0);
2669 new_temp = make_ssa_name (vec_dest, new_stmt);
2670 gimple_call_set_lhs (new_stmt, new_temp);
2672 else
2674 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2675 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2676 vop0, NULL);
2677 new_temp = make_ssa_name (vec_dest, new_stmt);
2678 gimple_assign_set_lhs (new_stmt, new_temp);
2681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2682 if (slp_node)
2683 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2684 new_stmt);
2687 if (j == 0)
2688 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2689 else
2690 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2691 prev_stmt_info = vinfo_for_stmt (new_stmt);
2693 break;
2695 case WIDEN:
2696 /* In case the vectorization factor (VF) is bigger than the number
2697 of elements that we can fit in a vectype (nunits), we have to
2698 generate more than one vector stmt - i.e - we need to "unroll"
2699 the vector stmt by a factor VF/nunits. */
2700 for (j = 0; j < ncopies; j++)
2702 /* Handle uses. */
2703 if (j == 0)
2705 if (slp_node)
2707 if (code == WIDEN_LSHIFT_EXPR)
2709 unsigned int k;
2711 vec_oprnd1 = op1;
2712 /* Store vec_oprnd1 for every vector stmt to be created
2713 for SLP_NODE. We check during the analysis that all
2714 the shift arguments are the same. */
2715 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2716 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2718 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2719 slp_node, -1);
2721 else
2722 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2723 &vec_oprnds1, slp_node, -1);
2725 else
2727 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2728 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2729 if (op_type == binary_op)
2731 if (code == WIDEN_LSHIFT_EXPR)
2732 vec_oprnd1 = op1;
2733 else
2734 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2735 NULL);
2736 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2740 else
2742 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2743 VEC_truncate (tree, vec_oprnds0, 0);
2744 VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
2745 if (op_type == binary_op)
2747 if (code == WIDEN_LSHIFT_EXPR)
2748 vec_oprnd1 = op1;
2749 else
2750 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2751 vec_oprnd1);
2752 VEC_truncate (tree, vec_oprnds1, 0);
2753 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
2757 /* Arguments are ready. Create the new vector stmts. */
2758 for (i = multi_step_cvt; i >= 0; i--)
2760 tree this_dest = VEC_index (tree, vec_dsts, i);
2761 enum tree_code c1 = code1, c2 = code2;
2762 if (i == 0 && codecvt2 != ERROR_MARK)
2764 c1 = codecvt1;
2765 c2 = codecvt2;
2767 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2768 &vec_oprnds1,
2769 stmt, this_dest, gsi,
2770 c1, c2, decl1, decl2,
2771 op_type);
2774 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2776 if (cvt_type)
2778 if (codecvt1 == CALL_EXPR)
2780 new_stmt = gimple_build_call (decl1, 1, vop0);
2781 new_temp = make_ssa_name (vec_dest, new_stmt);
2782 gimple_call_set_lhs (new_stmt, new_temp);
2784 else
2786 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2787 new_temp = make_ssa_name (vec_dest, NULL);
2788 new_stmt = gimple_build_assign_with_ops (codecvt1,
2789 new_temp,
2790 vop0, NULL);
2793 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2795 else
2796 new_stmt = SSA_NAME_DEF_STMT (vop0);
2798 if (slp_node)
2799 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
2800 new_stmt);
2801 else
2803 if (!prev_stmt_info)
2804 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2805 else
2806 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2807 prev_stmt_info = vinfo_for_stmt (new_stmt);
2812 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2813 break;
2815 case NARROW:
2816 /* In case the vectorization factor (VF) is bigger than the number
2817 of elements that we can fit in a vectype (nunits), we have to
2818 generate more than one vector stmt - i.e - we need to "unroll"
2819 the vector stmt by a factor VF/nunits. */
2820 for (j = 0; j < ncopies; j++)
2822 /* Handle uses. */
2823 if (slp_node)
2824 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2825 slp_node, -1);
2826 else
2828 VEC_truncate (tree, vec_oprnds0, 0);
2829 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2830 vect_pow2 (multi_step_cvt) - 1);
2833 /* Arguments are ready. Create the new vector stmts. */
2834 if (cvt_type)
2835 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
2837 if (codecvt1 == CALL_EXPR)
2839 new_stmt = gimple_build_call (decl1, 1, vop0);
2840 new_temp = make_ssa_name (vec_dest, new_stmt);
2841 gimple_call_set_lhs (new_stmt, new_temp);
2843 else
2845 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2846 new_temp = make_ssa_name (vec_dest, NULL);
2847 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2848 vop0, NULL);
2851 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2852 VEC_replace (tree, vec_oprnds0, i, new_temp);
2855 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2856 stmt, vec_dsts, gsi,
2857 slp_node, code1,
2858 &prev_stmt_info);
2861 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2862 break;
2865 VEC_free (tree, heap, vec_oprnds0);
2866 VEC_free (tree, heap, vec_oprnds1);
2867 VEC_free (tree, heap, vec_dsts);
2868 VEC_free (tree, heap, interm_types);
2870 return true;
2874 /* Function vectorizable_assignment.
2876 Check if STMT performs an assignment (copy) that can be vectorized.
2877 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2878 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2879 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2881 static bool
2882 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2883 gimple *vec_stmt, slp_tree slp_node)
2885 tree vec_dest;
2886 tree scalar_dest;
2887 tree op;
2888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2889 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2891 tree new_temp;
2892 tree def;
2893 gimple def_stmt;
2894 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2895 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2896 int ncopies;
2897 int i, j;
2898 VEC(tree,heap) *vec_oprnds = NULL;
2899 tree vop;
2900 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2901 gimple new_stmt = NULL;
2902 stmt_vec_info prev_stmt_info = NULL;
2903 enum tree_code code;
2904 tree vectype_in;
2906 /* Multiple types in SLP are handled by creating the appropriate number of
2907 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2908 case of SLP. */
2909 if (slp_node || PURE_SLP_STMT (stmt_info))
2910 ncopies = 1;
2911 else
2912 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2914 gcc_assert (ncopies >= 1);
2916 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2917 return false;
2919 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2920 return false;
2922 /* Is vectorizable assignment? */
2923 if (!is_gimple_assign (stmt))
2924 return false;
2926 scalar_dest = gimple_assign_lhs (stmt);
2927 if (TREE_CODE (scalar_dest) != SSA_NAME)
2928 return false;
2930 code = gimple_assign_rhs_code (stmt);
2931 if (gimple_assign_single_p (stmt)
2932 || code == PAREN_EXPR
2933 || CONVERT_EXPR_CODE_P (code))
2934 op = gimple_assign_rhs1 (stmt);
2935 else
2936 return false;
2938 if (code == VIEW_CONVERT_EXPR)
2939 op = TREE_OPERAND (op, 0);
2941 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2942 &def_stmt, &def, &dt[0], &vectype_in))
2944 if (dump_enabled_p ())
2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2946 "use not simple.");
2947 return false;
2950 /* We can handle NOP_EXPR conversions that do not change the number
2951 of elements or the vector size. */
2952 if ((CONVERT_EXPR_CODE_P (code)
2953 || code == VIEW_CONVERT_EXPR)
2954 && (!vectype_in
2955 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2956 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2957 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2958 return false;
2960 /* We do not handle bit-precision changes. */
2961 if ((CONVERT_EXPR_CODE_P (code)
2962 || code == VIEW_CONVERT_EXPR)
2963 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2964 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2965 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2966 || ((TYPE_PRECISION (TREE_TYPE (op))
2967 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2968 /* But a conversion that does not change the bit-pattern is ok. */
2969 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2970 > TYPE_PRECISION (TREE_TYPE (op)))
2971 && TYPE_UNSIGNED (TREE_TYPE (op))))
2973 if (dump_enabled_p ())
2974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2975 "type conversion to/from bit-precision "
2976 "unsupported.");
2977 return false;
2980 if (!vec_stmt) /* transformation not required. */
2982 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2983 if (dump_enabled_p ())
2984 dump_printf_loc (MSG_NOTE, vect_location,
2985 "=== vectorizable_assignment ===");
2986 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2987 return true;
2990 /** Transform. **/
2991 if (dump_enabled_p ())
2992 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
2994 /* Handle def. */
2995 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2997 /* Handle use. */
2998 for (j = 0; j < ncopies; j++)
3000 /* Handle uses. */
3001 if (j == 0)
3002 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
3003 else
3004 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3006 /* Arguments are ready. create the new vector stmt. */
3007 FOR_EACH_VEC_ELT (tree, vec_oprnds, i, vop)
3009 if (CONVERT_EXPR_CODE_P (code)
3010 || code == VIEW_CONVERT_EXPR)
3011 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
3012 new_stmt = gimple_build_assign (vec_dest, vop);
3013 new_temp = make_ssa_name (vec_dest, new_stmt);
3014 gimple_assign_set_lhs (new_stmt, new_temp);
3015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3016 if (slp_node)
3017 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3020 if (slp_node)
3021 continue;
3023 if (j == 0)
3024 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3025 else
3026 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3028 prev_stmt_info = vinfo_for_stmt (new_stmt);
3031 VEC_free (tree, heap, vec_oprnds);
3032 return true;
3036 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3037 either as shift by a scalar or by a vector. */
3039 bool
3040 vect_supportable_shift (enum tree_code code, tree scalar_type)
3043 enum machine_mode vec_mode;
3044 optab optab;
3045 int icode;
3046 tree vectype;
3048 vectype = get_vectype_for_scalar_type (scalar_type);
3049 if (!vectype)
3050 return false;
3052 optab = optab_for_tree_code (code, vectype, optab_scalar);
3053 if (!optab
3054 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3056 optab = optab_for_tree_code (code, vectype, optab_vector);
3057 if (!optab
3058 || (optab_handler (optab, TYPE_MODE (vectype))
3059 == CODE_FOR_nothing))
3060 return false;
3063 vec_mode = TYPE_MODE (vectype);
3064 icode = (int) optab_handler (optab, vec_mode);
3065 if (icode == CODE_FOR_nothing)
3066 return false;
3068 return true;
3072 /* Function vectorizable_shift.
3074 Check if STMT performs a shift operation that can be vectorized.
3075 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3076 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3077 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3079 static bool
3080 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3081 gimple *vec_stmt, slp_tree slp_node)
3083 tree vec_dest;
3084 tree scalar_dest;
3085 tree op0, op1 = NULL;
3086 tree vec_oprnd1 = NULL_TREE;
3087 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3088 tree vectype;
3089 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3090 enum tree_code code;
3091 enum machine_mode vec_mode;
3092 tree new_temp;
3093 optab optab;
3094 int icode;
3095 enum machine_mode optab_op2_mode;
3096 tree def;
3097 gimple def_stmt;
3098 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3099 gimple new_stmt = NULL;
3100 stmt_vec_info prev_stmt_info;
3101 int nunits_in;
3102 int nunits_out;
3103 tree vectype_out;
3104 tree op1_vectype;
3105 int ncopies;
3106 int j, i;
3107 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3108 tree vop0, vop1;
3109 unsigned int k;
3110 bool scalar_shift_arg = true;
3111 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3112 int vf;
3114 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3115 return false;
3117 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3118 return false;
3120 /* Is STMT a vectorizable binary/unary operation? */
3121 if (!is_gimple_assign (stmt))
3122 return false;
3124 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3125 return false;
3127 code = gimple_assign_rhs_code (stmt);
3129 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3130 || code == RROTATE_EXPR))
3131 return false;
3133 scalar_dest = gimple_assign_lhs (stmt);
3134 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3135 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3136 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3138 if (dump_enabled_p ())
3139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3140 "bit-precision shifts not supported.");
3141 return false;
3144 op0 = gimple_assign_rhs1 (stmt);
3145 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3146 &def_stmt, &def, &dt[0], &vectype))
3148 if (dump_enabled_p ())
3149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3150 "use not simple.");
3151 return false;
3153 /* If op0 is an external or constant def use a vector type with
3154 the same size as the output vector type. */
3155 if (!vectype)
3156 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3157 if (vec_stmt)
3158 gcc_assert (vectype);
3159 if (!vectype)
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3163 "no vectype for scalar type ");
3164 return false;
3167 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3168 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3169 if (nunits_out != nunits_in)
3170 return false;
3172 op1 = gimple_assign_rhs2 (stmt);
3173 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3174 &def, &dt[1], &op1_vectype))
3176 if (dump_enabled_p ())
3177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3178 "use not simple.");
3179 return false;
3182 if (loop_vinfo)
3183 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3184 else
3185 vf = 1;
3187 /* Multiple types in SLP are handled by creating the appropriate number of
3188 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3189 case of SLP. */
3190 if (slp_node || PURE_SLP_STMT (stmt_info))
3191 ncopies = 1;
3192 else
3193 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3195 gcc_assert (ncopies >= 1);
3197 /* Determine whether the shift amount is a vector, or scalar. If the
3198 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3200 if (dt[1] == vect_internal_def && !slp_node)
3201 scalar_shift_arg = false;
3202 else if (dt[1] == vect_constant_def
3203 || dt[1] == vect_external_def
3204 || dt[1] == vect_internal_def)
3206 /* In SLP, need to check whether the shift count is the same,
3207 in loops if it is a constant or invariant, it is always
3208 a scalar shift. */
3209 if (slp_node)
3211 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3212 gimple slpstmt;
3214 FOR_EACH_VEC_ELT (gimple, stmts, k, slpstmt)
3215 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3216 scalar_shift_arg = false;
3219 else
3221 if (dump_enabled_p ())
3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223 "operand mode requires invariant argument.");
3224 return false;
3227 /* Vector shifted by vector. */
3228 if (!scalar_shift_arg)
3230 optab = optab_for_tree_code (code, vectype, optab_vector);
3231 if (dump_enabled_p ())
3232 dump_printf_loc (MSG_NOTE, vect_location,
3233 "vector/vector shift/rotate found.");
3235 if (!op1_vectype)
3236 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3237 if (op1_vectype == NULL_TREE
3238 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242 "unusable type for last operand in"
3243 " vector/vector shift/rotate.");
3244 return false;
3247 /* See if the machine has a vector shifted by scalar insn and if not
3248 then see if it has a vector shifted by vector insn. */
3249 else
3251 optab = optab_for_tree_code (code, vectype, optab_scalar);
3252 if (optab
3253 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_NOTE, vect_location,
3257 "vector/scalar shift/rotate found.");
3259 else
3261 optab = optab_for_tree_code (code, vectype, optab_vector);
3262 if (optab
3263 && (optab_handler (optab, TYPE_MODE (vectype))
3264 != CODE_FOR_nothing))
3266 scalar_shift_arg = false;
3268 if (dump_enabled_p ())
3269 dump_printf_loc (MSG_NOTE, vect_location,
3270 "vector/vector shift/rotate found.");
3272 /* Unlike the other binary operators, shifts/rotates have
3273 the rhs being int, instead of the same type as the lhs,
3274 so make sure the scalar is the right type if we are
3275 dealing with vectors of long long/long/short/char. */
3276 if (dt[1] == vect_constant_def)
3277 op1 = fold_convert (TREE_TYPE (vectype), op1);
3278 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3279 TREE_TYPE (op1)))
3281 if (slp_node
3282 && TYPE_MODE (TREE_TYPE (vectype))
3283 != TYPE_MODE (TREE_TYPE (op1)))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "unusable type for last operand in"
3288 " vector/vector shift/rotate.");
3289 return false;
3291 if (vec_stmt && !slp_node)
3293 op1 = fold_convert (TREE_TYPE (vectype), op1);
3294 op1 = vect_init_vector (stmt, op1,
3295 TREE_TYPE (vectype), NULL);
3302 /* Supportable by target? */
3303 if (!optab)
3305 if (dump_enabled_p ())
3306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3307 "no optab.");
3308 return false;
3310 vec_mode = TYPE_MODE (vectype);
3311 icode = (int) optab_handler (optab, vec_mode);
3312 if (icode == CODE_FOR_nothing)
3314 if (dump_enabled_p ())
3315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3316 "op not supported by target.");
3317 /* Check only during analysis. */
3318 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3319 || (vf < vect_min_worthwhile_factor (code)
3320 && !vec_stmt))
3321 return false;
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3326 /* Worthwhile without SIMD support? Check only during analysis. */
3327 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3328 && vf < vect_min_worthwhile_factor (code)
3329 && !vec_stmt)
3331 if (dump_enabled_p ())
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3333 "not worthwhile without SIMD support.");
3334 return false;
3337 if (!vec_stmt) /* transformation not required. */
3339 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3340 if (dump_enabled_p ())
3341 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
3342 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3343 return true;
3346 /** Transform. **/
3348 if (dump_enabled_p ())
3349 dump_printf_loc (MSG_NOTE, vect_location,
3350 "transform binary/unary operation.");
3352 /* Handle def. */
3353 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3355 /* Allocate VECs for vector operands. In case of SLP, vector operands are
3356 created in the previous stages of the recursion, so no allocation is
3357 needed, except for the case of shift with scalar shift argument. In that
3358 case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
3359 be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
3360 In case of loop-based vectorization we allocate VECs of size 1. We
3361 allocate VEC_OPRNDS1 only in case of binary operation. */
3362 if (!slp_node)
3364 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3365 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3367 else if (scalar_shift_arg)
3368 vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
3370 prev_stmt_info = NULL;
3371 for (j = 0; j < ncopies; j++)
3373 /* Handle uses. */
3374 if (j == 0)
3376 if (scalar_shift_arg)
3378 /* Vector shl and shr insn patterns can be defined with scalar
3379 operand 2 (shift operand). In this case, use constant or loop
3380 invariant op1 directly, without extending it to vector mode
3381 first. */
3382 optab_op2_mode = insn_data[icode].operand[2].mode;
3383 if (!VECTOR_MODE_P (optab_op2_mode))
3385 if (dump_enabled_p ())
3386 dump_printf_loc (MSG_NOTE, vect_location,
3387 "operand 1 using scalar mode.");
3388 vec_oprnd1 = op1;
3389 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3390 if (slp_node)
3392 /* Store vec_oprnd1 for every vector stmt to be created
3393 for SLP_NODE. We check during the analysis that all
3394 the shift arguments are the same.
3395 TODO: Allow different constants for different vector
3396 stmts generated for an SLP instance. */
3397 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3398 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3403 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3404 (a special case for certain kind of vector shifts); otherwise,
3405 operand 1 should be of a vector type (the usual case). */
3406 if (vec_oprnd1)
3407 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3408 slp_node, -1);
3409 else
3410 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3411 slp_node, -1);
3413 else
3414 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3416 /* Arguments are ready. Create the new vector stmt. */
3417 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3419 vop1 = VEC_index (tree, vec_oprnds1, i);
3420 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3421 new_temp = make_ssa_name (vec_dest, new_stmt);
3422 gimple_assign_set_lhs (new_stmt, new_temp);
3423 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3424 if (slp_node)
3425 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3428 if (slp_node)
3429 continue;
3431 if (j == 0)
3432 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3433 else
3434 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3435 prev_stmt_info = vinfo_for_stmt (new_stmt);
3438 VEC_free (tree, heap, vec_oprnds0);
3439 VEC_free (tree, heap, vec_oprnds1);
3441 return true;
3445 static tree permute_vec_elements (tree, tree, tree, gimple,
3446 gimple_stmt_iterator *);
3449 /* Function vectorizable_operation.
3451 Check if STMT performs a binary, unary or ternary operation that can
3452 be vectorized.
3453 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3454 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3455 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3457 static bool
3458 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3459 gimple *vec_stmt, slp_tree slp_node)
3461 tree vec_dest;
3462 tree scalar_dest;
3463 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3464 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3465 tree vectype;
3466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3467 enum tree_code code;
3468 enum machine_mode vec_mode;
3469 tree new_temp;
3470 int op_type;
3471 optab optab;
3472 int icode;
3473 tree def;
3474 gimple def_stmt;
3475 enum vect_def_type dt[3]
3476 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3477 gimple new_stmt = NULL;
3478 stmt_vec_info prev_stmt_info;
3479 int nunits_in;
3480 int nunits_out;
3481 tree vectype_out;
3482 int ncopies;
3483 int j, i;
3484 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vec_oprnds2 = NULL;
3485 tree vop0, vop1, vop2;
3486 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3487 int vf;
3489 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3490 return false;
3492 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3493 return false;
3495 /* Is STMT a vectorizable binary/unary operation? */
3496 if (!is_gimple_assign (stmt))
3497 return false;
3499 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3500 return false;
3502 code = gimple_assign_rhs_code (stmt);
3504 /* For pointer addition, we should use the normal plus for
3505 the vector addition. */
3506 if (code == POINTER_PLUS_EXPR)
3507 code = PLUS_EXPR;
3509 /* Support only unary or binary operations. */
3510 op_type = TREE_CODE_LENGTH (code);
3511 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3513 if (dump_enabled_p ())
3514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3515 "num. args = %d (not unary/binary/ternary op).",
3516 op_type);
3517 return false;
3520 scalar_dest = gimple_assign_lhs (stmt);
3521 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3523 /* Most operations cannot handle bit-precision types without extra
3524 truncations. */
3525 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3526 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3527 /* Exception are bitwise binary operations. */
3528 && code != BIT_IOR_EXPR
3529 && code != BIT_XOR_EXPR
3530 && code != BIT_AND_EXPR)
3532 if (dump_enabled_p ())
3533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3534 "bit-precision arithmetic not supported.");
3535 return false;
3538 op0 = gimple_assign_rhs1 (stmt);
3539 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3540 &def_stmt, &def, &dt[0], &vectype))
3542 if (dump_enabled_p ())
3543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3544 "use not simple.");
3545 return false;
3547 /* If op0 is an external or constant def use a vector type with
3548 the same size as the output vector type. */
3549 if (!vectype)
3550 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3551 if (vec_stmt)
3552 gcc_assert (vectype);
3553 if (!vectype)
3555 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "no vectype for scalar type ");
3559 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3560 TREE_TYPE (op0));
3563 return false;
3566 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3567 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3568 if (nunits_out != nunits_in)
3569 return false;
3571 if (op_type == binary_op || op_type == ternary_op)
3573 op1 = gimple_assign_rhs2 (stmt);
3574 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3575 &def, &dt[1]))
3577 if (dump_enabled_p ())
3578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3579 "use not simple.");
3580 return false;
3583 if (op_type == ternary_op)
3585 op2 = gimple_assign_rhs3 (stmt);
3586 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3587 &def, &dt[2]))
3589 if (dump_enabled_p ())
3590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3591 "use not simple.");
3592 return false;
3596 if (loop_vinfo)
3597 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3598 else
3599 vf = 1;
3601 /* Multiple types in SLP are handled by creating the appropriate number of
3602 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3603 case of SLP. */
3604 if (slp_node || PURE_SLP_STMT (stmt_info))
3605 ncopies = 1;
3606 else
3607 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3609 gcc_assert (ncopies >= 1);
3611 /* Shifts are handled in vectorizable_shift (). */
3612 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3613 || code == RROTATE_EXPR)
3614 return false;
3616 /* Supportable by target? */
3618 vec_mode = TYPE_MODE (vectype);
3619 if (code == MULT_HIGHPART_EXPR)
3621 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3622 icode = LAST_INSN_CODE;
3623 else
3624 icode = CODE_FOR_nothing;
3626 else
3628 optab = optab_for_tree_code (code, vectype, optab_default);
3629 if (!optab)
3631 if (dump_enabled_p ())
3632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3633 "no optab.");
3634 return false;
3636 icode = (int) optab_handler (optab, vec_mode);
3639 if (icode == CODE_FOR_nothing)
3641 if (dump_enabled_p ())
3642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3643 "op not supported by target.");
3644 /* Check only during analysis. */
3645 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3646 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3647 return false;
3648 if (dump_enabled_p ())
3649 dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3652 /* Worthwhile without SIMD support? Check only during analysis. */
3653 if (!VECTOR_MODE_P (vec_mode)
3654 && !vec_stmt
3655 && vf < vect_min_worthwhile_factor (code))
3657 if (dump_enabled_p ())
3658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3659 "not worthwhile without SIMD support.");
3660 return false;
3663 if (!vec_stmt) /* transformation not required. */
3665 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3666 if (dump_enabled_p ())
3667 dump_printf_loc (MSG_NOTE, vect_location,
3668 "=== vectorizable_operation ===");
3669 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3670 return true;
3673 /** Transform. **/
3675 if (dump_enabled_p ())
3676 dump_printf_loc (MSG_NOTE, vect_location,
3677 "transform binary/unary operation.");
3679 /* Handle def. */
3680 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3682 /* In case the vectorization factor (VF) is bigger than the number
3683 of elements that we can fit in a vectype (nunits), we have to generate
3684 more than one vector stmt - i.e - we need to "unroll" the
3685 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3686 from one copy of the vector stmt to the next, in the field
3687 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3688 stages to find the correct vector defs to be used when vectorizing
3689 stmts that use the defs of the current stmt. The example below
3690 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3691 we need to create 4 vectorized stmts):
3693 before vectorization:
3694 RELATED_STMT VEC_STMT
3695 S1: x = memref - -
3696 S2: z = x + 1 - -
3698 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3699 there):
3700 RELATED_STMT VEC_STMT
3701 VS1_0: vx0 = memref0 VS1_1 -
3702 VS1_1: vx1 = memref1 VS1_2 -
3703 VS1_2: vx2 = memref2 VS1_3 -
3704 VS1_3: vx3 = memref3 - -
3705 S1: x = load - VS1_0
3706 S2: z = x + 1 - -
3708 step2: vectorize stmt S2 (done here):
3709 To vectorize stmt S2 we first need to find the relevant vector
3710 def for the first operand 'x'. This is, as usual, obtained from
3711 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3712 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3713 relevant vector def 'vx0'. Having found 'vx0' we can generate
3714 the vector stmt VS2_0, and as usual, record it in the
3715 STMT_VINFO_VEC_STMT of stmt S2.
3716 When creating the second copy (VS2_1), we obtain the relevant vector
3717 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3718 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3719 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3720 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3721 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3722 chain of stmts and pointers:
3723 RELATED_STMT VEC_STMT
3724 VS1_0: vx0 = memref0 VS1_1 -
3725 VS1_1: vx1 = memref1 VS1_2 -
3726 VS1_2: vx2 = memref2 VS1_3 -
3727 VS1_3: vx3 = memref3 - -
3728 S1: x = load - VS1_0
3729 VS2_0: vz0 = vx0 + v1 VS2_1 -
3730 VS2_1: vz1 = vx1 + v1 VS2_2 -
3731 VS2_2: vz2 = vx2 + v1 VS2_3 -
3732 VS2_3: vz3 = vx3 + v1 - -
3733 S2: z = x + 1 - VS2_0 */
3735 prev_stmt_info = NULL;
3736 for (j = 0; j < ncopies; j++)
3738 /* Handle uses. */
3739 if (j == 0)
3741 if (op_type == binary_op || op_type == ternary_op)
3742 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3743 slp_node, -1);
3744 else
3745 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3746 slp_node, -1);
3747 if (op_type == ternary_op)
3749 vec_oprnds2 = VEC_alloc (tree, heap, 1);
3750 VEC_quick_push (tree, vec_oprnds2,
3751 vect_get_vec_def_for_operand (op2, stmt, NULL));
3754 else
3756 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3757 if (op_type == ternary_op)
3759 tree vec_oprnd = VEC_pop (tree, vec_oprnds2);
3760 VEC_quick_push (tree, vec_oprnds2,
3761 vect_get_vec_def_for_stmt_copy (dt[2],
3762 vec_oprnd));
3766 /* Arguments are ready. Create the new vector stmt. */
3767 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vop0)
3769 vop1 = ((op_type == binary_op || op_type == ternary_op)
3770 ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
3771 vop2 = ((op_type == ternary_op)
3772 ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
3773 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3774 vop0, vop1, vop2);
3775 new_temp = make_ssa_name (vec_dest, new_stmt);
3776 gimple_assign_set_lhs (new_stmt, new_temp);
3777 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3778 if (slp_node)
3779 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3782 if (slp_node)
3783 continue;
3785 if (j == 0)
3786 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3787 else
3788 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3789 prev_stmt_info = vinfo_for_stmt (new_stmt);
3792 VEC_free (tree, heap, vec_oprnds0);
3793 if (vec_oprnds1)
3794 VEC_free (tree, heap, vec_oprnds1);
3795 if (vec_oprnds2)
3796 VEC_free (tree, heap, vec_oprnds2);
3798 return true;
3802 /* Function vectorizable_store.
3804 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3805 can be vectorized.
3806 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3807 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3808 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3810 static bool
3811 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3812 slp_tree slp_node)
3814 tree scalar_dest;
3815 tree data_ref;
3816 tree op;
3817 tree vec_oprnd = NULL_TREE;
3818 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3819 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3820 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3821 tree elem_type;
3822 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3823 struct loop *loop = NULL;
3824 enum machine_mode vec_mode;
3825 tree dummy;
3826 enum dr_alignment_support alignment_support_scheme;
3827 tree def;
3828 gimple def_stmt;
3829 enum vect_def_type dt;
3830 stmt_vec_info prev_stmt_info = NULL;
3831 tree dataref_ptr = NULL_TREE;
3832 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3833 int ncopies;
3834 int j;
3835 gimple next_stmt, first_stmt = NULL;
3836 bool grouped_store = false;
3837 bool store_lanes_p = false;
3838 unsigned int group_size, i;
3839 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3840 bool inv_p;
3841 VEC(tree,heap) *vec_oprnds = NULL;
3842 bool slp = (slp_node != NULL);
3843 unsigned int vec_num;
3844 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3845 tree aggr_type;
3847 if (loop_vinfo)
3848 loop = LOOP_VINFO_LOOP (loop_vinfo);
3850 /* Multiple types in SLP are handled by creating the appropriate number of
3851 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3852 case of SLP. */
3853 if (slp || PURE_SLP_STMT (stmt_info))
3854 ncopies = 1;
3855 else
3856 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3858 gcc_assert (ncopies >= 1);
3860 /* FORNOW. This restriction should be relaxed. */
3861 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3863 if (dump_enabled_p ())
3864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3865 "multiple types in nested loop.");
3866 return false;
3869 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3870 return false;
3872 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3873 return false;
3875 /* Is vectorizable store? */
3877 if (!is_gimple_assign (stmt))
3878 return false;
3880 scalar_dest = gimple_assign_lhs (stmt);
3881 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3882 && is_pattern_stmt_p (stmt_info))
3883 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3884 if (TREE_CODE (scalar_dest) != ARRAY_REF
3885 && TREE_CODE (scalar_dest) != INDIRECT_REF
3886 && TREE_CODE (scalar_dest) != COMPONENT_REF
3887 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3888 && TREE_CODE (scalar_dest) != REALPART_EXPR
3889 && TREE_CODE (scalar_dest) != MEM_REF)
3890 return false;
3892 gcc_assert (gimple_assign_single_p (stmt));
3893 op = gimple_assign_rhs1 (stmt);
3894 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3895 &def, &dt))
3897 if (dump_enabled_p ())
3898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3899 "use not simple.");
3900 return false;
3903 elem_type = TREE_TYPE (vectype);
3904 vec_mode = TYPE_MODE (vectype);
3906 /* FORNOW. In some cases can vectorize even if data-type not supported
3907 (e.g. - array initialization with 0). */
3908 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3909 return false;
3911 if (!STMT_VINFO_DATA_REF (stmt_info))
3912 return false;
3914 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3915 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3916 size_zero_node) < 0)
3918 if (dump_enabled_p ())
3919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3920 "negative step for store.");
3921 return false;
3924 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3926 grouped_store = true;
3927 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3928 if (!slp && !PURE_SLP_STMT (stmt_info))
3930 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3931 if (vect_store_lanes_supported (vectype, group_size))
3932 store_lanes_p = true;
3933 else if (!vect_grouped_store_supported (vectype, group_size))
3934 return false;
3937 if (first_stmt == stmt)
3939 /* STMT is the leader of the group. Check the operands of all the
3940 stmts of the group. */
3941 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3942 while (next_stmt)
3944 gcc_assert (gimple_assign_single_p (next_stmt));
3945 op = gimple_assign_rhs1 (next_stmt);
3946 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3947 &def_stmt, &def, &dt))
3949 if (dump_enabled_p ())
3950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3951 "use not simple.");
3952 return false;
3954 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3959 if (!vec_stmt) /* transformation not required. */
3961 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3962 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3963 NULL, NULL, NULL);
3964 return true;
3967 /** Transform. **/
3969 if (grouped_store)
3971 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3972 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3974 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3976 /* FORNOW */
3977 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3979 /* We vectorize all the stmts of the interleaving group when we
3980 reach the last stmt in the group. */
3981 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3982 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3983 && !slp)
3985 *vec_stmt = NULL;
3986 return true;
3989 if (slp)
3991 grouped_store = false;
3992 /* VEC_NUM is the number of vect stmts to be created for this
3993 group. */
3994 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3995 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
3996 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3997 op = gimple_assign_rhs1 (first_stmt);
3999 else
4000 /* VEC_NUM is the number of vect stmts to be created for this
4001 group. */
4002 vec_num = group_size;
4004 else
4006 first_stmt = stmt;
4007 first_dr = dr;
4008 group_size = vec_num = 1;
4011 if (dump_enabled_p ())
4012 dump_printf_loc (MSG_NOTE, vect_location,
4013 "transform store. ncopies = %d", ncopies);
4015 dr_chain = VEC_alloc (tree, heap, group_size);
4016 oprnds = VEC_alloc (tree, heap, group_size);
4018 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4019 gcc_assert (alignment_support_scheme);
4020 /* Targets with store-lane instructions must not require explicit
4021 realignment. */
4022 gcc_assert (!store_lanes_p
4023 || alignment_support_scheme == dr_aligned
4024 || alignment_support_scheme == dr_unaligned_supported);
4026 if (store_lanes_p)
4027 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4028 else
4029 aggr_type = vectype;
4031 /* In case the vectorization factor (VF) is bigger than the number
4032 of elements that we can fit in a vectype (nunits), we have to generate
4033 more than one vector stmt - i.e - we need to "unroll" the
4034 vector stmt by a factor VF/nunits. For more details see documentation in
4035 vect_get_vec_def_for_copy_stmt. */
4037 /* In case of interleaving (non-unit grouped access):
4039 S1: &base + 2 = x2
4040 S2: &base = x0
4041 S3: &base + 1 = x1
4042 S4: &base + 3 = x3
4044 We create vectorized stores starting from base address (the access of the
4045 first stmt in the chain (S2 in the above example), when the last store stmt
4046 of the chain (S4) is reached:
4048 VS1: &base = vx2
4049 VS2: &base + vec_size*1 = vx0
4050 VS3: &base + vec_size*2 = vx1
4051 VS4: &base + vec_size*3 = vx3
4053 Then permutation statements are generated:
4055 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4056 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4059 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4060 (the order of the data-refs in the output of vect_permute_store_chain
4061 corresponds to the order of scalar stmts in the interleaving chain - see
4062 the documentation of vect_permute_store_chain()).
4064 In case of both multiple types and interleaving, above vector stores and
4065 permutation stmts are created for every copy. The result vector stmts are
4066 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4067 STMT_VINFO_RELATED_STMT for the next copies.
4070 prev_stmt_info = NULL;
4071 for (j = 0; j < ncopies; j++)
4073 gimple new_stmt;
4074 gimple ptr_incr;
4076 if (j == 0)
4078 if (slp)
4080 /* Get vectorized arguments for SLP_NODE. */
4081 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4082 NULL, slp_node, -1);
4084 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4086 else
4088 /* For interleaved stores we collect vectorized defs for all the
4089 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4090 used as an input to vect_permute_store_chain(), and OPRNDS as
4091 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4093 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4094 OPRNDS are of size 1. */
4095 next_stmt = first_stmt;
4096 for (i = 0; i < group_size; i++)
4098 /* Since gaps are not supported for interleaved stores,
4099 GROUP_SIZE is the exact number of stmts in the chain.
4100 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4101 there is no interleaving, GROUP_SIZE is 1, and only one
4102 iteration of the loop will be executed. */
4103 gcc_assert (next_stmt
4104 && gimple_assign_single_p (next_stmt));
4105 op = gimple_assign_rhs1 (next_stmt);
4107 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4108 NULL);
4109 VEC_quick_push(tree, dr_chain, vec_oprnd);
4110 VEC_quick_push(tree, oprnds, vec_oprnd);
4111 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4115 /* We should have catched mismatched types earlier. */
4116 gcc_assert (useless_type_conversion_p (vectype,
4117 TREE_TYPE (vec_oprnd)));
4118 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4119 NULL_TREE, &dummy, gsi,
4120 &ptr_incr, false, &inv_p);
4121 gcc_assert (bb_vinfo || !inv_p);
4123 else
4125 /* For interleaved stores we created vectorized defs for all the
4126 defs stored in OPRNDS in the previous iteration (previous copy).
4127 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4128 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4129 next copy.
4130 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4131 OPRNDS are of size 1. */
4132 for (i = 0; i < group_size; i++)
4134 op = VEC_index (tree, oprnds, i);
4135 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4136 &def, &dt);
4137 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4138 VEC_replace(tree, dr_chain, i, vec_oprnd);
4139 VEC_replace(tree, oprnds, i, vec_oprnd);
4141 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4142 TYPE_SIZE_UNIT (aggr_type));
4145 if (store_lanes_p)
4147 tree vec_array;
4149 /* Combine all the vectors into an array. */
4150 vec_array = create_vector_array (vectype, vec_num);
4151 for (i = 0; i < vec_num; i++)
4153 vec_oprnd = VEC_index (tree, dr_chain, i);
4154 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4157 /* Emit:
4158 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4159 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4160 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4161 gimple_call_set_lhs (new_stmt, data_ref);
4162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4164 else
4166 new_stmt = NULL;
4167 if (grouped_store)
4169 result_chain = VEC_alloc (tree, heap, group_size);
4170 /* Permute. */
4171 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4172 &result_chain);
4175 next_stmt = first_stmt;
4176 for (i = 0; i < vec_num; i++)
4178 unsigned align, misalign;
4180 if (i > 0)
4181 /* Bump the vector pointer. */
4182 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4183 stmt, NULL_TREE);
4185 if (slp)
4186 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4187 else if (grouped_store)
4188 /* For grouped stores vectorized defs are interleaved in
4189 vect_permute_store_chain(). */
4190 vec_oprnd = VEC_index (tree, result_chain, i);
4192 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4193 build_int_cst (reference_alias_ptr_type
4194 (DR_REF (first_dr)), 0));
4195 align = TYPE_ALIGN_UNIT (vectype);
4196 if (aligned_access_p (first_dr))
4197 misalign = 0;
4198 else if (DR_MISALIGNMENT (first_dr) == -1)
4200 TREE_TYPE (data_ref)
4201 = build_aligned_type (TREE_TYPE (data_ref),
4202 TYPE_ALIGN (elem_type));
4203 align = TYPE_ALIGN_UNIT (elem_type);
4204 misalign = 0;
4206 else
4208 TREE_TYPE (data_ref)
4209 = build_aligned_type (TREE_TYPE (data_ref),
4210 TYPE_ALIGN (elem_type));
4211 misalign = DR_MISALIGNMENT (first_dr);
4213 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4214 misalign);
4216 /* Arguments are ready. Create the new vector stmt. */
4217 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4218 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4220 if (slp)
4221 continue;
4223 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4224 if (!next_stmt)
4225 break;
4228 if (!slp)
4230 if (j == 0)
4231 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4232 else
4233 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4234 prev_stmt_info = vinfo_for_stmt (new_stmt);
4238 VEC_free (tree, heap, dr_chain);
4239 VEC_free (tree, heap, oprnds);
4240 if (result_chain)
4241 VEC_free (tree, heap, result_chain);
4242 if (vec_oprnds)
4243 VEC_free (tree, heap, vec_oprnds);
4245 return true;
4248 /* Given a vector type VECTYPE and permutation SEL returns
4249 the VECTOR_CST mask that implements the permutation of the
4250 vector elements. If that is impossible to do, returns NULL. */
4252 tree
4253 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4255 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4256 int i, nunits;
4258 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4260 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4261 return NULL;
4263 mask_elt_type = lang_hooks.types.type_for_mode
4264 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4265 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4267 mask_elts = XALLOCAVEC (tree, nunits);
4268 for (i = nunits - 1; i >= 0; i--)
4269 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4270 mask_vec = build_vector (mask_type, mask_elts);
4272 return mask_vec;
4275 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4276 reversal of the vector elements. If that is impossible to do,
4277 returns NULL. */
4279 static tree
4280 perm_mask_for_reverse (tree vectype)
4282 int i, nunits;
4283 unsigned char *sel;
4285 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4286 sel = XALLOCAVEC (unsigned char, nunits);
4288 for (i = 0; i < nunits; ++i)
4289 sel[i] = nunits - 1 - i;
4291 return vect_gen_perm_mask (vectype, sel);
4294 /* Given a vector variable X and Y, that was generated for the scalar
4295 STMT, generate instructions to permute the vector elements of X and Y
4296 using permutation mask MASK_VEC, insert them at *GSI and return the
4297 permuted vector variable. */
4299 static tree
4300 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4301 gimple_stmt_iterator *gsi)
4303 tree vectype = TREE_TYPE (x);
4304 tree perm_dest, data_ref;
4305 gimple perm_stmt;
4307 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4308 data_ref = make_ssa_name (perm_dest, NULL);
4310 /* Generate the permute statement. */
4311 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4312 x, y, mask_vec);
4313 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4315 return data_ref;
4318 /* vectorizable_load.
4320 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4321 can be vectorized.
4322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4323 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4326 static bool
4327 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4328 slp_tree slp_node, slp_instance slp_node_instance)
4330 tree scalar_dest;
4331 tree vec_dest = NULL;
4332 tree data_ref = NULL;
4333 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4334 stmt_vec_info prev_stmt_info;
4335 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4336 struct loop *loop = NULL;
4337 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4338 bool nested_in_vect_loop = false;
4339 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4340 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4341 tree elem_type;
4342 tree new_temp;
4343 enum machine_mode mode;
4344 gimple new_stmt = NULL;
4345 tree dummy;
4346 enum dr_alignment_support alignment_support_scheme;
4347 tree dataref_ptr = NULL_TREE;
4348 gimple ptr_incr;
4349 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4350 int ncopies;
4351 int i, j, group_size;
4352 tree msq = NULL_TREE, lsq;
4353 tree offset = NULL_TREE;
4354 tree realignment_token = NULL_TREE;
4355 gimple phi = NULL;
4356 VEC(tree,heap) *dr_chain = NULL;
4357 bool grouped_load = false;
4358 bool load_lanes_p = false;
4359 gimple first_stmt;
4360 bool inv_p;
4361 bool negative = false;
4362 bool compute_in_loop = false;
4363 struct loop *at_loop;
4364 int vec_num;
4365 bool slp = (slp_node != NULL);
4366 bool slp_perm = false;
4367 enum tree_code code;
4368 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4369 int vf;
4370 tree aggr_type;
4371 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4372 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4373 tree stride_base, stride_step;
4374 int gather_scale = 1;
4375 enum vect_def_type gather_dt = vect_unknown_def_type;
4377 if (loop_vinfo)
4379 loop = LOOP_VINFO_LOOP (loop_vinfo);
4380 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4381 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4383 else
4384 vf = 1;
4386 /* Multiple types in SLP are handled by creating the appropriate number of
4387 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4388 case of SLP. */
4389 if (slp || PURE_SLP_STMT (stmt_info))
4390 ncopies = 1;
4391 else
4392 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4394 gcc_assert (ncopies >= 1);
4396 /* FORNOW. This restriction should be relaxed. */
4397 if (nested_in_vect_loop && ncopies > 1)
4399 if (dump_enabled_p ())
4400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4401 "multiple types in nested loop.");
4402 return false;
4405 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4406 return false;
4408 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4409 return false;
4411 /* Is vectorizable load? */
4412 if (!is_gimple_assign (stmt))
4413 return false;
4415 scalar_dest = gimple_assign_lhs (stmt);
4416 if (TREE_CODE (scalar_dest) != SSA_NAME)
4417 return false;
4419 code = gimple_assign_rhs_code (stmt);
4420 if (code != ARRAY_REF
4421 && code != INDIRECT_REF
4422 && code != COMPONENT_REF
4423 && code != IMAGPART_EXPR
4424 && code != REALPART_EXPR
4425 && code != MEM_REF
4426 && TREE_CODE_CLASS (code) != tcc_declaration)
4427 return false;
4429 if (!STMT_VINFO_DATA_REF (stmt_info))
4430 return false;
4432 elem_type = TREE_TYPE (vectype);
4433 mode = TYPE_MODE (vectype);
4435 /* FORNOW. In some cases can vectorize even if data-type not supported
4436 (e.g. - data copies). */
4437 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4439 if (dump_enabled_p ())
4440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4441 "Aligned load, but unsupported type.");
4442 return false;
4445 /* Check if the load is a part of an interleaving chain. */
4446 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4448 grouped_load = true;
4449 /* FORNOW */
4450 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4452 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4453 if (!slp && !PURE_SLP_STMT (stmt_info))
4455 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4456 if (vect_load_lanes_supported (vectype, group_size))
4457 load_lanes_p = true;
4458 else if (!vect_grouped_load_supported (vectype, group_size))
4459 return false;
4464 if (STMT_VINFO_GATHER_P (stmt_info))
4466 gimple def_stmt;
4467 tree def;
4468 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4469 &gather_off, &gather_scale);
4470 gcc_assert (gather_decl);
4471 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4472 &def_stmt, &def, &gather_dt,
4473 &gather_off_vectype))
4475 if (dump_enabled_p ())
4476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4477 "gather index use not simple.");
4478 return false;
4481 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4483 if (!vect_check_strided_load (stmt, loop_vinfo,
4484 &stride_base, &stride_step))
4485 return false;
4487 else
4489 negative = tree_int_cst_compare (nested_in_vect_loop
4490 ? STMT_VINFO_DR_STEP (stmt_info)
4491 : DR_STEP (dr),
4492 size_zero_node) < 0;
4493 if (negative && ncopies > 1)
4495 if (dump_enabled_p ())
4496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4497 "multiple types with negative step.");
4498 return false;
4501 if (negative)
4503 gcc_assert (!grouped_load);
4504 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4505 if (alignment_support_scheme != dr_aligned
4506 && alignment_support_scheme != dr_unaligned_supported)
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4510 "negative step but alignment required.");
4511 return false;
4513 if (!perm_mask_for_reverse (vectype))
4515 if (dump_enabled_p ())
4516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4517 "negative step and reversing not supported.");
4518 return false;
4523 if (!vec_stmt) /* transformation not required. */
4525 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4526 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4527 return true;
4530 if (dump_enabled_p ())
4531 dump_printf_loc (MSG_NOTE, vect_location,
4532 "transform load. ncopies = %d", ncopies);
4534 /** Transform. **/
4536 if (STMT_VINFO_GATHER_P (stmt_info))
4538 tree vec_oprnd0 = NULL_TREE, op;
4539 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4540 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4541 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4542 edge pe = loop_preheader_edge (loop);
4543 gimple_seq seq;
4544 basic_block new_bb;
4545 enum { NARROW, NONE, WIDEN } modifier;
4546 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4548 if (nunits == gather_off_nunits)
4549 modifier = NONE;
4550 else if (nunits == gather_off_nunits / 2)
4552 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4553 modifier = WIDEN;
4555 for (i = 0; i < gather_off_nunits; ++i)
4556 sel[i] = i | nunits;
4558 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4559 gcc_assert (perm_mask != NULL_TREE);
4561 else if (nunits == gather_off_nunits * 2)
4563 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4564 modifier = NARROW;
4566 for (i = 0; i < nunits; ++i)
4567 sel[i] = i < gather_off_nunits
4568 ? i : i + nunits - gather_off_nunits;
4570 perm_mask = vect_gen_perm_mask (vectype, sel);
4571 gcc_assert (perm_mask != NULL_TREE);
4572 ncopies *= 2;
4574 else
4575 gcc_unreachable ();
4577 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4578 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4579 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4580 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4581 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4582 scaletype = TREE_VALUE (arglist);
4583 gcc_checking_assert (types_compatible_p (srctype, rettype)
4584 && types_compatible_p (srctype, masktype));
4586 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4588 ptr = fold_convert (ptrtype, gather_base);
4589 if (!is_gimple_min_invariant (ptr))
4591 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4592 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4593 gcc_assert (!new_bb);
4596 /* Currently we support only unconditional gather loads,
4597 so mask should be all ones. */
4598 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4599 mask = build_int_cst (TREE_TYPE (masktype), -1);
4600 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4602 REAL_VALUE_TYPE r;
4603 long tmp[6];
4604 for (j = 0; j < 6; ++j)
4605 tmp[j] = -1;
4606 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4607 mask = build_real (TREE_TYPE (masktype), r);
4609 else
4610 gcc_unreachable ();
4611 mask = build_vector_from_val (masktype, mask);
4612 mask = vect_init_vector (stmt, mask, masktype, NULL);
4614 scale = build_int_cst (scaletype, gather_scale);
4616 prev_stmt_info = NULL;
4617 for (j = 0; j < ncopies; ++j)
4619 if (modifier == WIDEN && (j & 1))
4620 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4621 perm_mask, stmt, gsi);
4622 else if (j == 0)
4623 op = vec_oprnd0
4624 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4625 else
4626 op = vec_oprnd0
4627 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4629 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4631 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4632 == TYPE_VECTOR_SUBPARTS (idxtype));
4633 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4634 var = make_ssa_name (var, NULL);
4635 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4636 new_stmt
4637 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4638 op, NULL_TREE);
4639 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4640 op = var;
4643 new_stmt
4644 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4646 if (!useless_type_conversion_p (vectype, rettype))
4648 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4649 == TYPE_VECTOR_SUBPARTS (rettype));
4650 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4651 op = make_ssa_name (var, new_stmt);
4652 gimple_call_set_lhs (new_stmt, op);
4653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4654 var = make_ssa_name (vec_dest, NULL);
4655 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4656 new_stmt
4657 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4658 NULL_TREE);
4660 else
4662 var = make_ssa_name (vec_dest, new_stmt);
4663 gimple_call_set_lhs (new_stmt, var);
4666 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4668 if (modifier == NARROW)
4670 if ((j & 1) == 0)
4672 prev_res = var;
4673 continue;
4675 var = permute_vec_elements (prev_res, var,
4676 perm_mask, stmt, gsi);
4677 new_stmt = SSA_NAME_DEF_STMT (var);
4680 if (prev_stmt_info == NULL)
4681 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4682 else
4683 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4684 prev_stmt_info = vinfo_for_stmt (new_stmt);
4686 return true;
4688 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4690 gimple_stmt_iterator incr_gsi;
4691 bool insert_after;
4692 gimple incr;
4693 tree offvar;
4694 tree ref = DR_REF (dr);
4695 tree ivstep;
4696 tree running_off;
4697 VEC(constructor_elt, gc) *v = NULL;
4698 gimple_seq stmts = NULL;
4700 gcc_assert (stride_base && stride_step);
4702 /* For a load with loop-invariant (but other than power-of-2)
4703 stride (i.e. not a grouped access) like so:
4705 for (i = 0; i < n; i += stride)
4706 ... = array[i];
4708 we generate a new induction variable and new accesses to
4709 form a new vector (or vectors, depending on ncopies):
4711 for (j = 0; ; j += VF*stride)
4712 tmp1 = array[j];
4713 tmp2 = array[j + stride];
4715 vectemp = {tmp1, tmp2, ...}
4718 ivstep = stride_step;
4719 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4720 build_int_cst (TREE_TYPE (ivstep), vf));
4722 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4724 create_iv (stride_base, ivstep, NULL,
4725 loop, &incr_gsi, insert_after,
4726 &offvar, NULL);
4727 incr = gsi_stmt (incr_gsi);
4728 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4730 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4731 if (stmts)
4732 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4734 prev_stmt_info = NULL;
4735 running_off = offvar;
4736 for (j = 0; j < ncopies; j++)
4738 tree vec_inv;
4740 v = VEC_alloc (constructor_elt, gc, nunits);
4741 for (i = 0; i < nunits; i++)
4743 tree newref, newoff;
4744 gimple incr;
4745 if (TREE_CODE (ref) == ARRAY_REF)
4747 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
4748 unshare_expr (TREE_OPERAND (ref, 0)),
4749 running_off,
4750 NULL_TREE, NULL_TREE);
4751 if (!useless_type_conversion_p (TREE_TYPE (vectype),
4752 TREE_TYPE (newref)))
4753 newref = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype),
4754 newref);
4756 else
4757 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4758 running_off,
4759 TREE_OPERAND (ref, 1));
4761 newref = force_gimple_operand_gsi (gsi, newref, true,
4762 NULL_TREE, true,
4763 GSI_SAME_STMT);
4764 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4765 newoff = copy_ssa_name (running_off, NULL);
4766 if (POINTER_TYPE_P (TREE_TYPE (newoff)))
4767 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4768 running_off, stride_step);
4769 else
4770 incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
4771 running_off, stride_step);
4772 vect_finish_stmt_generation (stmt, incr, gsi);
4774 running_off = newoff;
4777 vec_inv = build_constructor (vectype, v);
4778 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4779 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4781 if (j == 0)
4782 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4783 else
4784 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4785 prev_stmt_info = vinfo_for_stmt (new_stmt);
4787 return true;
4790 if (grouped_load)
4792 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4793 if (slp
4794 && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance)
4795 && first_stmt != VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0))
4796 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
4798 /* Check if the chain of loads is already vectorized. */
4799 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4801 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4802 return true;
4804 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4805 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4807 /* VEC_NUM is the number of vect stmts to be created for this group. */
4808 if (slp)
4810 grouped_load = false;
4811 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4812 if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance))
4813 slp_perm = true;
4815 else
4816 vec_num = group_size;
4818 else
4820 first_stmt = stmt;
4821 first_dr = dr;
4822 group_size = vec_num = 1;
4825 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4826 gcc_assert (alignment_support_scheme);
4827 /* Targets with load-lane instructions must not require explicit
4828 realignment. */
4829 gcc_assert (!load_lanes_p
4830 || alignment_support_scheme == dr_aligned
4831 || alignment_support_scheme == dr_unaligned_supported);
4833 /* In case the vectorization factor (VF) is bigger than the number
4834 of elements that we can fit in a vectype (nunits), we have to generate
4835 more than one vector stmt - i.e - we need to "unroll" the
4836 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4837 from one copy of the vector stmt to the next, in the field
4838 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4839 stages to find the correct vector defs to be used when vectorizing
4840 stmts that use the defs of the current stmt. The example below
4841 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4842 need to create 4 vectorized stmts):
4844 before vectorization:
4845 RELATED_STMT VEC_STMT
4846 S1: x = memref - -
4847 S2: z = x + 1 - -
4849 step 1: vectorize stmt S1:
4850 We first create the vector stmt VS1_0, and, as usual, record a
4851 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4852 Next, we create the vector stmt VS1_1, and record a pointer to
4853 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4854 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4855 stmts and pointers:
4856 RELATED_STMT VEC_STMT
4857 VS1_0: vx0 = memref0 VS1_1 -
4858 VS1_1: vx1 = memref1 VS1_2 -
4859 VS1_2: vx2 = memref2 VS1_3 -
4860 VS1_3: vx3 = memref3 - -
4861 S1: x = load - VS1_0
4862 S2: z = x + 1 - -
4864 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4865 information we recorded in RELATED_STMT field is used to vectorize
4866 stmt S2. */
4868 /* In case of interleaving (non-unit grouped access):
4870 S1: x2 = &base + 2
4871 S2: x0 = &base
4872 S3: x1 = &base + 1
4873 S4: x3 = &base + 3
4875 Vectorized loads are created in the order of memory accesses
4876 starting from the access of the first stmt of the chain:
4878 VS1: vx0 = &base
4879 VS2: vx1 = &base + vec_size*1
4880 VS3: vx3 = &base + vec_size*2
4881 VS4: vx4 = &base + vec_size*3
4883 Then permutation statements are generated:
4885 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4886 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4889 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4890 (the order of the data-refs in the output of vect_permute_load_chain
4891 corresponds to the order of scalar stmts in the interleaving chain - see
4892 the documentation of vect_permute_load_chain()).
4893 The generation of permutation stmts and recording them in
4894 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4896 In case of both multiple types and interleaving, the vector loads and
4897 permutation stmts above are created for every copy. The result vector
4898 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4899 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4901 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4902 on a target that supports unaligned accesses (dr_unaligned_supported)
4903 we generate the following code:
4904 p = initial_addr;
4905 indx = 0;
4906 loop {
4907 p = p + indx * vectype_size;
4908 vec_dest = *(p);
4909 indx = indx + 1;
4912 Otherwise, the data reference is potentially unaligned on a target that
4913 does not support unaligned accesses (dr_explicit_realign_optimized) -
4914 then generate the following code, in which the data in each iteration is
4915 obtained by two vector loads, one from the previous iteration, and one
4916 from the current iteration:
4917 p1 = initial_addr;
4918 msq_init = *(floor(p1))
4919 p2 = initial_addr + VS - 1;
4920 realignment_token = call target_builtin;
4921 indx = 0;
4922 loop {
4923 p2 = p2 + indx * vectype_size
4924 lsq = *(floor(p2))
4925 vec_dest = realign_load (msq, lsq, realignment_token)
4926 indx = indx + 1;
4927 msq = lsq;
4928 } */
4930 /* If the misalignment remains the same throughout the execution of the
4931 loop, we can create the init_addr and permutation mask at the loop
4932 preheader. Otherwise, it needs to be created inside the loop.
4933 This can only occur when vectorizing memory accesses in the inner-loop
4934 nested within an outer-loop that is being vectorized. */
4936 if (nested_in_vect_loop
4937 && (TREE_INT_CST_LOW (DR_STEP (dr))
4938 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4940 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4941 compute_in_loop = true;
4944 if ((alignment_support_scheme == dr_explicit_realign_optimized
4945 || alignment_support_scheme == dr_explicit_realign)
4946 && !compute_in_loop)
4948 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4949 alignment_support_scheme, NULL_TREE,
4950 &at_loop);
4951 if (alignment_support_scheme == dr_explicit_realign_optimized)
4953 phi = SSA_NAME_DEF_STMT (msq);
4954 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4957 else
4958 at_loop = loop;
4960 if (negative)
4961 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4963 if (load_lanes_p)
4964 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4965 else
4966 aggr_type = vectype;
4968 prev_stmt_info = NULL;
4969 for (j = 0; j < ncopies; j++)
4971 /* 1. Create the vector or array pointer update chain. */
4972 if (j == 0)
4973 dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4974 offset, &dummy, gsi,
4975 &ptr_incr, false, &inv_p);
4976 else
4977 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4978 TYPE_SIZE_UNIT (aggr_type));
4980 if (grouped_load || slp_perm)
4981 dr_chain = VEC_alloc (tree, heap, vec_num);
4983 if (load_lanes_p)
4985 tree vec_array;
4987 vec_array = create_vector_array (vectype, vec_num);
4989 /* Emit:
4990 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
4991 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4992 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4993 gimple_call_set_lhs (new_stmt, vec_array);
4994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4996 /* Extract each vector into an SSA_NAME. */
4997 for (i = 0; i < vec_num; i++)
4999 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5000 vec_array, i);
5001 VEC_quick_push (tree, dr_chain, new_temp);
5004 /* Record the mapping between SSA_NAMEs and statements. */
5005 vect_record_grouped_load_vectors (stmt, dr_chain);
5007 else
5009 for (i = 0; i < vec_num; i++)
5011 if (i > 0)
5012 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5013 stmt, NULL_TREE);
5015 /* 2. Create the vector-load in the loop. */
5016 switch (alignment_support_scheme)
5018 case dr_aligned:
5019 case dr_unaligned_supported:
5021 unsigned int align, misalign;
5023 data_ref
5024 = build2 (MEM_REF, vectype, dataref_ptr,
5025 build_int_cst (reference_alias_ptr_type
5026 (DR_REF (first_dr)), 0));
5027 align = TYPE_ALIGN_UNIT (vectype);
5028 if (alignment_support_scheme == dr_aligned)
5030 gcc_assert (aligned_access_p (first_dr));
5031 misalign = 0;
5033 else if (DR_MISALIGNMENT (first_dr) == -1)
5035 TREE_TYPE (data_ref)
5036 = build_aligned_type (TREE_TYPE (data_ref),
5037 TYPE_ALIGN (elem_type));
5038 align = TYPE_ALIGN_UNIT (elem_type);
5039 misalign = 0;
5041 else
5043 TREE_TYPE (data_ref)
5044 = build_aligned_type (TREE_TYPE (data_ref),
5045 TYPE_ALIGN (elem_type));
5046 misalign = DR_MISALIGNMENT (first_dr);
5048 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5049 align, misalign);
5050 break;
5052 case dr_explicit_realign:
5054 tree ptr, bump;
5055 tree vs_minus_1;
5057 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5059 if (compute_in_loop)
5060 msq = vect_setup_realignment (first_stmt, gsi,
5061 &realignment_token,
5062 dr_explicit_realign,
5063 dataref_ptr, NULL);
5065 ptr = copy_ssa_name (dataref_ptr, NULL);
5066 new_stmt = gimple_build_assign_with_ops
5067 (BIT_AND_EXPR, ptr, dataref_ptr,
5068 build_int_cst
5069 (TREE_TYPE (dataref_ptr),
5070 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5071 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5072 data_ref
5073 = build2 (MEM_REF, vectype, ptr,
5074 build_int_cst (reference_alias_ptr_type
5075 (DR_REF (first_dr)), 0));
5076 vec_dest = vect_create_destination_var (scalar_dest,
5077 vectype);
5078 new_stmt = gimple_build_assign (vec_dest, data_ref);
5079 new_temp = make_ssa_name (vec_dest, new_stmt);
5080 gimple_assign_set_lhs (new_stmt, new_temp);
5081 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5082 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5083 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5084 msq = new_temp;
5086 bump = size_binop (MULT_EXPR, vs_minus_1,
5087 TYPE_SIZE_UNIT (elem_type));
5088 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5089 new_stmt = gimple_build_assign_with_ops
5090 (BIT_AND_EXPR, NULL_TREE, ptr,
5091 build_int_cst
5092 (TREE_TYPE (ptr),
5093 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5094 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5095 gimple_assign_set_lhs (new_stmt, ptr);
5096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5097 data_ref
5098 = build2 (MEM_REF, vectype, ptr,
5099 build_int_cst (reference_alias_ptr_type
5100 (DR_REF (first_dr)), 0));
5101 break;
5103 case dr_explicit_realign_optimized:
5104 new_temp = copy_ssa_name (dataref_ptr, NULL);
5105 new_stmt = gimple_build_assign_with_ops
5106 (BIT_AND_EXPR, new_temp, dataref_ptr,
5107 build_int_cst
5108 (TREE_TYPE (dataref_ptr),
5109 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5110 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5111 data_ref
5112 = build2 (MEM_REF, vectype, new_temp,
5113 build_int_cst (reference_alias_ptr_type
5114 (DR_REF (first_dr)), 0));
5115 break;
5116 default:
5117 gcc_unreachable ();
5119 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5120 new_stmt = gimple_build_assign (vec_dest, data_ref);
5121 new_temp = make_ssa_name (vec_dest, new_stmt);
5122 gimple_assign_set_lhs (new_stmt, new_temp);
5123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5125 /* 3. Handle explicit realignment if necessary/supported.
5126 Create in loop:
5127 vec_dest = realign_load (msq, lsq, realignment_token) */
5128 if (alignment_support_scheme == dr_explicit_realign_optimized
5129 || alignment_support_scheme == dr_explicit_realign)
5131 lsq = gimple_assign_lhs (new_stmt);
5132 if (!realignment_token)
5133 realignment_token = dataref_ptr;
5134 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5135 new_stmt
5136 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5137 vec_dest, msq, lsq,
5138 realignment_token);
5139 new_temp = make_ssa_name (vec_dest, new_stmt);
5140 gimple_assign_set_lhs (new_stmt, new_temp);
5141 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5143 if (alignment_support_scheme == dr_explicit_realign_optimized)
5145 gcc_assert (phi);
5146 if (i == vec_num - 1 && j == ncopies - 1)
5147 add_phi_arg (phi, lsq,
5148 loop_latch_edge (containing_loop),
5149 UNKNOWN_LOCATION);
5150 msq = lsq;
5154 /* 4. Handle invariant-load. */
5155 if (inv_p && !bb_vinfo)
5157 gimple_stmt_iterator gsi2 = *gsi;
5158 gcc_assert (!grouped_load);
5159 gsi_next (&gsi2);
5160 new_temp = vect_init_vector (stmt, scalar_dest,
5161 vectype, &gsi2);
5162 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5165 if (negative)
5167 tree perm_mask = perm_mask_for_reverse (vectype);
5168 new_temp = permute_vec_elements (new_temp, new_temp,
5169 perm_mask, stmt, gsi);
5170 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5173 /* Collect vector loads and later create their permutation in
5174 vect_transform_grouped_load (). */
5175 if (grouped_load || slp_perm)
5176 VEC_quick_push (tree, dr_chain, new_temp);
5178 /* Store vector loads in the corresponding SLP_NODE. */
5179 if (slp && !slp_perm)
5180 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
5181 new_stmt);
5185 if (slp && !slp_perm)
5186 continue;
5188 if (slp_perm)
5190 if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5191 slp_node_instance, false))
5193 VEC_free (tree, heap, dr_chain);
5194 return false;
5197 else
5199 if (grouped_load)
5201 if (!load_lanes_p)
5202 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5203 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5205 else
5207 if (j == 0)
5208 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5209 else
5210 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5211 prev_stmt_info = vinfo_for_stmt (new_stmt);
5214 if (dr_chain)
5215 VEC_free (tree, heap, dr_chain);
5218 return true;
5221 /* Function vect_is_simple_cond.
5223 Input:
5224 LOOP - the loop that is being vectorized.
5225 COND - Condition that is checked for simple use.
5227 Output:
5228 *COMP_VECTYPE - the vector type for the comparison.
5230 Returns whether a COND can be vectorized. Checks whether
5231 condition operands are supportable using vec_is_simple_use. */
5233 static bool
5234 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5235 bb_vec_info bb_vinfo, tree *comp_vectype)
5237 tree lhs, rhs;
5238 tree def;
5239 enum vect_def_type dt;
5240 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5242 if (!COMPARISON_CLASS_P (cond))
5243 return false;
5245 lhs = TREE_OPERAND (cond, 0);
5246 rhs = TREE_OPERAND (cond, 1);
5248 if (TREE_CODE (lhs) == SSA_NAME)
5250 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5251 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5252 &lhs_def_stmt, &def, &dt, &vectype1))
5253 return false;
5255 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5256 && TREE_CODE (lhs) != FIXED_CST)
5257 return false;
5259 if (TREE_CODE (rhs) == SSA_NAME)
5261 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5262 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5263 &rhs_def_stmt, &def, &dt, &vectype2))
5264 return false;
5266 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5267 && TREE_CODE (rhs) != FIXED_CST)
5268 return false;
5270 *comp_vectype = vectype1 ? vectype1 : vectype2;
5271 return true;
5274 /* vectorizable_condition.
5276 Check if STMT is conditional modify expression that can be vectorized.
5277 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5278 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5279 at GSI.
5281 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5282 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5283 else caluse if it is 2).
5285 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5287 bool
5288 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5289 gimple *vec_stmt, tree reduc_def, int reduc_index,
5290 slp_tree slp_node)
5292 tree scalar_dest = NULL_TREE;
5293 tree vec_dest = NULL_TREE;
5294 tree cond_expr, then_clause, else_clause;
5295 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5296 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5297 tree comp_vectype = NULL_TREE;
5298 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5299 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5300 tree vec_compare, vec_cond_expr;
5301 tree new_temp;
5302 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5303 tree def;
5304 enum vect_def_type dt, dts[4];
5305 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5306 int ncopies;
5307 enum tree_code code;
5308 stmt_vec_info prev_stmt_info = NULL;
5309 int i, j;
5310 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5311 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
5312 VEC (tree, heap) *vec_oprnds2 = NULL, *vec_oprnds3 = NULL;
5314 if (slp_node || PURE_SLP_STMT (stmt_info))
5315 ncopies = 1;
5316 else
5317 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5319 gcc_assert (ncopies >= 1);
5320 if (reduc_index && ncopies > 1)
5321 return false; /* FORNOW */
5323 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5324 return false;
5326 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5327 return false;
5329 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5330 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5331 && reduc_def))
5332 return false;
5334 /* FORNOW: not yet supported. */
5335 if (STMT_VINFO_LIVE_P (stmt_info))
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5339 "value used after loop.");
5340 return false;
5343 /* Is vectorizable conditional operation? */
5344 if (!is_gimple_assign (stmt))
5345 return false;
5347 code = gimple_assign_rhs_code (stmt);
5349 if (code != COND_EXPR)
5350 return false;
5352 cond_expr = gimple_assign_rhs1 (stmt);
5353 then_clause = gimple_assign_rhs2 (stmt);
5354 else_clause = gimple_assign_rhs3 (stmt);
5356 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5357 &comp_vectype)
5358 || !comp_vectype)
5359 return false;
5361 if (TREE_CODE (then_clause) == SSA_NAME)
5363 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5364 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5365 &then_def_stmt, &def, &dt))
5366 return false;
5368 else if (TREE_CODE (then_clause) != INTEGER_CST
5369 && TREE_CODE (then_clause) != REAL_CST
5370 && TREE_CODE (then_clause) != FIXED_CST)
5371 return false;
5373 if (TREE_CODE (else_clause) == SSA_NAME)
5375 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5376 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5377 &else_def_stmt, &def, &dt))
5378 return false;
5380 else if (TREE_CODE (else_clause) != INTEGER_CST
5381 && TREE_CODE (else_clause) != REAL_CST
5382 && TREE_CODE (else_clause) != FIXED_CST)
5383 return false;
5385 if (!vec_stmt)
5387 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5388 return expand_vec_cond_expr_p (vectype, comp_vectype);
5391 /* Transform. */
5393 if (!slp_node)
5395 vec_oprnds0 = VEC_alloc (tree, heap, 1);
5396 vec_oprnds1 = VEC_alloc (tree, heap, 1);
5397 vec_oprnds2 = VEC_alloc (tree, heap, 1);
5398 vec_oprnds3 = VEC_alloc (tree, heap, 1);
5401 /* Handle def. */
5402 scalar_dest = gimple_assign_lhs (stmt);
5403 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5405 /* Handle cond expr. */
5406 for (j = 0; j < ncopies; j++)
5408 gimple new_stmt = NULL;
5409 if (j == 0)
5411 if (slp_node)
5413 VEC (tree, heap) *ops = VEC_alloc (tree, heap, 4);
5414 VEC (slp_void_p, heap) *vec_defs;
5416 vec_defs = VEC_alloc (slp_void_p, heap, 4);
5417 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 0));
5418 VEC_safe_push (tree, heap, ops, TREE_OPERAND (cond_expr, 1));
5419 VEC_safe_push (tree, heap, ops, then_clause);
5420 VEC_safe_push (tree, heap, ops, else_clause);
5421 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5422 vec_oprnds3 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5423 vec_oprnds2 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5424 vec_oprnds1 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5425 vec_oprnds0 = (VEC (tree, heap) *) VEC_pop (slp_void_p, vec_defs);
5427 VEC_free (tree, heap, ops);
5428 VEC_free (slp_void_p, heap, vec_defs);
5430 else
5432 gimple gtemp;
5433 vec_cond_lhs =
5434 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5435 stmt, NULL);
5436 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5437 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5439 vec_cond_rhs =
5440 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5441 stmt, NULL);
5442 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5443 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5444 if (reduc_index == 1)
5445 vec_then_clause = reduc_def;
5446 else
5448 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5449 stmt, NULL);
5450 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5451 NULL, &gtemp, &def, &dts[2]);
5453 if (reduc_index == 2)
5454 vec_else_clause = reduc_def;
5455 else
5457 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5458 stmt, NULL);
5459 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5460 NULL, &gtemp, &def, &dts[3]);
5464 else
5466 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5467 VEC_pop (tree, vec_oprnds0));
5468 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5469 VEC_pop (tree, vec_oprnds1));
5470 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5471 VEC_pop (tree, vec_oprnds2));
5472 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5473 VEC_pop (tree, vec_oprnds3));
5476 if (!slp_node)
5478 VEC_quick_push (tree, vec_oprnds0, vec_cond_lhs);
5479 VEC_quick_push (tree, vec_oprnds1, vec_cond_rhs);
5480 VEC_quick_push (tree, vec_oprnds2, vec_then_clause);
5481 VEC_quick_push (tree, vec_oprnds3, vec_else_clause);
5484 /* Arguments are ready. Create the new vector stmt. */
5485 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, vec_cond_lhs)
5487 vec_cond_rhs = VEC_index (tree, vec_oprnds1, i);
5488 vec_then_clause = VEC_index (tree, vec_oprnds2, i);
5489 vec_else_clause = VEC_index (tree, vec_oprnds3, i);
5491 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
5492 vec_cond_lhs, vec_cond_rhs);
5493 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5494 vec_compare, vec_then_clause, vec_else_clause);
5496 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5497 new_temp = make_ssa_name (vec_dest, new_stmt);
5498 gimple_assign_set_lhs (new_stmt, new_temp);
5499 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5500 if (slp_node)
5501 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5504 if (slp_node)
5505 continue;
5507 if (j == 0)
5508 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5509 else
5510 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5512 prev_stmt_info = vinfo_for_stmt (new_stmt);
5515 VEC_free (tree, heap, vec_oprnds0);
5516 VEC_free (tree, heap, vec_oprnds1);
5517 VEC_free (tree, heap, vec_oprnds2);
5518 VEC_free (tree, heap, vec_oprnds3);
5520 return true;
5524 /* Make sure the statement is vectorizable. */
5526 bool
5527 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5529 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5530 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5531 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5532 bool ok;
5533 tree scalar_type, vectype;
5534 gimple pattern_stmt;
5535 gimple_seq pattern_def_seq;
5537 if (dump_enabled_p ())
5539 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5540 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5543 if (gimple_has_volatile_ops (stmt))
5545 if (dump_enabled_p ())
5546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5547 "not vectorized: stmt has volatile operands");
5549 return false;
5552 /* Skip stmts that do not need to be vectorized. In loops this is expected
5553 to include:
5554 - the COND_EXPR which is the loop exit condition
5555 - any LABEL_EXPRs in the loop
5556 - computations that are used only for array indexing or loop control.
5557 In basic blocks we only analyze statements that are a part of some SLP
5558 instance, therefore, all the statements are relevant.
5560 Pattern statement needs to be analyzed instead of the original statement
5561 if the original statement is not relevant. Otherwise, we analyze both
5562 statements. In basic blocks we are called from some SLP instance
5563 traversal, don't analyze pattern stmts instead, the pattern stmts
5564 already will be part of SLP instance. */
5566 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5567 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5568 && !STMT_VINFO_LIVE_P (stmt_info))
5570 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5571 && pattern_stmt
5572 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5573 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5575 /* Analyze PATTERN_STMT instead of the original stmt. */
5576 stmt = pattern_stmt;
5577 stmt_info = vinfo_for_stmt (pattern_stmt);
5578 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_NOTE, vect_location,
5581 "==> examining pattern statement: ");
5582 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5585 else
5587 if (dump_enabled_p ())
5588 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
5590 return true;
5593 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5594 && node == NULL
5595 && pattern_stmt
5596 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5597 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5599 /* Analyze PATTERN_STMT too. */
5600 if (dump_enabled_p ())
5602 dump_printf_loc (MSG_NOTE, vect_location,
5603 "==> examining pattern statement: ");
5604 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5607 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5608 return false;
5611 if (is_pattern_stmt_p (stmt_info)
5612 && node == NULL
5613 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5615 gimple_stmt_iterator si;
5617 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5619 gimple pattern_def_stmt = gsi_stmt (si);
5620 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5621 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5623 /* Analyze def stmt of STMT if it's a pattern stmt. */
5624 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_NOTE, vect_location,
5627 "==> examining pattern def statement: ");
5628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5631 if (!vect_analyze_stmt (pattern_def_stmt,
5632 need_to_vectorize, node))
5633 return false;
5638 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5640 case vect_internal_def:
5641 break;
5643 case vect_reduction_def:
5644 case vect_nested_cycle:
5645 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5646 || relevance == vect_used_in_outer_by_reduction
5647 || relevance == vect_unused_in_scope));
5648 break;
5650 case vect_induction_def:
5651 case vect_constant_def:
5652 case vect_external_def:
5653 case vect_unknown_def_type:
5654 default:
5655 gcc_unreachable ();
5658 if (bb_vinfo)
5660 gcc_assert (PURE_SLP_STMT (stmt_info));
5662 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5663 if (dump_enabled_p ())
5665 dump_printf_loc (MSG_NOTE, vect_location,
5666 "get vectype for scalar type: ");
5667 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5670 vectype = get_vectype_for_scalar_type (scalar_type);
5671 if (!vectype)
5673 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5676 "not SLPed: unsupported data-type ");
5677 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5678 scalar_type);
5680 return false;
5683 if (dump_enabled_p ())
5685 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5686 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5689 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5692 if (STMT_VINFO_RELEVANT_P (stmt_info))
5694 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5695 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5696 *need_to_vectorize = true;
5699 ok = true;
5700 if (!bb_vinfo
5701 && (STMT_VINFO_RELEVANT_P (stmt_info)
5702 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5703 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5704 || vectorizable_shift (stmt, NULL, NULL, NULL)
5705 || vectorizable_operation (stmt, NULL, NULL, NULL)
5706 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5707 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5708 || vectorizable_call (stmt, NULL, NULL, NULL)
5709 || vectorizable_store (stmt, NULL, NULL, NULL)
5710 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5711 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5712 else
5714 if (bb_vinfo)
5715 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5716 || vectorizable_shift (stmt, NULL, NULL, node)
5717 || vectorizable_operation (stmt, NULL, NULL, node)
5718 || vectorizable_assignment (stmt, NULL, NULL, node)
5719 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5720 || vectorizable_call (stmt, NULL, NULL, node)
5721 || vectorizable_store (stmt, NULL, NULL, node)
5722 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5725 if (!ok)
5727 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5730 "not vectorized: relevant stmt not ");
5731 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5735 return false;
5738 if (bb_vinfo)
5739 return true;
5741 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5742 need extra handling, except for vectorizable reductions. */
5743 if (STMT_VINFO_LIVE_P (stmt_info)
5744 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5745 ok = vectorizable_live_operation (stmt, NULL, NULL);
5747 if (!ok)
5749 if (dump_enabled_p ())
5751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5752 "not vectorized: live stmt not ");
5753 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5754 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5757 return false;
5760 return true;
5764 /* Function vect_transform_stmt.
5766 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5768 bool
5769 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5770 bool *grouped_store, slp_tree slp_node,
5771 slp_instance slp_node_instance)
5773 bool is_store = false;
5774 gimple vec_stmt = NULL;
5775 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5776 bool done;
5778 switch (STMT_VINFO_TYPE (stmt_info))
5780 case type_demotion_vec_info_type:
5781 case type_promotion_vec_info_type:
5782 case type_conversion_vec_info_type:
5783 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5784 gcc_assert (done);
5785 break;
5787 case induc_vec_info_type:
5788 gcc_assert (!slp_node);
5789 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5790 gcc_assert (done);
5791 break;
5793 case shift_vec_info_type:
5794 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5795 gcc_assert (done);
5796 break;
5798 case op_vec_info_type:
5799 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5800 gcc_assert (done);
5801 break;
5803 case assignment_vec_info_type:
5804 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5805 gcc_assert (done);
5806 break;
5808 case load_vec_info_type:
5809 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5810 slp_node_instance);
5811 gcc_assert (done);
5812 break;
5814 case store_vec_info_type:
5815 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5816 gcc_assert (done);
5817 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5819 /* In case of interleaving, the whole chain is vectorized when the
5820 last store in the chain is reached. Store stmts before the last
5821 one are skipped, and there vec_stmt_info shouldn't be freed
5822 meanwhile. */
5823 *grouped_store = true;
5824 if (STMT_VINFO_VEC_STMT (stmt_info))
5825 is_store = true;
5827 else
5828 is_store = true;
5829 break;
5831 case condition_vec_info_type:
5832 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5833 gcc_assert (done);
5834 break;
5836 case call_vec_info_type:
5837 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5838 stmt = gsi_stmt (*gsi);
5839 break;
5841 case reduc_vec_info_type:
5842 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5843 gcc_assert (done);
5844 break;
5846 default:
5847 if (!STMT_VINFO_LIVE_P (stmt_info))
5849 if (dump_enabled_p ())
5850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5851 "stmt not supported.");
5852 gcc_unreachable ();
5856 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5857 is being vectorized, but outside the immediately enclosing loop. */
5858 if (vec_stmt
5859 && STMT_VINFO_LOOP_VINFO (stmt_info)
5860 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5861 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5862 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5863 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5864 || STMT_VINFO_RELEVANT (stmt_info) ==
5865 vect_used_in_outer_by_reduction))
5867 struct loop *innerloop = LOOP_VINFO_LOOP (
5868 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5869 imm_use_iterator imm_iter;
5870 use_operand_p use_p;
5871 tree scalar_dest;
5872 gimple exit_phi;
5874 if (dump_enabled_p ())
5875 dump_printf_loc (MSG_NOTE, vect_location,
5876 "Record the vdef for outer-loop vectorization.");
5878 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5879 (to be used when vectorizing outer-loop stmts that use the DEF of
5880 STMT). */
5881 if (gimple_code (stmt) == GIMPLE_PHI)
5882 scalar_dest = PHI_RESULT (stmt);
5883 else
5884 scalar_dest = gimple_assign_lhs (stmt);
5886 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5888 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5890 exit_phi = USE_STMT (use_p);
5891 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5896 /* Handle stmts whose DEF is used outside the loop-nest that is
5897 being vectorized. */
5898 if (STMT_VINFO_LIVE_P (stmt_info)
5899 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5901 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5902 gcc_assert (done);
5905 if (vec_stmt)
5906 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5908 return is_store;
5912 /* Remove a group of stores (for SLP or interleaving), free their
5913 stmt_vec_info. */
5915 void
5916 vect_remove_stores (gimple first_stmt)
5918 gimple next = first_stmt;
5919 gimple tmp;
5920 gimple_stmt_iterator next_si;
5922 while (next)
5924 stmt_vec_info stmt_info = vinfo_for_stmt (next);
5926 tmp = GROUP_NEXT_ELEMENT (stmt_info);
5927 if (is_pattern_stmt_p (stmt_info))
5928 next = STMT_VINFO_RELATED_STMT (stmt_info);
5929 /* Free the attached stmt_vec_info and remove the stmt. */
5930 next_si = gsi_for_stmt (next);
5931 unlink_stmt_vdef (next);
5932 gsi_remove (&next_si, true);
5933 release_defs (next);
5934 free_stmt_vec_info (next);
5935 next = tmp;
5940 /* Function new_stmt_vec_info.
5942 Create and initialize a new stmt_vec_info struct for STMT. */
5944 stmt_vec_info
5945 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5946 bb_vec_info bb_vinfo)
5948 stmt_vec_info res;
5949 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5951 STMT_VINFO_TYPE (res) = undef_vec_info_type;
5952 STMT_VINFO_STMT (res) = stmt;
5953 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5954 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5955 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5956 STMT_VINFO_LIVE_P (res) = false;
5957 STMT_VINFO_VECTYPE (res) = NULL;
5958 STMT_VINFO_VEC_STMT (res) = NULL;
5959 STMT_VINFO_VECTORIZABLE (res) = true;
5960 STMT_VINFO_IN_PATTERN_P (res) = false;
5961 STMT_VINFO_RELATED_STMT (res) = NULL;
5962 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5963 STMT_VINFO_DATA_REF (res) = NULL;
5965 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5966 STMT_VINFO_DR_OFFSET (res) = NULL;
5967 STMT_VINFO_DR_INIT (res) = NULL;
5968 STMT_VINFO_DR_STEP (res) = NULL;
5969 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5971 if (gimple_code (stmt) == GIMPLE_PHI
5972 && is_loop_header_bb_p (gimple_bb (stmt)))
5973 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5974 else
5975 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5977 STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
5978 STMT_SLP_TYPE (res) = loop_vect;
5979 GROUP_FIRST_ELEMENT (res) = NULL;
5980 GROUP_NEXT_ELEMENT (res) = NULL;
5981 GROUP_SIZE (res) = 0;
5982 GROUP_STORE_COUNT (res) = 0;
5983 GROUP_GAP (res) = 0;
5984 GROUP_SAME_DR_STMT (res) = NULL;
5985 GROUP_READ_WRITE_DEPENDENCE (res) = false;
5987 return res;
5991 /* Create a hash table for stmt_vec_info. */
5993 void
5994 init_stmt_vec_info_vec (void)
5996 gcc_assert (!stmt_vec_info_vec);
5997 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
6001 /* Free hash table for stmt_vec_info. */
6003 void
6004 free_stmt_vec_info_vec (void)
6006 gcc_assert (stmt_vec_info_vec);
6007 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
6011 /* Free stmt vectorization related info. */
6013 void
6014 free_stmt_vec_info (gimple stmt)
6016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6018 if (!stmt_info)
6019 return;
6021 /* Check if this statement has a related "pattern stmt"
6022 (introduced by the vectorizer during the pattern recognition
6023 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6024 too. */
6025 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6027 stmt_vec_info patt_info
6028 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6029 if (patt_info)
6031 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6032 if (seq)
6034 gimple_stmt_iterator si;
6035 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6036 free_stmt_vec_info (gsi_stmt (si));
6038 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6042 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
6043 set_vinfo_for_stmt (stmt, NULL);
6044 free (stmt_info);
6048 /* Function get_vectype_for_scalar_type_and_size.
6050 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6051 by the target. */
6053 static tree
6054 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6056 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6057 enum machine_mode simd_mode;
6058 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6059 int nunits;
6060 tree vectype;
6062 if (nbytes == 0)
6063 return NULL_TREE;
6065 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6066 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6067 return NULL_TREE;
6069 /* For vector types of elements whose mode precision doesn't
6070 match their types precision we use a element type of mode
6071 precision. The vectorization routines will have to make sure
6072 they support the proper result truncation/extension.
6073 We also make sure to build vector types with INTEGER_TYPE
6074 component type only. */
6075 if (INTEGRAL_TYPE_P (scalar_type)
6076 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6077 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6078 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6079 TYPE_UNSIGNED (scalar_type));
6081 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6082 When the component mode passes the above test simply use a type
6083 corresponding to that mode. The theory is that any use that
6084 would cause problems with this will disable vectorization anyway. */
6085 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6086 && !INTEGRAL_TYPE_P (scalar_type)
6087 && !POINTER_TYPE_P (scalar_type))
6088 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6090 /* We can't build a vector type of elements with alignment bigger than
6091 their size. */
6092 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6093 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6095 /* If we felt back to using the mode fail if there was
6096 no scalar type for it. */
6097 if (scalar_type == NULL_TREE)
6098 return NULL_TREE;
6100 /* If no size was supplied use the mode the target prefers. Otherwise
6101 lookup a vector mode of the specified size. */
6102 if (size == 0)
6103 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6104 else
6105 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6106 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6107 if (nunits <= 1)
6108 return NULL_TREE;
6110 vectype = build_vector_type (scalar_type, nunits);
6111 if (dump_enabled_p ())
6113 dump_printf_loc (MSG_NOTE, vect_location,
6114 "get vectype with %d units of type ", nunits);
6115 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
6118 if (!vectype)
6119 return NULL_TREE;
6121 if (dump_enabled_p ())
6123 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6124 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
6127 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6128 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6130 if (dump_enabled_p ())
6131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6132 "mode not supported by target.");
6133 return NULL_TREE;
6136 return vectype;
6139 unsigned int current_vector_size;
6141 /* Function get_vectype_for_scalar_type.
6143 Returns the vector type corresponding to SCALAR_TYPE as supported
6144 by the target. */
6146 tree
6147 get_vectype_for_scalar_type (tree scalar_type)
6149 tree vectype;
6150 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6151 current_vector_size);
6152 if (vectype
6153 && current_vector_size == 0)
6154 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6155 return vectype;
6158 /* Function get_same_sized_vectype
6160 Returns a vector type corresponding to SCALAR_TYPE of size
6161 VECTOR_TYPE if supported by the target. */
6163 tree
6164 get_same_sized_vectype (tree scalar_type, tree vector_type)
6166 return get_vectype_for_scalar_type_and_size
6167 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6170 /* Function vect_is_simple_use.
6172 Input:
6173 LOOP_VINFO - the vect info of the loop that is being vectorized.
6174 BB_VINFO - the vect info of the basic block that is being vectorized.
6175 OPERAND - operand of STMT in the loop or bb.
6176 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6178 Returns whether a stmt with OPERAND can be vectorized.
6179 For loops, supportable operands are constants, loop invariants, and operands
6180 that are defined by the current iteration of the loop. Unsupportable
6181 operands are those that are defined by a previous iteration of the loop (as
6182 is the case in reduction/induction computations).
6183 For basic blocks, supportable operands are constants and bb invariants.
6184 For now, operands defined outside the basic block are not supported. */
6186 bool
6187 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6188 bb_vec_info bb_vinfo, gimple *def_stmt,
6189 tree *def, enum vect_def_type *dt)
6191 basic_block bb;
6192 stmt_vec_info stmt_vinfo;
6193 struct loop *loop = NULL;
6195 if (loop_vinfo)
6196 loop = LOOP_VINFO_LOOP (loop_vinfo);
6198 *def_stmt = NULL;
6199 *def = NULL_TREE;
6201 if (dump_enabled_p ())
6203 dump_printf_loc (MSG_NOTE, vect_location,
6204 "vect_is_simple_use: operand ");
6205 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6208 if (CONSTANT_CLASS_P (operand))
6210 *dt = vect_constant_def;
6211 return true;
6214 if (is_gimple_min_invariant (operand))
6216 *def = operand;
6217 *dt = vect_external_def;
6218 return true;
6221 if (TREE_CODE (operand) == PAREN_EXPR)
6223 if (dump_enabled_p ())
6224 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
6225 operand = TREE_OPERAND (operand, 0);
6228 if (TREE_CODE (operand) != SSA_NAME)
6230 if (dump_enabled_p ())
6231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6232 "not ssa-name.");
6233 return false;
6236 *def_stmt = SSA_NAME_DEF_STMT (operand);
6237 if (*def_stmt == NULL)
6239 if (dump_enabled_p ())
6240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6241 "no def_stmt.");
6242 return false;
6245 if (dump_enabled_p ())
6247 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6248 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6251 /* Empty stmt is expected only in case of a function argument.
6252 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6253 if (gimple_nop_p (*def_stmt))
6255 *def = operand;
6256 *dt = vect_external_def;
6257 return true;
6260 bb = gimple_bb (*def_stmt);
6262 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6263 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6264 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6265 *dt = vect_external_def;
6266 else
6268 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6269 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6272 if (*dt == vect_unknown_def_type
6273 || (stmt
6274 && *dt == vect_double_reduction_def
6275 && gimple_code (stmt) != GIMPLE_PHI))
6277 if (dump_enabled_p ())
6278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6279 "Unsupported pattern.");
6280 return false;
6283 if (dump_enabled_p ())
6284 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
6286 switch (gimple_code (*def_stmt))
6288 case GIMPLE_PHI:
6289 *def = gimple_phi_result (*def_stmt);
6290 break;
6292 case GIMPLE_ASSIGN:
6293 *def = gimple_assign_lhs (*def_stmt);
6294 break;
6296 case GIMPLE_CALL:
6297 *def = gimple_call_lhs (*def_stmt);
6298 if (*def != NULL)
6299 break;
6300 /* FALLTHRU */
6301 default:
6302 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6304 "unsupported defining stmt: ");
6305 return false;
6308 return true;
6311 /* Function vect_is_simple_use_1.
6313 Same as vect_is_simple_use_1 but also determines the vector operand
6314 type of OPERAND and stores it to *VECTYPE. If the definition of
6315 OPERAND is vect_uninitialized_def, vect_constant_def or
6316 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6317 is responsible to compute the best suited vector type for the
6318 scalar operand. */
6320 bool
6321 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6322 bb_vec_info bb_vinfo, gimple *def_stmt,
6323 tree *def, enum vect_def_type *dt, tree *vectype)
6325 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6326 def, dt))
6327 return false;
6329 /* Now get a vector type if the def is internal, otherwise supply
6330 NULL_TREE and leave it up to the caller to figure out a proper
6331 type for the use stmt. */
6332 if (*dt == vect_internal_def
6333 || *dt == vect_induction_def
6334 || *dt == vect_reduction_def
6335 || *dt == vect_double_reduction_def
6336 || *dt == vect_nested_cycle)
6338 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6340 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6341 && !STMT_VINFO_RELEVANT (stmt_info)
6342 && !STMT_VINFO_LIVE_P (stmt_info))
6343 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6345 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6346 gcc_assert (*vectype != NULL_TREE);
6348 else if (*dt == vect_uninitialized_def
6349 || *dt == vect_constant_def
6350 || *dt == vect_external_def)
6351 *vectype = NULL_TREE;
6352 else
6353 gcc_unreachable ();
6355 return true;
6359 /* Function supportable_widening_operation
6361 Check whether an operation represented by the code CODE is a
6362 widening operation that is supported by the target platform in
6363 vector form (i.e., when operating on arguments of type VECTYPE_IN
6364 producing a result of type VECTYPE_OUT).
6366 Widening operations we currently support are NOP (CONVERT), FLOAT
6367 and WIDEN_MULT. This function checks if these operations are supported
6368 by the target platform either directly (via vector tree-codes), or via
6369 target builtins.
6371 Output:
6372 - CODE1 and CODE2 are codes of vector operations to be used when
6373 vectorizing the operation, if available.
6374 - MULTI_STEP_CVT determines the number of required intermediate steps in
6375 case of multi-step conversion (like char->short->int - in that case
6376 MULTI_STEP_CVT will be 1).
6377 - INTERM_TYPES contains the intermediate type required to perform the
6378 widening operation (short in the above example). */
6380 bool
6381 supportable_widening_operation (enum tree_code code, gimple stmt,
6382 tree vectype_out, tree vectype_in,
6383 enum tree_code *code1, enum tree_code *code2,
6384 int *multi_step_cvt,
6385 VEC (tree, heap) **interm_types)
6387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6388 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6389 struct loop *vect_loop = NULL;
6390 enum machine_mode vec_mode;
6391 enum insn_code icode1, icode2;
6392 optab optab1, optab2;
6393 tree vectype = vectype_in;
6394 tree wide_vectype = vectype_out;
6395 enum tree_code c1, c2;
6396 int i;
6397 tree prev_type, intermediate_type;
6398 enum machine_mode intermediate_mode, prev_mode;
6399 optab optab3, optab4;
6401 *multi_step_cvt = 0;
6402 if (loop_info)
6403 vect_loop = LOOP_VINFO_LOOP (loop_info);
6405 switch (code)
6407 case WIDEN_MULT_EXPR:
6408 /* The result of a vectorized widening operation usually requires
6409 two vectors (because the widened results do not fit into one vector).
6410 The generated vector results would normally be expected to be
6411 generated in the same order as in the original scalar computation,
6412 i.e. if 8 results are generated in each vector iteration, they are
6413 to be organized as follows:
6414 vect1: [res1,res2,res3,res4],
6415 vect2: [res5,res6,res7,res8].
6417 However, in the special case that the result of the widening
6418 operation is used in a reduction computation only, the order doesn't
6419 matter (because when vectorizing a reduction we change the order of
6420 the computation). Some targets can take advantage of this and
6421 generate more efficient code. For example, targets like Altivec,
6422 that support widen_mult using a sequence of {mult_even,mult_odd}
6423 generate the following vectors:
6424 vect1: [res1,res3,res5,res7],
6425 vect2: [res2,res4,res6,res8].
6427 When vectorizing outer-loops, we execute the inner-loop sequentially
6428 (each vectorized inner-loop iteration contributes to VF outer-loop
6429 iterations in parallel). We therefore don't allow to change the
6430 order of the computation in the inner-loop during outer-loop
6431 vectorization. */
6432 /* TODO: Another case in which order doesn't *really* matter is when we
6433 widen and then contract again, e.g. (short)((int)x * y >> 8).
6434 Normally, pack_trunc performs an even/odd permute, whereas the
6435 repack from an even/odd expansion would be an interleave, which
6436 would be significantly simpler for e.g. AVX2. */
6437 /* In any case, in order to avoid duplicating the code below, recurse
6438 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6439 are properly set up for the caller. If we fail, we'll continue with
6440 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6441 if (vect_loop
6442 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6443 && !nested_in_vect_loop_p (vect_loop, stmt)
6444 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6445 stmt, vectype_out, vectype_in,
6446 code1, code2, multi_step_cvt,
6447 interm_types))
6448 return true;
6449 c1 = VEC_WIDEN_MULT_LO_EXPR;
6450 c2 = VEC_WIDEN_MULT_HI_EXPR;
6451 break;
6453 case VEC_WIDEN_MULT_EVEN_EXPR:
6454 /* Support the recursion induced just above. */
6455 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6456 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6457 break;
6459 case WIDEN_LSHIFT_EXPR:
6460 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6461 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6462 break;
6464 CASE_CONVERT:
6465 c1 = VEC_UNPACK_LO_EXPR;
6466 c2 = VEC_UNPACK_HI_EXPR;
6467 break;
6469 case FLOAT_EXPR:
6470 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6471 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6472 break;
6474 case FIX_TRUNC_EXPR:
6475 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6476 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6477 computing the operation. */
6478 return false;
6480 default:
6481 gcc_unreachable ();
6484 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6486 enum tree_code ctmp = c1;
6487 c1 = c2;
6488 c2 = ctmp;
6491 if (code == FIX_TRUNC_EXPR)
6493 /* The signedness is determined from output operand. */
6494 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6495 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6497 else
6499 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6500 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6503 if (!optab1 || !optab2)
6504 return false;
6506 vec_mode = TYPE_MODE (vectype);
6507 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6508 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6509 return false;
6511 *code1 = c1;
6512 *code2 = c2;
6514 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6515 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6516 return true;
6518 /* Check if it's a multi-step conversion that can be done using intermediate
6519 types. */
6521 prev_type = vectype;
6522 prev_mode = vec_mode;
6524 if (!CONVERT_EXPR_CODE_P (code))
6525 return false;
6527 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6528 intermediate steps in promotion sequence. We try
6529 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6530 not. */
6531 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6532 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6534 intermediate_mode = insn_data[icode1].operand[0].mode;
6535 intermediate_type
6536 = lang_hooks.types.type_for_mode (intermediate_mode,
6537 TYPE_UNSIGNED (prev_type));
6538 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6539 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6541 if (!optab3 || !optab4
6542 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6543 || insn_data[icode1].operand[0].mode != intermediate_mode
6544 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6545 || insn_data[icode2].operand[0].mode != intermediate_mode
6546 || ((icode1 = optab_handler (optab3, intermediate_mode))
6547 == CODE_FOR_nothing)
6548 || ((icode2 = optab_handler (optab4, intermediate_mode))
6549 == CODE_FOR_nothing))
6550 break;
6552 VEC_quick_push (tree, *interm_types, intermediate_type);
6553 (*multi_step_cvt)++;
6555 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6556 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6557 return true;
6559 prev_type = intermediate_type;
6560 prev_mode = intermediate_mode;
6563 VEC_free (tree, heap, *interm_types);
6564 return false;
6568 /* Function supportable_narrowing_operation
6570 Check whether an operation represented by the code CODE is a
6571 narrowing operation that is supported by the target platform in
6572 vector form (i.e., when operating on arguments of type VECTYPE_IN
6573 and producing a result of type VECTYPE_OUT).
6575 Narrowing operations we currently support are NOP (CONVERT) and
6576 FIX_TRUNC. This function checks if these operations are supported by
6577 the target platform directly via vector tree-codes.
6579 Output:
6580 - CODE1 is the code of a vector operation to be used when
6581 vectorizing the operation, if available.
6582 - MULTI_STEP_CVT determines the number of required intermediate steps in
6583 case of multi-step conversion (like int->short->char - in that case
6584 MULTI_STEP_CVT will be 1).
6585 - INTERM_TYPES contains the intermediate type required to perform the
6586 narrowing operation (short in the above example). */
6588 bool
6589 supportable_narrowing_operation (enum tree_code code,
6590 tree vectype_out, tree vectype_in,
6591 enum tree_code *code1, int *multi_step_cvt,
6592 VEC (tree, heap) **interm_types)
6594 enum machine_mode vec_mode;
6595 enum insn_code icode1;
6596 optab optab1, interm_optab;
6597 tree vectype = vectype_in;
6598 tree narrow_vectype = vectype_out;
6599 enum tree_code c1;
6600 tree intermediate_type;
6601 enum machine_mode intermediate_mode, prev_mode;
6602 int i;
6603 bool uns;
6605 *multi_step_cvt = 0;
6606 switch (code)
6608 CASE_CONVERT:
6609 c1 = VEC_PACK_TRUNC_EXPR;
6610 break;
6612 case FIX_TRUNC_EXPR:
6613 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6614 break;
6616 case FLOAT_EXPR:
6617 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6618 tree code and optabs used for computing the operation. */
6619 return false;
6621 default:
6622 gcc_unreachable ();
6625 if (code == FIX_TRUNC_EXPR)
6626 /* The signedness is determined from output operand. */
6627 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6628 else
6629 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6631 if (!optab1)
6632 return false;
6634 vec_mode = TYPE_MODE (vectype);
6635 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6636 return false;
6638 *code1 = c1;
6640 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6641 return true;
6643 /* Check if it's a multi-step conversion that can be done using intermediate
6644 types. */
6645 prev_mode = vec_mode;
6646 if (code == FIX_TRUNC_EXPR)
6647 uns = TYPE_UNSIGNED (vectype_out);
6648 else
6649 uns = TYPE_UNSIGNED (vectype);
6651 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6652 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6653 costly than signed. */
6654 if (code == FIX_TRUNC_EXPR && uns)
6656 enum insn_code icode2;
6658 intermediate_type
6659 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6660 interm_optab
6661 = optab_for_tree_code (c1, intermediate_type, optab_default);
6662 if (interm_optab != unknown_optab
6663 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6664 && insn_data[icode1].operand[0].mode
6665 == insn_data[icode2].operand[0].mode)
6667 uns = false;
6668 optab1 = interm_optab;
6669 icode1 = icode2;
6673 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6674 intermediate steps in promotion sequence. We try
6675 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6676 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
6677 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6679 intermediate_mode = insn_data[icode1].operand[0].mode;
6680 intermediate_type
6681 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6682 interm_optab
6683 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6684 optab_default);
6685 if (!interm_optab
6686 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6687 || insn_data[icode1].operand[0].mode != intermediate_mode
6688 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6689 == CODE_FOR_nothing))
6690 break;
6692 VEC_quick_push (tree, *interm_types, intermediate_type);
6693 (*multi_step_cvt)++;
6695 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6696 return true;
6698 prev_mode = intermediate_mode;
6699 optab1 = interm_optab;
6702 VEC_free (tree, heap, *interm_types);
6703 return false;