[AArch64] Remove simd_type
[official-gcc.git] / gcc / tree-vect-stmts.c
blob54d821af9c0fe492d3a2ca152142273f5b0b4252
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "gimple.h"
33 #include "gimplify.h"
34 #include "gimple-iterator.h"
35 #include "gimplify-me.h"
36 #include "gimple-ssa.h"
37 #include "tree-cfg.h"
38 #include "tree-phinodes.h"
39 #include "ssa-iterators.h"
40 #include "tree-ssanames.h"
41 #include "tree-ssa-loop-manip.h"
42 #include "cfgloop.h"
43 #include "expr.h"
44 #include "recog.h" /* FIXME: for insn_data */
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "tree-vectorizer.h"
48 #include "dumpfile.h"
50 /* For lang_hooks.types.type_for_mode. */
51 #include "langhooks.h"
53 /* Return the vectorized type for the given statement. */
55 tree
56 stmt_vectype (struct _stmt_vec_info *stmt_info)
58 return STMT_VINFO_VECTYPE (stmt_info);
61 /* Return TRUE iff the given statement is in an inner loop relative to
62 the loop being vectorized. */
63 bool
64 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
66 gimple stmt = STMT_VINFO_STMT (stmt_info);
67 basic_block bb = gimple_bb (stmt);
68 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
69 struct loop* loop;
71 if (!loop_vinfo)
72 return false;
74 loop = LOOP_VINFO_LOOP (loop_vinfo);
76 return (bb->loop_father == loop->inner);
79 /* Record the cost of a statement, either by directly informing the
80 target model or by saving it in a vector for later processing.
81 Return a preliminary estimate of the statement's cost. */
83 unsigned
84 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
85 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
86 int misalign, enum vect_cost_model_location where)
88 if (body_cost_vec)
90 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
91 add_stmt_info_to_vec (body_cost_vec, count, kind,
92 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
93 misalign);
94 return (unsigned)
95 (builtin_vectorization_cost (kind, vectype, misalign) * count);
98 else
100 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
101 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
102 void *target_cost_data;
104 if (loop_vinfo)
105 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
106 else
107 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
109 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
110 misalign, where);
114 /* Return a variable of type ELEM_TYPE[NELEMS]. */
116 static tree
117 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
120 "vect_array");
123 /* ARRAY is an array of vectors created by create_vector_array.
124 Return an SSA_NAME for the vector in index N. The reference
125 is part of the vectorization of STMT and the vector is associated
126 with scalar destination SCALAR_DEST. */
128 static tree
129 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
130 tree array, unsigned HOST_WIDE_INT n)
132 tree vect_type, vect, vect_name, array_ref;
133 gimple new_stmt;
135 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
136 vect_type = TREE_TYPE (TREE_TYPE (array));
137 vect = vect_create_destination_var (scalar_dest, vect_type);
138 array_ref = build4 (ARRAY_REF, vect_type, array,
139 build_int_cst (size_type_node, n),
140 NULL_TREE, NULL_TREE);
142 new_stmt = gimple_build_assign (vect, array_ref);
143 vect_name = make_ssa_name (vect, new_stmt);
144 gimple_assign_set_lhs (new_stmt, vect_name);
145 vect_finish_stmt_generation (stmt, new_stmt, gsi);
147 return vect_name;
150 /* ARRAY is an array of vectors created by create_vector_array.
151 Emit code to store SSA_NAME VECT in index N of the array.
152 The store is part of the vectorization of STMT. */
154 static void
155 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
156 tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (stmt, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
176 tree mem_ref, alias_ptr_type;
178 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182 return mem_ref;
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
187 /* Function vect_mark_relevant.
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
191 static void
192 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
193 enum vect_relevant relevant, bool live_p,
194 bool used_in_pattern)
196 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
197 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
198 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
199 gimple pattern_stmt;
201 if (dump_enabled_p ())
202 dump_printf_loc (MSG_NOTE, vect_location,
203 "mark relevant %d, live %d.\n", relevant, live_p);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 bool found = false;
212 if (!used_in_pattern)
214 imm_use_iterator imm_iter;
215 use_operand_p use_p;
216 gimple use_stmt;
217 tree lhs;
218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
219 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
221 if (is_gimple_assign (stmt))
222 lhs = gimple_assign_lhs (stmt);
223 else
224 lhs = gimple_call_lhs (stmt);
226 /* This use is out of pattern use, if LHS has other uses that are
227 pattern uses, we should mark the stmt itself, and not the pattern
228 stmt. */
229 if (TREE_CODE (lhs) == SSA_NAME)
230 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
232 if (is_gimple_debug (USE_STMT (use_p)))
233 continue;
234 use_stmt = USE_STMT (use_p);
236 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
237 continue;
239 if (vinfo_for_stmt (use_stmt)
240 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
242 found = true;
243 break;
248 if (!found)
250 /* This is the last stmt in a sequence that was detected as a
251 pattern that can potentially be vectorized. Don't mark the stmt
252 as relevant/live because it's not going to be vectorized.
253 Instead mark the pattern-stmt that replaces it. */
255 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
257 if (dump_enabled_p ())
258 dump_printf_loc (MSG_NOTE, vect_location,
259 "last stmt in pattern. don't mark"
260 " relevant/live.\n");
261 stmt_info = vinfo_for_stmt (pattern_stmt);
262 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
263 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
264 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
265 stmt = pattern_stmt;
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
282 worklist->safe_push (stmt);
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT in loop that is represented by LOOP_VINFO is
289 "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt)
313 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
314 != loop_exit_ctrl_vec_info_type)
315 *relevant = vect_used_in_scope;
317 /* changing memory. */
318 if (gimple_code (stmt) != GIMPLE_PHI)
319 if (gimple_vdef (stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (dump_enabled_p ())
336 dump_printf_loc (MSG_NOTE, vect_location,
337 "vec_stmt_relevant_p: used out of loop.\n");
339 if (is_gimple_debug (USE_STMT (use_p)))
340 continue;
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 return (*live_p || *relevant);
356 /* Function exist_non_indexing_operands_for_use_p
358 USE is one of the uses attached to STMT. Check if USE is
359 used in STMT for anything other than indexing an array. */
361 static bool
362 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
364 tree operand;
365 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
367 /* USE corresponds to some operand in STMT. If there is no data
368 reference in STMT, then any operand that corresponds to USE
369 is not indexing an array. */
370 if (!STMT_VINFO_DATA_REF (stmt_info))
371 return true;
373 /* STMT has a data_ref. FORNOW this means that its of one of
374 the following forms:
375 -1- ARRAY_REF = var
376 -2- var = ARRAY_REF
377 (This should have been verified in analyze_data_refs).
379 'var' in the second case corresponds to a def, not a use,
380 so USE cannot correspond to any operands that are not used
381 for array indexing.
383 Therefore, all we need to check is if STMT falls into the
384 first case, and whether var corresponds to USE. */
386 if (!gimple_assign_copy_p (stmt))
387 return false;
388 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
389 return false;
390 operand = gimple_assign_rhs1 (stmt);
391 if (TREE_CODE (operand) != SSA_NAME)
392 return false;
394 if (operand == use)
395 return true;
397 return false;
402 Function process_use.
404 Inputs:
405 - a USE in STMT in a loop represented by LOOP_VINFO
406 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
407 that defined USE. This is done by calling mark_relevant and passing it
408 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
409 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
410 be performed.
412 Outputs:
413 Generally, LIVE_P and RELEVANT are used to define the liveness and
414 relevance info of the DEF_STMT of this USE:
415 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
416 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
417 Exceptions:
418 - case 1: If USE is used only for address computations (e.g. array indexing),
419 which does not need to be directly vectorized, then the liveness/relevance
420 of the respective DEF_STMT is left unchanged.
421 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
422 skip DEF_STMT cause it had already been processed.
423 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
424 be modified accordingly.
426 Return true if everything is as expected. Return false otherwise. */
428 static bool
429 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
430 enum vect_relevant relevant, vec<gimple> *worklist,
431 bool force)
433 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
434 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
435 stmt_vec_info dstmt_vinfo;
436 basic_block bb, def_bb;
437 tree def;
438 gimple def_stmt;
439 enum vect_def_type dt;
441 /* case 1: we are only interested in uses that need to be vectorized. Uses
442 that are used for address computation are not considered relevant. */
443 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
444 return true;
446 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
448 if (dump_enabled_p ())
449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
450 "not vectorized: unsupported use in stmt.\n");
451 return false;
454 if (!def_stmt || gimple_nop_p (def_stmt))
455 return true;
457 def_bb = gimple_bb (def_stmt);
458 if (!flow_bb_inside_loop_p (loop, def_bb))
460 if (dump_enabled_p ())
461 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
462 return true;
465 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
466 DEF_STMT must have already been processed, because this should be the
467 only way that STMT, which is a reduction-phi, was put in the worklist,
468 as there should be no other uses for DEF_STMT in the loop. So we just
469 check that everything is as expected, and we are done. */
470 dstmt_vinfo = vinfo_for_stmt (def_stmt);
471 bb = gimple_bb (stmt);
472 if (gimple_code (stmt) == GIMPLE_PHI
473 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
474 && gimple_code (def_stmt) != GIMPLE_PHI
475 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
476 && bb->loop_father == def_bb->loop_father)
478 if (dump_enabled_p ())
479 dump_printf_loc (MSG_NOTE, vect_location,
480 "reduc-stmt defining reduc-phi in the same nest.\n");
481 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
482 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
483 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
484 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
485 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
486 return true;
489 /* case 3a: outer-loop stmt defining an inner-loop stmt:
490 outer-loop-header-bb:
491 d = def_stmt
492 inner-loop:
493 stmt # use (d)
494 outer-loop-tail-bb:
495 ... */
496 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
498 if (dump_enabled_p ())
499 dump_printf_loc (MSG_NOTE, vect_location,
500 "outer-loop def-stmt defining inner-loop stmt.\n");
502 switch (relevant)
504 case vect_unused_in_scope:
505 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
506 vect_used_in_scope : vect_unused_in_scope;
507 break;
509 case vect_used_in_outer_by_reduction:
510 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
511 relevant = vect_used_by_reduction;
512 break;
514 case vect_used_in_outer:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 relevant = vect_used_in_scope;
517 break;
519 case vect_used_in_scope:
520 break;
522 default:
523 gcc_unreachable ();
527 /* case 3b: inner-loop stmt defining an outer-loop stmt:
528 outer-loop-header-bb:
530 inner-loop:
531 d = def_stmt
532 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
533 stmt # use (d) */
534 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
536 if (dump_enabled_p ())
537 dump_printf_loc (MSG_NOTE, vect_location,
538 "inner-loop def-stmt defining outer-loop stmt.\n");
540 switch (relevant)
542 case vect_unused_in_scope:
543 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
544 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
545 vect_used_in_outer_by_reduction : vect_unused_in_scope;
546 break;
548 case vect_used_by_reduction:
549 relevant = vect_used_in_outer_by_reduction;
550 break;
552 case vect_used_in_scope:
553 relevant = vect_used_in_outer;
554 break;
556 default:
557 gcc_unreachable ();
561 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
562 is_pattern_stmt_p (stmt_vinfo));
563 return true;
567 /* Function vect_mark_stmts_to_be_vectorized.
569 Not all stmts in the loop need to be vectorized. For example:
571 for i...
572 for j...
573 1. T0 = i + j
574 2. T1 = a[T0]
576 3. j = j + 1
578 Stmt 1 and 3 do not need to be vectorized, because loop control and
579 addressing of vectorized data-refs are handled differently.
581 This pass detects such stmts. */
583 bool
584 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
586 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
587 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
588 unsigned int nbbs = loop->num_nodes;
589 gimple_stmt_iterator si;
590 gimple stmt;
591 unsigned int i;
592 stmt_vec_info stmt_vinfo;
593 basic_block bb;
594 gimple phi;
595 bool live_p;
596 enum vect_relevant relevant, tmp_relevant;
597 enum vect_def_type def_type;
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "=== vect_mark_stmts_to_be_vectorized ===\n");
603 stack_vec<gimple, 64> worklist;
605 /* 1. Init worklist. */
606 for (i = 0; i < nbbs; i++)
608 bb = bbs[i];
609 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
611 phi = gsi_stmt (si);
612 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
615 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
616 dump_printf (MSG_NOTE, "\n");
619 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
620 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
622 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
624 stmt = gsi_stmt (si);
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
629 dump_printf (MSG_NOTE, "\n");
632 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
637 /* 2. Process_worklist */
638 while (worklist.length () > 0)
640 use_operand_p use_p;
641 ssa_op_iter iter;
643 stmt = worklist.pop ();
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
648 dump_printf (MSG_NOTE, "\n");
651 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
652 (DEF_STMT) as relevant/irrelevant and live/dead according to the
653 liveness and relevance properties of STMT. */
654 stmt_vinfo = vinfo_for_stmt (stmt);
655 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
656 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
658 /* Generally, the liveness and relevance properties of STMT are
659 propagated as is to the DEF_STMTs of its USEs:
660 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
661 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
663 One exception is when STMT has been identified as defining a reduction
664 variable; in this case we set the liveness/relevance as follows:
665 live_p = false
666 relevant = vect_used_by_reduction
667 This is because we distinguish between two kinds of relevant stmts -
668 those that are used by a reduction computation, and those that are
669 (also) used by a regular computation. This allows us later on to
670 identify stmts that are used solely by a reduction, and therefore the
671 order of the results that they produce does not have to be kept. */
673 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
674 tmp_relevant = relevant;
675 switch (def_type)
677 case vect_reduction_def:
678 switch (tmp_relevant)
680 case vect_unused_in_scope:
681 relevant = vect_used_by_reduction;
682 break;
684 case vect_used_by_reduction:
685 if (gimple_code (stmt) == GIMPLE_PHI)
686 break;
687 /* fall through */
689 default:
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "unsupported use of reduction.\n");
693 return false;
696 live_p = false;
697 break;
699 case vect_nested_cycle:
700 if (tmp_relevant != vect_unused_in_scope
701 && tmp_relevant != vect_used_in_outer_by_reduction
702 && tmp_relevant != vect_used_in_outer)
704 if (dump_enabled_p ())
705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
706 "unsupported use of nested cycle.\n");
708 return false;
711 live_p = false;
712 break;
714 case vect_double_reduction_def:
715 if (tmp_relevant != vect_unused_in_scope
716 && tmp_relevant != vect_used_by_reduction)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of double reduction.\n");
722 return false;
725 live_p = false;
726 break;
728 default:
729 break;
732 if (is_pattern_stmt_p (stmt_vinfo))
734 /* Pattern statements are not inserted into the code, so
735 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
736 have to scan the RHS or function arguments instead. */
737 if (is_gimple_assign (stmt))
739 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
740 tree op = gimple_assign_rhs1 (stmt);
742 i = 1;
743 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
745 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
746 live_p, relevant, &worklist, false)
747 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
748 live_p, relevant, &worklist, false))
749 return false;
750 i = 2;
752 for (; i < gimple_num_ops (stmt); i++)
754 op = gimple_op (stmt, i);
755 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
756 &worklist, false))
757 return false;
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
766 &worklist, false))
767 return false;
771 else
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
776 &worklist, false))
777 return false;
780 if (STMT_VINFO_GATHER_P (stmt_vinfo))
782 tree off;
783 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
784 gcc_assert (decl);
785 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
786 &worklist, true))
787 return false;
789 } /* while worklist */
791 return true;
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
801 void
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 stmt_vector_for_cost *prologue_cost_vec,
805 stmt_vector_for_cost *body_cost_vec)
807 int i;
808 int inside_cost = 0, prologue_cost = 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info))
812 return;
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i = 0; i < 2; i++)
816 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
817 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
818 stmt_info, 0, vect_prologue);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
822 stmt_info, 0, vect_body);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE, vect_location,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
836 static void
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
838 enum vect_def_type *dt, int pwr)
840 int i, tmp;
841 int inside_cost = 0, prologue_cost = 0;
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
844 void *target_cost_data;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info))
848 return;
850 if (loop_vinfo)
851 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
852 else
853 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
855 for (i = 0; i < pwr + 1; i++)
857 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
858 (i + 1) : i;
859 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
860 vec_promote_demote, stmt_info, 0,
861 vect_body);
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
876 /* Function vect_cost_group_size
878 For grouped load or store, return the group_size only if it is the first
879 load or store of a group, else return 1. This ensures that group size is
880 only returned once per group. */
882 static int
883 vect_cost_group_size (stmt_vec_info stmt_info)
885 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
887 if (first_stmt == STMT_VINFO_STMT (stmt_info))
888 return GROUP_SIZE (stmt_info);
890 return 1;
894 /* Function vect_model_store_cost
896 Models cost for stores. In the case of grouped accesses, one access
897 has the overhead of the grouped access attributed to it. */
899 void
900 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
901 bool store_lanes_p, enum vect_def_type dt,
902 slp_tree slp_node,
903 stmt_vector_for_cost *prologue_cost_vec,
904 stmt_vector_for_cost *body_cost_vec)
906 int group_size;
907 unsigned int inside_cost = 0, prologue_cost = 0;
908 struct data_reference *first_dr;
909 gimple first_stmt;
911 /* The SLP costs were already calculated during SLP tree build. */
912 if (PURE_SLP_STMT (stmt_info))
913 return;
915 if (dt == vect_constant_def || dt == vect_external_def)
916 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
917 stmt_info, 0, vect_prologue);
919 /* Grouped access? */
920 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
922 if (slp_node)
924 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
925 group_size = 1;
927 else
929 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
930 group_size = vect_cost_group_size (stmt_info);
933 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
935 /* Not a grouped access. */
936 else
938 group_size = 1;
939 first_dr = STMT_VINFO_DATA_REF (stmt_info);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (!store_lanes_p && group_size > 1)
948 /* Uses a high and low interleave operation for each needed permute. */
950 int nstmts = ncopies * exact_log2 (group_size) * group_size;
951 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
952 stmt_info, 0, vect_body);
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE, vect_location,
956 "vect_model_store_cost: strided group_size = %d .\n",
957 group_size);
960 /* Costs of the stores. */
961 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_NOTE, vect_location,
965 "vect_model_store_cost: inside_cost = %d, "
966 "prologue_cost = %d .\n", inside_cost, prologue_cost);
970 /* Calculate cost of DR's memory access. */
971 void
972 vect_get_store_cost (struct data_reference *dr, int ncopies,
973 unsigned int *inside_cost,
974 stmt_vector_for_cost *body_cost_vec)
976 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
977 gimple stmt = DR_STMT (dr);
978 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
980 switch (alignment_support_scheme)
982 case dr_aligned:
984 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
985 vector_store, stmt_info, 0,
986 vect_body);
988 if (dump_enabled_p ())
989 dump_printf_loc (MSG_NOTE, vect_location,
990 "vect_model_store_cost: aligned.\n");
991 break;
994 case dr_unaligned_supported:
996 /* Here, we assign an additional cost for the unaligned store. */
997 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
998 unaligned_store, stmt_info,
999 DR_MISALIGNMENT (dr), vect_body);
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE, vect_location,
1002 "vect_model_store_cost: unaligned supported by "
1003 "hardware.\n");
1004 break;
1007 case dr_unaligned_unsupported:
1009 *inside_cost = VECT_MAX_COST;
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1013 "vect_model_store_cost: unsupported access.\n");
1014 break;
1017 default:
1018 gcc_unreachable ();
1023 /* Function vect_model_load_cost
1025 Models cost for loads. In the case of grouped accesses, the last access
1026 has the overhead of the grouped access attributed to it. Since unaligned
1027 accesses are supported for loads, we also account for the costs of the
1028 access scheme chosen. */
1030 void
1031 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1032 bool load_lanes_p, slp_tree slp_node,
1033 stmt_vector_for_cost *prologue_cost_vec,
1034 stmt_vector_for_cost *body_cost_vec)
1036 int group_size;
1037 gimple first_stmt;
1038 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1039 unsigned int inside_cost = 0, prologue_cost = 0;
1041 /* The SLP costs were already calculated during SLP tree build. */
1042 if (PURE_SLP_STMT (stmt_info))
1043 return;
1045 /* Grouped accesses? */
1046 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1049 group_size = vect_cost_group_size (stmt_info);
1050 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1052 /* Not a grouped access. */
1053 else
1055 group_size = 1;
1056 first_dr = dr;
1059 /* We assume that the cost of a single load-lanes instruction is
1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
1063 if (!load_lanes_p && group_size > 1)
1065 /* Uses an even and odd extract operations for each needed permute. */
1066 int nstmts = ncopies * exact_log2 (group_size) * group_size;
1067 inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1068 stmt_info, 0, vect_body);
1070 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE, vect_location,
1072 "vect_model_load_cost: strided group_size = %d .\n",
1073 group_size);
1076 /* The loads themselves. */
1077 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1079 /* N scalar loads plus gathering them into a vector. */
1080 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1081 inside_cost += record_stmt_cost (body_cost_vec,
1082 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1083 scalar_load, stmt_info, 0, vect_body);
1084 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1085 stmt_info, 0, vect_body);
1087 else
1088 vect_get_load_cost (first_dr, ncopies,
1089 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1090 || group_size > 1 || slp_node),
1091 &inside_cost, &prologue_cost,
1092 prologue_cost_vec, body_cost_vec, true);
1094 if (dump_enabled_p ())
1095 dump_printf_loc (MSG_NOTE, vect_location,
1096 "vect_model_load_cost: inside_cost = %d, "
1097 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1101 /* Calculate cost of DR's memory access. */
1102 void
1103 vect_get_load_cost (struct data_reference *dr, int ncopies,
1104 bool add_realign_cost, unsigned int *inside_cost,
1105 unsigned int *prologue_cost,
1106 stmt_vector_for_cost *prologue_cost_vec,
1107 stmt_vector_for_cost *body_cost_vec,
1108 bool record_prologue_costs)
1110 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1111 gimple stmt = DR_STMT (dr);
1112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1114 switch (alignment_support_scheme)
1116 case dr_aligned:
1118 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1119 stmt_info, 0, vect_body);
1121 if (dump_enabled_p ())
1122 dump_printf_loc (MSG_NOTE, vect_location,
1123 "vect_model_load_cost: aligned.\n");
1125 break;
1127 case dr_unaligned_supported:
1129 /* Here, we assign an additional cost for the unaligned load. */
1130 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1131 unaligned_load, stmt_info,
1132 DR_MISALIGNMENT (dr), vect_body);
1134 if (dump_enabled_p ())
1135 dump_printf_loc (MSG_NOTE, vect_location,
1136 "vect_model_load_cost: unaligned supported by "
1137 "hardware.\n");
1139 break;
1141 case dr_explicit_realign:
1143 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1144 vector_load, stmt_info, 0, vect_body);
1145 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1146 vec_perm, stmt_info, 0, vect_body);
1148 /* FIXME: If the misalignment remains fixed across the iterations of
1149 the containing loop, the following cost should be added to the
1150 prologue costs. */
1151 if (targetm.vectorize.builtin_mask_for_load)
1152 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1153 stmt_info, 0, vect_body);
1155 if (dump_enabled_p ())
1156 dump_printf_loc (MSG_NOTE, vect_location,
1157 "vect_model_load_cost: explicit realign\n");
1159 break;
1161 case dr_explicit_realign_optimized:
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: unaligned software "
1166 "pipelined.\n");
1168 /* Unaligned software pipeline has a load of an address, an initial
1169 load, and possibly a mask operation to "prime" the loop. However,
1170 if this is an access in a group of loads, which provide grouped
1171 access, then the above cost should only be considered for one
1172 access in the group. Inside the loop, there is a load op
1173 and a realignment op. */
1175 if (add_realign_cost && record_prologue_costs)
1177 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1178 vector_stmt, stmt_info,
1179 0, vect_prologue);
1180 if (targetm.vectorize.builtin_mask_for_load)
1181 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1182 vector_stmt, stmt_info,
1183 0, vect_prologue);
1186 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1187 stmt_info, 0, vect_body);
1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1189 stmt_info, 0, vect_body);
1191 if (dump_enabled_p ())
1192 dump_printf_loc (MSG_NOTE, vect_location,
1193 "vect_model_load_cost: explicit realign optimized"
1194 "\n");
1196 break;
1199 case dr_unaligned_unsupported:
1201 *inside_cost = VECT_MAX_COST;
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1205 "vect_model_load_cost: unsupported access.\n");
1206 break;
1209 default:
1210 gcc_unreachable ();
1214 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1215 the loop preheader for the vectorized stmt STMT. */
1217 static void
1218 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1220 if (gsi)
1221 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1222 else
1224 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1225 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1227 if (loop_vinfo)
1229 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1230 basic_block new_bb;
1231 edge pe;
1233 if (nested_in_vect_loop_p (loop, stmt))
1234 loop = loop->inner;
1236 pe = loop_preheader_edge (loop);
1237 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1238 gcc_assert (!new_bb);
1240 else
1242 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1243 basic_block bb;
1244 gimple_stmt_iterator gsi_bb_start;
1246 gcc_assert (bb_vinfo);
1247 bb = BB_VINFO_BB (bb_vinfo);
1248 gsi_bb_start = gsi_after_labels (bb);
1249 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1253 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "created new init_stmt: ");
1257 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1258 dump_printf (MSG_NOTE, "\n");
1262 /* Function vect_init_vector.
1264 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1265 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1266 vector type a vector with all elements equal to VAL is created first.
1267 Place the initialization at BSI if it is not NULL. Otherwise, place the
1268 initialization at the loop preheader.
1269 Return the DEF of INIT_STMT.
1270 It will be used in the vectorization of STMT. */
1272 tree
1273 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1275 tree new_var;
1276 gimple init_stmt;
1277 tree vec_oprnd;
1278 tree new_temp;
1280 if (TREE_CODE (type) == VECTOR_TYPE
1281 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1283 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1285 if (CONSTANT_CLASS_P (val))
1286 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1287 else
1289 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1290 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1291 new_temp, val,
1292 NULL_TREE);
1293 vect_init_vector_1 (stmt, init_stmt, gsi);
1294 val = new_temp;
1297 val = build_vector_from_val (type, val);
1300 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1301 init_stmt = gimple_build_assign (new_var, val);
1302 new_temp = make_ssa_name (new_var, init_stmt);
1303 gimple_assign_set_lhs (init_stmt, new_temp);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 vec_oprnd = gimple_assign_lhs (init_stmt);
1306 return vec_oprnd;
1310 /* Function vect_get_vec_def_for_operand.
1312 OP is an operand in STMT. This function returns a (vector) def that will be
1313 used in the vectorized stmt for STMT.
1315 In the case that OP is an SSA_NAME which is defined in the loop, then
1316 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1318 In case OP is an invariant or constant, a new stmt that creates a vector def
1319 needs to be introduced. */
1321 tree
1322 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1324 tree vec_oprnd;
1325 gimple vec_stmt;
1326 gimple def_stmt;
1327 stmt_vec_info def_stmt_info = NULL;
1328 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1329 unsigned int nunits;
1330 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1331 tree def;
1332 enum vect_def_type dt;
1333 bool is_simple_use;
1334 tree vector_type;
1336 if (dump_enabled_p ())
1338 dump_printf_loc (MSG_NOTE, vect_location,
1339 "vect_get_vec_def_for_operand: ");
1340 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1341 dump_printf (MSG_NOTE, "\n");
1344 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1345 &def_stmt, &def, &dt);
1346 gcc_assert (is_simple_use);
1347 if (dump_enabled_p ())
1349 int loc_printed = 0;
1350 if (def)
1352 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1353 loc_printed = 1;
1354 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1355 dump_printf (MSG_NOTE, "\n");
1357 if (def_stmt)
1359 if (loc_printed)
1360 dump_printf (MSG_NOTE, " def_stmt = ");
1361 else
1362 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1363 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1364 dump_printf (MSG_NOTE, "\n");
1368 switch (dt)
1370 /* Case 1: operand is a constant. */
1371 case vect_constant_def:
1373 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1374 gcc_assert (vector_type);
1375 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1377 if (scalar_def)
1378 *scalar_def = op;
1380 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1381 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE, vect_location,
1383 "Create vector_cst. nunits = %d\n", nunits);
1385 return vect_init_vector (stmt, op, vector_type, NULL);
1388 /* Case 2: operand is defined outside the loop - loop invariant. */
1389 case vect_external_def:
1391 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1392 gcc_assert (vector_type);
1394 if (scalar_def)
1395 *scalar_def = def;
1397 /* Create 'vec_inv = {inv,inv,..,inv}' */
1398 if (dump_enabled_p ())
1399 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1401 return vect_init_vector (stmt, def, vector_type, NULL);
1404 /* Case 3: operand is defined inside the loop. */
1405 case vect_internal_def:
1407 if (scalar_def)
1408 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1410 /* Get the def from the vectorized stmt. */
1411 def_stmt_info = vinfo_for_stmt (def_stmt);
1413 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1414 /* Get vectorized pattern statement. */
1415 if (!vec_stmt
1416 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1417 && !STMT_VINFO_RELEVANT (def_stmt_info))
1418 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1419 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1420 gcc_assert (vec_stmt);
1421 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1422 vec_oprnd = PHI_RESULT (vec_stmt);
1423 else if (is_gimple_call (vec_stmt))
1424 vec_oprnd = gimple_call_lhs (vec_stmt);
1425 else
1426 vec_oprnd = gimple_assign_lhs (vec_stmt);
1427 return vec_oprnd;
1430 /* Case 4: operand is defined by a loop header phi - reduction */
1431 case vect_reduction_def:
1432 case vect_double_reduction_def:
1433 case vect_nested_cycle:
1435 struct loop *loop;
1437 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1438 loop = (gimple_bb (def_stmt))->loop_father;
1440 /* Get the def before the loop */
1441 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1442 return get_initial_def_for_reduction (stmt, op, scalar_def);
1445 /* Case 5: operand is defined by loop-header phi - induction. */
1446 case vect_induction_def:
1448 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1450 /* Get the def from the vectorized stmt. */
1451 def_stmt_info = vinfo_for_stmt (def_stmt);
1452 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1453 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1454 vec_oprnd = PHI_RESULT (vec_stmt);
1455 else
1456 vec_oprnd = gimple_get_lhs (vec_stmt);
1457 return vec_oprnd;
1460 default:
1461 gcc_unreachable ();
1466 /* Function vect_get_vec_def_for_stmt_copy
1468 Return a vector-def for an operand. This function is used when the
1469 vectorized stmt to be created (by the caller to this function) is a "copy"
1470 created in case the vectorized result cannot fit in one vector, and several
1471 copies of the vector-stmt are required. In this case the vector-def is
1472 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1473 of the stmt that defines VEC_OPRND.
1474 DT is the type of the vector def VEC_OPRND.
1476 Context:
1477 In case the vectorization factor (VF) is bigger than the number
1478 of elements that can fit in a vectype (nunits), we have to generate
1479 more than one vector stmt to vectorize the scalar stmt. This situation
1480 arises when there are multiple data-types operated upon in the loop; the
1481 smallest data-type determines the VF, and as a result, when vectorizing
1482 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1483 vector stmt (each computing a vector of 'nunits' results, and together
1484 computing 'VF' results in each iteration). This function is called when
1485 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1486 which VF=16 and nunits=4, so the number of copies required is 4):
1488 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1490 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1491 VS1.1: vx.1 = memref1 VS1.2
1492 VS1.2: vx.2 = memref2 VS1.3
1493 VS1.3: vx.3 = memref3
1495 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1496 VSnew.1: vz1 = vx.1 + ... VSnew.2
1497 VSnew.2: vz2 = vx.2 + ... VSnew.3
1498 VSnew.3: vz3 = vx.3 + ...
1500 The vectorization of S1 is explained in vectorizable_load.
1501 The vectorization of S2:
1502 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1503 the function 'vect_get_vec_def_for_operand' is called to
1504 get the relevant vector-def for each operand of S2. For operand x it
1505 returns the vector-def 'vx.0'.
1507 To create the remaining copies of the vector-stmt (VSnew.j), this
1508 function is called to get the relevant vector-def for each operand. It is
1509 obtained from the respective VS1.j stmt, which is recorded in the
1510 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1512 For example, to obtain the vector-def 'vx.1' in order to create the
1513 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1514 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1515 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1516 and return its def ('vx.1').
1517 Overall, to create the above sequence this function will be called 3 times:
1518 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1519 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1520 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1522 tree
1523 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1525 gimple vec_stmt_for_operand;
1526 stmt_vec_info def_stmt_info;
1528 /* Do nothing; can reuse same def. */
1529 if (dt == vect_external_def || dt == vect_constant_def )
1530 return vec_oprnd;
1532 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1533 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1534 gcc_assert (def_stmt_info);
1535 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1536 gcc_assert (vec_stmt_for_operand);
1537 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1538 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1539 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1540 else
1541 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1542 return vec_oprnd;
1546 /* Get vectorized definitions for the operands to create a copy of an original
1547 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1549 static void
1550 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1551 vec<tree> *vec_oprnds0,
1552 vec<tree> *vec_oprnds1)
1554 tree vec_oprnd = vec_oprnds0->pop ();
1556 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1557 vec_oprnds0->quick_push (vec_oprnd);
1559 if (vec_oprnds1 && vec_oprnds1->length ())
1561 vec_oprnd = vec_oprnds1->pop ();
1562 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1563 vec_oprnds1->quick_push (vec_oprnd);
1568 /* Get vectorized definitions for OP0 and OP1.
1569 REDUC_INDEX is the index of reduction operand in case of reduction,
1570 and -1 otherwise. */
1572 void
1573 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1574 vec<tree> *vec_oprnds0,
1575 vec<tree> *vec_oprnds1,
1576 slp_tree slp_node, int reduc_index)
1578 if (slp_node)
1580 int nops = (op1 == NULL_TREE) ? 1 : 2;
1581 vec<tree> ops;
1582 ops.create (nops);
1583 vec<vec<tree> > vec_defs;
1584 vec_defs.create (nops);
1586 ops.quick_push (op0);
1587 if (op1)
1588 ops.quick_push (op1);
1590 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1592 *vec_oprnds0 = vec_defs[0];
1593 if (op1)
1594 *vec_oprnds1 = vec_defs[1];
1596 ops.release ();
1597 vec_defs.release ();
1599 else
1601 tree vec_oprnd;
1603 vec_oprnds0->create (1);
1604 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1605 vec_oprnds0->quick_push (vec_oprnd);
1607 if (op1)
1609 vec_oprnds1->create (1);
1610 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1611 vec_oprnds1->quick_push (vec_oprnd);
1617 /* Function vect_finish_stmt_generation.
1619 Insert a new stmt. */
1621 void
1622 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1623 gimple_stmt_iterator *gsi)
1625 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1626 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1627 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1629 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1631 if (!gsi_end_p (*gsi)
1632 && gimple_has_mem_ops (vec_stmt))
1634 gimple at_stmt = gsi_stmt (*gsi);
1635 tree vuse = gimple_vuse (at_stmt);
1636 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1638 tree vdef = gimple_vdef (at_stmt);
1639 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1640 /* If we have an SSA vuse and insert a store, update virtual
1641 SSA form to avoid triggering the renamer. Do so only
1642 if we can easily see all uses - which is what almost always
1643 happens with the way vectorized stmts are inserted. */
1644 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1645 && ((is_gimple_assign (vec_stmt)
1646 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1647 || (is_gimple_call (vec_stmt)
1648 && !(gimple_call_flags (vec_stmt)
1649 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1651 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1652 gimple_set_vdef (vec_stmt, new_vdef);
1653 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1657 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1659 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1660 bb_vinfo));
1662 if (dump_enabled_p ())
1664 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1666 dump_printf (MSG_NOTE, "\n");
1669 gimple_set_location (vec_stmt, gimple_location (stmt));
1672 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1673 a function declaration if the target has a vectorized version
1674 of the function, or NULL_TREE if the function cannot be vectorized. */
1676 tree
1677 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1679 tree fndecl = gimple_call_fndecl (call);
1681 /* We only handle functions that do not read or clobber memory -- i.e.
1682 const or novops ones. */
1683 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1684 return NULL_TREE;
1686 if (!fndecl
1687 || TREE_CODE (fndecl) != FUNCTION_DECL
1688 || !DECL_BUILT_IN (fndecl))
1689 return NULL_TREE;
1691 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1692 vectype_in);
1695 /* Function vectorizable_call.
1697 Check if STMT performs a function call that can be vectorized.
1698 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1699 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1700 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1702 static bool
1703 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1704 slp_tree slp_node)
1706 tree vec_dest;
1707 tree scalar_dest;
1708 tree op, type;
1709 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1710 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1711 tree vectype_out, vectype_in;
1712 int nunits_in;
1713 int nunits_out;
1714 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1715 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1716 tree fndecl, new_temp, def, rhs_type;
1717 gimple def_stmt;
1718 enum vect_def_type dt[3]
1719 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1720 gimple new_stmt = NULL;
1721 int ncopies, j;
1722 vec<tree> vargs = vNULL;
1723 enum { NARROW, NONE, WIDEN } modifier;
1724 size_t i, nargs;
1725 tree lhs;
1727 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1728 return false;
1730 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1731 return false;
1733 /* Is STMT a vectorizable call? */
1734 if (!is_gimple_call (stmt))
1735 return false;
1737 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1738 return false;
1740 if (stmt_can_throw_internal (stmt))
1741 return false;
1743 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1745 /* Process function arguments. */
1746 rhs_type = NULL_TREE;
1747 vectype_in = NULL_TREE;
1748 nargs = gimple_call_num_args (stmt);
1750 /* Bail out if the function has more than three arguments, we do not have
1751 interesting builtin functions to vectorize with more than two arguments
1752 except for fma. No arguments is also not good. */
1753 if (nargs == 0 || nargs > 3)
1754 return false;
1756 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1757 if (gimple_call_internal_p (stmt)
1758 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1760 nargs = 0;
1761 rhs_type = unsigned_type_node;
1764 for (i = 0; i < nargs; i++)
1766 tree opvectype;
1768 op = gimple_call_arg (stmt, i);
1770 /* We can only handle calls with arguments of the same type. */
1771 if (rhs_type
1772 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1774 if (dump_enabled_p ())
1775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1776 "argument types differ.\n");
1777 return false;
1779 if (!rhs_type)
1780 rhs_type = TREE_TYPE (op);
1782 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1783 &def_stmt, &def, &dt[i], &opvectype))
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787 "use not simple.\n");
1788 return false;
1791 if (!vectype_in)
1792 vectype_in = opvectype;
1793 else if (opvectype
1794 && opvectype != vectype_in)
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "argument vector types differ.\n");
1799 return false;
1802 /* If all arguments are external or constant defs use a vector type with
1803 the same size as the output vector type. */
1804 if (!vectype_in)
1805 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1806 if (vec_stmt)
1807 gcc_assert (vectype_in);
1808 if (!vectype_in)
1810 if (dump_enabled_p ())
1812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813 "no vectype for scalar type ");
1814 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1815 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1818 return false;
1821 /* FORNOW */
1822 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1823 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1824 if (nunits_in == nunits_out / 2)
1825 modifier = NARROW;
1826 else if (nunits_out == nunits_in)
1827 modifier = NONE;
1828 else if (nunits_out == nunits_in / 2)
1829 modifier = WIDEN;
1830 else
1831 return false;
1833 /* For now, we only vectorize functions if a target specific builtin
1834 is available. TODO -- in some cases, it might be profitable to
1835 insert the calls for pieces of the vector, in order to be able
1836 to vectorize other operations in the loop. */
1837 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1838 if (fndecl == NULL_TREE)
1840 if (gimple_call_internal_p (stmt)
1841 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
1842 && !slp_node
1843 && loop_vinfo
1844 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1845 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
1846 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
1847 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
1849 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1850 { 0, 1, 2, ... vf - 1 } vector. */
1851 gcc_assert (nargs == 0);
1853 else
1855 if (dump_enabled_p ())
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1857 "function is not vectorizable.\n");
1858 return false;
1862 gcc_assert (!gimple_vuse (stmt));
1864 if (slp_node || PURE_SLP_STMT (stmt_info))
1865 ncopies = 1;
1866 else if (modifier == NARROW)
1867 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1868 else
1869 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1871 /* Sanity check: make sure that at least one copy of the vectorized stmt
1872 needs to be generated. */
1873 gcc_assert (ncopies >= 1);
1875 if (!vec_stmt) /* transformation not required. */
1877 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
1880 "\n");
1881 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1882 return true;
1885 /** Transform. **/
1887 if (dump_enabled_p ())
1888 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
1890 /* Handle def. */
1891 scalar_dest = gimple_call_lhs (stmt);
1892 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1894 prev_stmt_info = NULL;
1895 switch (modifier)
1897 case NONE:
1898 for (j = 0; j < ncopies; ++j)
1900 /* Build argument list for the vectorized call. */
1901 if (j == 0)
1902 vargs.create (nargs);
1903 else
1904 vargs.truncate (0);
1906 if (slp_node)
1908 vec<vec<tree> > vec_defs;
1909 vec_defs.create (nargs);
1910 vec<tree> vec_oprnds0;
1912 for (i = 0; i < nargs; i++)
1913 vargs.quick_push (gimple_call_arg (stmt, i));
1914 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1915 vec_oprnds0 = vec_defs[0];
1917 /* Arguments are ready. Create the new vector stmt. */
1918 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1920 size_t k;
1921 for (k = 0; k < nargs; k++)
1923 vec<tree> vec_oprndsk = vec_defs[k];
1924 vargs[k] = vec_oprndsk[i];
1926 new_stmt = gimple_build_call_vec (fndecl, vargs);
1927 new_temp = make_ssa_name (vec_dest, new_stmt);
1928 gimple_call_set_lhs (new_stmt, new_temp);
1929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1930 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1933 for (i = 0; i < nargs; i++)
1935 vec<tree> vec_oprndsi = vec_defs[i];
1936 vec_oprndsi.release ();
1938 vec_defs.release ();
1939 continue;
1942 for (i = 0; i < nargs; i++)
1944 op = gimple_call_arg (stmt, i);
1945 if (j == 0)
1946 vec_oprnd0
1947 = vect_get_vec_def_for_operand (op, stmt, NULL);
1948 else
1950 vec_oprnd0 = gimple_call_arg (new_stmt, i);
1951 vec_oprnd0
1952 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1955 vargs.quick_push (vec_oprnd0);
1958 if (gimple_call_internal_p (stmt)
1959 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
1961 tree *v = XALLOCAVEC (tree, nunits_out);
1962 int k;
1963 for (k = 0; k < nunits_out; ++k)
1964 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
1965 tree cst = build_vector (vectype_out, v);
1966 tree new_var
1967 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
1968 gimple init_stmt = gimple_build_assign (new_var, cst);
1969 new_temp = make_ssa_name (new_var, init_stmt);
1970 gimple_assign_set_lhs (init_stmt, new_temp);
1971 vect_init_vector_1 (stmt, init_stmt, NULL);
1972 new_temp = make_ssa_name (vec_dest, NULL);
1973 new_stmt = gimple_build_assign (new_temp,
1974 gimple_assign_lhs (init_stmt));
1976 else
1978 new_stmt = gimple_build_call_vec (fndecl, vargs);
1979 new_temp = make_ssa_name (vec_dest, new_stmt);
1980 gimple_call_set_lhs (new_stmt, new_temp);
1982 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1984 if (j == 0)
1985 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1986 else
1987 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1989 prev_stmt_info = vinfo_for_stmt (new_stmt);
1992 break;
1994 case NARROW:
1995 for (j = 0; j < ncopies; ++j)
1997 /* Build argument list for the vectorized call. */
1998 if (j == 0)
1999 vargs.create (nargs * 2);
2000 else
2001 vargs.truncate (0);
2003 if (slp_node)
2005 vec<vec<tree> > vec_defs;
2006 vec_defs.create (nargs);
2007 vec<tree> vec_oprnds0;
2009 for (i = 0; i < nargs; i++)
2010 vargs.quick_push (gimple_call_arg (stmt, i));
2011 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2012 vec_oprnds0 = vec_defs[0];
2014 /* Arguments are ready. Create the new vector stmt. */
2015 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2017 size_t k;
2018 vargs.truncate (0);
2019 for (k = 0; k < nargs; k++)
2021 vec<tree> vec_oprndsk = vec_defs[k];
2022 vargs.quick_push (vec_oprndsk[i]);
2023 vargs.quick_push (vec_oprndsk[i + 1]);
2025 new_stmt = gimple_build_call_vec (fndecl, vargs);
2026 new_temp = make_ssa_name (vec_dest, new_stmt);
2027 gimple_call_set_lhs (new_stmt, new_temp);
2028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2029 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2032 for (i = 0; i < nargs; i++)
2034 vec<tree> vec_oprndsi = vec_defs[i];
2035 vec_oprndsi.release ();
2037 vec_defs.release ();
2038 continue;
2041 for (i = 0; i < nargs; i++)
2043 op = gimple_call_arg (stmt, i);
2044 if (j == 0)
2046 vec_oprnd0
2047 = vect_get_vec_def_for_operand (op, stmt, NULL);
2048 vec_oprnd1
2049 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2051 else
2053 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2054 vec_oprnd0
2055 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2056 vec_oprnd1
2057 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2060 vargs.quick_push (vec_oprnd0);
2061 vargs.quick_push (vec_oprnd1);
2064 new_stmt = gimple_build_call_vec (fndecl, vargs);
2065 new_temp = make_ssa_name (vec_dest, new_stmt);
2066 gimple_call_set_lhs (new_stmt, new_temp);
2067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2069 if (j == 0)
2070 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2071 else
2072 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2074 prev_stmt_info = vinfo_for_stmt (new_stmt);
2077 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2079 break;
2081 case WIDEN:
2082 /* No current target implements this case. */
2083 return false;
2086 vargs.release ();
2088 /* Update the exception handling table with the vector stmt if necessary. */
2089 if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2090 gimple_purge_dead_eh_edges (gimple_bb (stmt));
2092 /* The call in STMT might prevent it from being removed in dce.
2093 We however cannot remove it here, due to the way the ssa name
2094 it defines is mapped to the new definition. So just replace
2095 rhs of the statement with something harmless. */
2097 if (slp_node)
2098 return true;
2100 type = TREE_TYPE (scalar_dest);
2101 if (is_pattern_stmt_p (stmt_info))
2102 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2103 else
2104 lhs = gimple_call_lhs (stmt);
2105 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2106 set_vinfo_for_stmt (new_stmt, stmt_info);
2107 set_vinfo_for_stmt (stmt, NULL);
2108 STMT_VINFO_STMT (stmt_info) = new_stmt;
2109 gsi_replace (gsi, new_stmt, false);
2111 return true;
2115 /* Function vect_gen_widened_results_half
2117 Create a vector stmt whose code, type, number of arguments, and result
2118 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2119 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2120 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2121 needs to be created (DECL is a function-decl of a target-builtin).
2122 STMT is the original scalar stmt that we are vectorizing. */
2124 static gimple
2125 vect_gen_widened_results_half (enum tree_code code,
2126 tree decl,
2127 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2128 tree vec_dest, gimple_stmt_iterator *gsi,
2129 gimple stmt)
2131 gimple new_stmt;
2132 tree new_temp;
2134 /* Generate half of the widened result: */
2135 if (code == CALL_EXPR)
2137 /* Target specific support */
2138 if (op_type == binary_op)
2139 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2140 else
2141 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2142 new_temp = make_ssa_name (vec_dest, new_stmt);
2143 gimple_call_set_lhs (new_stmt, new_temp);
2145 else
2147 /* Generic support */
2148 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2149 if (op_type != binary_op)
2150 vec_oprnd1 = NULL;
2151 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2152 vec_oprnd1);
2153 new_temp = make_ssa_name (vec_dest, new_stmt);
2154 gimple_assign_set_lhs (new_stmt, new_temp);
2156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2158 return new_stmt;
2162 /* Get vectorized definitions for loop-based vectorization. For the first
2163 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2164 scalar operand), and for the rest we get a copy with
2165 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2166 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2167 The vectors are collected into VEC_OPRNDS. */
2169 static void
2170 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2171 vec<tree> *vec_oprnds, int multi_step_cvt)
2173 tree vec_oprnd;
2175 /* Get first vector operand. */
2176 /* All the vector operands except the very first one (that is scalar oprnd)
2177 are stmt copies. */
2178 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2179 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2180 else
2181 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2183 vec_oprnds->quick_push (vec_oprnd);
2185 /* Get second vector operand. */
2186 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2187 vec_oprnds->quick_push (vec_oprnd);
2189 *oprnd = vec_oprnd;
2191 /* For conversion in multiple steps, continue to get operands
2192 recursively. */
2193 if (multi_step_cvt)
2194 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
2198 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2199 For multi-step conversions store the resulting vectors and call the function
2200 recursively. */
2202 static void
2203 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2204 int multi_step_cvt, gimple stmt,
2205 vec<tree> vec_dsts,
2206 gimple_stmt_iterator *gsi,
2207 slp_tree slp_node, enum tree_code code,
2208 stmt_vec_info *prev_stmt_info)
2210 unsigned int i;
2211 tree vop0, vop1, new_tmp, vec_dest;
2212 gimple new_stmt;
2213 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2215 vec_dest = vec_dsts.pop ();
2217 for (i = 0; i < vec_oprnds->length (); i += 2)
2219 /* Create demotion operation. */
2220 vop0 = (*vec_oprnds)[i];
2221 vop1 = (*vec_oprnds)[i + 1];
2222 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2223 new_tmp = make_ssa_name (vec_dest, new_stmt);
2224 gimple_assign_set_lhs (new_stmt, new_tmp);
2225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2227 if (multi_step_cvt)
2228 /* Store the resulting vector for next recursive call. */
2229 (*vec_oprnds)[i/2] = new_tmp;
2230 else
2232 /* This is the last step of the conversion sequence. Store the
2233 vectors in SLP_NODE or in vector info of the scalar statement
2234 (or in STMT_VINFO_RELATED_STMT chain). */
2235 if (slp_node)
2236 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2237 else
2239 if (!*prev_stmt_info)
2240 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2241 else
2242 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2244 *prev_stmt_info = vinfo_for_stmt (new_stmt);
2249 /* For multi-step demotion operations we first generate demotion operations
2250 from the source type to the intermediate types, and then combine the
2251 results (stored in VEC_OPRNDS) in demotion operation to the destination
2252 type. */
2253 if (multi_step_cvt)
2255 /* At each level of recursion we have half of the operands we had at the
2256 previous level. */
2257 vec_oprnds->truncate ((i+1)/2);
2258 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2259 stmt, vec_dsts, gsi, slp_node,
2260 VEC_PACK_TRUNC_EXPR,
2261 prev_stmt_info);
2264 vec_dsts.quick_push (vec_dest);
2268 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2269 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2270 the resulting vectors and call the function recursively. */
2272 static void
2273 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2274 vec<tree> *vec_oprnds1,
2275 gimple stmt, tree vec_dest,
2276 gimple_stmt_iterator *gsi,
2277 enum tree_code code1,
2278 enum tree_code code2, tree decl1,
2279 tree decl2, int op_type)
2281 int i;
2282 tree vop0, vop1, new_tmp1, new_tmp2;
2283 gimple new_stmt1, new_stmt2;
2284 vec<tree> vec_tmp = vNULL;
2286 vec_tmp.create (vec_oprnds0->length () * 2);
2287 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2289 if (op_type == binary_op)
2290 vop1 = (*vec_oprnds1)[i];
2291 else
2292 vop1 = NULL_TREE;
2294 /* Generate the two halves of promotion operation. */
2295 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2296 op_type, vec_dest, gsi, stmt);
2297 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2298 op_type, vec_dest, gsi, stmt);
2299 if (is_gimple_call (new_stmt1))
2301 new_tmp1 = gimple_call_lhs (new_stmt1);
2302 new_tmp2 = gimple_call_lhs (new_stmt2);
2304 else
2306 new_tmp1 = gimple_assign_lhs (new_stmt1);
2307 new_tmp2 = gimple_assign_lhs (new_stmt2);
2310 /* Store the results for the next step. */
2311 vec_tmp.quick_push (new_tmp1);
2312 vec_tmp.quick_push (new_tmp2);
2315 vec_oprnds0->release ();
2316 *vec_oprnds0 = vec_tmp;
2320 /* Check if STMT performs a conversion operation, that can be vectorized.
2321 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2322 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2323 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2325 static bool
2326 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2327 gimple *vec_stmt, slp_tree slp_node)
2329 tree vec_dest;
2330 tree scalar_dest;
2331 tree op0, op1 = NULL_TREE;
2332 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2333 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2334 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2335 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2336 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2337 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2338 tree new_temp;
2339 tree def;
2340 gimple def_stmt;
2341 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2342 gimple new_stmt = NULL;
2343 stmt_vec_info prev_stmt_info;
2344 int nunits_in;
2345 int nunits_out;
2346 tree vectype_out, vectype_in;
2347 int ncopies, i, j;
2348 tree lhs_type, rhs_type;
2349 enum { NARROW, NONE, WIDEN } modifier;
2350 vec<tree> vec_oprnds0 = vNULL;
2351 vec<tree> vec_oprnds1 = vNULL;
2352 tree vop0;
2353 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2354 int multi_step_cvt = 0;
2355 vec<tree> vec_dsts = vNULL;
2356 vec<tree> interm_types = vNULL;
2357 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2358 int op_type;
2359 enum machine_mode rhs_mode;
2360 unsigned short fltsz;
2362 /* Is STMT a vectorizable conversion? */
2364 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2365 return false;
2367 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2368 return false;
2370 if (!is_gimple_assign (stmt))
2371 return false;
2373 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2374 return false;
2376 code = gimple_assign_rhs_code (stmt);
2377 if (!CONVERT_EXPR_CODE_P (code)
2378 && code != FIX_TRUNC_EXPR
2379 && code != FLOAT_EXPR
2380 && code != WIDEN_MULT_EXPR
2381 && code != WIDEN_LSHIFT_EXPR)
2382 return false;
2384 op_type = TREE_CODE_LENGTH (code);
2386 /* Check types of lhs and rhs. */
2387 scalar_dest = gimple_assign_lhs (stmt);
2388 lhs_type = TREE_TYPE (scalar_dest);
2389 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2391 op0 = gimple_assign_rhs1 (stmt);
2392 rhs_type = TREE_TYPE (op0);
2394 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2395 && !((INTEGRAL_TYPE_P (lhs_type)
2396 && INTEGRAL_TYPE_P (rhs_type))
2397 || (SCALAR_FLOAT_TYPE_P (lhs_type)
2398 && SCALAR_FLOAT_TYPE_P (rhs_type))))
2399 return false;
2401 if ((INTEGRAL_TYPE_P (lhs_type)
2402 && (TYPE_PRECISION (lhs_type)
2403 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2404 || (INTEGRAL_TYPE_P (rhs_type)
2405 && (TYPE_PRECISION (rhs_type)
2406 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2408 if (dump_enabled_p ())
2409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2410 "type conversion to/from bit-precision unsupported."
2411 "\n");
2412 return false;
2415 /* Check the operands of the operation. */
2416 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2417 &def_stmt, &def, &dt[0], &vectype_in))
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 "use not simple.\n");
2422 return false;
2424 if (op_type == binary_op)
2426 bool ok;
2428 op1 = gimple_assign_rhs2 (stmt);
2429 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2430 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2431 OP1. */
2432 if (CONSTANT_CLASS_P (op0))
2433 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2434 &def_stmt, &def, &dt[1], &vectype_in);
2435 else
2436 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2437 &def, &dt[1]);
2439 if (!ok)
2441 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "use not simple.\n");
2444 return false;
2448 /* If op0 is an external or constant defs use a vector type of
2449 the same size as the output vector type. */
2450 if (!vectype_in)
2451 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2452 if (vec_stmt)
2453 gcc_assert (vectype_in);
2454 if (!vectype_in)
2456 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "no vectype for scalar type ");
2460 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2461 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2464 return false;
2467 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2468 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2469 if (nunits_in < nunits_out)
2470 modifier = NARROW;
2471 else if (nunits_out == nunits_in)
2472 modifier = NONE;
2473 else
2474 modifier = WIDEN;
2476 /* Multiple types in SLP are handled by creating the appropriate number of
2477 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2478 case of SLP. */
2479 if (slp_node || PURE_SLP_STMT (stmt_info))
2480 ncopies = 1;
2481 else if (modifier == NARROW)
2482 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2483 else
2484 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2486 /* Sanity check: make sure that at least one copy of the vectorized stmt
2487 needs to be generated. */
2488 gcc_assert (ncopies >= 1);
2490 /* Supportable by target? */
2491 switch (modifier)
2493 case NONE:
2494 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2495 return false;
2496 if (supportable_convert_operation (code, vectype_out, vectype_in,
2497 &decl1, &code1))
2498 break;
2499 /* FALLTHRU */
2500 unsupported:
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2503 "conversion not supported by target.\n");
2504 return false;
2506 case WIDEN:
2507 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2508 &code1, &code2, &multi_step_cvt,
2509 &interm_types))
2511 /* Binary widening operation can only be supported directly by the
2512 architecture. */
2513 gcc_assert (!(multi_step_cvt && op_type == binary_op));
2514 break;
2517 if (code != FLOAT_EXPR
2518 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2519 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2520 goto unsupported;
2522 rhs_mode = TYPE_MODE (rhs_type);
2523 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2524 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2525 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2526 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2528 cvt_type
2529 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2530 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2531 if (cvt_type == NULL_TREE)
2532 goto unsupported;
2534 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2536 if (!supportable_convert_operation (code, vectype_out,
2537 cvt_type, &decl1, &codecvt1))
2538 goto unsupported;
2540 else if (!supportable_widening_operation (code, stmt, vectype_out,
2541 cvt_type, &codecvt1,
2542 &codecvt2, &multi_step_cvt,
2543 &interm_types))
2544 continue;
2545 else
2546 gcc_assert (multi_step_cvt == 0);
2548 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2549 vectype_in, &code1, &code2,
2550 &multi_step_cvt, &interm_types))
2551 break;
2554 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2555 goto unsupported;
2557 if (GET_MODE_SIZE (rhs_mode) == fltsz)
2558 codecvt2 = ERROR_MARK;
2559 else
2561 multi_step_cvt++;
2562 interm_types.safe_push (cvt_type);
2563 cvt_type = NULL_TREE;
2565 break;
2567 case NARROW:
2568 gcc_assert (op_type == unary_op);
2569 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2570 &code1, &multi_step_cvt,
2571 &interm_types))
2572 break;
2574 if (code != FIX_TRUNC_EXPR
2575 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2576 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2577 goto unsupported;
2579 rhs_mode = TYPE_MODE (rhs_type);
2580 cvt_type
2581 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2582 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2583 if (cvt_type == NULL_TREE)
2584 goto unsupported;
2585 if (!supportable_convert_operation (code, cvt_type, vectype_in,
2586 &decl1, &codecvt1))
2587 goto unsupported;
2588 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2589 &code1, &multi_step_cvt,
2590 &interm_types))
2591 break;
2592 goto unsupported;
2594 default:
2595 gcc_unreachable ();
2598 if (!vec_stmt) /* transformation not required. */
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_NOTE, vect_location,
2602 "=== vectorizable_conversion ===\n");
2603 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2605 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2606 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2608 else if (modifier == NARROW)
2610 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2611 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2613 else
2615 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2616 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2618 interm_types.release ();
2619 return true;
2622 /** Transform. **/
2623 if (dump_enabled_p ())
2624 dump_printf_loc (MSG_NOTE, vect_location,
2625 "transform conversion. ncopies = %d.\n", ncopies);
2627 if (op_type == binary_op)
2629 if (CONSTANT_CLASS_P (op0))
2630 op0 = fold_convert (TREE_TYPE (op1), op0);
2631 else if (CONSTANT_CLASS_P (op1))
2632 op1 = fold_convert (TREE_TYPE (op0), op1);
2635 /* In case of multi-step conversion, we first generate conversion operations
2636 to the intermediate types, and then from that types to the final one.
2637 We create vector destinations for the intermediate type (TYPES) received
2638 from supportable_*_operation, and store them in the correct order
2639 for future use in vect_create_vectorized_*_stmts (). */
2640 vec_dsts.create (multi_step_cvt + 1);
2641 vec_dest = vect_create_destination_var (scalar_dest,
2642 (cvt_type && modifier == WIDEN)
2643 ? cvt_type : vectype_out);
2644 vec_dsts.quick_push (vec_dest);
2646 if (multi_step_cvt)
2648 for (i = interm_types.length () - 1;
2649 interm_types.iterate (i, &intermediate_type); i--)
2651 vec_dest = vect_create_destination_var (scalar_dest,
2652 intermediate_type);
2653 vec_dsts.quick_push (vec_dest);
2657 if (cvt_type)
2658 vec_dest = vect_create_destination_var (scalar_dest,
2659 modifier == WIDEN
2660 ? vectype_out : cvt_type);
2662 if (!slp_node)
2664 if (modifier == WIDEN)
2666 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
2667 if (op_type == binary_op)
2668 vec_oprnds1.create (1);
2670 else if (modifier == NARROW)
2671 vec_oprnds0.create (
2672 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2674 else if (code == WIDEN_LSHIFT_EXPR)
2675 vec_oprnds1.create (slp_node->vec_stmts_size);
2677 last_oprnd = op0;
2678 prev_stmt_info = NULL;
2679 switch (modifier)
2681 case NONE:
2682 for (j = 0; j < ncopies; j++)
2684 if (j == 0)
2685 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2686 -1);
2687 else
2688 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2690 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2692 /* Arguments are ready, create the new vector stmt. */
2693 if (code1 == CALL_EXPR)
2695 new_stmt = gimple_build_call (decl1, 1, vop0);
2696 new_temp = make_ssa_name (vec_dest, new_stmt);
2697 gimple_call_set_lhs (new_stmt, new_temp);
2699 else
2701 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2702 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2703 vop0, NULL);
2704 new_temp = make_ssa_name (vec_dest, new_stmt);
2705 gimple_assign_set_lhs (new_stmt, new_temp);
2708 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2709 if (slp_node)
2710 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2713 if (j == 0)
2714 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2715 else
2716 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2717 prev_stmt_info = vinfo_for_stmt (new_stmt);
2719 break;
2721 case WIDEN:
2722 /* In case the vectorization factor (VF) is bigger than the number
2723 of elements that we can fit in a vectype (nunits), we have to
2724 generate more than one vector stmt - i.e - we need to "unroll"
2725 the vector stmt by a factor VF/nunits. */
2726 for (j = 0; j < ncopies; j++)
2728 /* Handle uses. */
2729 if (j == 0)
2731 if (slp_node)
2733 if (code == WIDEN_LSHIFT_EXPR)
2735 unsigned int k;
2737 vec_oprnd1 = op1;
2738 /* Store vec_oprnd1 for every vector stmt to be created
2739 for SLP_NODE. We check during the analysis that all
2740 the shift arguments are the same. */
2741 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2742 vec_oprnds1.quick_push (vec_oprnd1);
2744 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2745 slp_node, -1);
2747 else
2748 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2749 &vec_oprnds1, slp_node, -1);
2751 else
2753 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2754 vec_oprnds0.quick_push (vec_oprnd0);
2755 if (op_type == binary_op)
2757 if (code == WIDEN_LSHIFT_EXPR)
2758 vec_oprnd1 = op1;
2759 else
2760 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2761 NULL);
2762 vec_oprnds1.quick_push (vec_oprnd1);
2766 else
2768 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2769 vec_oprnds0.truncate (0);
2770 vec_oprnds0.quick_push (vec_oprnd0);
2771 if (op_type == binary_op)
2773 if (code == WIDEN_LSHIFT_EXPR)
2774 vec_oprnd1 = op1;
2775 else
2776 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2777 vec_oprnd1);
2778 vec_oprnds1.truncate (0);
2779 vec_oprnds1.quick_push (vec_oprnd1);
2783 /* Arguments are ready. Create the new vector stmts. */
2784 for (i = multi_step_cvt; i >= 0; i--)
2786 tree this_dest = vec_dsts[i];
2787 enum tree_code c1 = code1, c2 = code2;
2788 if (i == 0 && codecvt2 != ERROR_MARK)
2790 c1 = codecvt1;
2791 c2 = codecvt2;
2793 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2794 &vec_oprnds1,
2795 stmt, this_dest, gsi,
2796 c1, c2, decl1, decl2,
2797 op_type);
2800 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2802 if (cvt_type)
2804 if (codecvt1 == CALL_EXPR)
2806 new_stmt = gimple_build_call (decl1, 1, vop0);
2807 new_temp = make_ssa_name (vec_dest, new_stmt);
2808 gimple_call_set_lhs (new_stmt, new_temp);
2810 else
2812 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2813 new_temp = make_ssa_name (vec_dest, NULL);
2814 new_stmt = gimple_build_assign_with_ops (codecvt1,
2815 new_temp,
2816 vop0, NULL);
2819 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2821 else
2822 new_stmt = SSA_NAME_DEF_STMT (vop0);
2824 if (slp_node)
2825 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2826 else
2828 if (!prev_stmt_info)
2829 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2830 else
2831 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2832 prev_stmt_info = vinfo_for_stmt (new_stmt);
2837 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2838 break;
2840 case NARROW:
2841 /* In case the vectorization factor (VF) is bigger than the number
2842 of elements that we can fit in a vectype (nunits), we have to
2843 generate more than one vector stmt - i.e - we need to "unroll"
2844 the vector stmt by a factor VF/nunits. */
2845 for (j = 0; j < ncopies; j++)
2847 /* Handle uses. */
2848 if (slp_node)
2849 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2850 slp_node, -1);
2851 else
2853 vec_oprnds0.truncate (0);
2854 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2855 vect_pow2 (multi_step_cvt) - 1);
2858 /* Arguments are ready. Create the new vector stmts. */
2859 if (cvt_type)
2860 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2862 if (codecvt1 == CALL_EXPR)
2864 new_stmt = gimple_build_call (decl1, 1, vop0);
2865 new_temp = make_ssa_name (vec_dest, new_stmt);
2866 gimple_call_set_lhs (new_stmt, new_temp);
2868 else
2870 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2871 new_temp = make_ssa_name (vec_dest, NULL);
2872 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2873 vop0, NULL);
2876 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2877 vec_oprnds0[i] = new_temp;
2880 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2881 stmt, vec_dsts, gsi,
2882 slp_node, code1,
2883 &prev_stmt_info);
2886 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2887 break;
2890 vec_oprnds0.release ();
2891 vec_oprnds1.release ();
2892 vec_dsts.release ();
2893 interm_types.release ();
2895 return true;
2899 /* Function vectorizable_assignment.
2901 Check if STMT performs an assignment (copy) that can be vectorized.
2902 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2903 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2904 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2906 static bool
2907 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2908 gimple *vec_stmt, slp_tree slp_node)
2910 tree vec_dest;
2911 tree scalar_dest;
2912 tree op;
2913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2914 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2916 tree new_temp;
2917 tree def;
2918 gimple def_stmt;
2919 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2920 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2921 int ncopies;
2922 int i, j;
2923 vec<tree> vec_oprnds = vNULL;
2924 tree vop;
2925 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2926 gimple new_stmt = NULL;
2927 stmt_vec_info prev_stmt_info = NULL;
2928 enum tree_code code;
2929 tree vectype_in;
2931 /* Multiple types in SLP are handled by creating the appropriate number of
2932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2933 case of SLP. */
2934 if (slp_node || PURE_SLP_STMT (stmt_info))
2935 ncopies = 1;
2936 else
2937 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2939 gcc_assert (ncopies >= 1);
2941 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2942 return false;
2944 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2945 return false;
2947 /* Is vectorizable assignment? */
2948 if (!is_gimple_assign (stmt))
2949 return false;
2951 scalar_dest = gimple_assign_lhs (stmt);
2952 if (TREE_CODE (scalar_dest) != SSA_NAME)
2953 return false;
2955 code = gimple_assign_rhs_code (stmt);
2956 if (gimple_assign_single_p (stmt)
2957 || code == PAREN_EXPR
2958 || CONVERT_EXPR_CODE_P (code))
2959 op = gimple_assign_rhs1 (stmt);
2960 else
2961 return false;
2963 if (code == VIEW_CONVERT_EXPR)
2964 op = TREE_OPERAND (op, 0);
2966 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2967 &def_stmt, &def, &dt[0], &vectype_in))
2969 if (dump_enabled_p ())
2970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2971 "use not simple.\n");
2972 return false;
2975 /* We can handle NOP_EXPR conversions that do not change the number
2976 of elements or the vector size. */
2977 if ((CONVERT_EXPR_CODE_P (code)
2978 || code == VIEW_CONVERT_EXPR)
2979 && (!vectype_in
2980 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2981 || (GET_MODE_SIZE (TYPE_MODE (vectype))
2982 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2983 return false;
2985 /* We do not handle bit-precision changes. */
2986 if ((CONVERT_EXPR_CODE_P (code)
2987 || code == VIEW_CONVERT_EXPR)
2988 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2989 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2990 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2991 || ((TYPE_PRECISION (TREE_TYPE (op))
2992 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2993 /* But a conversion that does not change the bit-pattern is ok. */
2994 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2995 > TYPE_PRECISION (TREE_TYPE (op)))
2996 && TYPE_UNSIGNED (TREE_TYPE (op))))
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3000 "type conversion to/from bit-precision "
3001 "unsupported.\n");
3002 return false;
3005 if (!vec_stmt) /* transformation not required. */
3007 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3008 if (dump_enabled_p ())
3009 dump_printf_loc (MSG_NOTE, vect_location,
3010 "=== vectorizable_assignment ===\n");
3011 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3012 return true;
3015 /** Transform. **/
3016 if (dump_enabled_p ())
3017 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
3019 /* Handle def. */
3020 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3022 /* Handle use. */
3023 for (j = 0; j < ncopies; j++)
3025 /* Handle uses. */
3026 if (j == 0)
3027 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
3028 else
3029 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3031 /* Arguments are ready. create the new vector stmt. */
3032 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3034 if (CONVERT_EXPR_CODE_P (code)
3035 || code == VIEW_CONVERT_EXPR)
3036 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
3037 new_stmt = gimple_build_assign (vec_dest, vop);
3038 new_temp = make_ssa_name (vec_dest, new_stmt);
3039 gimple_assign_set_lhs (new_stmt, new_temp);
3040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3041 if (slp_node)
3042 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3045 if (slp_node)
3046 continue;
3048 if (j == 0)
3049 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3050 else
3051 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3053 prev_stmt_info = vinfo_for_stmt (new_stmt);
3056 vec_oprnds.release ();
3057 return true;
3061 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3062 either as shift by a scalar or by a vector. */
3064 bool
3065 vect_supportable_shift (enum tree_code code, tree scalar_type)
3068 enum machine_mode vec_mode;
3069 optab optab;
3070 int icode;
3071 tree vectype;
3073 vectype = get_vectype_for_scalar_type (scalar_type);
3074 if (!vectype)
3075 return false;
3077 optab = optab_for_tree_code (code, vectype, optab_scalar);
3078 if (!optab
3079 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3081 optab = optab_for_tree_code (code, vectype, optab_vector);
3082 if (!optab
3083 || (optab_handler (optab, TYPE_MODE (vectype))
3084 == CODE_FOR_nothing))
3085 return false;
3088 vec_mode = TYPE_MODE (vectype);
3089 icode = (int) optab_handler (optab, vec_mode);
3090 if (icode == CODE_FOR_nothing)
3091 return false;
3093 return true;
3097 /* Function vectorizable_shift.
3099 Check if STMT performs a shift operation that can be vectorized.
3100 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3101 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3102 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3104 static bool
3105 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3106 gimple *vec_stmt, slp_tree slp_node)
3108 tree vec_dest;
3109 tree scalar_dest;
3110 tree op0, op1 = NULL;
3111 tree vec_oprnd1 = NULL_TREE;
3112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3113 tree vectype;
3114 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3115 enum tree_code code;
3116 enum machine_mode vec_mode;
3117 tree new_temp;
3118 optab optab;
3119 int icode;
3120 enum machine_mode optab_op2_mode;
3121 tree def;
3122 gimple def_stmt;
3123 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3124 gimple new_stmt = NULL;
3125 stmt_vec_info prev_stmt_info;
3126 int nunits_in;
3127 int nunits_out;
3128 tree vectype_out;
3129 tree op1_vectype;
3130 int ncopies;
3131 int j, i;
3132 vec<tree> vec_oprnds0 = vNULL;
3133 vec<tree> vec_oprnds1 = vNULL;
3134 tree vop0, vop1;
3135 unsigned int k;
3136 bool scalar_shift_arg = true;
3137 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3138 int vf;
3140 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3141 return false;
3143 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3144 return false;
3146 /* Is STMT a vectorizable binary/unary operation? */
3147 if (!is_gimple_assign (stmt))
3148 return false;
3150 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3151 return false;
3153 code = gimple_assign_rhs_code (stmt);
3155 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3156 || code == RROTATE_EXPR))
3157 return false;
3159 scalar_dest = gimple_assign_lhs (stmt);
3160 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3161 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3162 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3164 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3166 "bit-precision shifts not supported.\n");
3167 return false;
3170 op0 = gimple_assign_rhs1 (stmt);
3171 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3172 &def_stmt, &def, &dt[0], &vectype))
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3176 "use not simple.\n");
3177 return false;
3179 /* If op0 is an external or constant def use a vector type with
3180 the same size as the output vector type. */
3181 if (!vectype)
3182 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3183 if (vec_stmt)
3184 gcc_assert (vectype);
3185 if (!vectype)
3187 if (dump_enabled_p ())
3188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3189 "no vectype for scalar type\n");
3190 return false;
3193 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3194 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3195 if (nunits_out != nunits_in)
3196 return false;
3198 op1 = gimple_assign_rhs2 (stmt);
3199 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3200 &def, &dt[1], &op1_vectype))
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3204 "use not simple.\n");
3205 return false;
3208 if (loop_vinfo)
3209 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3210 else
3211 vf = 1;
3213 /* Multiple types in SLP are handled by creating the appropriate number of
3214 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3215 case of SLP. */
3216 if (slp_node || PURE_SLP_STMT (stmt_info))
3217 ncopies = 1;
3218 else
3219 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3221 gcc_assert (ncopies >= 1);
3223 /* Determine whether the shift amount is a vector, or scalar. If the
3224 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3226 if (dt[1] == vect_internal_def && !slp_node)
3227 scalar_shift_arg = false;
3228 else if (dt[1] == vect_constant_def
3229 || dt[1] == vect_external_def
3230 || dt[1] == vect_internal_def)
3232 /* In SLP, need to check whether the shift count is the same,
3233 in loops if it is a constant or invariant, it is always
3234 a scalar shift. */
3235 if (slp_node)
3237 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3238 gimple slpstmt;
3240 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3241 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3242 scalar_shift_arg = false;
3245 else
3247 if (dump_enabled_p ())
3248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3249 "operand mode requires invariant argument.\n");
3250 return false;
3253 /* Vector shifted by vector. */
3254 if (!scalar_shift_arg)
3256 optab = optab_for_tree_code (code, vectype, optab_vector);
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_NOTE, vect_location,
3259 "vector/vector shift/rotate found.\n");
3261 if (!op1_vectype)
3262 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3263 if (op1_vectype == NULL_TREE
3264 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268 "unusable type for last operand in"
3269 " vector/vector shift/rotate.\n");
3270 return false;
3273 /* See if the machine has a vector shifted by scalar insn and if not
3274 then see if it has a vector shifted by vector insn. */
3275 else
3277 optab = optab_for_tree_code (code, vectype, optab_scalar);
3278 if (optab
3279 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3281 if (dump_enabled_p ())
3282 dump_printf_loc (MSG_NOTE, vect_location,
3283 "vector/scalar shift/rotate found.\n");
3285 else
3287 optab = optab_for_tree_code (code, vectype, optab_vector);
3288 if (optab
3289 && (optab_handler (optab, TYPE_MODE (vectype))
3290 != CODE_FOR_nothing))
3292 scalar_shift_arg = false;
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_NOTE, vect_location,
3296 "vector/vector shift/rotate found.\n");
3298 /* Unlike the other binary operators, shifts/rotates have
3299 the rhs being int, instead of the same type as the lhs,
3300 so make sure the scalar is the right type if we are
3301 dealing with vectors of long long/long/short/char. */
3302 if (dt[1] == vect_constant_def)
3303 op1 = fold_convert (TREE_TYPE (vectype), op1);
3304 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3305 TREE_TYPE (op1)))
3307 if (slp_node
3308 && TYPE_MODE (TREE_TYPE (vectype))
3309 != TYPE_MODE (TREE_TYPE (op1)))
3311 if (dump_enabled_p ())
3312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3313 "unusable type for last operand in"
3314 " vector/vector shift/rotate.\n");
3315 return false;
3317 if (vec_stmt && !slp_node)
3319 op1 = fold_convert (TREE_TYPE (vectype), op1);
3320 op1 = vect_init_vector (stmt, op1,
3321 TREE_TYPE (vectype), NULL);
3328 /* Supportable by target? */
3329 if (!optab)
3331 if (dump_enabled_p ())
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3333 "no optab.\n");
3334 return false;
3336 vec_mode = TYPE_MODE (vectype);
3337 icode = (int) optab_handler (optab, vec_mode);
3338 if (icode == CODE_FOR_nothing)
3340 if (dump_enabled_p ())
3341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3342 "op not supported by target.\n");
3343 /* Check only during analysis. */
3344 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3345 || (vf < vect_min_worthwhile_factor (code)
3346 && !vec_stmt))
3347 return false;
3348 if (dump_enabled_p ())
3349 dump_printf_loc (MSG_NOTE, vect_location,
3350 "proceeding using word mode.\n");
3353 /* Worthwhile without SIMD support? Check only during analysis. */
3354 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3355 && vf < vect_min_worthwhile_factor (code)
3356 && !vec_stmt)
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3360 "not worthwhile without SIMD support.\n");
3361 return false;
3364 if (!vec_stmt) /* transformation not required. */
3366 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3367 if (dump_enabled_p ())
3368 dump_printf_loc (MSG_NOTE, vect_location,
3369 "=== vectorizable_shift ===\n");
3370 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3371 return true;
3374 /** Transform. **/
3376 if (dump_enabled_p ())
3377 dump_printf_loc (MSG_NOTE, vect_location,
3378 "transform binary/unary operation.\n");
3380 /* Handle def. */
3381 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3383 prev_stmt_info = NULL;
3384 for (j = 0; j < ncopies; j++)
3386 /* Handle uses. */
3387 if (j == 0)
3389 if (scalar_shift_arg)
3391 /* Vector shl and shr insn patterns can be defined with scalar
3392 operand 2 (shift operand). In this case, use constant or loop
3393 invariant op1 directly, without extending it to vector mode
3394 first. */
3395 optab_op2_mode = insn_data[icode].operand[2].mode;
3396 if (!VECTOR_MODE_P (optab_op2_mode))
3398 if (dump_enabled_p ())
3399 dump_printf_loc (MSG_NOTE, vect_location,
3400 "operand 1 using scalar mode.\n");
3401 vec_oprnd1 = op1;
3402 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3403 vec_oprnds1.quick_push (vec_oprnd1);
3404 if (slp_node)
3406 /* Store vec_oprnd1 for every vector stmt to be created
3407 for SLP_NODE. We check during the analysis that all
3408 the shift arguments are the same.
3409 TODO: Allow different constants for different vector
3410 stmts generated for an SLP instance. */
3411 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3412 vec_oprnds1.quick_push (vec_oprnd1);
3417 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3418 (a special case for certain kind of vector shifts); otherwise,
3419 operand 1 should be of a vector type (the usual case). */
3420 if (vec_oprnd1)
3421 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3422 slp_node, -1);
3423 else
3424 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3425 slp_node, -1);
3427 else
3428 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3430 /* Arguments are ready. Create the new vector stmt. */
3431 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3433 vop1 = vec_oprnds1[i];
3434 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3435 new_temp = make_ssa_name (vec_dest, new_stmt);
3436 gimple_assign_set_lhs (new_stmt, new_temp);
3437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3438 if (slp_node)
3439 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3442 if (slp_node)
3443 continue;
3445 if (j == 0)
3446 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3447 else
3448 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3449 prev_stmt_info = vinfo_for_stmt (new_stmt);
3452 vec_oprnds0.release ();
3453 vec_oprnds1.release ();
3455 return true;
3459 static tree permute_vec_elements (tree, tree, tree, gimple,
3460 gimple_stmt_iterator *);
3463 /* Function vectorizable_operation.
3465 Check if STMT performs a binary, unary or ternary operation that can
3466 be vectorized.
3467 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3468 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3469 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3471 static bool
3472 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3473 gimple *vec_stmt, slp_tree slp_node)
3475 tree vec_dest;
3476 tree scalar_dest;
3477 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3478 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3479 tree vectype;
3480 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3481 enum tree_code code;
3482 enum machine_mode vec_mode;
3483 tree new_temp;
3484 int op_type;
3485 optab optab;
3486 int icode;
3487 tree def;
3488 gimple def_stmt;
3489 enum vect_def_type dt[3]
3490 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3491 gimple new_stmt = NULL;
3492 stmt_vec_info prev_stmt_info;
3493 int nunits_in;
3494 int nunits_out;
3495 tree vectype_out;
3496 int ncopies;
3497 int j, i;
3498 vec<tree> vec_oprnds0 = vNULL;
3499 vec<tree> vec_oprnds1 = vNULL;
3500 vec<tree> vec_oprnds2 = vNULL;
3501 tree vop0, vop1, vop2;
3502 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3503 int vf;
3505 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3506 return false;
3508 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3509 return false;
3511 /* Is STMT a vectorizable binary/unary operation? */
3512 if (!is_gimple_assign (stmt))
3513 return false;
3515 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3516 return false;
3518 code = gimple_assign_rhs_code (stmt);
3520 /* For pointer addition, we should use the normal plus for
3521 the vector addition. */
3522 if (code == POINTER_PLUS_EXPR)
3523 code = PLUS_EXPR;
3525 /* Support only unary or binary operations. */
3526 op_type = TREE_CODE_LENGTH (code);
3527 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3529 if (dump_enabled_p ())
3530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3531 "num. args = %d (not unary/binary/ternary op).\n",
3532 op_type);
3533 return false;
3536 scalar_dest = gimple_assign_lhs (stmt);
3537 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3539 /* Most operations cannot handle bit-precision types without extra
3540 truncations. */
3541 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3542 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3543 /* Exception are bitwise binary operations. */
3544 && code != BIT_IOR_EXPR
3545 && code != BIT_XOR_EXPR
3546 && code != BIT_AND_EXPR)
3548 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3550 "bit-precision arithmetic not supported.\n");
3551 return false;
3554 op0 = gimple_assign_rhs1 (stmt);
3555 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3556 &def_stmt, &def, &dt[0], &vectype))
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560 "use not simple.\n");
3561 return false;
3563 /* If op0 is an external or constant def use a vector type with
3564 the same size as the output vector type. */
3565 if (!vectype)
3566 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3567 if (vec_stmt)
3568 gcc_assert (vectype);
3569 if (!vectype)
3571 if (dump_enabled_p ())
3573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3574 "no vectype for scalar type ");
3575 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3576 TREE_TYPE (op0));
3577 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3580 return false;
3583 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3584 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3585 if (nunits_out != nunits_in)
3586 return false;
3588 if (op_type == binary_op || op_type == ternary_op)
3590 op1 = gimple_assign_rhs2 (stmt);
3591 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3592 &def, &dt[1]))
3594 if (dump_enabled_p ())
3595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3596 "use not simple.\n");
3597 return false;
3600 if (op_type == ternary_op)
3602 op2 = gimple_assign_rhs3 (stmt);
3603 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3604 &def, &dt[2]))
3606 if (dump_enabled_p ())
3607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3608 "use not simple.\n");
3609 return false;
3613 if (loop_vinfo)
3614 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3615 else
3616 vf = 1;
3618 /* Multiple types in SLP are handled by creating the appropriate number of
3619 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3620 case of SLP. */
3621 if (slp_node || PURE_SLP_STMT (stmt_info))
3622 ncopies = 1;
3623 else
3624 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3626 gcc_assert (ncopies >= 1);
3628 /* Shifts are handled in vectorizable_shift (). */
3629 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3630 || code == RROTATE_EXPR)
3631 return false;
3633 /* Supportable by target? */
3635 vec_mode = TYPE_MODE (vectype);
3636 if (code == MULT_HIGHPART_EXPR)
3638 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3639 icode = LAST_INSN_CODE;
3640 else
3641 icode = CODE_FOR_nothing;
3643 else
3645 optab = optab_for_tree_code (code, vectype, optab_default);
3646 if (!optab)
3648 if (dump_enabled_p ())
3649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3650 "no optab.\n");
3651 return false;
3653 icode = (int) optab_handler (optab, vec_mode);
3656 if (icode == CODE_FOR_nothing)
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3660 "op not supported by target.\n");
3661 /* Check only during analysis. */
3662 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3663 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3664 return false;
3665 if (dump_enabled_p ())
3666 dump_printf_loc (MSG_NOTE, vect_location,
3667 "proceeding using word mode.\n");
3670 /* Worthwhile without SIMD support? Check only during analysis. */
3671 if (!VECTOR_MODE_P (vec_mode)
3672 && !vec_stmt
3673 && vf < vect_min_worthwhile_factor (code))
3675 if (dump_enabled_p ())
3676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3677 "not worthwhile without SIMD support.\n");
3678 return false;
3681 if (!vec_stmt) /* transformation not required. */
3683 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3684 if (dump_enabled_p ())
3685 dump_printf_loc (MSG_NOTE, vect_location,
3686 "=== vectorizable_operation ===\n");
3687 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3688 return true;
3691 /** Transform. **/
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_NOTE, vect_location,
3695 "transform binary/unary operation.\n");
3697 /* Handle def. */
3698 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3700 /* In case the vectorization factor (VF) is bigger than the number
3701 of elements that we can fit in a vectype (nunits), we have to generate
3702 more than one vector stmt - i.e - we need to "unroll" the
3703 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3704 from one copy of the vector stmt to the next, in the field
3705 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3706 stages to find the correct vector defs to be used when vectorizing
3707 stmts that use the defs of the current stmt. The example below
3708 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3709 we need to create 4 vectorized stmts):
3711 before vectorization:
3712 RELATED_STMT VEC_STMT
3713 S1: x = memref - -
3714 S2: z = x + 1 - -
3716 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3717 there):
3718 RELATED_STMT VEC_STMT
3719 VS1_0: vx0 = memref0 VS1_1 -
3720 VS1_1: vx1 = memref1 VS1_2 -
3721 VS1_2: vx2 = memref2 VS1_3 -
3722 VS1_3: vx3 = memref3 - -
3723 S1: x = load - VS1_0
3724 S2: z = x + 1 - -
3726 step2: vectorize stmt S2 (done here):
3727 To vectorize stmt S2 we first need to find the relevant vector
3728 def for the first operand 'x'. This is, as usual, obtained from
3729 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3730 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3731 relevant vector def 'vx0'. Having found 'vx0' we can generate
3732 the vector stmt VS2_0, and as usual, record it in the
3733 STMT_VINFO_VEC_STMT of stmt S2.
3734 When creating the second copy (VS2_1), we obtain the relevant vector
3735 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3736 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3737 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3738 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3739 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3740 chain of stmts and pointers:
3741 RELATED_STMT VEC_STMT
3742 VS1_0: vx0 = memref0 VS1_1 -
3743 VS1_1: vx1 = memref1 VS1_2 -
3744 VS1_2: vx2 = memref2 VS1_3 -
3745 VS1_3: vx3 = memref3 - -
3746 S1: x = load - VS1_0
3747 VS2_0: vz0 = vx0 + v1 VS2_1 -
3748 VS2_1: vz1 = vx1 + v1 VS2_2 -
3749 VS2_2: vz2 = vx2 + v1 VS2_3 -
3750 VS2_3: vz3 = vx3 + v1 - -
3751 S2: z = x + 1 - VS2_0 */
3753 prev_stmt_info = NULL;
3754 for (j = 0; j < ncopies; j++)
3756 /* Handle uses. */
3757 if (j == 0)
3759 if (op_type == binary_op || op_type == ternary_op)
3760 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3761 slp_node, -1);
3762 else
3763 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3764 slp_node, -1);
3765 if (op_type == ternary_op)
3767 vec_oprnds2.create (1);
3768 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3769 stmt,
3770 NULL));
3773 else
3775 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3776 if (op_type == ternary_op)
3778 tree vec_oprnd = vec_oprnds2.pop ();
3779 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3780 vec_oprnd));
3784 /* Arguments are ready. Create the new vector stmt. */
3785 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3787 vop1 = ((op_type == binary_op || op_type == ternary_op)
3788 ? vec_oprnds1[i] : NULL_TREE);
3789 vop2 = ((op_type == ternary_op)
3790 ? vec_oprnds2[i] : NULL_TREE);
3791 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3792 vop0, vop1, vop2);
3793 new_temp = make_ssa_name (vec_dest, new_stmt);
3794 gimple_assign_set_lhs (new_stmt, new_temp);
3795 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3796 if (slp_node)
3797 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3800 if (slp_node)
3801 continue;
3803 if (j == 0)
3804 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3805 else
3806 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3807 prev_stmt_info = vinfo_for_stmt (new_stmt);
3810 vec_oprnds0.release ();
3811 vec_oprnds1.release ();
3812 vec_oprnds2.release ();
3814 return true;
3817 /* A helper function to ensure data reference DR's base alignment
3818 for STMT_INFO. */
3820 static void
3821 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
3823 if (!dr->aux)
3824 return;
3826 if (((dataref_aux *)dr->aux)->base_misaligned)
3828 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3829 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
3831 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
3832 DECL_USER_ALIGN (base_decl) = 1;
3833 ((dataref_aux *)dr->aux)->base_misaligned = false;
3838 /* Function vectorizable_store.
3840 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3841 can be vectorized.
3842 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3843 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3844 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3846 static bool
3847 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3848 slp_tree slp_node)
3850 tree scalar_dest;
3851 tree data_ref;
3852 tree op;
3853 tree vec_oprnd = NULL_TREE;
3854 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3855 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3856 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3857 tree elem_type;
3858 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3859 struct loop *loop = NULL;
3860 enum machine_mode vec_mode;
3861 tree dummy;
3862 enum dr_alignment_support alignment_support_scheme;
3863 tree def;
3864 gimple def_stmt;
3865 enum vect_def_type dt;
3866 stmt_vec_info prev_stmt_info = NULL;
3867 tree dataref_ptr = NULL_TREE;
3868 tree dataref_offset = NULL_TREE;
3869 gimple ptr_incr = NULL;
3870 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3871 int ncopies;
3872 int j;
3873 gimple next_stmt, first_stmt = NULL;
3874 bool grouped_store = false;
3875 bool store_lanes_p = false;
3876 unsigned int group_size, i;
3877 vec<tree> dr_chain = vNULL;
3878 vec<tree> oprnds = vNULL;
3879 vec<tree> result_chain = vNULL;
3880 bool inv_p;
3881 vec<tree> vec_oprnds = vNULL;
3882 bool slp = (slp_node != NULL);
3883 unsigned int vec_num;
3884 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3885 tree aggr_type;
3887 if (loop_vinfo)
3888 loop = LOOP_VINFO_LOOP (loop_vinfo);
3890 /* Multiple types in SLP are handled by creating the appropriate number of
3891 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3892 case of SLP. */
3893 if (slp || PURE_SLP_STMT (stmt_info))
3894 ncopies = 1;
3895 else
3896 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3898 gcc_assert (ncopies >= 1);
3900 /* FORNOW. This restriction should be relaxed. */
3901 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3903 if (dump_enabled_p ())
3904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3905 "multiple types in nested loop.\n");
3906 return false;
3909 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3910 return false;
3912 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3913 return false;
3915 /* Is vectorizable store? */
3917 if (!is_gimple_assign (stmt))
3918 return false;
3920 scalar_dest = gimple_assign_lhs (stmt);
3921 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3922 && is_pattern_stmt_p (stmt_info))
3923 scalar_dest = TREE_OPERAND (scalar_dest, 0);
3924 if (TREE_CODE (scalar_dest) != ARRAY_REF
3925 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
3926 && TREE_CODE (scalar_dest) != INDIRECT_REF
3927 && TREE_CODE (scalar_dest) != COMPONENT_REF
3928 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3929 && TREE_CODE (scalar_dest) != REALPART_EXPR
3930 && TREE_CODE (scalar_dest) != MEM_REF)
3931 return false;
3933 gcc_assert (gimple_assign_single_p (stmt));
3934 op = gimple_assign_rhs1 (stmt);
3935 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3936 &def, &dt))
3938 if (dump_enabled_p ())
3939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3940 "use not simple.\n");
3941 return false;
3944 elem_type = TREE_TYPE (vectype);
3945 vec_mode = TYPE_MODE (vectype);
3947 /* FORNOW. In some cases can vectorize even if data-type not supported
3948 (e.g. - array initialization with 0). */
3949 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3950 return false;
3952 if (!STMT_VINFO_DATA_REF (stmt_info))
3953 return false;
3955 if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3956 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3957 size_zero_node) < 0)
3959 if (dump_enabled_p ())
3960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3961 "negative step for store.\n");
3962 return false;
3965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3967 grouped_store = true;
3968 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3969 if (!slp && !PURE_SLP_STMT (stmt_info))
3971 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3972 if (vect_store_lanes_supported (vectype, group_size))
3973 store_lanes_p = true;
3974 else if (!vect_grouped_store_supported (vectype, group_size))
3975 return false;
3978 if (first_stmt == stmt)
3980 /* STMT is the leader of the group. Check the operands of all the
3981 stmts of the group. */
3982 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3983 while (next_stmt)
3985 gcc_assert (gimple_assign_single_p (next_stmt));
3986 op = gimple_assign_rhs1 (next_stmt);
3987 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3988 &def_stmt, &def, &dt))
3990 if (dump_enabled_p ())
3991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3992 "use not simple.\n");
3993 return false;
3995 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4000 if (!vec_stmt) /* transformation not required. */
4002 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4003 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
4004 NULL, NULL, NULL);
4005 return true;
4008 /** Transform. **/
4010 ensure_base_align (stmt_info, dr);
4012 if (grouped_store)
4014 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4015 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4017 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4019 /* FORNOW */
4020 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
4022 /* We vectorize all the stmts of the interleaving group when we
4023 reach the last stmt in the group. */
4024 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4025 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
4026 && !slp)
4028 *vec_stmt = NULL;
4029 return true;
4032 if (slp)
4034 grouped_store = false;
4035 /* VEC_NUM is the number of vect stmts to be created for this
4036 group. */
4037 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4038 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4039 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4040 op = gimple_assign_rhs1 (first_stmt);
4042 else
4043 /* VEC_NUM is the number of vect stmts to be created for this
4044 group. */
4045 vec_num = group_size;
4047 else
4049 first_stmt = stmt;
4050 first_dr = dr;
4051 group_size = vec_num = 1;
4054 if (dump_enabled_p ())
4055 dump_printf_loc (MSG_NOTE, vect_location,
4056 "transform store. ncopies = %d\n", ncopies);
4058 dr_chain.create (group_size);
4059 oprnds.create (group_size);
4061 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4062 gcc_assert (alignment_support_scheme);
4063 /* Targets with store-lane instructions must not require explicit
4064 realignment. */
4065 gcc_assert (!store_lanes_p
4066 || alignment_support_scheme == dr_aligned
4067 || alignment_support_scheme == dr_unaligned_supported);
4069 if (store_lanes_p)
4070 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4071 else
4072 aggr_type = vectype;
4074 /* In case the vectorization factor (VF) is bigger than the number
4075 of elements that we can fit in a vectype (nunits), we have to generate
4076 more than one vector stmt - i.e - we need to "unroll" the
4077 vector stmt by a factor VF/nunits. For more details see documentation in
4078 vect_get_vec_def_for_copy_stmt. */
4080 /* In case of interleaving (non-unit grouped access):
4082 S1: &base + 2 = x2
4083 S2: &base = x0
4084 S3: &base + 1 = x1
4085 S4: &base + 3 = x3
4087 We create vectorized stores starting from base address (the access of the
4088 first stmt in the chain (S2 in the above example), when the last store stmt
4089 of the chain (S4) is reached:
4091 VS1: &base = vx2
4092 VS2: &base + vec_size*1 = vx0
4093 VS3: &base + vec_size*2 = vx1
4094 VS4: &base + vec_size*3 = vx3
4096 Then permutation statements are generated:
4098 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4099 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4102 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4103 (the order of the data-refs in the output of vect_permute_store_chain
4104 corresponds to the order of scalar stmts in the interleaving chain - see
4105 the documentation of vect_permute_store_chain()).
4107 In case of both multiple types and interleaving, above vector stores and
4108 permutation stmts are created for every copy. The result vector stmts are
4109 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4110 STMT_VINFO_RELATED_STMT for the next copies.
4113 prev_stmt_info = NULL;
4114 for (j = 0; j < ncopies; j++)
4116 gimple new_stmt;
4118 if (j == 0)
4120 if (slp)
4122 /* Get vectorized arguments for SLP_NODE. */
4123 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4124 NULL, slp_node, -1);
4126 vec_oprnd = vec_oprnds[0];
4128 else
4130 /* For interleaved stores we collect vectorized defs for all the
4131 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4132 used as an input to vect_permute_store_chain(), and OPRNDS as
4133 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4135 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4136 OPRNDS are of size 1. */
4137 next_stmt = first_stmt;
4138 for (i = 0; i < group_size; i++)
4140 /* Since gaps are not supported for interleaved stores,
4141 GROUP_SIZE is the exact number of stmts in the chain.
4142 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4143 there is no interleaving, GROUP_SIZE is 1, and only one
4144 iteration of the loop will be executed. */
4145 gcc_assert (next_stmt
4146 && gimple_assign_single_p (next_stmt));
4147 op = gimple_assign_rhs1 (next_stmt);
4149 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4150 NULL);
4151 dr_chain.quick_push (vec_oprnd);
4152 oprnds.quick_push (vec_oprnd);
4153 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4157 /* We should have catched mismatched types earlier. */
4158 gcc_assert (useless_type_conversion_p (vectype,
4159 TREE_TYPE (vec_oprnd)));
4160 bool simd_lane_access_p
4161 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
4162 if (simd_lane_access_p
4163 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
4164 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
4165 && integer_zerop (DR_OFFSET (first_dr))
4166 && integer_zerop (DR_INIT (first_dr))
4167 && alias_sets_conflict_p (get_alias_set (aggr_type),
4168 get_alias_set (DR_REF (first_dr))))
4170 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
4171 dataref_offset = build_int_cst (reference_alias_ptr_type
4172 (DR_REF (first_dr)), 0);
4173 inv_p = false;
4175 else
4176 dataref_ptr
4177 = vect_create_data_ref_ptr (first_stmt, aggr_type,
4178 simd_lane_access_p ? loop : NULL,
4179 NULL_TREE, &dummy, gsi, &ptr_incr,
4180 simd_lane_access_p, &inv_p);
4181 gcc_assert (bb_vinfo || !inv_p);
4183 else
4185 /* For interleaved stores we created vectorized defs for all the
4186 defs stored in OPRNDS in the previous iteration (previous copy).
4187 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4188 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4189 next copy.
4190 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4191 OPRNDS are of size 1. */
4192 for (i = 0; i < group_size; i++)
4194 op = oprnds[i];
4195 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4196 &def, &dt);
4197 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4198 dr_chain[i] = vec_oprnd;
4199 oprnds[i] = vec_oprnd;
4201 if (dataref_offset)
4202 dataref_offset
4203 = int_const_binop (PLUS_EXPR, dataref_offset,
4204 TYPE_SIZE_UNIT (aggr_type));
4205 else
4206 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4207 TYPE_SIZE_UNIT (aggr_type));
4210 if (store_lanes_p)
4212 tree vec_array;
4214 /* Combine all the vectors into an array. */
4215 vec_array = create_vector_array (vectype, vec_num);
4216 for (i = 0; i < vec_num; i++)
4218 vec_oprnd = dr_chain[i];
4219 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4222 /* Emit:
4223 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4224 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4225 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4226 gimple_call_set_lhs (new_stmt, data_ref);
4227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4229 else
4231 new_stmt = NULL;
4232 if (grouped_store)
4234 if (j == 0)
4235 result_chain.create (group_size);
4236 /* Permute. */
4237 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4238 &result_chain);
4241 next_stmt = first_stmt;
4242 for (i = 0; i < vec_num; i++)
4244 unsigned align, misalign;
4246 if (i > 0)
4247 /* Bump the vector pointer. */
4248 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4249 stmt, NULL_TREE);
4251 if (slp)
4252 vec_oprnd = vec_oprnds[i];
4253 else if (grouped_store)
4254 /* For grouped stores vectorized defs are interleaved in
4255 vect_permute_store_chain(). */
4256 vec_oprnd = result_chain[i];
4258 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4259 dataref_offset
4260 ? dataref_offset
4261 : build_int_cst (reference_alias_ptr_type
4262 (DR_REF (first_dr)), 0));
4263 align = TYPE_ALIGN_UNIT (vectype);
4264 if (aligned_access_p (first_dr))
4265 misalign = 0;
4266 else if (DR_MISALIGNMENT (first_dr) == -1)
4268 TREE_TYPE (data_ref)
4269 = build_aligned_type (TREE_TYPE (data_ref),
4270 TYPE_ALIGN (elem_type));
4271 align = TYPE_ALIGN_UNIT (elem_type);
4272 misalign = 0;
4274 else
4276 TREE_TYPE (data_ref)
4277 = build_aligned_type (TREE_TYPE (data_ref),
4278 TYPE_ALIGN (elem_type));
4279 misalign = DR_MISALIGNMENT (first_dr);
4281 if (dataref_offset == NULL_TREE)
4282 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4283 misalign);
4285 /* Arguments are ready. Create the new vector stmt. */
4286 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4287 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4289 if (slp)
4290 continue;
4292 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4293 if (!next_stmt)
4294 break;
4297 if (!slp)
4299 if (j == 0)
4300 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4301 else
4302 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4303 prev_stmt_info = vinfo_for_stmt (new_stmt);
4307 dr_chain.release ();
4308 oprnds.release ();
4309 result_chain.release ();
4310 vec_oprnds.release ();
4312 return true;
4315 /* Given a vector type VECTYPE and permutation SEL returns
4316 the VECTOR_CST mask that implements the permutation of the
4317 vector elements. If that is impossible to do, returns NULL. */
4319 tree
4320 vect_gen_perm_mask (tree vectype, unsigned char *sel)
4322 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4323 int i, nunits;
4325 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4327 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4328 return NULL;
4330 mask_elt_type = lang_hooks.types.type_for_mode
4331 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4332 mask_type = get_vectype_for_scalar_type (mask_elt_type);
4334 mask_elts = XALLOCAVEC (tree, nunits);
4335 for (i = nunits - 1; i >= 0; i--)
4336 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4337 mask_vec = build_vector (mask_type, mask_elts);
4339 return mask_vec;
4342 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4343 reversal of the vector elements. If that is impossible to do,
4344 returns NULL. */
4346 static tree
4347 perm_mask_for_reverse (tree vectype)
4349 int i, nunits;
4350 unsigned char *sel;
4352 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4353 sel = XALLOCAVEC (unsigned char, nunits);
4355 for (i = 0; i < nunits; ++i)
4356 sel[i] = nunits - 1 - i;
4358 return vect_gen_perm_mask (vectype, sel);
4361 /* Given a vector variable X and Y, that was generated for the scalar
4362 STMT, generate instructions to permute the vector elements of X and Y
4363 using permutation mask MASK_VEC, insert them at *GSI and return the
4364 permuted vector variable. */
4366 static tree
4367 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4368 gimple_stmt_iterator *gsi)
4370 tree vectype = TREE_TYPE (x);
4371 tree perm_dest, data_ref;
4372 gimple perm_stmt;
4374 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4375 data_ref = make_ssa_name (perm_dest, NULL);
4377 /* Generate the permute statement. */
4378 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4379 x, y, mask_vec);
4380 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4382 return data_ref;
4385 /* vectorizable_load.
4387 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4388 can be vectorized.
4389 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4390 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4391 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4393 static bool
4394 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4395 slp_tree slp_node, slp_instance slp_node_instance)
4397 tree scalar_dest;
4398 tree vec_dest = NULL;
4399 tree data_ref = NULL;
4400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4401 stmt_vec_info prev_stmt_info;
4402 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4403 struct loop *loop = NULL;
4404 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4405 bool nested_in_vect_loop = false;
4406 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4407 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4408 tree elem_type;
4409 tree new_temp;
4410 enum machine_mode mode;
4411 gimple new_stmt = NULL;
4412 tree dummy;
4413 enum dr_alignment_support alignment_support_scheme;
4414 tree dataref_ptr = NULL_TREE;
4415 tree dataref_offset = NULL_TREE;
4416 gimple ptr_incr = NULL;
4417 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4418 int ncopies;
4419 int i, j, group_size, group_gap;
4420 tree msq = NULL_TREE, lsq;
4421 tree offset = NULL_TREE;
4422 tree realignment_token = NULL_TREE;
4423 gimple phi = NULL;
4424 vec<tree> dr_chain = vNULL;
4425 bool grouped_load = false;
4426 bool load_lanes_p = false;
4427 gimple first_stmt;
4428 bool inv_p;
4429 bool negative = false;
4430 bool compute_in_loop = false;
4431 struct loop *at_loop;
4432 int vec_num;
4433 bool slp = (slp_node != NULL);
4434 bool slp_perm = false;
4435 enum tree_code code;
4436 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4437 int vf;
4438 tree aggr_type;
4439 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4440 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4441 int gather_scale = 1;
4442 enum vect_def_type gather_dt = vect_unknown_def_type;
4444 if (loop_vinfo)
4446 loop = LOOP_VINFO_LOOP (loop_vinfo);
4447 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4448 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4450 else
4451 vf = 1;
4453 /* Multiple types in SLP are handled by creating the appropriate number of
4454 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4455 case of SLP. */
4456 if (slp || PURE_SLP_STMT (stmt_info))
4457 ncopies = 1;
4458 else
4459 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4461 gcc_assert (ncopies >= 1);
4463 /* FORNOW. This restriction should be relaxed. */
4464 if (nested_in_vect_loop && ncopies > 1)
4466 if (dump_enabled_p ())
4467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4468 "multiple types in nested loop.\n");
4469 return false;
4472 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4473 return false;
4475 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4476 return false;
4478 /* Is vectorizable load? */
4479 if (!is_gimple_assign (stmt))
4480 return false;
4482 scalar_dest = gimple_assign_lhs (stmt);
4483 if (TREE_CODE (scalar_dest) != SSA_NAME)
4484 return false;
4486 code = gimple_assign_rhs_code (stmt);
4487 if (code != ARRAY_REF
4488 && code != BIT_FIELD_REF
4489 && code != INDIRECT_REF
4490 && code != COMPONENT_REF
4491 && code != IMAGPART_EXPR
4492 && code != REALPART_EXPR
4493 && code != MEM_REF
4494 && TREE_CODE_CLASS (code) != tcc_declaration)
4495 return false;
4497 if (!STMT_VINFO_DATA_REF (stmt_info))
4498 return false;
4500 elem_type = TREE_TYPE (vectype);
4501 mode = TYPE_MODE (vectype);
4503 /* FORNOW. In some cases can vectorize even if data-type not supported
4504 (e.g. - data copies). */
4505 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4507 if (dump_enabled_p ())
4508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4509 "Aligned load, but unsupported type.\n");
4510 return false;
4513 /* Check if the load is a part of an interleaving chain. */
4514 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4516 grouped_load = true;
4517 /* FORNOW */
4518 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4520 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4521 if (!slp && !PURE_SLP_STMT (stmt_info))
4523 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4524 if (vect_load_lanes_supported (vectype, group_size))
4525 load_lanes_p = true;
4526 else if (!vect_grouped_load_supported (vectype, group_size))
4527 return false;
4532 if (STMT_VINFO_GATHER_P (stmt_info))
4534 gimple def_stmt;
4535 tree def;
4536 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4537 &gather_off, &gather_scale);
4538 gcc_assert (gather_decl);
4539 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4540 &def_stmt, &def, &gather_dt,
4541 &gather_off_vectype))
4543 if (dump_enabled_p ())
4544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4545 "gather index use not simple.\n");
4546 return false;
4549 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4551 else
4553 negative = tree_int_cst_compare (nested_in_vect_loop
4554 ? STMT_VINFO_DR_STEP (stmt_info)
4555 : DR_STEP (dr),
4556 size_zero_node) < 0;
4557 if (negative && ncopies > 1)
4559 if (dump_enabled_p ())
4560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4561 "multiple types with negative step.\n");
4562 return false;
4565 if (negative)
4567 if (grouped_load)
4569 if (dump_enabled_p ())
4570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4571 "negative step for group load not supported"
4572 "\n");
4573 return false;
4575 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4576 if (alignment_support_scheme != dr_aligned
4577 && alignment_support_scheme != dr_unaligned_supported)
4579 if (dump_enabled_p ())
4580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4581 "negative step but alignment required.\n");
4582 return false;
4584 if (!perm_mask_for_reverse (vectype))
4586 if (dump_enabled_p ())
4587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4588 "negative step and reversing not supported."
4589 "\n");
4590 return false;
4595 if (!vec_stmt) /* transformation not required. */
4597 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4598 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4599 return true;
4602 if (dump_enabled_p ())
4603 dump_printf_loc (MSG_NOTE, vect_location,
4604 "transform load. ncopies = %d\n", ncopies);
4606 /** Transform. **/
4608 ensure_base_align (stmt_info, dr);
4610 if (STMT_VINFO_GATHER_P (stmt_info))
4612 tree vec_oprnd0 = NULL_TREE, op;
4613 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4614 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4615 tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4616 edge pe = loop_preheader_edge (loop);
4617 gimple_seq seq;
4618 basic_block new_bb;
4619 enum { NARROW, NONE, WIDEN } modifier;
4620 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4622 if (nunits == gather_off_nunits)
4623 modifier = NONE;
4624 else if (nunits == gather_off_nunits / 2)
4626 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4627 modifier = WIDEN;
4629 for (i = 0; i < gather_off_nunits; ++i)
4630 sel[i] = i | nunits;
4632 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4633 gcc_assert (perm_mask != NULL_TREE);
4635 else if (nunits == gather_off_nunits * 2)
4637 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4638 modifier = NARROW;
4640 for (i = 0; i < nunits; ++i)
4641 sel[i] = i < gather_off_nunits
4642 ? i : i + nunits - gather_off_nunits;
4644 perm_mask = vect_gen_perm_mask (vectype, sel);
4645 gcc_assert (perm_mask != NULL_TREE);
4646 ncopies *= 2;
4648 else
4649 gcc_unreachable ();
4651 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4652 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4653 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4654 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4655 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4656 scaletype = TREE_VALUE (arglist);
4657 gcc_checking_assert (types_compatible_p (srctype, rettype)
4658 && types_compatible_p (srctype, masktype));
4660 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4662 ptr = fold_convert (ptrtype, gather_base);
4663 if (!is_gimple_min_invariant (ptr))
4665 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4666 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4667 gcc_assert (!new_bb);
4670 /* Currently we support only unconditional gather loads,
4671 so mask should be all ones. */
4672 if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4673 mask = build_int_cst (TREE_TYPE (masktype), -1);
4674 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4676 REAL_VALUE_TYPE r;
4677 long tmp[6];
4678 for (j = 0; j < 6; ++j)
4679 tmp[j] = -1;
4680 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4681 mask = build_real (TREE_TYPE (masktype), r);
4683 else
4684 gcc_unreachable ();
4685 mask = build_vector_from_val (masktype, mask);
4686 mask = vect_init_vector (stmt, mask, masktype, NULL);
4688 scale = build_int_cst (scaletype, gather_scale);
4690 prev_stmt_info = NULL;
4691 for (j = 0; j < ncopies; ++j)
4693 if (modifier == WIDEN && (j & 1))
4694 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4695 perm_mask, stmt, gsi);
4696 else if (j == 0)
4697 op = vec_oprnd0
4698 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4699 else
4700 op = vec_oprnd0
4701 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4703 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4705 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4706 == TYPE_VECTOR_SUBPARTS (idxtype));
4707 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4708 var = make_ssa_name (var, NULL);
4709 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4710 new_stmt
4711 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4712 op, NULL_TREE);
4713 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4714 op = var;
4717 new_stmt
4718 = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4720 if (!useless_type_conversion_p (vectype, rettype))
4722 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4723 == TYPE_VECTOR_SUBPARTS (rettype));
4724 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4725 op = make_ssa_name (var, new_stmt);
4726 gimple_call_set_lhs (new_stmt, op);
4727 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4728 var = make_ssa_name (vec_dest, NULL);
4729 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4730 new_stmt
4731 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4732 NULL_TREE);
4734 else
4736 var = make_ssa_name (vec_dest, new_stmt);
4737 gimple_call_set_lhs (new_stmt, var);
4740 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4742 if (modifier == NARROW)
4744 if ((j & 1) == 0)
4746 prev_res = var;
4747 continue;
4749 var = permute_vec_elements (prev_res, var,
4750 perm_mask, stmt, gsi);
4751 new_stmt = SSA_NAME_DEF_STMT (var);
4754 if (prev_stmt_info == NULL)
4755 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4756 else
4757 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4758 prev_stmt_info = vinfo_for_stmt (new_stmt);
4760 return true;
4762 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4764 gimple_stmt_iterator incr_gsi;
4765 bool insert_after;
4766 gimple incr;
4767 tree offvar;
4768 tree ivstep;
4769 tree running_off;
4770 vec<constructor_elt, va_gc> *v = NULL;
4771 gimple_seq stmts = NULL;
4772 tree stride_base, stride_step, alias_off;
4774 gcc_assert (!nested_in_vect_loop);
4776 stride_base
4777 = fold_build_pointer_plus
4778 (unshare_expr (DR_BASE_ADDRESS (dr)),
4779 size_binop (PLUS_EXPR,
4780 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4781 convert_to_ptrofftype (DR_INIT (dr))));
4782 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4784 /* For a load with loop-invariant (but other than power-of-2)
4785 stride (i.e. not a grouped access) like so:
4787 for (i = 0; i < n; i += stride)
4788 ... = array[i];
4790 we generate a new induction variable and new accesses to
4791 form a new vector (or vectors, depending on ncopies):
4793 for (j = 0; ; j += VF*stride)
4794 tmp1 = array[j];
4795 tmp2 = array[j + stride];
4797 vectemp = {tmp1, tmp2, ...}
4800 ivstep = stride_step;
4801 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4802 build_int_cst (TREE_TYPE (ivstep), vf));
4804 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4806 create_iv (stride_base, ivstep, NULL,
4807 loop, &incr_gsi, insert_after,
4808 &offvar, NULL);
4809 incr = gsi_stmt (incr_gsi);
4810 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4812 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4813 if (stmts)
4814 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4816 prev_stmt_info = NULL;
4817 running_off = offvar;
4818 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4819 for (j = 0; j < ncopies; j++)
4821 tree vec_inv;
4823 vec_alloc (v, nunits);
4824 for (i = 0; i < nunits; i++)
4826 tree newref, newoff;
4827 gimple incr;
4828 newref = build2 (MEM_REF, TREE_TYPE (vectype),
4829 running_off, alias_off);
4831 newref = force_gimple_operand_gsi (gsi, newref, true,
4832 NULL_TREE, true,
4833 GSI_SAME_STMT);
4834 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4835 newoff = copy_ssa_name (running_off, NULL);
4836 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4837 running_off, stride_step);
4838 vect_finish_stmt_generation (stmt, incr, gsi);
4840 running_off = newoff;
4843 vec_inv = build_constructor (vectype, v);
4844 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4845 new_stmt = SSA_NAME_DEF_STMT (new_temp);
4847 if (j == 0)
4848 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4849 else
4850 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4851 prev_stmt_info = vinfo_for_stmt (new_stmt);
4853 return true;
4856 if (grouped_load)
4858 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4859 if (slp
4860 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
4861 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4862 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4864 /* Check if the chain of loads is already vectorized. */
4865 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
4866 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4867 ??? But we can only do so if there is exactly one
4868 as we have no way to get at the rest. Leave the CSE
4869 opportunity alone.
4870 ??? With the group load eventually participating
4871 in multiple different permutations (having multiple
4872 slp nodes which refer to the same group) the CSE
4873 is even wrong code. See PR56270. */
4874 && !slp)
4876 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4877 return true;
4879 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4880 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4882 /* VEC_NUM is the number of vect stmts to be created for this group. */
4883 if (slp)
4885 grouped_load = false;
4886 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4887 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
4888 slp_perm = true;
4889 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
4891 else
4893 vec_num = group_size;
4894 group_gap = 0;
4897 else
4899 first_stmt = stmt;
4900 first_dr = dr;
4901 group_size = vec_num = 1;
4902 group_gap = 0;
4905 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4906 gcc_assert (alignment_support_scheme);
4907 /* Targets with load-lane instructions must not require explicit
4908 realignment. */
4909 gcc_assert (!load_lanes_p
4910 || alignment_support_scheme == dr_aligned
4911 || alignment_support_scheme == dr_unaligned_supported);
4913 /* In case the vectorization factor (VF) is bigger than the number
4914 of elements that we can fit in a vectype (nunits), we have to generate
4915 more than one vector stmt - i.e - we need to "unroll" the
4916 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4917 from one copy of the vector stmt to the next, in the field
4918 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4919 stages to find the correct vector defs to be used when vectorizing
4920 stmts that use the defs of the current stmt. The example below
4921 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4922 need to create 4 vectorized stmts):
4924 before vectorization:
4925 RELATED_STMT VEC_STMT
4926 S1: x = memref - -
4927 S2: z = x + 1 - -
4929 step 1: vectorize stmt S1:
4930 We first create the vector stmt VS1_0, and, as usual, record a
4931 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4932 Next, we create the vector stmt VS1_1, and record a pointer to
4933 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4934 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4935 stmts and pointers:
4936 RELATED_STMT VEC_STMT
4937 VS1_0: vx0 = memref0 VS1_1 -
4938 VS1_1: vx1 = memref1 VS1_2 -
4939 VS1_2: vx2 = memref2 VS1_3 -
4940 VS1_3: vx3 = memref3 - -
4941 S1: x = load - VS1_0
4942 S2: z = x + 1 - -
4944 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4945 information we recorded in RELATED_STMT field is used to vectorize
4946 stmt S2. */
4948 /* In case of interleaving (non-unit grouped access):
4950 S1: x2 = &base + 2
4951 S2: x0 = &base
4952 S3: x1 = &base + 1
4953 S4: x3 = &base + 3
4955 Vectorized loads are created in the order of memory accesses
4956 starting from the access of the first stmt of the chain:
4958 VS1: vx0 = &base
4959 VS2: vx1 = &base + vec_size*1
4960 VS3: vx3 = &base + vec_size*2
4961 VS4: vx4 = &base + vec_size*3
4963 Then permutation statements are generated:
4965 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4966 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4969 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4970 (the order of the data-refs in the output of vect_permute_load_chain
4971 corresponds to the order of scalar stmts in the interleaving chain - see
4972 the documentation of vect_permute_load_chain()).
4973 The generation of permutation stmts and recording them in
4974 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4976 In case of both multiple types and interleaving, the vector loads and
4977 permutation stmts above are created for every copy. The result vector
4978 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4979 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4981 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4982 on a target that supports unaligned accesses (dr_unaligned_supported)
4983 we generate the following code:
4984 p = initial_addr;
4985 indx = 0;
4986 loop {
4987 p = p + indx * vectype_size;
4988 vec_dest = *(p);
4989 indx = indx + 1;
4992 Otherwise, the data reference is potentially unaligned on a target that
4993 does not support unaligned accesses (dr_explicit_realign_optimized) -
4994 then generate the following code, in which the data in each iteration is
4995 obtained by two vector loads, one from the previous iteration, and one
4996 from the current iteration:
4997 p1 = initial_addr;
4998 msq_init = *(floor(p1))
4999 p2 = initial_addr + VS - 1;
5000 realignment_token = call target_builtin;
5001 indx = 0;
5002 loop {
5003 p2 = p2 + indx * vectype_size
5004 lsq = *(floor(p2))
5005 vec_dest = realign_load (msq, lsq, realignment_token)
5006 indx = indx + 1;
5007 msq = lsq;
5008 } */
5010 /* If the misalignment remains the same throughout the execution of the
5011 loop, we can create the init_addr and permutation mask at the loop
5012 preheader. Otherwise, it needs to be created inside the loop.
5013 This can only occur when vectorizing memory accesses in the inner-loop
5014 nested within an outer-loop that is being vectorized. */
5016 if (nested_in_vect_loop
5017 && (TREE_INT_CST_LOW (DR_STEP (dr))
5018 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
5020 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5021 compute_in_loop = true;
5024 if ((alignment_support_scheme == dr_explicit_realign_optimized
5025 || alignment_support_scheme == dr_explicit_realign)
5026 && !compute_in_loop)
5028 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
5029 alignment_support_scheme, NULL_TREE,
5030 &at_loop);
5031 if (alignment_support_scheme == dr_explicit_realign_optimized)
5033 phi = SSA_NAME_DEF_STMT (msq);
5034 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5037 else
5038 at_loop = loop;
5040 if (negative)
5041 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5043 if (load_lanes_p)
5044 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5045 else
5046 aggr_type = vectype;
5048 prev_stmt_info = NULL;
5049 for (j = 0; j < ncopies; j++)
5051 /* 1. Create the vector or array pointer update chain. */
5052 if (j == 0)
5054 bool simd_lane_access_p
5055 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5056 if (simd_lane_access_p
5057 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5058 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5059 && integer_zerop (DR_OFFSET (first_dr))
5060 && integer_zerop (DR_INIT (first_dr))
5061 && alias_sets_conflict_p (get_alias_set (aggr_type),
5062 get_alias_set (DR_REF (first_dr)))
5063 && (alignment_support_scheme == dr_aligned
5064 || alignment_support_scheme == dr_unaligned_supported))
5066 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5067 dataref_offset = build_int_cst (reference_alias_ptr_type
5068 (DR_REF (first_dr)), 0);
5069 inv_p = false;
5071 else
5072 dataref_ptr
5073 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
5074 offset, &dummy, gsi, &ptr_incr,
5075 simd_lane_access_p, &inv_p);
5077 else if (dataref_offset)
5078 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
5079 TYPE_SIZE_UNIT (aggr_type));
5080 else
5081 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5082 TYPE_SIZE_UNIT (aggr_type));
5084 if (grouped_load || slp_perm)
5085 dr_chain.create (vec_num);
5087 if (load_lanes_p)
5089 tree vec_array;
5091 vec_array = create_vector_array (vectype, vec_num);
5093 /* Emit:
5094 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5095 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5096 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
5097 gimple_call_set_lhs (new_stmt, vec_array);
5098 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5100 /* Extract each vector into an SSA_NAME. */
5101 for (i = 0; i < vec_num; i++)
5103 new_temp = read_vector_array (stmt, gsi, scalar_dest,
5104 vec_array, i);
5105 dr_chain.quick_push (new_temp);
5108 /* Record the mapping between SSA_NAMEs and statements. */
5109 vect_record_grouped_load_vectors (stmt, dr_chain);
5111 else
5113 for (i = 0; i < vec_num; i++)
5115 if (i > 0)
5116 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5117 stmt, NULL_TREE);
5119 /* 2. Create the vector-load in the loop. */
5120 switch (alignment_support_scheme)
5122 case dr_aligned:
5123 case dr_unaligned_supported:
5125 unsigned int align, misalign;
5127 data_ref
5128 = build2 (MEM_REF, vectype, dataref_ptr,
5129 dataref_offset
5130 ? dataref_offset
5131 : build_int_cst (reference_alias_ptr_type
5132 (DR_REF (first_dr)), 0));
5133 align = TYPE_ALIGN_UNIT (vectype);
5134 if (alignment_support_scheme == dr_aligned)
5136 gcc_assert (aligned_access_p (first_dr));
5137 misalign = 0;
5139 else if (DR_MISALIGNMENT (first_dr) == -1)
5141 TREE_TYPE (data_ref)
5142 = build_aligned_type (TREE_TYPE (data_ref),
5143 TYPE_ALIGN (elem_type));
5144 align = TYPE_ALIGN_UNIT (elem_type);
5145 misalign = 0;
5147 else
5149 TREE_TYPE (data_ref)
5150 = build_aligned_type (TREE_TYPE (data_ref),
5151 TYPE_ALIGN (elem_type));
5152 misalign = DR_MISALIGNMENT (first_dr);
5154 if (dataref_offset == NULL_TREE)
5155 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5156 align, misalign);
5157 break;
5159 case dr_explicit_realign:
5161 tree ptr, bump;
5162 tree vs_minus_1;
5164 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5166 if (compute_in_loop)
5167 msq = vect_setup_realignment (first_stmt, gsi,
5168 &realignment_token,
5169 dr_explicit_realign,
5170 dataref_ptr, NULL);
5172 ptr = copy_ssa_name (dataref_ptr, NULL);
5173 new_stmt = gimple_build_assign_with_ops
5174 (BIT_AND_EXPR, ptr, dataref_ptr,
5175 build_int_cst
5176 (TREE_TYPE (dataref_ptr),
5177 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5179 data_ref
5180 = build2 (MEM_REF, vectype, ptr,
5181 build_int_cst (reference_alias_ptr_type
5182 (DR_REF (first_dr)), 0));
5183 vec_dest = vect_create_destination_var (scalar_dest,
5184 vectype);
5185 new_stmt = gimple_build_assign (vec_dest, data_ref);
5186 new_temp = make_ssa_name (vec_dest, new_stmt);
5187 gimple_assign_set_lhs (new_stmt, new_temp);
5188 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5189 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5191 msq = new_temp;
5193 bump = size_binop (MULT_EXPR, vs_minus_1,
5194 TYPE_SIZE_UNIT (elem_type));
5195 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5196 new_stmt = gimple_build_assign_with_ops
5197 (BIT_AND_EXPR, NULL_TREE, ptr,
5198 build_int_cst
5199 (TREE_TYPE (ptr),
5200 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5201 ptr = copy_ssa_name (dataref_ptr, new_stmt);
5202 gimple_assign_set_lhs (new_stmt, ptr);
5203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5204 data_ref
5205 = build2 (MEM_REF, vectype, ptr,
5206 build_int_cst (reference_alias_ptr_type
5207 (DR_REF (first_dr)), 0));
5208 break;
5210 case dr_explicit_realign_optimized:
5211 new_temp = copy_ssa_name (dataref_ptr, NULL);
5212 new_stmt = gimple_build_assign_with_ops
5213 (BIT_AND_EXPR, new_temp, dataref_ptr,
5214 build_int_cst
5215 (TREE_TYPE (dataref_ptr),
5216 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5217 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5218 data_ref
5219 = build2 (MEM_REF, vectype, new_temp,
5220 build_int_cst (reference_alias_ptr_type
5221 (DR_REF (first_dr)), 0));
5222 break;
5223 default:
5224 gcc_unreachable ();
5226 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5227 new_stmt = gimple_build_assign (vec_dest, data_ref);
5228 new_temp = make_ssa_name (vec_dest, new_stmt);
5229 gimple_assign_set_lhs (new_stmt, new_temp);
5230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5232 /* 3. Handle explicit realignment if necessary/supported.
5233 Create in loop:
5234 vec_dest = realign_load (msq, lsq, realignment_token) */
5235 if (alignment_support_scheme == dr_explicit_realign_optimized
5236 || alignment_support_scheme == dr_explicit_realign)
5238 lsq = gimple_assign_lhs (new_stmt);
5239 if (!realignment_token)
5240 realignment_token = dataref_ptr;
5241 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5242 new_stmt
5243 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5244 vec_dest, msq, lsq,
5245 realignment_token);
5246 new_temp = make_ssa_name (vec_dest, new_stmt);
5247 gimple_assign_set_lhs (new_stmt, new_temp);
5248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5250 if (alignment_support_scheme == dr_explicit_realign_optimized)
5252 gcc_assert (phi);
5253 if (i == vec_num - 1 && j == ncopies - 1)
5254 add_phi_arg (phi, lsq,
5255 loop_latch_edge (containing_loop),
5256 UNKNOWN_LOCATION);
5257 msq = lsq;
5261 /* 4. Handle invariant-load. */
5262 if (inv_p && !bb_vinfo)
5264 gimple_stmt_iterator gsi2 = *gsi;
5265 gcc_assert (!grouped_load);
5266 gsi_next (&gsi2);
5267 new_temp = vect_init_vector (stmt, scalar_dest,
5268 vectype, &gsi2);
5269 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5272 if (negative)
5274 tree perm_mask = perm_mask_for_reverse (vectype);
5275 new_temp = permute_vec_elements (new_temp, new_temp,
5276 perm_mask, stmt, gsi);
5277 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5280 /* Collect vector loads and later create their permutation in
5281 vect_transform_grouped_load (). */
5282 if (grouped_load || slp_perm)
5283 dr_chain.quick_push (new_temp);
5285 /* Store vector loads in the corresponding SLP_NODE. */
5286 if (slp && !slp_perm)
5287 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5289 /* Bump the vector pointer to account for a gap. */
5290 if (slp && group_gap != 0)
5292 tree bump = size_binop (MULT_EXPR,
5293 TYPE_SIZE_UNIT (elem_type),
5294 size_int (group_gap));
5295 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5296 stmt, bump);
5300 if (slp && !slp_perm)
5301 continue;
5303 if (slp_perm)
5305 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
5306 slp_node_instance, false))
5308 dr_chain.release ();
5309 return false;
5312 else
5314 if (grouped_load)
5316 if (!load_lanes_p)
5317 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5318 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5320 else
5322 if (j == 0)
5323 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5324 else
5325 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5326 prev_stmt_info = vinfo_for_stmt (new_stmt);
5329 dr_chain.release ();
5332 return true;
5335 /* Function vect_is_simple_cond.
5337 Input:
5338 LOOP - the loop that is being vectorized.
5339 COND - Condition that is checked for simple use.
5341 Output:
5342 *COMP_VECTYPE - the vector type for the comparison.
5344 Returns whether a COND can be vectorized. Checks whether
5345 condition operands are supportable using vec_is_simple_use. */
5347 static bool
5348 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5349 bb_vec_info bb_vinfo, tree *comp_vectype)
5351 tree lhs, rhs;
5352 tree def;
5353 enum vect_def_type dt;
5354 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5356 if (!COMPARISON_CLASS_P (cond))
5357 return false;
5359 lhs = TREE_OPERAND (cond, 0);
5360 rhs = TREE_OPERAND (cond, 1);
5362 if (TREE_CODE (lhs) == SSA_NAME)
5364 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5365 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5366 &lhs_def_stmt, &def, &dt, &vectype1))
5367 return false;
5369 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5370 && TREE_CODE (lhs) != FIXED_CST)
5371 return false;
5373 if (TREE_CODE (rhs) == SSA_NAME)
5375 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5376 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5377 &rhs_def_stmt, &def, &dt, &vectype2))
5378 return false;
5380 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5381 && TREE_CODE (rhs) != FIXED_CST)
5382 return false;
5384 *comp_vectype = vectype1 ? vectype1 : vectype2;
5385 return true;
5388 /* vectorizable_condition.
5390 Check if STMT is conditional modify expression that can be vectorized.
5391 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5392 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5393 at GSI.
5395 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5396 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5397 else caluse if it is 2).
5399 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5401 bool
5402 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5403 gimple *vec_stmt, tree reduc_def, int reduc_index,
5404 slp_tree slp_node)
5406 tree scalar_dest = NULL_TREE;
5407 tree vec_dest = NULL_TREE;
5408 tree cond_expr, then_clause, else_clause;
5409 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5410 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5411 tree comp_vectype = NULL_TREE;
5412 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5413 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5414 tree vec_compare, vec_cond_expr;
5415 tree new_temp;
5416 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5417 tree def;
5418 enum vect_def_type dt, dts[4];
5419 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5420 int ncopies;
5421 enum tree_code code;
5422 stmt_vec_info prev_stmt_info = NULL;
5423 int i, j;
5424 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5425 vec<tree> vec_oprnds0 = vNULL;
5426 vec<tree> vec_oprnds1 = vNULL;
5427 vec<tree> vec_oprnds2 = vNULL;
5428 vec<tree> vec_oprnds3 = vNULL;
5429 tree vec_cmp_type;
5431 if (slp_node || PURE_SLP_STMT (stmt_info))
5432 ncopies = 1;
5433 else
5434 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5436 gcc_assert (ncopies >= 1);
5437 if (reduc_index && ncopies > 1)
5438 return false; /* FORNOW */
5440 if (reduc_index && STMT_SLP_TYPE (stmt_info))
5441 return false;
5443 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5444 return false;
5446 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5447 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5448 && reduc_def))
5449 return false;
5451 /* FORNOW: not yet supported. */
5452 if (STMT_VINFO_LIVE_P (stmt_info))
5454 if (dump_enabled_p ())
5455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5456 "value used after loop.\n");
5457 return false;
5460 /* Is vectorizable conditional operation? */
5461 if (!is_gimple_assign (stmt))
5462 return false;
5464 code = gimple_assign_rhs_code (stmt);
5466 if (code != COND_EXPR)
5467 return false;
5469 cond_expr = gimple_assign_rhs1 (stmt);
5470 then_clause = gimple_assign_rhs2 (stmt);
5471 else_clause = gimple_assign_rhs3 (stmt);
5473 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5474 &comp_vectype)
5475 || !comp_vectype)
5476 return false;
5478 if (TREE_CODE (then_clause) == SSA_NAME)
5480 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5481 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5482 &then_def_stmt, &def, &dt))
5483 return false;
5485 else if (TREE_CODE (then_clause) != INTEGER_CST
5486 && TREE_CODE (then_clause) != REAL_CST
5487 && TREE_CODE (then_clause) != FIXED_CST)
5488 return false;
5490 if (TREE_CODE (else_clause) == SSA_NAME)
5492 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5493 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5494 &else_def_stmt, &def, &dt))
5495 return false;
5497 else if (TREE_CODE (else_clause) != INTEGER_CST
5498 && TREE_CODE (else_clause) != REAL_CST
5499 && TREE_CODE (else_clause) != FIXED_CST)
5500 return false;
5502 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5503 /* The result of a vector comparison should be signed type. */
5504 tree cmp_type = build_nonstandard_integer_type (prec, 0);
5505 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5506 if (vec_cmp_type == NULL_TREE)
5507 return false;
5509 if (!vec_stmt)
5511 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5512 return expand_vec_cond_expr_p (vectype, comp_vectype);
5515 /* Transform. */
5517 if (!slp_node)
5519 vec_oprnds0.create (1);
5520 vec_oprnds1.create (1);
5521 vec_oprnds2.create (1);
5522 vec_oprnds3.create (1);
5525 /* Handle def. */
5526 scalar_dest = gimple_assign_lhs (stmt);
5527 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5529 /* Handle cond expr. */
5530 for (j = 0; j < ncopies; j++)
5532 gimple new_stmt = NULL;
5533 if (j == 0)
5535 if (slp_node)
5537 stack_vec<tree, 4> ops;
5538 stack_vec<vec<tree>, 4> vec_defs;
5540 ops.safe_push (TREE_OPERAND (cond_expr, 0));
5541 ops.safe_push (TREE_OPERAND (cond_expr, 1));
5542 ops.safe_push (then_clause);
5543 ops.safe_push (else_clause);
5544 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5545 vec_oprnds3 = vec_defs.pop ();
5546 vec_oprnds2 = vec_defs.pop ();
5547 vec_oprnds1 = vec_defs.pop ();
5548 vec_oprnds0 = vec_defs.pop ();
5550 ops.release ();
5551 vec_defs.release ();
5553 else
5555 gimple gtemp;
5556 vec_cond_lhs =
5557 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5558 stmt, NULL);
5559 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5560 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5562 vec_cond_rhs =
5563 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5564 stmt, NULL);
5565 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5566 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5567 if (reduc_index == 1)
5568 vec_then_clause = reduc_def;
5569 else
5571 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5572 stmt, NULL);
5573 vect_is_simple_use (then_clause, stmt, loop_vinfo,
5574 NULL, &gtemp, &def, &dts[2]);
5576 if (reduc_index == 2)
5577 vec_else_clause = reduc_def;
5578 else
5580 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5581 stmt, NULL);
5582 vect_is_simple_use (else_clause, stmt, loop_vinfo,
5583 NULL, &gtemp, &def, &dts[3]);
5587 else
5589 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5590 vec_oprnds0.pop ());
5591 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5592 vec_oprnds1.pop ());
5593 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5594 vec_oprnds2.pop ());
5595 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5596 vec_oprnds3.pop ());
5599 if (!slp_node)
5601 vec_oprnds0.quick_push (vec_cond_lhs);
5602 vec_oprnds1.quick_push (vec_cond_rhs);
5603 vec_oprnds2.quick_push (vec_then_clause);
5604 vec_oprnds3.quick_push (vec_else_clause);
5607 /* Arguments are ready. Create the new vector stmt. */
5608 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5610 vec_cond_rhs = vec_oprnds1[i];
5611 vec_then_clause = vec_oprnds2[i];
5612 vec_else_clause = vec_oprnds3[i];
5614 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5615 vec_cond_lhs, vec_cond_rhs);
5616 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5617 vec_compare, vec_then_clause, vec_else_clause);
5619 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5620 new_temp = make_ssa_name (vec_dest, new_stmt);
5621 gimple_assign_set_lhs (new_stmt, new_temp);
5622 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5623 if (slp_node)
5624 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5627 if (slp_node)
5628 continue;
5630 if (j == 0)
5631 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5632 else
5633 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5635 prev_stmt_info = vinfo_for_stmt (new_stmt);
5638 vec_oprnds0.release ();
5639 vec_oprnds1.release ();
5640 vec_oprnds2.release ();
5641 vec_oprnds3.release ();
5643 return true;
5647 /* Make sure the statement is vectorizable. */
5649 bool
5650 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5652 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5653 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5654 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5655 bool ok;
5656 tree scalar_type, vectype;
5657 gimple pattern_stmt;
5658 gimple_seq pattern_def_seq;
5660 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5664 dump_printf (MSG_NOTE, "\n");
5667 if (gimple_has_volatile_ops (stmt))
5669 if (dump_enabled_p ())
5670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5671 "not vectorized: stmt has volatile operands\n");
5673 return false;
5676 /* Skip stmts that do not need to be vectorized. In loops this is expected
5677 to include:
5678 - the COND_EXPR which is the loop exit condition
5679 - any LABEL_EXPRs in the loop
5680 - computations that are used only for array indexing or loop control.
5681 In basic blocks we only analyze statements that are a part of some SLP
5682 instance, therefore, all the statements are relevant.
5684 Pattern statement needs to be analyzed instead of the original statement
5685 if the original statement is not relevant. Otherwise, we analyze both
5686 statements. In basic blocks we are called from some SLP instance
5687 traversal, don't analyze pattern stmts instead, the pattern stmts
5688 already will be part of SLP instance. */
5690 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5691 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5692 && !STMT_VINFO_LIVE_P (stmt_info))
5694 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5695 && pattern_stmt
5696 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5697 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5699 /* Analyze PATTERN_STMT instead of the original stmt. */
5700 stmt = pattern_stmt;
5701 stmt_info = vinfo_for_stmt (pattern_stmt);
5702 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_NOTE, vect_location,
5705 "==> examining pattern statement: ");
5706 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5707 dump_printf (MSG_NOTE, "\n");
5710 else
5712 if (dump_enabled_p ())
5713 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
5715 return true;
5718 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5719 && node == NULL
5720 && pattern_stmt
5721 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5722 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5724 /* Analyze PATTERN_STMT too. */
5725 if (dump_enabled_p ())
5727 dump_printf_loc (MSG_NOTE, vect_location,
5728 "==> examining pattern statement: ");
5729 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5730 dump_printf (MSG_NOTE, "\n");
5733 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5734 return false;
5737 if (is_pattern_stmt_p (stmt_info)
5738 && node == NULL
5739 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5741 gimple_stmt_iterator si;
5743 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5745 gimple pattern_def_stmt = gsi_stmt (si);
5746 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5747 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5749 /* Analyze def stmt of STMT if it's a pattern stmt. */
5750 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_NOTE, vect_location,
5753 "==> examining pattern def statement: ");
5754 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5755 dump_printf (MSG_NOTE, "\n");
5758 if (!vect_analyze_stmt (pattern_def_stmt,
5759 need_to_vectorize, node))
5760 return false;
5765 switch (STMT_VINFO_DEF_TYPE (stmt_info))
5767 case vect_internal_def:
5768 break;
5770 case vect_reduction_def:
5771 case vect_nested_cycle:
5772 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5773 || relevance == vect_used_in_outer_by_reduction
5774 || relevance == vect_unused_in_scope));
5775 break;
5777 case vect_induction_def:
5778 case vect_constant_def:
5779 case vect_external_def:
5780 case vect_unknown_def_type:
5781 default:
5782 gcc_unreachable ();
5785 if (bb_vinfo)
5787 gcc_assert (PURE_SLP_STMT (stmt_info));
5789 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5790 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_NOTE, vect_location,
5793 "get vectype for scalar type: ");
5794 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5795 dump_printf (MSG_NOTE, "\n");
5798 vectype = get_vectype_for_scalar_type (scalar_type);
5799 if (!vectype)
5801 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5804 "not SLPed: unsupported data-type ");
5805 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5806 scalar_type);
5807 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5809 return false;
5812 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
5815 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5816 dump_printf (MSG_NOTE, "\n");
5819 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5822 if (STMT_VINFO_RELEVANT_P (stmt_info))
5824 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5825 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5826 *need_to_vectorize = true;
5829 ok = true;
5830 if (!bb_vinfo
5831 && (STMT_VINFO_RELEVANT_P (stmt_info)
5832 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5833 ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5834 || vectorizable_shift (stmt, NULL, NULL, NULL)
5835 || vectorizable_operation (stmt, NULL, NULL, NULL)
5836 || vectorizable_assignment (stmt, NULL, NULL, NULL)
5837 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5838 || vectorizable_call (stmt, NULL, NULL, NULL)
5839 || vectorizable_store (stmt, NULL, NULL, NULL)
5840 || vectorizable_reduction (stmt, NULL, NULL, NULL)
5841 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5842 else
5844 if (bb_vinfo)
5845 ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5846 || vectorizable_shift (stmt, NULL, NULL, node)
5847 || vectorizable_operation (stmt, NULL, NULL, node)
5848 || vectorizable_assignment (stmt, NULL, NULL, node)
5849 || vectorizable_load (stmt, NULL, NULL, node, NULL)
5850 || vectorizable_call (stmt, NULL, NULL, node)
5851 || vectorizable_store (stmt, NULL, NULL, node)
5852 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5855 if (!ok)
5857 if (dump_enabled_p ())
5859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5860 "not vectorized: relevant stmt not ");
5861 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5862 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5863 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5866 return false;
5869 if (bb_vinfo)
5870 return true;
5872 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5873 need extra handling, except for vectorizable reductions. */
5874 if (STMT_VINFO_LIVE_P (stmt_info)
5875 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5876 ok = vectorizable_live_operation (stmt, NULL, NULL);
5878 if (!ok)
5880 if (dump_enabled_p ())
5882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5883 "not vectorized: live stmt not ");
5884 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5885 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5886 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5889 return false;
5892 return true;
5896 /* Function vect_transform_stmt.
5898 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5900 bool
5901 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5902 bool *grouped_store, slp_tree slp_node,
5903 slp_instance slp_node_instance)
5905 bool is_store = false;
5906 gimple vec_stmt = NULL;
5907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5908 bool done;
5910 switch (STMT_VINFO_TYPE (stmt_info))
5912 case type_demotion_vec_info_type:
5913 case type_promotion_vec_info_type:
5914 case type_conversion_vec_info_type:
5915 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5916 gcc_assert (done);
5917 break;
5919 case induc_vec_info_type:
5920 gcc_assert (!slp_node);
5921 done = vectorizable_induction (stmt, gsi, &vec_stmt);
5922 gcc_assert (done);
5923 break;
5925 case shift_vec_info_type:
5926 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5927 gcc_assert (done);
5928 break;
5930 case op_vec_info_type:
5931 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5932 gcc_assert (done);
5933 break;
5935 case assignment_vec_info_type:
5936 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5937 gcc_assert (done);
5938 break;
5940 case load_vec_info_type:
5941 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5942 slp_node_instance);
5943 gcc_assert (done);
5944 break;
5946 case store_vec_info_type:
5947 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5948 gcc_assert (done);
5949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5951 /* In case of interleaving, the whole chain is vectorized when the
5952 last store in the chain is reached. Store stmts before the last
5953 one are skipped, and there vec_stmt_info shouldn't be freed
5954 meanwhile. */
5955 *grouped_store = true;
5956 if (STMT_VINFO_VEC_STMT (stmt_info))
5957 is_store = true;
5959 else
5960 is_store = true;
5961 break;
5963 case condition_vec_info_type:
5964 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5965 gcc_assert (done);
5966 break;
5968 case call_vec_info_type:
5969 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5970 stmt = gsi_stmt (*gsi);
5971 break;
5973 case reduc_vec_info_type:
5974 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5975 gcc_assert (done);
5976 break;
5978 default:
5979 if (!STMT_VINFO_LIVE_P (stmt_info))
5981 if (dump_enabled_p ())
5982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5983 "stmt not supported.\n");
5984 gcc_unreachable ();
5988 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5989 is being vectorized, but outside the immediately enclosing loop. */
5990 if (vec_stmt
5991 && STMT_VINFO_LOOP_VINFO (stmt_info)
5992 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5993 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5994 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5995 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5996 || STMT_VINFO_RELEVANT (stmt_info) ==
5997 vect_used_in_outer_by_reduction))
5999 struct loop *innerloop = LOOP_VINFO_LOOP (
6000 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
6001 imm_use_iterator imm_iter;
6002 use_operand_p use_p;
6003 tree scalar_dest;
6004 gimple exit_phi;
6006 if (dump_enabled_p ())
6007 dump_printf_loc (MSG_NOTE, vect_location,
6008 "Record the vdef for outer-loop vectorization.\n");
6010 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6011 (to be used when vectorizing outer-loop stmts that use the DEF of
6012 STMT). */
6013 if (gimple_code (stmt) == GIMPLE_PHI)
6014 scalar_dest = PHI_RESULT (stmt);
6015 else
6016 scalar_dest = gimple_assign_lhs (stmt);
6018 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
6020 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
6022 exit_phi = USE_STMT (use_p);
6023 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
6028 /* Handle stmts whose DEF is used outside the loop-nest that is
6029 being vectorized. */
6030 if (STMT_VINFO_LIVE_P (stmt_info)
6031 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6033 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
6034 gcc_assert (done);
6037 if (vec_stmt)
6038 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6040 return is_store;
6044 /* Remove a group of stores (for SLP or interleaving), free their
6045 stmt_vec_info. */
6047 void
6048 vect_remove_stores (gimple first_stmt)
6050 gimple next = first_stmt;
6051 gimple tmp;
6052 gimple_stmt_iterator next_si;
6054 while (next)
6056 stmt_vec_info stmt_info = vinfo_for_stmt (next);
6058 tmp = GROUP_NEXT_ELEMENT (stmt_info);
6059 if (is_pattern_stmt_p (stmt_info))
6060 next = STMT_VINFO_RELATED_STMT (stmt_info);
6061 /* Free the attached stmt_vec_info and remove the stmt. */
6062 next_si = gsi_for_stmt (next);
6063 unlink_stmt_vdef (next);
6064 gsi_remove (&next_si, true);
6065 release_defs (next);
6066 free_stmt_vec_info (next);
6067 next = tmp;
6072 /* Function new_stmt_vec_info.
6074 Create and initialize a new stmt_vec_info struct for STMT. */
6076 stmt_vec_info
6077 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
6078 bb_vec_info bb_vinfo)
6080 stmt_vec_info res;
6081 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
6083 STMT_VINFO_TYPE (res) = undef_vec_info_type;
6084 STMT_VINFO_STMT (res) = stmt;
6085 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
6086 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
6087 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
6088 STMT_VINFO_LIVE_P (res) = false;
6089 STMT_VINFO_VECTYPE (res) = NULL;
6090 STMT_VINFO_VEC_STMT (res) = NULL;
6091 STMT_VINFO_VECTORIZABLE (res) = true;
6092 STMT_VINFO_IN_PATTERN_P (res) = false;
6093 STMT_VINFO_RELATED_STMT (res) = NULL;
6094 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
6095 STMT_VINFO_DATA_REF (res) = NULL;
6097 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
6098 STMT_VINFO_DR_OFFSET (res) = NULL;
6099 STMT_VINFO_DR_INIT (res) = NULL;
6100 STMT_VINFO_DR_STEP (res) = NULL;
6101 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
6103 if (gimple_code (stmt) == GIMPLE_PHI
6104 && is_loop_header_bb_p (gimple_bb (stmt)))
6105 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
6106 else
6107 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
6109 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
6110 STMT_SLP_TYPE (res) = loop_vect;
6111 GROUP_FIRST_ELEMENT (res) = NULL;
6112 GROUP_NEXT_ELEMENT (res) = NULL;
6113 GROUP_SIZE (res) = 0;
6114 GROUP_STORE_COUNT (res) = 0;
6115 GROUP_GAP (res) = 0;
6116 GROUP_SAME_DR_STMT (res) = NULL;
6118 return res;
6122 /* Create a hash table for stmt_vec_info. */
6124 void
6125 init_stmt_vec_info_vec (void)
6127 gcc_assert (!stmt_vec_info_vec.exists ());
6128 stmt_vec_info_vec.create (50);
6132 /* Free hash table for stmt_vec_info. */
6134 void
6135 free_stmt_vec_info_vec (void)
6137 unsigned int i;
6138 vec_void_p info;
6139 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6140 if (info != NULL)
6141 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
6142 gcc_assert (stmt_vec_info_vec.exists ());
6143 stmt_vec_info_vec.release ();
6147 /* Free stmt vectorization related info. */
6149 void
6150 free_stmt_vec_info (gimple stmt)
6152 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6154 if (!stmt_info)
6155 return;
6157 /* Check if this statement has a related "pattern stmt"
6158 (introduced by the vectorizer during the pattern recognition
6159 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6160 too. */
6161 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6163 stmt_vec_info patt_info
6164 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6165 if (patt_info)
6167 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6168 if (seq)
6170 gimple_stmt_iterator si;
6171 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6172 free_stmt_vec_info (gsi_stmt (si));
6174 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6178 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6179 set_vinfo_for_stmt (stmt, NULL);
6180 free (stmt_info);
6184 /* Function get_vectype_for_scalar_type_and_size.
6186 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6187 by the target. */
6189 static tree
6190 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6192 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6193 enum machine_mode simd_mode;
6194 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6195 int nunits;
6196 tree vectype;
6198 if (nbytes == 0)
6199 return NULL_TREE;
6201 if (GET_MODE_CLASS (inner_mode) != MODE_INT
6202 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6203 return NULL_TREE;
6205 /* For vector types of elements whose mode precision doesn't
6206 match their types precision we use a element type of mode
6207 precision. The vectorization routines will have to make sure
6208 they support the proper result truncation/extension.
6209 We also make sure to build vector types with INTEGER_TYPE
6210 component type only. */
6211 if (INTEGRAL_TYPE_P (scalar_type)
6212 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6213 || TREE_CODE (scalar_type) != INTEGER_TYPE))
6214 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6215 TYPE_UNSIGNED (scalar_type));
6217 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6218 When the component mode passes the above test simply use a type
6219 corresponding to that mode. The theory is that any use that
6220 would cause problems with this will disable vectorization anyway. */
6221 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6222 && !INTEGRAL_TYPE_P (scalar_type))
6223 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6225 /* We can't build a vector type of elements with alignment bigger than
6226 their size. */
6227 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6228 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6229 TYPE_UNSIGNED (scalar_type));
6231 /* If we felt back to using the mode fail if there was
6232 no scalar type for it. */
6233 if (scalar_type == NULL_TREE)
6234 return NULL_TREE;
6236 /* If no size was supplied use the mode the target prefers. Otherwise
6237 lookup a vector mode of the specified size. */
6238 if (size == 0)
6239 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6240 else
6241 simd_mode = mode_for_vector (inner_mode, size / nbytes);
6242 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6243 if (nunits <= 1)
6244 return NULL_TREE;
6246 vectype = build_vector_type (scalar_type, nunits);
6248 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6249 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6250 return NULL_TREE;
6252 return vectype;
6255 unsigned int current_vector_size;
6257 /* Function get_vectype_for_scalar_type.
6259 Returns the vector type corresponding to SCALAR_TYPE as supported
6260 by the target. */
6262 tree
6263 get_vectype_for_scalar_type (tree scalar_type)
6265 tree vectype;
6266 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6267 current_vector_size);
6268 if (vectype
6269 && current_vector_size == 0)
6270 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6271 return vectype;
6274 /* Function get_same_sized_vectype
6276 Returns a vector type corresponding to SCALAR_TYPE of size
6277 VECTOR_TYPE if supported by the target. */
6279 tree
6280 get_same_sized_vectype (tree scalar_type, tree vector_type)
6282 return get_vectype_for_scalar_type_and_size
6283 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6286 /* Function vect_is_simple_use.
6288 Input:
6289 LOOP_VINFO - the vect info of the loop that is being vectorized.
6290 BB_VINFO - the vect info of the basic block that is being vectorized.
6291 OPERAND - operand of STMT in the loop or bb.
6292 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6294 Returns whether a stmt with OPERAND can be vectorized.
6295 For loops, supportable operands are constants, loop invariants, and operands
6296 that are defined by the current iteration of the loop. Unsupportable
6297 operands are those that are defined by a previous iteration of the loop (as
6298 is the case in reduction/induction computations).
6299 For basic blocks, supportable operands are constants and bb invariants.
6300 For now, operands defined outside the basic block are not supported. */
6302 bool
6303 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6304 bb_vec_info bb_vinfo, gimple *def_stmt,
6305 tree *def, enum vect_def_type *dt)
6307 basic_block bb;
6308 stmt_vec_info stmt_vinfo;
6309 struct loop *loop = NULL;
6311 if (loop_vinfo)
6312 loop = LOOP_VINFO_LOOP (loop_vinfo);
6314 *def_stmt = NULL;
6315 *def = NULL_TREE;
6317 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_NOTE, vect_location,
6320 "vect_is_simple_use: operand ");
6321 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6322 dump_printf (MSG_NOTE, "\n");
6325 if (CONSTANT_CLASS_P (operand))
6327 *dt = vect_constant_def;
6328 return true;
6331 if (is_gimple_min_invariant (operand))
6333 *def = operand;
6334 *dt = vect_external_def;
6335 return true;
6338 if (TREE_CODE (operand) == PAREN_EXPR)
6340 if (dump_enabled_p ())
6341 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
6342 operand = TREE_OPERAND (operand, 0);
6345 if (TREE_CODE (operand) != SSA_NAME)
6347 if (dump_enabled_p ())
6348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6349 "not ssa-name.\n");
6350 return false;
6353 *def_stmt = SSA_NAME_DEF_STMT (operand);
6354 if (*def_stmt == NULL)
6356 if (dump_enabled_p ())
6357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6358 "no def_stmt.\n");
6359 return false;
6362 if (dump_enabled_p ())
6364 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6365 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6366 dump_printf (MSG_NOTE, "\n");
6369 /* Empty stmt is expected only in case of a function argument.
6370 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6371 if (gimple_nop_p (*def_stmt))
6373 *def = operand;
6374 *dt = vect_external_def;
6375 return true;
6378 bb = gimple_bb (*def_stmt);
6380 if ((loop && !flow_bb_inside_loop_p (loop, bb))
6381 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6382 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6383 *dt = vect_external_def;
6384 else
6386 stmt_vinfo = vinfo_for_stmt (*def_stmt);
6387 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6390 if (*dt == vect_unknown_def_type
6391 || (stmt
6392 && *dt == vect_double_reduction_def
6393 && gimple_code (stmt) != GIMPLE_PHI))
6395 if (dump_enabled_p ())
6396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6397 "Unsupported pattern.\n");
6398 return false;
6401 if (dump_enabled_p ())
6402 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
6404 switch (gimple_code (*def_stmt))
6406 case GIMPLE_PHI:
6407 *def = gimple_phi_result (*def_stmt);
6408 break;
6410 case GIMPLE_ASSIGN:
6411 *def = gimple_assign_lhs (*def_stmt);
6412 break;
6414 case GIMPLE_CALL:
6415 *def = gimple_call_lhs (*def_stmt);
6416 if (*def != NULL)
6417 break;
6418 /* FALLTHRU */
6419 default:
6420 if (dump_enabled_p ())
6421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6422 "unsupported defining stmt:\n");
6423 return false;
6426 return true;
6429 /* Function vect_is_simple_use_1.
6431 Same as vect_is_simple_use_1 but also determines the vector operand
6432 type of OPERAND and stores it to *VECTYPE. If the definition of
6433 OPERAND is vect_uninitialized_def, vect_constant_def or
6434 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6435 is responsible to compute the best suited vector type for the
6436 scalar operand. */
6438 bool
6439 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6440 bb_vec_info bb_vinfo, gimple *def_stmt,
6441 tree *def, enum vect_def_type *dt, tree *vectype)
6443 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6444 def, dt))
6445 return false;
6447 /* Now get a vector type if the def is internal, otherwise supply
6448 NULL_TREE and leave it up to the caller to figure out a proper
6449 type for the use stmt. */
6450 if (*dt == vect_internal_def
6451 || *dt == vect_induction_def
6452 || *dt == vect_reduction_def
6453 || *dt == vect_double_reduction_def
6454 || *dt == vect_nested_cycle)
6456 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6458 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6459 && !STMT_VINFO_RELEVANT (stmt_info)
6460 && !STMT_VINFO_LIVE_P (stmt_info))
6461 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6463 *vectype = STMT_VINFO_VECTYPE (stmt_info);
6464 gcc_assert (*vectype != NULL_TREE);
6466 else if (*dt == vect_uninitialized_def
6467 || *dt == vect_constant_def
6468 || *dt == vect_external_def)
6469 *vectype = NULL_TREE;
6470 else
6471 gcc_unreachable ();
6473 return true;
6477 /* Function supportable_widening_operation
6479 Check whether an operation represented by the code CODE is a
6480 widening operation that is supported by the target platform in
6481 vector form (i.e., when operating on arguments of type VECTYPE_IN
6482 producing a result of type VECTYPE_OUT).
6484 Widening operations we currently support are NOP (CONVERT), FLOAT
6485 and WIDEN_MULT. This function checks if these operations are supported
6486 by the target platform either directly (via vector tree-codes), or via
6487 target builtins.
6489 Output:
6490 - CODE1 and CODE2 are codes of vector operations to be used when
6491 vectorizing the operation, if available.
6492 - MULTI_STEP_CVT determines the number of required intermediate steps in
6493 case of multi-step conversion (like char->short->int - in that case
6494 MULTI_STEP_CVT will be 1).
6495 - INTERM_TYPES contains the intermediate type required to perform the
6496 widening operation (short in the above example). */
6498 bool
6499 supportable_widening_operation (enum tree_code code, gimple stmt,
6500 tree vectype_out, tree vectype_in,
6501 enum tree_code *code1, enum tree_code *code2,
6502 int *multi_step_cvt,
6503 vec<tree> *interm_types)
6505 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6506 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6507 struct loop *vect_loop = NULL;
6508 enum machine_mode vec_mode;
6509 enum insn_code icode1, icode2;
6510 optab optab1, optab2;
6511 tree vectype = vectype_in;
6512 tree wide_vectype = vectype_out;
6513 enum tree_code c1, c2;
6514 int i;
6515 tree prev_type, intermediate_type;
6516 enum machine_mode intermediate_mode, prev_mode;
6517 optab optab3, optab4;
6519 *multi_step_cvt = 0;
6520 if (loop_info)
6521 vect_loop = LOOP_VINFO_LOOP (loop_info);
6523 switch (code)
6525 case WIDEN_MULT_EXPR:
6526 /* The result of a vectorized widening operation usually requires
6527 two vectors (because the widened results do not fit into one vector).
6528 The generated vector results would normally be expected to be
6529 generated in the same order as in the original scalar computation,
6530 i.e. if 8 results are generated in each vector iteration, they are
6531 to be organized as follows:
6532 vect1: [res1,res2,res3,res4],
6533 vect2: [res5,res6,res7,res8].
6535 However, in the special case that the result of the widening
6536 operation is used in a reduction computation only, the order doesn't
6537 matter (because when vectorizing a reduction we change the order of
6538 the computation). Some targets can take advantage of this and
6539 generate more efficient code. For example, targets like Altivec,
6540 that support widen_mult using a sequence of {mult_even,mult_odd}
6541 generate the following vectors:
6542 vect1: [res1,res3,res5,res7],
6543 vect2: [res2,res4,res6,res8].
6545 When vectorizing outer-loops, we execute the inner-loop sequentially
6546 (each vectorized inner-loop iteration contributes to VF outer-loop
6547 iterations in parallel). We therefore don't allow to change the
6548 order of the computation in the inner-loop during outer-loop
6549 vectorization. */
6550 /* TODO: Another case in which order doesn't *really* matter is when we
6551 widen and then contract again, e.g. (short)((int)x * y >> 8).
6552 Normally, pack_trunc performs an even/odd permute, whereas the
6553 repack from an even/odd expansion would be an interleave, which
6554 would be significantly simpler for e.g. AVX2. */
6555 /* In any case, in order to avoid duplicating the code below, recurse
6556 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6557 are properly set up for the caller. If we fail, we'll continue with
6558 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6559 if (vect_loop
6560 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6561 && !nested_in_vect_loop_p (vect_loop, stmt)
6562 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6563 stmt, vectype_out, vectype_in,
6564 code1, code2, multi_step_cvt,
6565 interm_types))
6566 return true;
6567 c1 = VEC_WIDEN_MULT_LO_EXPR;
6568 c2 = VEC_WIDEN_MULT_HI_EXPR;
6569 break;
6571 case VEC_WIDEN_MULT_EVEN_EXPR:
6572 /* Support the recursion induced just above. */
6573 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6574 c2 = VEC_WIDEN_MULT_ODD_EXPR;
6575 break;
6577 case WIDEN_LSHIFT_EXPR:
6578 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6579 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6580 break;
6582 CASE_CONVERT:
6583 c1 = VEC_UNPACK_LO_EXPR;
6584 c2 = VEC_UNPACK_HI_EXPR;
6585 break;
6587 case FLOAT_EXPR:
6588 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6589 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6590 break;
6592 case FIX_TRUNC_EXPR:
6593 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6594 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6595 computing the operation. */
6596 return false;
6598 default:
6599 gcc_unreachable ();
6602 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6604 enum tree_code ctmp = c1;
6605 c1 = c2;
6606 c2 = ctmp;
6609 if (code == FIX_TRUNC_EXPR)
6611 /* The signedness is determined from output operand. */
6612 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6613 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6615 else
6617 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6618 optab2 = optab_for_tree_code (c2, vectype, optab_default);
6621 if (!optab1 || !optab2)
6622 return false;
6624 vec_mode = TYPE_MODE (vectype);
6625 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6626 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6627 return false;
6629 *code1 = c1;
6630 *code2 = c2;
6632 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6633 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6634 return true;
6636 /* Check if it's a multi-step conversion that can be done using intermediate
6637 types. */
6639 prev_type = vectype;
6640 prev_mode = vec_mode;
6642 if (!CONVERT_EXPR_CODE_P (code))
6643 return false;
6645 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6646 intermediate steps in promotion sequence. We try
6647 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6648 not. */
6649 interm_types->create (MAX_INTERM_CVT_STEPS);
6650 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6652 intermediate_mode = insn_data[icode1].operand[0].mode;
6653 intermediate_type
6654 = lang_hooks.types.type_for_mode (intermediate_mode,
6655 TYPE_UNSIGNED (prev_type));
6656 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6657 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6659 if (!optab3 || !optab4
6660 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6661 || insn_data[icode1].operand[0].mode != intermediate_mode
6662 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6663 || insn_data[icode2].operand[0].mode != intermediate_mode
6664 || ((icode1 = optab_handler (optab3, intermediate_mode))
6665 == CODE_FOR_nothing)
6666 || ((icode2 = optab_handler (optab4, intermediate_mode))
6667 == CODE_FOR_nothing))
6668 break;
6670 interm_types->quick_push (intermediate_type);
6671 (*multi_step_cvt)++;
6673 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6674 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6675 return true;
6677 prev_type = intermediate_type;
6678 prev_mode = intermediate_mode;
6681 interm_types->release ();
6682 return false;
6686 /* Function supportable_narrowing_operation
6688 Check whether an operation represented by the code CODE is a
6689 narrowing operation that is supported by the target platform in
6690 vector form (i.e., when operating on arguments of type VECTYPE_IN
6691 and producing a result of type VECTYPE_OUT).
6693 Narrowing operations we currently support are NOP (CONVERT) and
6694 FIX_TRUNC. This function checks if these operations are supported by
6695 the target platform directly via vector tree-codes.
6697 Output:
6698 - CODE1 is the code of a vector operation to be used when
6699 vectorizing the operation, if available.
6700 - MULTI_STEP_CVT determines the number of required intermediate steps in
6701 case of multi-step conversion (like int->short->char - in that case
6702 MULTI_STEP_CVT will be 1).
6703 - INTERM_TYPES contains the intermediate type required to perform the
6704 narrowing operation (short in the above example). */
6706 bool
6707 supportable_narrowing_operation (enum tree_code code,
6708 tree vectype_out, tree vectype_in,
6709 enum tree_code *code1, int *multi_step_cvt,
6710 vec<tree> *interm_types)
6712 enum machine_mode vec_mode;
6713 enum insn_code icode1;
6714 optab optab1, interm_optab;
6715 tree vectype = vectype_in;
6716 tree narrow_vectype = vectype_out;
6717 enum tree_code c1;
6718 tree intermediate_type;
6719 enum machine_mode intermediate_mode, prev_mode;
6720 int i;
6721 bool uns;
6723 *multi_step_cvt = 0;
6724 switch (code)
6726 CASE_CONVERT:
6727 c1 = VEC_PACK_TRUNC_EXPR;
6728 break;
6730 case FIX_TRUNC_EXPR:
6731 c1 = VEC_PACK_FIX_TRUNC_EXPR;
6732 break;
6734 case FLOAT_EXPR:
6735 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6736 tree code and optabs used for computing the operation. */
6737 return false;
6739 default:
6740 gcc_unreachable ();
6743 if (code == FIX_TRUNC_EXPR)
6744 /* The signedness is determined from output operand. */
6745 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6746 else
6747 optab1 = optab_for_tree_code (c1, vectype, optab_default);
6749 if (!optab1)
6750 return false;
6752 vec_mode = TYPE_MODE (vectype);
6753 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6754 return false;
6756 *code1 = c1;
6758 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6759 return true;
6761 /* Check if it's a multi-step conversion that can be done using intermediate
6762 types. */
6763 prev_mode = vec_mode;
6764 if (code == FIX_TRUNC_EXPR)
6765 uns = TYPE_UNSIGNED (vectype_out);
6766 else
6767 uns = TYPE_UNSIGNED (vectype);
6769 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6770 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6771 costly than signed. */
6772 if (code == FIX_TRUNC_EXPR && uns)
6774 enum insn_code icode2;
6776 intermediate_type
6777 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6778 interm_optab
6779 = optab_for_tree_code (c1, intermediate_type, optab_default);
6780 if (interm_optab != unknown_optab
6781 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6782 && insn_data[icode1].operand[0].mode
6783 == insn_data[icode2].operand[0].mode)
6785 uns = false;
6786 optab1 = interm_optab;
6787 icode1 = icode2;
6791 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6792 intermediate steps in promotion sequence. We try
6793 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6794 interm_types->create (MAX_INTERM_CVT_STEPS);
6795 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6797 intermediate_mode = insn_data[icode1].operand[0].mode;
6798 intermediate_type
6799 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
6800 interm_optab
6801 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6802 optab_default);
6803 if (!interm_optab
6804 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6805 || insn_data[icode1].operand[0].mode != intermediate_mode
6806 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6807 == CODE_FOR_nothing))
6808 break;
6810 interm_types->quick_push (intermediate_type);
6811 (*multi_step_cvt)++;
6813 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6814 return true;
6816 prev_mode = intermediate_mode;
6817 optab1 = interm_optab;
6820 interm_types->release ();
6821 return false;