* c-c++-common/ubsan/float-cast-overflow-6.c: Add i?86-*-* target.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob2a5f23cbe0f07c9dda70dc63938763024560c121
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimplify.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "expr.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "insn-codes.h"
64 #include "optabs.h"
65 #include "diagnostic-core.h"
66 #include "tree-vectorizer.h"
67 #include "dumpfile.h"
68 #include "hash-map.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "builtins.h"
74 /* For lang_hooks.types.type_for_mode. */
75 #include "langhooks.h"
77 /* Return the vectorized type for the given statement. */
79 tree
80 stmt_vectype (struct _stmt_vec_info *stmt_info)
82 return STMT_VINFO_VECTYPE (stmt_info);
85 /* Return TRUE iff the given statement is in an inner loop relative to
86 the loop being vectorized. */
87 bool
88 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
90 gimple stmt = STMT_VINFO_STMT (stmt_info);
91 basic_block bb = gimple_bb (stmt);
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 struct loop* loop;
95 if (!loop_vinfo)
96 return false;
98 loop = LOOP_VINFO_LOOP (loop_vinfo);
100 return (bb->loop_father == loop->inner);
103 /* Record the cost of a statement, either by directly informing the
104 target model or by saving it in a vector for later processing.
105 Return a preliminary estimate of the statement's cost. */
107 unsigned
108 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
109 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
110 int misalign, enum vect_cost_model_location where)
112 if (body_cost_vec)
114 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
115 add_stmt_info_to_vec (body_cost_vec, count, kind,
116 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
117 misalign);
118 return (unsigned)
119 (builtin_vectorization_cost (kind, vectype, misalign) * count);
122 else
124 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
125 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
126 void *target_cost_data;
128 if (loop_vinfo)
129 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
130 else
131 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
133 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
134 misalign, where);
138 /* Return a variable of type ELEM_TYPE[NELEMS]. */
140 static tree
141 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
143 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
144 "vect_array");
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Return an SSA_NAME for the vector in index N. The reference
149 is part of the vectorization of STMT and the vector is associated
150 with scalar destination SCALAR_DEST. */
152 static tree
153 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
154 tree array, unsigned HOST_WIDE_INT n)
156 tree vect_type, vect, vect_name, array_ref;
157 gimple new_stmt;
159 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
160 vect_type = TREE_TYPE (TREE_TYPE (array));
161 vect = vect_create_destination_var (scalar_dest, vect_type);
162 array_ref = build4 (ARRAY_REF, vect_type, array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
166 new_stmt = gimple_build_assign (vect, array_ref);
167 vect_name = make_ssa_name (vect, new_stmt);
168 gimple_assign_set_lhs (new_stmt, vect_name);
169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
171 return vect_name;
174 /* ARRAY is an array of vectors created by create_vector_array.
175 Emit code to store SSA_NAME VECT in index N of the array.
176 The store is part of the vectorization of STMT. */
178 static void
179 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
180 tree array, unsigned HOST_WIDE_INT n)
182 tree array_ref;
183 gimple new_stmt;
185 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
186 build_int_cst (size_type_node, n),
187 NULL_TREE, NULL_TREE);
189 new_stmt = gimple_build_assign (array_ref, vect);
190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
193 /* PTR is a pointer to an array of type TYPE. Return a representation
194 of *PTR. The memory reference replaces those in FIRST_DR
195 (and its group). */
197 static tree
198 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
200 tree mem_ref, alias_ptr_type;
202 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
203 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
204 /* Arrays have the same alignment as their type. */
205 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
206 return mem_ref;
209 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
211 /* Function vect_mark_relevant.
213 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
215 static void
216 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
217 enum vect_relevant relevant, bool live_p,
218 bool used_in_pattern)
220 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
221 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
222 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
223 gimple pattern_stmt;
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "mark relevant %d, live %d.\n", relevant, live_p);
229 /* If this stmt is an original stmt in a pattern, we might need to mark its
230 related pattern stmt instead of the original stmt. However, such stmts
231 may have their own uses that are not in any pattern, in such cases the
232 stmt itself should be marked. */
233 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
235 bool found = false;
236 if (!used_in_pattern)
238 imm_use_iterator imm_iter;
239 use_operand_p use_p;
240 gimple use_stmt;
241 tree lhs;
242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
245 if (is_gimple_assign (stmt))
246 lhs = gimple_assign_lhs (stmt);
247 else
248 lhs = gimple_call_lhs (stmt);
250 /* This use is out of pattern use, if LHS has other uses that are
251 pattern uses, we should mark the stmt itself, and not the pattern
252 stmt. */
253 if (lhs && TREE_CODE (lhs) == SSA_NAME)
254 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
256 if (is_gimple_debug (USE_STMT (use_p)))
257 continue;
258 use_stmt = USE_STMT (use_p);
260 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
261 continue;
263 if (vinfo_for_stmt (use_stmt)
264 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
266 found = true;
267 break;
272 if (!found)
274 /* This is the last stmt in a sequence that was detected as a
275 pattern that can potentially be vectorized. Don't mark the stmt
276 as relevant/live because it's not going to be vectorized.
277 Instead mark the pattern-stmt that replaces it. */
279 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
281 if (dump_enabled_p ())
282 dump_printf_loc (MSG_NOTE, vect_location,
283 "last stmt in pattern. don't mark"
284 " relevant/live.\n");
285 stmt_info = vinfo_for_stmt (pattern_stmt);
286 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
287 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
288 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
289 stmt = pattern_stmt;
293 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
294 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
295 STMT_VINFO_RELEVANT (stmt_info) = relevant;
297 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
298 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
300 if (dump_enabled_p ())
301 dump_printf_loc (MSG_NOTE, vect_location,
302 "already marked relevant/live.\n");
303 return;
306 worklist->safe_push (stmt);
310 /* Function vect_stmt_relevant_p.
312 Return true if STMT in loop that is represented by LOOP_VINFO is
313 "relevant for vectorization".
315 A stmt is considered "relevant for vectorization" if:
316 - it has uses outside the loop.
317 - it has vdefs (it alters memory).
318 - control stmts in the loop (except for the exit condition).
320 CHECKME: what other side effects would the vectorizer allow? */
322 static bool
323 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
324 enum vect_relevant *relevant, bool *live_p)
326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
327 ssa_op_iter op_iter;
328 imm_use_iterator imm_iter;
329 use_operand_p use_p;
330 def_operand_p def_p;
332 *relevant = vect_unused_in_scope;
333 *live_p = false;
335 /* cond stmt other than loop exit cond. */
336 if (is_ctrl_stmt (stmt)
337 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
338 != loop_exit_ctrl_vec_info_type)
339 *relevant = vect_used_in_scope;
341 /* changing memory. */
342 if (gimple_code (stmt) != GIMPLE_PHI)
343 if (gimple_vdef (stmt))
345 if (dump_enabled_p ())
346 dump_printf_loc (MSG_NOTE, vect_location,
347 "vec_stmt_relevant_p: stmt has vdefs.\n");
348 *relevant = vect_used_in_scope;
351 /* uses outside the loop. */
352 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
354 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
356 basic_block bb = gimple_bb (USE_STMT (use_p));
357 if (!flow_bb_inside_loop_p (loop, bb))
359 if (dump_enabled_p ())
360 dump_printf_loc (MSG_NOTE, vect_location,
361 "vec_stmt_relevant_p: used out of loop.\n");
363 if (is_gimple_debug (USE_STMT (use_p)))
364 continue;
366 /* We expect all such uses to be in the loop exit phis
367 (because of loop closed form) */
368 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
369 gcc_assert (bb == single_exit (loop)->dest);
371 *live_p = true;
376 return (*live_p || *relevant);
380 /* Function exist_non_indexing_operands_for_use_p
382 USE is one of the uses attached to STMT. Check if USE is
383 used in STMT for anything other than indexing an array. */
385 static bool
386 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
388 tree operand;
389 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
391 /* USE corresponds to some operand in STMT. If there is no data
392 reference in STMT, then any operand that corresponds to USE
393 is not indexing an array. */
394 if (!STMT_VINFO_DATA_REF (stmt_info))
395 return true;
397 /* STMT has a data_ref. FORNOW this means that its of one of
398 the following forms:
399 -1- ARRAY_REF = var
400 -2- var = ARRAY_REF
401 (This should have been verified in analyze_data_refs).
403 'var' in the second case corresponds to a def, not a use,
404 so USE cannot correspond to any operands that are not used
405 for array indexing.
407 Therefore, all we need to check is if STMT falls into the
408 first case, and whether var corresponds to USE. */
410 if (!gimple_assign_copy_p (stmt))
412 if (is_gimple_call (stmt)
413 && gimple_call_internal_p (stmt))
414 switch (gimple_call_internal_fn (stmt))
416 case IFN_MASK_STORE:
417 operand = gimple_call_arg (stmt, 3);
418 if (operand == use)
419 return true;
420 /* FALLTHRU */
421 case IFN_MASK_LOAD:
422 operand = gimple_call_arg (stmt, 2);
423 if (operand == use)
424 return true;
425 break;
426 default:
427 break;
429 return false;
432 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
433 return false;
434 operand = gimple_assign_rhs1 (stmt);
435 if (TREE_CODE (operand) != SSA_NAME)
436 return false;
438 if (operand == use)
439 return true;
441 return false;
446 Function process_use.
448 Inputs:
449 - a USE in STMT in a loop represented by LOOP_VINFO
450 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
451 that defined USE. This is done by calling mark_relevant and passing it
452 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
453 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
454 be performed.
456 Outputs:
457 Generally, LIVE_P and RELEVANT are used to define the liveness and
458 relevance info of the DEF_STMT of this USE:
459 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
460 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
461 Exceptions:
462 - case 1: If USE is used only for address computations (e.g. array indexing),
463 which does not need to be directly vectorized, then the liveness/relevance
464 of the respective DEF_STMT is left unchanged.
465 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
466 skip DEF_STMT cause it had already been processed.
467 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
468 be modified accordingly.
470 Return true if everything is as expected. Return false otherwise. */
472 static bool
473 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
474 enum vect_relevant relevant, vec<gimple> *worklist,
475 bool force)
477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
479 stmt_vec_info dstmt_vinfo;
480 basic_block bb, def_bb;
481 tree def;
482 gimple def_stmt;
483 enum vect_def_type dt;
485 /* case 1: we are only interested in uses that need to be vectorized. Uses
486 that are used for address computation are not considered relevant. */
487 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
488 return true;
490 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494 "not vectorized: unsupported use in stmt.\n");
495 return false;
498 if (!def_stmt || gimple_nop_p (def_stmt))
499 return true;
501 def_bb = gimple_bb (def_stmt);
502 if (!flow_bb_inside_loop_p (loop, def_bb))
504 if (dump_enabled_p ())
505 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
506 return true;
509 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
510 DEF_STMT must have already been processed, because this should be the
511 only way that STMT, which is a reduction-phi, was put in the worklist,
512 as there should be no other uses for DEF_STMT in the loop. So we just
513 check that everything is as expected, and we are done. */
514 dstmt_vinfo = vinfo_for_stmt (def_stmt);
515 bb = gimple_bb (stmt);
516 if (gimple_code (stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (def_stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
526 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
527 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
528 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
529 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
530 return true;
533 /* case 3a: outer-loop stmt defining an inner-loop stmt:
534 outer-loop-header-bb:
535 d = def_stmt
536 inner-loop:
537 stmt # use (d)
538 outer-loop-tail-bb:
539 ... */
540 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
542 if (dump_enabled_p ())
543 dump_printf_loc (MSG_NOTE, vect_location,
544 "outer-loop def-stmt defining inner-loop stmt.\n");
546 switch (relevant)
548 case vect_unused_in_scope:
549 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
550 vect_used_in_scope : vect_unused_in_scope;
551 break;
553 case vect_used_in_outer_by_reduction:
554 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
555 relevant = vect_used_by_reduction;
556 break;
558 case vect_used_in_outer:
559 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
560 relevant = vect_used_in_scope;
561 break;
563 case vect_used_in_scope:
564 break;
566 default:
567 gcc_unreachable ();
571 /* case 3b: inner-loop stmt defining an outer-loop stmt:
572 outer-loop-header-bb:
574 inner-loop:
575 d = def_stmt
576 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
577 stmt # use (d) */
578 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
580 if (dump_enabled_p ())
581 dump_printf_loc (MSG_NOTE, vect_location,
582 "inner-loop def-stmt defining outer-loop stmt.\n");
584 switch (relevant)
586 case vect_unused_in_scope:
587 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
588 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
589 vect_used_in_outer_by_reduction : vect_unused_in_scope;
590 break;
592 case vect_used_by_reduction:
593 relevant = vect_used_in_outer_by_reduction;
594 break;
596 case vect_used_in_scope:
597 relevant = vect_used_in_outer;
598 break;
600 default:
601 gcc_unreachable ();
605 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
606 is_pattern_stmt_p (stmt_vinfo));
607 return true;
611 /* Function vect_mark_stmts_to_be_vectorized.
613 Not all stmts in the loop need to be vectorized. For example:
615 for i...
616 for j...
617 1. T0 = i + j
618 2. T1 = a[T0]
620 3. j = j + 1
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
625 This pass detects such stmts. */
627 bool
628 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
632 unsigned int nbbs = loop->num_nodes;
633 gimple_stmt_iterator si;
634 gimple stmt;
635 unsigned int i;
636 stmt_vec_info stmt_vinfo;
637 basic_block bb;
638 gimple phi;
639 bool live_p;
640 enum vect_relevant relevant, tmp_relevant;
641 enum vect_def_type def_type;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec<gimple, 64> worklist;
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
655 phi = gsi_stmt (si);
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
660 dump_printf (MSG_NOTE, "\n");
663 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
664 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
666 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
668 stmt = gsi_stmt (si);
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
672 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
673 dump_printf (MSG_NOTE, "\n");
676 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
677 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
681 /* 2. Process_worklist */
682 while (worklist.length () > 0)
684 use_operand_p use_p;
685 ssa_op_iter iter;
687 stmt = worklist.pop ();
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
692 dump_printf (MSG_NOTE, "\n");
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant and live/dead according to the
697 liveness and relevance properties of STMT. */
698 stmt_vinfo = vinfo_for_stmt (stmt);
699 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
702 /* Generally, the liveness and relevance properties of STMT are
703 propagated as is to the DEF_STMTs of its USEs:
704 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
705 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
707 One exception is when STMT has been identified as defining a reduction
708 variable; in this case we set the liveness/relevance as follows:
709 live_p = false
710 relevant = vect_used_by_reduction
711 This is because we distinguish between two kinds of relevant stmts -
712 those that are used by a reduction computation, and those that are
713 (also) used by a regular computation. This allows us later on to
714 identify stmts that are used solely by a reduction, and therefore the
715 order of the results that they produce does not have to be kept. */
717 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
718 tmp_relevant = relevant;
719 switch (def_type)
721 case vect_reduction_def:
722 switch (tmp_relevant)
724 case vect_unused_in_scope:
725 relevant = vect_used_by_reduction;
726 break;
728 case vect_used_by_reduction:
729 if (gimple_code (stmt) == GIMPLE_PHI)
730 break;
731 /* fall through */
733 default:
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
736 "unsupported use of reduction.\n");
737 return false;
740 live_p = false;
741 break;
743 case vect_nested_cycle:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_in_outer_by_reduction
746 && tmp_relevant != vect_used_in_outer)
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
750 "unsupported use of nested cycle.\n");
752 return false;
755 live_p = false;
756 break;
758 case vect_double_reduction_def:
759 if (tmp_relevant != vect_unused_in_scope
760 && tmp_relevant != vect_used_by_reduction)
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
764 "unsupported use of double reduction.\n");
766 return false;
769 live_p = false;
770 break;
772 default:
773 break;
776 if (is_pattern_stmt_p (stmt_vinfo))
778 /* Pattern statements are not inserted into the code, so
779 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
780 have to scan the RHS or function arguments instead. */
781 if (is_gimple_assign (stmt))
783 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
784 tree op = gimple_assign_rhs1 (stmt);
786 i = 1;
787 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
789 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
790 live_p, relevant, &worklist, false)
791 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
792 live_p, relevant, &worklist, false))
793 return false;
794 i = 2;
796 for (; i < gimple_num_ops (stmt); i++)
798 op = gimple_op (stmt, i);
799 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
800 &worklist, false))
801 return false;
804 else if (is_gimple_call (stmt))
806 for (i = 0; i < gimple_call_num_args (stmt); i++)
808 tree arg = gimple_call_arg (stmt, i);
809 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
810 &worklist, false))
811 return false;
815 else
816 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
818 tree op = USE_FROM_PTR (use_p);
819 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
820 &worklist, false))
821 return false;
824 if (STMT_VINFO_GATHER_P (stmt_vinfo))
826 tree off;
827 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
828 gcc_assert (decl);
829 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
830 &worklist, true))
831 return false;
833 } /* while worklist */
835 return true;
839 /* Function vect_model_simple_cost.
841 Models cost for simple operations, i.e. those that only emit ncopies of a
842 single op. Right now, this does not account for multiple insns that could
843 be generated for the single vector op. We will handle that shortly. */
845 void
846 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
847 enum vect_def_type *dt,
848 stmt_vector_for_cost *prologue_cost_vec,
849 stmt_vector_for_cost *body_cost_vec)
851 int i;
852 int inside_cost = 0, prologue_cost = 0;
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
858 /* FORNOW: Assuming maximum 2 args per stmts. */
859 for (i = 0; i < 2; i++)
860 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
861 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
862 stmt_info, 0, vect_prologue);
864 /* Pass the inside-of-loop statements to the target-specific cost model. */
865 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
866 stmt_info, 0, vect_body);
868 if (dump_enabled_p ())
869 dump_printf_loc (MSG_NOTE, vect_location,
870 "vect_model_simple_cost: inside_cost = %d, "
871 "prologue_cost = %d .\n", inside_cost, prologue_cost);
875 /* Model cost for type demotion and promotion operations. PWR is normally
876 zero for single-step promotions and demotions. It will be one if
877 two-step promotion/demotion is required, and so on. Each additional
878 step doubles the number of instructions required. */
880 static void
881 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
882 enum vect_def_type *dt, int pwr)
884 int i, tmp;
885 int inside_cost = 0, prologue_cost = 0;
886 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
887 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
888 void *target_cost_data;
890 /* The SLP costs were already calculated during SLP tree build. */
891 if (PURE_SLP_STMT (stmt_info))
892 return;
894 if (loop_vinfo)
895 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
896 else
897 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
899 for (i = 0; i < pwr + 1; i++)
901 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
902 (i + 1) : i;
903 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
904 vec_promote_demote, stmt_info, 0,
905 vect_body);
908 /* FORNOW: Assuming maximum 2 args per stmts. */
909 for (i = 0; i < 2; i++)
910 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
911 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
912 stmt_info, 0, vect_prologue);
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_NOTE, vect_location,
916 "vect_model_promotion_demotion_cost: inside_cost = %d, "
917 "prologue_cost = %d .\n", inside_cost, prologue_cost);
920 /* Function vect_cost_group_size
922 For grouped load or store, return the group_size only if it is the first
923 load or store of a group, else return 1. This ensures that group size is
924 only returned once per group. */
926 static int
927 vect_cost_group_size (stmt_vec_info stmt_info)
929 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
931 if (first_stmt == STMT_VINFO_STMT (stmt_info))
932 return GROUP_SIZE (stmt_info);
934 return 1;
938 /* Function vect_model_store_cost
940 Models cost for stores. In the case of grouped accesses, one access
941 has the overhead of the grouped access attributed to it. */
943 void
944 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
945 bool store_lanes_p, enum vect_def_type dt,
946 slp_tree slp_node,
947 stmt_vector_for_cost *prologue_cost_vec,
948 stmt_vector_for_cost *body_cost_vec)
950 int group_size;
951 unsigned int inside_cost = 0, prologue_cost = 0;
952 struct data_reference *first_dr;
953 gimple first_stmt;
955 /* The SLP costs were already calculated during SLP tree build. */
956 if (PURE_SLP_STMT (stmt_info))
957 return;
959 if (dt == vect_constant_def || dt == vect_external_def)
960 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
961 stmt_info, 0, vect_prologue);
963 /* Grouped access? */
964 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
966 if (slp_node)
968 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
969 group_size = 1;
971 else
973 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
974 group_size = vect_cost_group_size (stmt_info);
977 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
979 /* Not a grouped access. */
980 else
982 group_size = 1;
983 first_dr = STMT_VINFO_DATA_REF (stmt_info);
986 /* We assume that the cost of a single store-lanes instruction is
987 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
988 access is instead being provided by a permute-and-store operation,
989 include the cost of the permutes. */
990 if (!store_lanes_p && group_size > 1)
992 /* Uses a high and low interleave or shuffle operations for each
993 needed permute. */
994 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
995 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
996 stmt_info, 0, vect_body);
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: strided group_size = %d .\n",
1001 group_size);
1004 /* Costs of the stores. */
1005 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "vect_model_store_cost: inside_cost = %d, "
1010 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1014 /* Calculate cost of DR's memory access. */
1015 void
1016 vect_get_store_cost (struct data_reference *dr, int ncopies,
1017 unsigned int *inside_cost,
1018 stmt_vector_for_cost *body_cost_vec)
1020 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1021 gimple stmt = DR_STMT (dr);
1022 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1024 switch (alignment_support_scheme)
1026 case dr_aligned:
1028 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1029 vector_store, stmt_info, 0,
1030 vect_body);
1032 if (dump_enabled_p ())
1033 dump_printf_loc (MSG_NOTE, vect_location,
1034 "vect_model_store_cost: aligned.\n");
1035 break;
1038 case dr_unaligned_supported:
1040 /* Here, we assign an additional cost for the unaligned store. */
1041 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1042 unaligned_store, stmt_info,
1043 DR_MISALIGNMENT (dr), vect_body);
1044 if (dump_enabled_p ())
1045 dump_printf_loc (MSG_NOTE, vect_location,
1046 "vect_model_store_cost: unaligned supported by "
1047 "hardware.\n");
1048 break;
1051 case dr_unaligned_unsupported:
1053 *inside_cost = VECT_MAX_COST;
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1057 "vect_model_store_cost: unsupported access.\n");
1058 break;
1061 default:
1062 gcc_unreachable ();
1067 /* Function vect_model_load_cost
1069 Models cost for loads. In the case of grouped accesses, the last access
1070 has the overhead of the grouped access attributed to it. Since unaligned
1071 accesses are supported for loads, we also account for the costs of the
1072 access scheme chosen. */
1074 void
1075 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1076 bool load_lanes_p, slp_tree slp_node,
1077 stmt_vector_for_cost *prologue_cost_vec,
1078 stmt_vector_for_cost *body_cost_vec)
1080 int group_size;
1081 gimple first_stmt;
1082 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1083 unsigned int inside_cost = 0, prologue_cost = 0;
1085 /* The SLP costs were already calculated during SLP tree build. */
1086 if (PURE_SLP_STMT (stmt_info))
1087 return;
1089 /* Grouped accesses? */
1090 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1091 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1093 group_size = vect_cost_group_size (stmt_info);
1094 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1096 /* Not a grouped access. */
1097 else
1099 group_size = 1;
1100 first_dr = dr;
1103 /* We assume that the cost of a single load-lanes instruction is
1104 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1105 access is instead being provided by a load-and-permute operation,
1106 include the cost of the permutes. */
1107 if (!load_lanes_p && group_size > 1)
1109 /* Uses an even and odd extract operations or shuffle operations
1110 for each needed permute. */
1111 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1112 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1113 stmt_info, 0, vect_body);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE, vect_location,
1117 "vect_model_load_cost: strided group_size = %d .\n",
1118 group_size);
1121 /* The loads themselves. */
1122 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1124 /* N scalar loads plus gathering them into a vector. */
1125 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1126 inside_cost += record_stmt_cost (body_cost_vec,
1127 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1128 scalar_load, stmt_info, 0, vect_body);
1129 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1130 stmt_info, 0, vect_body);
1132 else
1133 vect_get_load_cost (first_dr, ncopies,
1134 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1135 || group_size > 1 || slp_node),
1136 &inside_cost, &prologue_cost,
1137 prologue_cost_vec, body_cost_vec, true);
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location,
1141 "vect_model_load_cost: inside_cost = %d, "
1142 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1146 /* Calculate cost of DR's memory access. */
1147 void
1148 vect_get_load_cost (struct data_reference *dr, int ncopies,
1149 bool add_realign_cost, unsigned int *inside_cost,
1150 unsigned int *prologue_cost,
1151 stmt_vector_for_cost *prologue_cost_vec,
1152 stmt_vector_for_cost *body_cost_vec,
1153 bool record_prologue_costs)
1155 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1156 gimple stmt = DR_STMT (dr);
1157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1159 switch (alignment_support_scheme)
1161 case dr_aligned:
1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: aligned.\n");
1170 break;
1172 case dr_unaligned_supported:
1174 /* Here, we assign an additional cost for the unaligned load. */
1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1176 unaligned_load, stmt_info,
1177 DR_MISALIGNMENT (dr), vect_body);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: unaligned supported by "
1182 "hardware.\n");
1184 break;
1186 case dr_explicit_realign:
1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1189 vector_load, stmt_info, 0, vect_body);
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1191 vec_perm, stmt_info, 0, vect_body);
1193 /* FIXME: If the misalignment remains fixed across the iterations of
1194 the containing loop, the following cost should be added to the
1195 prologue costs. */
1196 if (targetm.vectorize.builtin_mask_for_load)
1197 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1198 stmt_info, 0, vect_body);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign\n");
1204 break;
1206 case dr_explicit_realign_optimized:
1208 if (dump_enabled_p ())
1209 dump_printf_loc (MSG_NOTE, vect_location,
1210 "vect_model_load_cost: unaligned software "
1211 "pipelined.\n");
1213 /* Unaligned software pipeline has a load of an address, an initial
1214 load, and possibly a mask operation to "prime" the loop. However,
1215 if this is an access in a group of loads, which provide grouped
1216 access, then the above cost should only be considered for one
1217 access in the group. Inside the loop, there is a load op
1218 and a realignment op. */
1220 if (add_realign_cost && record_prologue_costs)
1222 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1223 vector_stmt, stmt_info,
1224 0, vect_prologue);
1225 if (targetm.vectorize.builtin_mask_for_load)
1226 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1227 vector_stmt, stmt_info,
1228 0, vect_prologue);
1231 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1232 stmt_info, 0, vect_body);
1233 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1234 stmt_info, 0, vect_body);
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "vect_model_load_cost: explicit realign optimized"
1239 "\n");
1241 break;
1244 case dr_unaligned_unsupported:
1246 *inside_cost = VECT_MAX_COST;
1248 if (dump_enabled_p ())
1249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1250 "vect_model_load_cost: unsupported access.\n");
1251 break;
1254 default:
1255 gcc_unreachable ();
1259 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1260 the loop preheader for the vectorized stmt STMT. */
1262 static void
1263 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1265 if (gsi)
1266 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1267 else
1269 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1270 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1272 if (loop_vinfo)
1274 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1275 basic_block new_bb;
1276 edge pe;
1278 if (nested_in_vect_loop_p (loop, stmt))
1279 loop = loop->inner;
1281 pe = loop_preheader_edge (loop);
1282 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1283 gcc_assert (!new_bb);
1285 else
1287 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1288 basic_block bb;
1289 gimple_stmt_iterator gsi_bb_start;
1291 gcc_assert (bb_vinfo);
1292 bb = BB_VINFO_BB (bb_vinfo);
1293 gsi_bb_start = gsi_after_labels (bb);
1294 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1298 if (dump_enabled_p ())
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "created new init_stmt: ");
1302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1303 dump_printf (MSG_NOTE, "\n");
1307 /* Function vect_init_vector.
1309 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1310 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1311 vector type a vector with all elements equal to VAL is created first.
1312 Place the initialization at BSI if it is not NULL. Otherwise, place the
1313 initialization at the loop preheader.
1314 Return the DEF of INIT_STMT.
1315 It will be used in the vectorization of STMT. */
1317 tree
1318 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1320 tree new_var;
1321 gimple init_stmt;
1322 tree vec_oprnd;
1323 tree new_temp;
1325 if (TREE_CODE (type) == VECTOR_TYPE
1326 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1328 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1330 if (CONSTANT_CLASS_P (val))
1331 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1332 else
1334 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1335 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1336 new_temp, val,
1337 NULL_TREE);
1338 vect_init_vector_1 (stmt, init_stmt, gsi);
1339 val = new_temp;
1342 val = build_vector_from_val (type, val);
1345 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1346 init_stmt = gimple_build_assign (new_var, val);
1347 new_temp = make_ssa_name (new_var, init_stmt);
1348 gimple_assign_set_lhs (init_stmt, new_temp);
1349 vect_init_vector_1 (stmt, init_stmt, gsi);
1350 vec_oprnd = gimple_assign_lhs (init_stmt);
1351 return vec_oprnd;
1355 /* Function vect_get_vec_def_for_operand.
1357 OP is an operand in STMT. This function returns a (vector) def that will be
1358 used in the vectorized stmt for STMT.
1360 In the case that OP is an SSA_NAME which is defined in the loop, then
1361 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1363 In case OP is an invariant or constant, a new stmt that creates a vector def
1364 needs to be introduced. */
1366 tree
1367 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1369 tree vec_oprnd;
1370 gimple vec_stmt;
1371 gimple def_stmt;
1372 stmt_vec_info def_stmt_info = NULL;
1373 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1374 unsigned int nunits;
1375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1376 tree def;
1377 enum vect_def_type dt;
1378 bool is_simple_use;
1379 tree vector_type;
1381 if (dump_enabled_p ())
1383 dump_printf_loc (MSG_NOTE, vect_location,
1384 "vect_get_vec_def_for_operand: ");
1385 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1386 dump_printf (MSG_NOTE, "\n");
1389 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1390 &def_stmt, &def, &dt);
1391 gcc_assert (is_simple_use);
1392 if (dump_enabled_p ())
1394 int loc_printed = 0;
1395 if (def)
1397 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1398 loc_printed = 1;
1399 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1400 dump_printf (MSG_NOTE, "\n");
1402 if (def_stmt)
1404 if (loc_printed)
1405 dump_printf (MSG_NOTE, " def_stmt = ");
1406 else
1407 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1408 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1409 dump_printf (MSG_NOTE, "\n");
1413 switch (dt)
1415 /* Case 1: operand is a constant. */
1416 case vect_constant_def:
1418 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1419 gcc_assert (vector_type);
1420 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1422 if (scalar_def)
1423 *scalar_def = op;
1425 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1426 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "Create vector_cst. nunits = %d\n", nunits);
1430 return vect_init_vector (stmt, op, vector_type, NULL);
1433 /* Case 2: operand is defined outside the loop - loop invariant. */
1434 case vect_external_def:
1436 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1437 gcc_assert (vector_type);
1439 if (scalar_def)
1440 *scalar_def = def;
1442 /* Create 'vec_inv = {inv,inv,..,inv}' */
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1446 return vect_init_vector (stmt, def, vector_type, NULL);
1449 /* Case 3: operand is defined inside the loop. */
1450 case vect_internal_def:
1452 if (scalar_def)
1453 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1455 /* Get the def from the vectorized stmt. */
1456 def_stmt_info = vinfo_for_stmt (def_stmt);
1458 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1459 /* Get vectorized pattern statement. */
1460 if (!vec_stmt
1461 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1462 && !STMT_VINFO_RELEVANT (def_stmt_info))
1463 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1464 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1465 gcc_assert (vec_stmt);
1466 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1467 vec_oprnd = PHI_RESULT (vec_stmt);
1468 else if (is_gimple_call (vec_stmt))
1469 vec_oprnd = gimple_call_lhs (vec_stmt);
1470 else
1471 vec_oprnd = gimple_assign_lhs (vec_stmt);
1472 return vec_oprnd;
1475 /* Case 4: operand is defined by a loop header phi - reduction */
1476 case vect_reduction_def:
1477 case vect_double_reduction_def:
1478 case vect_nested_cycle:
1480 struct loop *loop;
1482 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1483 loop = (gimple_bb (def_stmt))->loop_father;
1485 /* Get the def before the loop */
1486 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1487 return get_initial_def_for_reduction (stmt, op, scalar_def);
1490 /* Case 5: operand is defined by loop-header phi - induction. */
1491 case vect_induction_def:
1493 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1495 /* Get the def from the vectorized stmt. */
1496 def_stmt_info = vinfo_for_stmt (def_stmt);
1497 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1498 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1499 vec_oprnd = PHI_RESULT (vec_stmt);
1500 else
1501 vec_oprnd = gimple_get_lhs (vec_stmt);
1502 return vec_oprnd;
1505 default:
1506 gcc_unreachable ();
1511 /* Function vect_get_vec_def_for_stmt_copy
1513 Return a vector-def for an operand. This function is used when the
1514 vectorized stmt to be created (by the caller to this function) is a "copy"
1515 created in case the vectorized result cannot fit in one vector, and several
1516 copies of the vector-stmt are required. In this case the vector-def is
1517 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1518 of the stmt that defines VEC_OPRND.
1519 DT is the type of the vector def VEC_OPRND.
1521 Context:
1522 In case the vectorization factor (VF) is bigger than the number
1523 of elements that can fit in a vectype (nunits), we have to generate
1524 more than one vector stmt to vectorize the scalar stmt. This situation
1525 arises when there are multiple data-types operated upon in the loop; the
1526 smallest data-type determines the VF, and as a result, when vectorizing
1527 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1528 vector stmt (each computing a vector of 'nunits' results, and together
1529 computing 'VF' results in each iteration). This function is called when
1530 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1531 which VF=16 and nunits=4, so the number of copies required is 4):
1533 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1535 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1536 VS1.1: vx.1 = memref1 VS1.2
1537 VS1.2: vx.2 = memref2 VS1.3
1538 VS1.3: vx.3 = memref3
1540 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1541 VSnew.1: vz1 = vx.1 + ... VSnew.2
1542 VSnew.2: vz2 = vx.2 + ... VSnew.3
1543 VSnew.3: vz3 = vx.3 + ...
1545 The vectorization of S1 is explained in vectorizable_load.
1546 The vectorization of S2:
1547 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1548 the function 'vect_get_vec_def_for_operand' is called to
1549 get the relevant vector-def for each operand of S2. For operand x it
1550 returns the vector-def 'vx.0'.
1552 To create the remaining copies of the vector-stmt (VSnew.j), this
1553 function is called to get the relevant vector-def for each operand. It is
1554 obtained from the respective VS1.j stmt, which is recorded in the
1555 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1557 For example, to obtain the vector-def 'vx.1' in order to create the
1558 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1559 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1560 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1561 and return its def ('vx.1').
1562 Overall, to create the above sequence this function will be called 3 times:
1563 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1564 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1565 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1567 tree
1568 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1570 gimple vec_stmt_for_operand;
1571 stmt_vec_info def_stmt_info;
1573 /* Do nothing; can reuse same def. */
1574 if (dt == vect_external_def || dt == vect_constant_def )
1575 return vec_oprnd;
1577 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1578 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1579 gcc_assert (def_stmt_info);
1580 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1581 gcc_assert (vec_stmt_for_operand);
1582 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1583 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1584 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1585 else
1586 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1587 return vec_oprnd;
1591 /* Get vectorized definitions for the operands to create a copy of an original
1592 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1594 static void
1595 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1596 vec<tree> *vec_oprnds0,
1597 vec<tree> *vec_oprnds1)
1599 tree vec_oprnd = vec_oprnds0->pop ();
1601 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1602 vec_oprnds0->quick_push (vec_oprnd);
1604 if (vec_oprnds1 && vec_oprnds1->length ())
1606 vec_oprnd = vec_oprnds1->pop ();
1607 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1608 vec_oprnds1->quick_push (vec_oprnd);
1613 /* Get vectorized definitions for OP0 and OP1.
1614 REDUC_INDEX is the index of reduction operand in case of reduction,
1615 and -1 otherwise. */
1617 void
1618 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1619 vec<tree> *vec_oprnds0,
1620 vec<tree> *vec_oprnds1,
1621 slp_tree slp_node, int reduc_index)
1623 if (slp_node)
1625 int nops = (op1 == NULL_TREE) ? 1 : 2;
1626 auto_vec<tree> ops (nops);
1627 auto_vec<vec<tree> > vec_defs (nops);
1629 ops.quick_push (op0);
1630 if (op1)
1631 ops.quick_push (op1);
1633 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1635 *vec_oprnds0 = vec_defs[0];
1636 if (op1)
1637 *vec_oprnds1 = vec_defs[1];
1639 else
1641 tree vec_oprnd;
1643 vec_oprnds0->create (1);
1644 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1645 vec_oprnds0->quick_push (vec_oprnd);
1647 if (op1)
1649 vec_oprnds1->create (1);
1650 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1651 vec_oprnds1->quick_push (vec_oprnd);
1657 /* Function vect_finish_stmt_generation.
1659 Insert a new stmt. */
1661 void
1662 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1663 gimple_stmt_iterator *gsi)
1665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1666 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1667 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1669 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1671 if (!gsi_end_p (*gsi)
1672 && gimple_has_mem_ops (vec_stmt))
1674 gimple at_stmt = gsi_stmt (*gsi);
1675 tree vuse = gimple_vuse (at_stmt);
1676 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1678 tree vdef = gimple_vdef (at_stmt);
1679 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1680 /* If we have an SSA vuse and insert a store, update virtual
1681 SSA form to avoid triggering the renamer. Do so only
1682 if we can easily see all uses - which is what almost always
1683 happens with the way vectorized stmts are inserted. */
1684 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1685 && ((is_gimple_assign (vec_stmt)
1686 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1687 || (is_gimple_call (vec_stmt)
1688 && !(gimple_call_flags (vec_stmt)
1689 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1691 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1692 gimple_set_vdef (vec_stmt, new_vdef);
1693 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1697 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1699 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1700 bb_vinfo));
1702 if (dump_enabled_p ())
1704 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1705 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1706 dump_printf (MSG_NOTE, "\n");
1709 gimple_set_location (vec_stmt, gimple_location (stmt));
1711 /* While EH edges will generally prevent vectorization, stmt might
1712 e.g. be in a must-not-throw region. Ensure newly created stmts
1713 that could throw are part of the same region. */
1714 int lp_nr = lookup_stmt_eh_lp (stmt);
1715 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1716 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1719 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1720 a function declaration if the target has a vectorized version
1721 of the function, or NULL_TREE if the function cannot be vectorized. */
1723 tree
1724 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1726 tree fndecl = gimple_call_fndecl (call);
1728 /* We only handle functions that do not read or clobber memory -- i.e.
1729 const or novops ones. */
1730 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1731 return NULL_TREE;
1733 if (!fndecl
1734 || TREE_CODE (fndecl) != FUNCTION_DECL
1735 || !DECL_BUILT_IN (fndecl))
1736 return NULL_TREE;
1738 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1739 vectype_in);
1743 static tree permute_vec_elements (tree, tree, tree, gimple,
1744 gimple_stmt_iterator *);
1747 /* Function vectorizable_mask_load_store.
1749 Check if STMT performs a conditional load or store that can be vectorized.
1750 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1751 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1752 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1754 static bool
1755 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1756 gimple *vec_stmt, slp_tree slp_node)
1758 tree vec_dest = NULL;
1759 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1760 stmt_vec_info prev_stmt_info;
1761 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1762 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1763 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1764 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1765 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1766 tree elem_type;
1767 gimple new_stmt;
1768 tree dummy;
1769 tree dataref_ptr = NULL_TREE;
1770 gimple ptr_incr;
1771 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1772 int ncopies;
1773 int i, j;
1774 bool inv_p;
1775 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1776 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1777 int gather_scale = 1;
1778 enum vect_def_type gather_dt = vect_unknown_def_type;
1779 bool is_store;
1780 tree mask;
1781 gimple def_stmt;
1782 tree def;
1783 enum vect_def_type dt;
1785 if (slp_node != NULL)
1786 return false;
1788 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1789 gcc_assert (ncopies >= 1);
1791 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1792 mask = gimple_call_arg (stmt, 2);
1793 if (TYPE_PRECISION (TREE_TYPE (mask))
1794 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1795 return false;
1797 /* FORNOW. This restriction should be relaxed. */
1798 if (nested_in_vect_loop && ncopies > 1)
1800 if (dump_enabled_p ())
1801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1802 "multiple types in nested loop.");
1803 return false;
1806 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1807 return false;
1809 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1810 return false;
1812 if (!STMT_VINFO_DATA_REF (stmt_info))
1813 return false;
1815 elem_type = TREE_TYPE (vectype);
1817 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1818 return false;
1820 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1821 return false;
1823 if (STMT_VINFO_GATHER_P (stmt_info))
1825 gimple def_stmt;
1826 tree def;
1827 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1828 &gather_off, &gather_scale);
1829 gcc_assert (gather_decl);
1830 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1831 &def_stmt, &def, &gather_dt,
1832 &gather_off_vectype))
1834 if (dump_enabled_p ())
1835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1836 "gather index use not simple.");
1837 return false;
1840 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1841 tree masktype
1842 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1843 if (TREE_CODE (masktype) == INTEGER_TYPE)
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "masked gather with integer mask not supported.");
1848 return false;
1851 else if (tree_int_cst_compare (nested_in_vect_loop
1852 ? STMT_VINFO_DR_STEP (stmt_info)
1853 : DR_STEP (dr), size_zero_node) <= 0)
1854 return false;
1855 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1856 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1857 return false;
1859 if (TREE_CODE (mask) != SSA_NAME)
1860 return false;
1862 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1863 &def_stmt, &def, &dt))
1864 return false;
1866 if (is_store)
1868 tree rhs = gimple_call_arg (stmt, 3);
1869 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1870 &def_stmt, &def, &dt))
1871 return false;
1874 if (!vec_stmt) /* transformation not required. */
1876 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1877 if (is_store)
1878 vect_model_store_cost (stmt_info, ncopies, false, dt,
1879 NULL, NULL, NULL);
1880 else
1881 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1882 return true;
1885 /** Transform. **/
1887 if (STMT_VINFO_GATHER_P (stmt_info))
1889 tree vec_oprnd0 = NULL_TREE, op;
1890 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1891 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1892 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1893 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1894 tree mask_perm_mask = NULL_TREE;
1895 edge pe = loop_preheader_edge (loop);
1896 gimple_seq seq;
1897 basic_block new_bb;
1898 enum { NARROW, NONE, WIDEN } modifier;
1899 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1901 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1902 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1903 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1904 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1905 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1906 scaletype = TREE_VALUE (arglist);
1907 gcc_checking_assert (types_compatible_p (srctype, rettype)
1908 && types_compatible_p (srctype, masktype));
1910 if (nunits == gather_off_nunits)
1911 modifier = NONE;
1912 else if (nunits == gather_off_nunits / 2)
1914 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1915 modifier = WIDEN;
1917 for (i = 0; i < gather_off_nunits; ++i)
1918 sel[i] = i | nunits;
1920 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1921 gcc_assert (perm_mask != NULL_TREE);
1923 else if (nunits == gather_off_nunits * 2)
1925 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1926 modifier = NARROW;
1928 for (i = 0; i < nunits; ++i)
1929 sel[i] = i < gather_off_nunits
1930 ? i : i + nunits - gather_off_nunits;
1932 perm_mask = vect_gen_perm_mask (vectype, sel);
1933 gcc_assert (perm_mask != NULL_TREE);
1934 ncopies *= 2;
1935 for (i = 0; i < nunits; ++i)
1936 sel[i] = i | gather_off_nunits;
1937 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1938 gcc_assert (mask_perm_mask != NULL_TREE);
1940 else
1941 gcc_unreachable ();
1943 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1945 ptr = fold_convert (ptrtype, gather_base);
1946 if (!is_gimple_min_invariant (ptr))
1948 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1949 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1950 gcc_assert (!new_bb);
1953 scale = build_int_cst (scaletype, gather_scale);
1955 prev_stmt_info = NULL;
1956 for (j = 0; j < ncopies; ++j)
1958 if (modifier == WIDEN && (j & 1))
1959 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1960 perm_mask, stmt, gsi);
1961 else if (j == 0)
1962 op = vec_oprnd0
1963 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1964 else
1965 op = vec_oprnd0
1966 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1968 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1970 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1971 == TYPE_VECTOR_SUBPARTS (idxtype));
1972 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1973 var = make_ssa_name (var, NULL);
1974 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1975 new_stmt
1976 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1977 op, NULL_TREE);
1978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1979 op = var;
1982 if (mask_perm_mask && (j & 1))
1983 mask_op = permute_vec_elements (mask_op, mask_op,
1984 mask_perm_mask, stmt, gsi);
1985 else
1987 if (j == 0)
1988 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1989 else
1991 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1992 &def_stmt, &def, &dt);
1993 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1996 mask_op = vec_mask;
1997 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1999 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2000 == TYPE_VECTOR_SUBPARTS (masktype));
2001 var = vect_get_new_vect_var (masktype, vect_simple_var,
2002 NULL);
2003 var = make_ssa_name (var, NULL);
2004 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2005 new_stmt
2006 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
2007 mask_op, NULL_TREE);
2008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2009 mask_op = var;
2013 new_stmt
2014 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2015 scale);
2017 if (!useless_type_conversion_p (vectype, rettype))
2019 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2020 == TYPE_VECTOR_SUBPARTS (rettype));
2021 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2022 op = make_ssa_name (var, new_stmt);
2023 gimple_call_set_lhs (new_stmt, op);
2024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2025 var = make_ssa_name (vec_dest, NULL);
2026 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2027 new_stmt
2028 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2029 NULL_TREE);
2031 else
2033 var = make_ssa_name (vec_dest, new_stmt);
2034 gimple_call_set_lhs (new_stmt, var);
2037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2039 if (modifier == NARROW)
2041 if ((j & 1) == 0)
2043 prev_res = var;
2044 continue;
2046 var = permute_vec_elements (prev_res, var,
2047 perm_mask, stmt, gsi);
2048 new_stmt = SSA_NAME_DEF_STMT (var);
2051 if (prev_stmt_info == NULL)
2052 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2053 else
2054 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2055 prev_stmt_info = vinfo_for_stmt (new_stmt);
2058 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2059 from the IL. */
2060 tree lhs = gimple_call_lhs (stmt);
2061 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2062 set_vinfo_for_stmt (new_stmt, stmt_info);
2063 set_vinfo_for_stmt (stmt, NULL);
2064 STMT_VINFO_STMT (stmt_info) = new_stmt;
2065 gsi_replace (gsi, new_stmt, true);
2066 return true;
2068 else if (is_store)
2070 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2071 prev_stmt_info = NULL;
2072 for (i = 0; i < ncopies; i++)
2074 unsigned align, misalign;
2076 if (i == 0)
2078 tree rhs = gimple_call_arg (stmt, 3);
2079 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2080 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2081 /* We should have catched mismatched types earlier. */
2082 gcc_assert (useless_type_conversion_p (vectype,
2083 TREE_TYPE (vec_rhs)));
2084 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2085 NULL_TREE, &dummy, gsi,
2086 &ptr_incr, false, &inv_p);
2087 gcc_assert (!inv_p);
2089 else
2091 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2092 &def, &dt);
2093 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2094 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2095 &def, &dt);
2096 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2097 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2098 TYPE_SIZE_UNIT (vectype));
2101 align = TYPE_ALIGN_UNIT (vectype);
2102 if (aligned_access_p (dr))
2103 misalign = 0;
2104 else if (DR_MISALIGNMENT (dr) == -1)
2106 align = TYPE_ALIGN_UNIT (elem_type);
2107 misalign = 0;
2109 else
2110 misalign = DR_MISALIGNMENT (dr);
2111 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2112 misalign);
2113 new_stmt
2114 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2115 gimple_call_arg (stmt, 1),
2116 vec_mask, vec_rhs);
2117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2118 if (i == 0)
2119 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2120 else
2121 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2122 prev_stmt_info = vinfo_for_stmt (new_stmt);
2125 else
2127 tree vec_mask = NULL_TREE;
2128 prev_stmt_info = NULL;
2129 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2130 for (i = 0; i < ncopies; i++)
2132 unsigned align, misalign;
2134 if (i == 0)
2136 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2137 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2138 NULL_TREE, &dummy, gsi,
2139 &ptr_incr, false, &inv_p);
2140 gcc_assert (!inv_p);
2142 else
2144 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2145 &def, &dt);
2146 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2147 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2148 TYPE_SIZE_UNIT (vectype));
2151 align = TYPE_ALIGN_UNIT (vectype);
2152 if (aligned_access_p (dr))
2153 misalign = 0;
2154 else if (DR_MISALIGNMENT (dr) == -1)
2156 align = TYPE_ALIGN_UNIT (elem_type);
2157 misalign = 0;
2159 else
2160 misalign = DR_MISALIGNMENT (dr);
2161 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2162 misalign);
2163 new_stmt
2164 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2165 gimple_call_arg (stmt, 1),
2166 vec_mask);
2167 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2169 if (i == 0)
2170 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2171 else
2172 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2173 prev_stmt_info = vinfo_for_stmt (new_stmt);
2177 if (!is_store)
2179 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2180 from the IL. */
2181 tree lhs = gimple_call_lhs (stmt);
2182 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2183 set_vinfo_for_stmt (new_stmt, stmt_info);
2184 set_vinfo_for_stmt (stmt, NULL);
2185 STMT_VINFO_STMT (stmt_info) = new_stmt;
2186 gsi_replace (gsi, new_stmt, true);
2189 return true;
2193 /* Function vectorizable_call.
2195 Check if STMT performs a function call that can be vectorized.
2196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2200 static bool
2201 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2202 slp_tree slp_node)
2204 tree vec_dest;
2205 tree scalar_dest;
2206 tree op, type;
2207 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2209 tree vectype_out, vectype_in;
2210 int nunits_in;
2211 int nunits_out;
2212 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2213 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2214 tree fndecl, new_temp, def, rhs_type;
2215 gimple def_stmt;
2216 enum vect_def_type dt[3]
2217 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2218 gimple new_stmt = NULL;
2219 int ncopies, j;
2220 vec<tree> vargs = vNULL;
2221 enum { NARROW, NONE, WIDEN } modifier;
2222 size_t i, nargs;
2223 tree lhs;
2225 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2226 return false;
2228 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2229 return false;
2231 /* Is STMT a vectorizable call? */
2232 if (!is_gimple_call (stmt))
2233 return false;
2235 if (gimple_call_internal_p (stmt)
2236 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2237 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2238 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2239 slp_node);
2241 if (gimple_call_lhs (stmt) == NULL_TREE
2242 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2243 return false;
2245 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2249 /* Process function arguments. */
2250 rhs_type = NULL_TREE;
2251 vectype_in = NULL_TREE;
2252 nargs = gimple_call_num_args (stmt);
2254 /* Bail out if the function has more than three arguments, we do not have
2255 interesting builtin functions to vectorize with more than two arguments
2256 except for fma. No arguments is also not good. */
2257 if (nargs == 0 || nargs > 3)
2258 return false;
2260 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2261 if (gimple_call_internal_p (stmt)
2262 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2264 nargs = 0;
2265 rhs_type = unsigned_type_node;
2268 for (i = 0; i < nargs; i++)
2270 tree opvectype;
2272 op = gimple_call_arg (stmt, i);
2274 /* We can only handle calls with arguments of the same type. */
2275 if (rhs_type
2276 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2280 "argument types differ.\n");
2281 return false;
2283 if (!rhs_type)
2284 rhs_type = TREE_TYPE (op);
2286 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2287 &def_stmt, &def, &dt[i], &opvectype))
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2291 "use not simple.\n");
2292 return false;
2295 if (!vectype_in)
2296 vectype_in = opvectype;
2297 else if (opvectype
2298 && opvectype != vectype_in)
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "argument vector types differ.\n");
2303 return false;
2306 /* If all arguments are external or constant defs use a vector type with
2307 the same size as the output vector type. */
2308 if (!vectype_in)
2309 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2310 if (vec_stmt)
2311 gcc_assert (vectype_in);
2312 if (!vectype_in)
2314 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "no vectype for scalar type ");
2318 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2319 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2322 return false;
2325 /* FORNOW */
2326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2328 if (nunits_in == nunits_out / 2)
2329 modifier = NARROW;
2330 else if (nunits_out == nunits_in)
2331 modifier = NONE;
2332 else if (nunits_out == nunits_in / 2)
2333 modifier = WIDEN;
2334 else
2335 return false;
2337 /* For now, we only vectorize functions if a target specific builtin
2338 is available. TODO -- in some cases, it might be profitable to
2339 insert the calls for pieces of the vector, in order to be able
2340 to vectorize other operations in the loop. */
2341 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2342 if (fndecl == NULL_TREE)
2344 if (gimple_call_internal_p (stmt)
2345 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2346 && !slp_node
2347 && loop_vinfo
2348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2349 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2350 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2351 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2353 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2354 { 0, 1, 2, ... vf - 1 } vector. */
2355 gcc_assert (nargs == 0);
2357 else
2359 if (dump_enabled_p ())
2360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2361 "function is not vectorizable.\n");
2362 return false;
2366 gcc_assert (!gimple_vuse (stmt));
2368 if (slp_node || PURE_SLP_STMT (stmt_info))
2369 ncopies = 1;
2370 else if (modifier == NARROW)
2371 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2372 else
2373 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2375 /* Sanity check: make sure that at least one copy of the vectorized stmt
2376 needs to be generated. */
2377 gcc_assert (ncopies >= 1);
2379 if (!vec_stmt) /* transformation not required. */
2381 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2384 "\n");
2385 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2386 return true;
2389 /** Transform. **/
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2394 /* Handle def. */
2395 scalar_dest = gimple_call_lhs (stmt);
2396 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2398 prev_stmt_info = NULL;
2399 switch (modifier)
2401 case NONE:
2402 for (j = 0; j < ncopies; ++j)
2404 /* Build argument list for the vectorized call. */
2405 if (j == 0)
2406 vargs.create (nargs);
2407 else
2408 vargs.truncate (0);
2410 if (slp_node)
2412 auto_vec<vec<tree> > vec_defs (nargs);
2413 vec<tree> vec_oprnds0;
2415 for (i = 0; i < nargs; i++)
2416 vargs.quick_push (gimple_call_arg (stmt, i));
2417 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2418 vec_oprnds0 = vec_defs[0];
2420 /* Arguments are ready. Create the new vector stmt. */
2421 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2423 size_t k;
2424 for (k = 0; k < nargs; k++)
2426 vec<tree> vec_oprndsk = vec_defs[k];
2427 vargs[k] = vec_oprndsk[i];
2429 new_stmt = gimple_build_call_vec (fndecl, vargs);
2430 new_temp = make_ssa_name (vec_dest, new_stmt);
2431 gimple_call_set_lhs (new_stmt, new_temp);
2432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2433 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2436 for (i = 0; i < nargs; i++)
2438 vec<tree> vec_oprndsi = vec_defs[i];
2439 vec_oprndsi.release ();
2441 continue;
2444 for (i = 0; i < nargs; i++)
2446 op = gimple_call_arg (stmt, i);
2447 if (j == 0)
2448 vec_oprnd0
2449 = vect_get_vec_def_for_operand (op, stmt, NULL);
2450 else
2452 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2453 vec_oprnd0
2454 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2457 vargs.quick_push (vec_oprnd0);
2460 if (gimple_call_internal_p (stmt)
2461 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2463 tree *v = XALLOCAVEC (tree, nunits_out);
2464 int k;
2465 for (k = 0; k < nunits_out; ++k)
2466 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2467 tree cst = build_vector (vectype_out, v);
2468 tree new_var
2469 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2470 gimple init_stmt = gimple_build_assign (new_var, cst);
2471 new_temp = make_ssa_name (new_var, init_stmt);
2472 gimple_assign_set_lhs (init_stmt, new_temp);
2473 vect_init_vector_1 (stmt, init_stmt, NULL);
2474 new_temp = make_ssa_name (vec_dest, NULL);
2475 new_stmt = gimple_build_assign (new_temp,
2476 gimple_assign_lhs (init_stmt));
2478 else
2480 new_stmt = gimple_build_call_vec (fndecl, vargs);
2481 new_temp = make_ssa_name (vec_dest, new_stmt);
2482 gimple_call_set_lhs (new_stmt, new_temp);
2484 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2486 if (j == 0)
2487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2488 else
2489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2491 prev_stmt_info = vinfo_for_stmt (new_stmt);
2494 break;
2496 case NARROW:
2497 for (j = 0; j < ncopies; ++j)
2499 /* Build argument list for the vectorized call. */
2500 if (j == 0)
2501 vargs.create (nargs * 2);
2502 else
2503 vargs.truncate (0);
2505 if (slp_node)
2507 auto_vec<vec<tree> > vec_defs (nargs);
2508 vec<tree> vec_oprnds0;
2510 for (i = 0; i < nargs; i++)
2511 vargs.quick_push (gimple_call_arg (stmt, i));
2512 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2513 vec_oprnds0 = vec_defs[0];
2515 /* Arguments are ready. Create the new vector stmt. */
2516 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2518 size_t k;
2519 vargs.truncate (0);
2520 for (k = 0; k < nargs; k++)
2522 vec<tree> vec_oprndsk = vec_defs[k];
2523 vargs.quick_push (vec_oprndsk[i]);
2524 vargs.quick_push (vec_oprndsk[i + 1]);
2526 new_stmt = gimple_build_call_vec (fndecl, vargs);
2527 new_temp = make_ssa_name (vec_dest, new_stmt);
2528 gimple_call_set_lhs (new_stmt, new_temp);
2529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2530 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2533 for (i = 0; i < nargs; i++)
2535 vec<tree> vec_oprndsi = vec_defs[i];
2536 vec_oprndsi.release ();
2538 continue;
2541 for (i = 0; i < nargs; i++)
2543 op = gimple_call_arg (stmt, i);
2544 if (j == 0)
2546 vec_oprnd0
2547 = vect_get_vec_def_for_operand (op, stmt, NULL);
2548 vec_oprnd1
2549 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2551 else
2553 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2554 vec_oprnd0
2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2556 vec_oprnd1
2557 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2560 vargs.quick_push (vec_oprnd0);
2561 vargs.quick_push (vec_oprnd1);
2564 new_stmt = gimple_build_call_vec (fndecl, vargs);
2565 new_temp = make_ssa_name (vec_dest, new_stmt);
2566 gimple_call_set_lhs (new_stmt, new_temp);
2567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2569 if (j == 0)
2570 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2571 else
2572 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2574 prev_stmt_info = vinfo_for_stmt (new_stmt);
2577 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2579 break;
2581 case WIDEN:
2582 /* No current target implements this case. */
2583 return false;
2586 vargs.release ();
2588 /* The call in STMT might prevent it from being removed in dce.
2589 We however cannot remove it here, due to the way the ssa name
2590 it defines is mapped to the new definition. So just replace
2591 rhs of the statement with something harmless. */
2593 if (slp_node)
2594 return true;
2596 type = TREE_TYPE (scalar_dest);
2597 if (is_pattern_stmt_p (stmt_info))
2598 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2599 else
2600 lhs = gimple_call_lhs (stmt);
2601 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2602 set_vinfo_for_stmt (new_stmt, stmt_info);
2603 set_vinfo_for_stmt (stmt, NULL);
2604 STMT_VINFO_STMT (stmt_info) = new_stmt;
2605 gsi_replace (gsi, new_stmt, false);
2607 return true;
2611 struct simd_call_arg_info
2613 tree vectype;
2614 tree op;
2615 enum vect_def_type dt;
2616 HOST_WIDE_INT linear_step;
2617 unsigned int align;
2620 /* Function vectorizable_simd_clone_call.
2622 Check if STMT performs a function call that can be vectorized
2623 by calling a simd clone of the function.
2624 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2625 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2626 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2628 static bool
2629 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2630 gimple *vec_stmt, slp_tree slp_node)
2632 tree vec_dest;
2633 tree scalar_dest;
2634 tree op, type;
2635 tree vec_oprnd0 = NULL_TREE;
2636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2637 tree vectype;
2638 unsigned int nunits;
2639 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2640 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2641 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2642 tree fndecl, new_temp, def;
2643 gimple def_stmt;
2644 gimple new_stmt = NULL;
2645 int ncopies, j;
2646 vec<simd_call_arg_info> arginfo = vNULL;
2647 vec<tree> vargs = vNULL;
2648 size_t i, nargs;
2649 tree lhs, rtype, ratype;
2650 vec<constructor_elt, va_gc> *ret_ctor_elts;
2652 /* Is STMT a vectorizable call? */
2653 if (!is_gimple_call (stmt))
2654 return false;
2656 fndecl = gimple_call_fndecl (stmt);
2657 if (fndecl == NULL_TREE)
2658 return false;
2660 struct cgraph_node *node = cgraph_node::get (fndecl);
2661 if (node == NULL || node->simd_clones == NULL)
2662 return false;
2664 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2665 return false;
2667 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2668 return false;
2670 if (gimple_call_lhs (stmt)
2671 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2672 return false;
2674 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2676 vectype = STMT_VINFO_VECTYPE (stmt_info);
2678 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2679 return false;
2681 /* FORNOW */
2682 if (slp_node || PURE_SLP_STMT (stmt_info))
2683 return false;
2685 /* Process function arguments. */
2686 nargs = gimple_call_num_args (stmt);
2688 /* Bail out if the function has zero arguments. */
2689 if (nargs == 0)
2690 return false;
2692 arginfo.create (nargs);
2694 for (i = 0; i < nargs; i++)
2696 simd_call_arg_info thisarginfo;
2697 affine_iv iv;
2699 thisarginfo.linear_step = 0;
2700 thisarginfo.align = 0;
2701 thisarginfo.op = NULL_TREE;
2703 op = gimple_call_arg (stmt, i);
2704 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2705 &def_stmt, &def, &thisarginfo.dt,
2706 &thisarginfo.vectype)
2707 || thisarginfo.dt == vect_uninitialized_def)
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2711 "use not simple.\n");
2712 arginfo.release ();
2713 return false;
2716 if (thisarginfo.dt == vect_constant_def
2717 || thisarginfo.dt == vect_external_def)
2718 gcc_assert (thisarginfo.vectype == NULL_TREE);
2719 else
2720 gcc_assert (thisarginfo.vectype != NULL_TREE);
2722 if (thisarginfo.dt != vect_constant_def
2723 && thisarginfo.dt != vect_external_def
2724 && loop_vinfo
2725 && TREE_CODE (op) == SSA_NAME
2726 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2727 && tree_fits_shwi_p (iv.step))
2729 thisarginfo.linear_step = tree_to_shwi (iv.step);
2730 thisarginfo.op = iv.base;
2732 else if ((thisarginfo.dt == vect_constant_def
2733 || thisarginfo.dt == vect_external_def)
2734 && POINTER_TYPE_P (TREE_TYPE (op)))
2735 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2737 arginfo.quick_push (thisarginfo);
2740 unsigned int badness = 0;
2741 struct cgraph_node *bestn = NULL;
2742 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2743 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2744 else
2745 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2746 n = n->simdclone->next_clone)
2748 unsigned int this_badness = 0;
2749 if (n->simdclone->simdlen
2750 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2751 || n->simdclone->nargs != nargs)
2752 continue;
2753 if (n->simdclone->simdlen
2754 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2755 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2756 - exact_log2 (n->simdclone->simdlen)) * 1024;
2757 if (n->simdclone->inbranch)
2758 this_badness += 2048;
2759 int target_badness = targetm.simd_clone.usable (n);
2760 if (target_badness < 0)
2761 continue;
2762 this_badness += target_badness * 512;
2763 /* FORNOW: Have to add code to add the mask argument. */
2764 if (n->simdclone->inbranch)
2765 continue;
2766 for (i = 0; i < nargs; i++)
2768 switch (n->simdclone->args[i].arg_type)
2770 case SIMD_CLONE_ARG_TYPE_VECTOR:
2771 if (!useless_type_conversion_p
2772 (n->simdclone->args[i].orig_type,
2773 TREE_TYPE (gimple_call_arg (stmt, i))))
2774 i = -1;
2775 else if (arginfo[i].dt == vect_constant_def
2776 || arginfo[i].dt == vect_external_def
2777 || arginfo[i].linear_step)
2778 this_badness += 64;
2779 break;
2780 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2781 if (arginfo[i].dt != vect_constant_def
2782 && arginfo[i].dt != vect_external_def)
2783 i = -1;
2784 break;
2785 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2786 if (arginfo[i].dt == vect_constant_def
2787 || arginfo[i].dt == vect_external_def
2788 || (arginfo[i].linear_step
2789 != n->simdclone->args[i].linear_step))
2790 i = -1;
2791 break;
2792 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2793 /* FORNOW */
2794 i = -1;
2795 break;
2796 case SIMD_CLONE_ARG_TYPE_MASK:
2797 gcc_unreachable ();
2799 if (i == (size_t) -1)
2800 break;
2801 if (n->simdclone->args[i].alignment > arginfo[i].align)
2803 i = -1;
2804 break;
2806 if (arginfo[i].align)
2807 this_badness += (exact_log2 (arginfo[i].align)
2808 - exact_log2 (n->simdclone->args[i].alignment));
2810 if (i == (size_t) -1)
2811 continue;
2812 if (bestn == NULL || this_badness < badness)
2814 bestn = n;
2815 badness = this_badness;
2819 if (bestn == NULL)
2821 arginfo.release ();
2822 return false;
2825 for (i = 0; i < nargs; i++)
2826 if ((arginfo[i].dt == vect_constant_def
2827 || arginfo[i].dt == vect_external_def)
2828 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2830 arginfo[i].vectype
2831 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2832 i)));
2833 if (arginfo[i].vectype == NULL
2834 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2835 > bestn->simdclone->simdlen))
2837 arginfo.release ();
2838 return false;
2842 fndecl = bestn->decl;
2843 nunits = bestn->simdclone->simdlen;
2844 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2846 /* If the function isn't const, only allow it in simd loops where user
2847 has asserted that at least nunits consecutive iterations can be
2848 performed using SIMD instructions. */
2849 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2850 && gimple_vuse (stmt))
2852 arginfo.release ();
2853 return false;
2856 /* Sanity check: make sure that at least one copy of the vectorized stmt
2857 needs to be generated. */
2858 gcc_assert (ncopies >= 1);
2860 if (!vec_stmt) /* transformation not required. */
2862 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2863 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2864 if (dump_enabled_p ())
2865 dump_printf_loc (MSG_NOTE, vect_location,
2866 "=== vectorizable_simd_clone_call ===\n");
2867 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2868 arginfo.release ();
2869 return true;
2872 /** Transform. **/
2874 if (dump_enabled_p ())
2875 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2877 /* Handle def. */
2878 scalar_dest = gimple_call_lhs (stmt);
2879 vec_dest = NULL_TREE;
2880 rtype = NULL_TREE;
2881 ratype = NULL_TREE;
2882 if (scalar_dest)
2884 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2885 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2886 if (TREE_CODE (rtype) == ARRAY_TYPE)
2888 ratype = rtype;
2889 rtype = TREE_TYPE (ratype);
2893 prev_stmt_info = NULL;
2894 for (j = 0; j < ncopies; ++j)
2896 /* Build argument list for the vectorized call. */
2897 if (j == 0)
2898 vargs.create (nargs);
2899 else
2900 vargs.truncate (0);
2902 for (i = 0; i < nargs; i++)
2904 unsigned int k, l, m, o;
2905 tree atype;
2906 op = gimple_call_arg (stmt, i);
2907 switch (bestn->simdclone->args[i].arg_type)
2909 case SIMD_CLONE_ARG_TYPE_VECTOR:
2910 atype = bestn->simdclone->args[i].vector_type;
2911 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2912 for (m = j * o; m < (j + 1) * o; m++)
2914 if (TYPE_VECTOR_SUBPARTS (atype)
2915 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2917 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2918 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2919 / TYPE_VECTOR_SUBPARTS (atype));
2920 gcc_assert ((k & (k - 1)) == 0);
2921 if (m == 0)
2922 vec_oprnd0
2923 = vect_get_vec_def_for_operand (op, stmt, NULL);
2924 else
2926 vec_oprnd0 = arginfo[i].op;
2927 if ((m & (k - 1)) == 0)
2928 vec_oprnd0
2929 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2930 vec_oprnd0);
2932 arginfo[i].op = vec_oprnd0;
2933 vec_oprnd0
2934 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2935 size_int (prec),
2936 bitsize_int ((m & (k - 1)) * prec));
2937 new_stmt
2938 = gimple_build_assign (make_ssa_name (atype, NULL),
2939 vec_oprnd0);
2940 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2941 vargs.safe_push (gimple_assign_lhs (new_stmt));
2943 else
2945 k = (TYPE_VECTOR_SUBPARTS (atype)
2946 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2947 gcc_assert ((k & (k - 1)) == 0);
2948 vec<constructor_elt, va_gc> *ctor_elts;
2949 if (k != 1)
2950 vec_alloc (ctor_elts, k);
2951 else
2952 ctor_elts = NULL;
2953 for (l = 0; l < k; l++)
2955 if (m == 0 && l == 0)
2956 vec_oprnd0
2957 = vect_get_vec_def_for_operand (op, stmt, NULL);
2958 else
2959 vec_oprnd0
2960 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2961 arginfo[i].op);
2962 arginfo[i].op = vec_oprnd0;
2963 if (k == 1)
2964 break;
2965 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2966 vec_oprnd0);
2968 if (k == 1)
2969 vargs.safe_push (vec_oprnd0);
2970 else
2972 vec_oprnd0 = build_constructor (atype, ctor_elts);
2973 new_stmt
2974 = gimple_build_assign (make_ssa_name (atype, NULL),
2975 vec_oprnd0);
2976 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2977 vargs.safe_push (gimple_assign_lhs (new_stmt));
2981 break;
2982 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2983 vargs.safe_push (op);
2984 break;
2985 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2986 if (j == 0)
2988 gimple_seq stmts;
2989 arginfo[i].op
2990 = force_gimple_operand (arginfo[i].op, &stmts, true,
2991 NULL_TREE);
2992 if (stmts != NULL)
2994 basic_block new_bb;
2995 edge pe = loop_preheader_edge (loop);
2996 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2997 gcc_assert (!new_bb);
2999 tree phi_res = copy_ssa_name (op, NULL);
3000 gimple new_phi = create_phi_node (phi_res, loop->header);
3001 set_vinfo_for_stmt (new_phi,
3002 new_stmt_vec_info (new_phi, loop_vinfo,
3003 NULL));
3004 add_phi_arg (new_phi, arginfo[i].op,
3005 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3006 enum tree_code code
3007 = POINTER_TYPE_P (TREE_TYPE (op))
3008 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3009 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3010 ? sizetype : TREE_TYPE (op);
3011 widest_int cst
3012 = wi::mul (bestn->simdclone->args[i].linear_step,
3013 ncopies * nunits);
3014 tree tcst = wide_int_to_tree (type, cst);
3015 tree phi_arg = copy_ssa_name (op, NULL);
3016 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3017 phi_res, tcst);
3018 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3019 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3020 set_vinfo_for_stmt (new_stmt,
3021 new_stmt_vec_info (new_stmt, loop_vinfo,
3022 NULL));
3023 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3024 UNKNOWN_LOCATION);
3025 arginfo[i].op = phi_res;
3026 vargs.safe_push (phi_res);
3028 else
3030 enum tree_code code
3031 = POINTER_TYPE_P (TREE_TYPE (op))
3032 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3033 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3034 ? sizetype : TREE_TYPE (op);
3035 widest_int cst
3036 = wi::mul (bestn->simdclone->args[i].linear_step,
3037 j * nunits);
3038 tree tcst = wide_int_to_tree (type, cst);
3039 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3040 new_stmt
3041 = gimple_build_assign_with_ops (code, new_temp,
3042 arginfo[i].op, tcst);
3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3044 vargs.safe_push (new_temp);
3046 break;
3047 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3048 default:
3049 gcc_unreachable ();
3053 new_stmt = gimple_build_call_vec (fndecl, vargs);
3054 if (vec_dest)
3056 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3057 if (ratype)
3058 new_temp = create_tmp_var (ratype, NULL);
3059 else if (TYPE_VECTOR_SUBPARTS (vectype)
3060 == TYPE_VECTOR_SUBPARTS (rtype))
3061 new_temp = make_ssa_name (vec_dest, new_stmt);
3062 else
3063 new_temp = make_ssa_name (rtype, new_stmt);
3064 gimple_call_set_lhs (new_stmt, new_temp);
3066 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3068 if (vec_dest)
3070 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3072 unsigned int k, l;
3073 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3074 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3075 gcc_assert ((k & (k - 1)) == 0);
3076 for (l = 0; l < k; l++)
3078 tree t;
3079 if (ratype)
3081 t = build_fold_addr_expr (new_temp);
3082 t = build2 (MEM_REF, vectype, t,
3083 build_int_cst (TREE_TYPE (t),
3084 l * prec / BITS_PER_UNIT));
3086 else
3087 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3088 size_int (prec), bitsize_int (l * prec));
3089 new_stmt
3090 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3092 if (j == 0 && l == 0)
3093 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3094 else
3095 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3097 prev_stmt_info = vinfo_for_stmt (new_stmt);
3100 if (ratype)
3102 tree clobber = build_constructor (ratype, NULL);
3103 TREE_THIS_VOLATILE (clobber) = 1;
3104 new_stmt = gimple_build_assign (new_temp, clobber);
3105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3107 continue;
3109 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3111 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3112 / TYPE_VECTOR_SUBPARTS (rtype));
3113 gcc_assert ((k & (k - 1)) == 0);
3114 if ((j & (k - 1)) == 0)
3115 vec_alloc (ret_ctor_elts, k);
3116 if (ratype)
3118 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3119 for (m = 0; m < o; m++)
3121 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3122 size_int (m), NULL_TREE, NULL_TREE);
3123 new_stmt
3124 = gimple_build_assign (make_ssa_name (rtype, NULL),
3125 tem);
3126 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3127 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3128 gimple_assign_lhs (new_stmt));
3130 tree clobber = build_constructor (ratype, NULL);
3131 TREE_THIS_VOLATILE (clobber) = 1;
3132 new_stmt = gimple_build_assign (new_temp, clobber);
3133 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3135 else
3136 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3137 if ((j & (k - 1)) != k - 1)
3138 continue;
3139 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3140 new_stmt
3141 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3142 vec_oprnd0);
3143 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3145 if ((unsigned) j == k - 1)
3146 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3147 else
3148 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3150 prev_stmt_info = vinfo_for_stmt (new_stmt);
3151 continue;
3153 else if (ratype)
3155 tree t = build_fold_addr_expr (new_temp);
3156 t = build2 (MEM_REF, vectype, t,
3157 build_int_cst (TREE_TYPE (t), 0));
3158 new_stmt
3159 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3160 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3161 tree clobber = build_constructor (ratype, NULL);
3162 TREE_THIS_VOLATILE (clobber) = 1;
3163 vect_finish_stmt_generation (stmt,
3164 gimple_build_assign (new_temp,
3165 clobber), gsi);
3169 if (j == 0)
3170 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3171 else
3172 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3174 prev_stmt_info = vinfo_for_stmt (new_stmt);
3177 vargs.release ();
3179 /* The call in STMT might prevent it from being removed in dce.
3180 We however cannot remove it here, due to the way the ssa name
3181 it defines is mapped to the new definition. So just replace
3182 rhs of the statement with something harmless. */
3184 if (slp_node)
3185 return true;
3187 if (scalar_dest)
3189 type = TREE_TYPE (scalar_dest);
3190 if (is_pattern_stmt_p (stmt_info))
3191 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3192 else
3193 lhs = gimple_call_lhs (stmt);
3194 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3196 else
3197 new_stmt = gimple_build_nop ();
3198 set_vinfo_for_stmt (new_stmt, stmt_info);
3199 set_vinfo_for_stmt (stmt, NULL);
3200 STMT_VINFO_STMT (stmt_info) = new_stmt;
3201 gsi_replace (gsi, new_stmt, false);
3202 unlink_stmt_vdef (stmt);
3204 return true;
3208 /* Function vect_gen_widened_results_half
3210 Create a vector stmt whose code, type, number of arguments, and result
3211 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3212 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3213 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3214 needs to be created (DECL is a function-decl of a target-builtin).
3215 STMT is the original scalar stmt that we are vectorizing. */
3217 static gimple
3218 vect_gen_widened_results_half (enum tree_code code,
3219 tree decl,
3220 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3221 tree vec_dest, gimple_stmt_iterator *gsi,
3222 gimple stmt)
3224 gimple new_stmt;
3225 tree new_temp;
3227 /* Generate half of the widened result: */
3228 if (code == CALL_EXPR)
3230 /* Target specific support */
3231 if (op_type == binary_op)
3232 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3233 else
3234 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3235 new_temp = make_ssa_name (vec_dest, new_stmt);
3236 gimple_call_set_lhs (new_stmt, new_temp);
3238 else
3240 /* Generic support */
3241 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3242 if (op_type != binary_op)
3243 vec_oprnd1 = NULL;
3244 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3245 vec_oprnd1);
3246 new_temp = make_ssa_name (vec_dest, new_stmt);
3247 gimple_assign_set_lhs (new_stmt, new_temp);
3249 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3251 return new_stmt;
3255 /* Get vectorized definitions for loop-based vectorization. For the first
3256 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3257 scalar operand), and for the rest we get a copy with
3258 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3259 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3260 The vectors are collected into VEC_OPRNDS. */
3262 static void
3263 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3264 vec<tree> *vec_oprnds, int multi_step_cvt)
3266 tree vec_oprnd;
3268 /* Get first vector operand. */
3269 /* All the vector operands except the very first one (that is scalar oprnd)
3270 are stmt copies. */
3271 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3272 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3273 else
3274 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3276 vec_oprnds->quick_push (vec_oprnd);
3278 /* Get second vector operand. */
3279 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3280 vec_oprnds->quick_push (vec_oprnd);
3282 *oprnd = vec_oprnd;
3284 /* For conversion in multiple steps, continue to get operands
3285 recursively. */
3286 if (multi_step_cvt)
3287 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3291 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3292 For multi-step conversions store the resulting vectors and call the function
3293 recursively. */
3295 static void
3296 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3297 int multi_step_cvt, gimple stmt,
3298 vec<tree> vec_dsts,
3299 gimple_stmt_iterator *gsi,
3300 slp_tree slp_node, enum tree_code code,
3301 stmt_vec_info *prev_stmt_info)
3303 unsigned int i;
3304 tree vop0, vop1, new_tmp, vec_dest;
3305 gimple new_stmt;
3306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3308 vec_dest = vec_dsts.pop ();
3310 for (i = 0; i < vec_oprnds->length (); i += 2)
3312 /* Create demotion operation. */
3313 vop0 = (*vec_oprnds)[i];
3314 vop1 = (*vec_oprnds)[i + 1];
3315 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3316 new_tmp = make_ssa_name (vec_dest, new_stmt);
3317 gimple_assign_set_lhs (new_stmt, new_tmp);
3318 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3320 if (multi_step_cvt)
3321 /* Store the resulting vector for next recursive call. */
3322 (*vec_oprnds)[i/2] = new_tmp;
3323 else
3325 /* This is the last step of the conversion sequence. Store the
3326 vectors in SLP_NODE or in vector info of the scalar statement
3327 (or in STMT_VINFO_RELATED_STMT chain). */
3328 if (slp_node)
3329 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3330 else
3332 if (!*prev_stmt_info)
3333 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3334 else
3335 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3337 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3342 /* For multi-step demotion operations we first generate demotion operations
3343 from the source type to the intermediate types, and then combine the
3344 results (stored in VEC_OPRNDS) in demotion operation to the destination
3345 type. */
3346 if (multi_step_cvt)
3348 /* At each level of recursion we have half of the operands we had at the
3349 previous level. */
3350 vec_oprnds->truncate ((i+1)/2);
3351 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3352 stmt, vec_dsts, gsi, slp_node,
3353 VEC_PACK_TRUNC_EXPR,
3354 prev_stmt_info);
3357 vec_dsts.quick_push (vec_dest);
3361 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3362 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3363 the resulting vectors and call the function recursively. */
3365 static void
3366 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3367 vec<tree> *vec_oprnds1,
3368 gimple stmt, tree vec_dest,
3369 gimple_stmt_iterator *gsi,
3370 enum tree_code code1,
3371 enum tree_code code2, tree decl1,
3372 tree decl2, int op_type)
3374 int i;
3375 tree vop0, vop1, new_tmp1, new_tmp2;
3376 gimple new_stmt1, new_stmt2;
3377 vec<tree> vec_tmp = vNULL;
3379 vec_tmp.create (vec_oprnds0->length () * 2);
3380 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3382 if (op_type == binary_op)
3383 vop1 = (*vec_oprnds1)[i];
3384 else
3385 vop1 = NULL_TREE;
3387 /* Generate the two halves of promotion operation. */
3388 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3389 op_type, vec_dest, gsi, stmt);
3390 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3391 op_type, vec_dest, gsi, stmt);
3392 if (is_gimple_call (new_stmt1))
3394 new_tmp1 = gimple_call_lhs (new_stmt1);
3395 new_tmp2 = gimple_call_lhs (new_stmt2);
3397 else
3399 new_tmp1 = gimple_assign_lhs (new_stmt1);
3400 new_tmp2 = gimple_assign_lhs (new_stmt2);
3403 /* Store the results for the next step. */
3404 vec_tmp.quick_push (new_tmp1);
3405 vec_tmp.quick_push (new_tmp2);
3408 vec_oprnds0->release ();
3409 *vec_oprnds0 = vec_tmp;
3413 /* Check if STMT performs a conversion operation, that can be vectorized.
3414 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3415 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3416 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3418 static bool
3419 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3420 gimple *vec_stmt, slp_tree slp_node)
3422 tree vec_dest;
3423 tree scalar_dest;
3424 tree op0, op1 = NULL_TREE;
3425 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3426 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3427 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3428 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3429 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3430 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3431 tree new_temp;
3432 tree def;
3433 gimple def_stmt;
3434 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3435 gimple new_stmt = NULL;
3436 stmt_vec_info prev_stmt_info;
3437 int nunits_in;
3438 int nunits_out;
3439 tree vectype_out, vectype_in;
3440 int ncopies, i, j;
3441 tree lhs_type, rhs_type;
3442 enum { NARROW, NONE, WIDEN } modifier;
3443 vec<tree> vec_oprnds0 = vNULL;
3444 vec<tree> vec_oprnds1 = vNULL;
3445 tree vop0;
3446 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3447 int multi_step_cvt = 0;
3448 vec<tree> vec_dsts = vNULL;
3449 vec<tree> interm_types = vNULL;
3450 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3451 int op_type;
3452 machine_mode rhs_mode;
3453 unsigned short fltsz;
3455 /* Is STMT a vectorizable conversion? */
3457 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3458 return false;
3460 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3461 return false;
3463 if (!is_gimple_assign (stmt))
3464 return false;
3466 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3467 return false;
3469 code = gimple_assign_rhs_code (stmt);
3470 if (!CONVERT_EXPR_CODE_P (code)
3471 && code != FIX_TRUNC_EXPR
3472 && code != FLOAT_EXPR
3473 && code != WIDEN_MULT_EXPR
3474 && code != WIDEN_LSHIFT_EXPR)
3475 return false;
3477 op_type = TREE_CODE_LENGTH (code);
3479 /* Check types of lhs and rhs. */
3480 scalar_dest = gimple_assign_lhs (stmt);
3481 lhs_type = TREE_TYPE (scalar_dest);
3482 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3484 op0 = gimple_assign_rhs1 (stmt);
3485 rhs_type = TREE_TYPE (op0);
3487 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3488 && !((INTEGRAL_TYPE_P (lhs_type)
3489 && INTEGRAL_TYPE_P (rhs_type))
3490 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3491 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3492 return false;
3494 if ((INTEGRAL_TYPE_P (lhs_type)
3495 && (TYPE_PRECISION (lhs_type)
3496 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3497 || (INTEGRAL_TYPE_P (rhs_type)
3498 && (TYPE_PRECISION (rhs_type)
3499 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3501 if (dump_enabled_p ())
3502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3503 "type conversion to/from bit-precision unsupported."
3504 "\n");
3505 return false;
3508 /* Check the operands of the operation. */
3509 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3510 &def_stmt, &def, &dt[0], &vectype_in))
3512 if (dump_enabled_p ())
3513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3514 "use not simple.\n");
3515 return false;
3517 if (op_type == binary_op)
3519 bool ok;
3521 op1 = gimple_assign_rhs2 (stmt);
3522 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3523 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3524 OP1. */
3525 if (CONSTANT_CLASS_P (op0))
3526 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3527 &def_stmt, &def, &dt[1], &vectype_in);
3528 else
3529 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3530 &def, &dt[1]);
3532 if (!ok)
3534 if (dump_enabled_p ())
3535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3536 "use not simple.\n");
3537 return false;
3541 /* If op0 is an external or constant defs use a vector type of
3542 the same size as the output vector type. */
3543 if (!vectype_in)
3544 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3545 if (vec_stmt)
3546 gcc_assert (vectype_in);
3547 if (!vectype_in)
3549 if (dump_enabled_p ())
3551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3552 "no vectype for scalar type ");
3553 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3554 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3557 return false;
3560 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3561 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3562 if (nunits_in < nunits_out)
3563 modifier = NARROW;
3564 else if (nunits_out == nunits_in)
3565 modifier = NONE;
3566 else
3567 modifier = WIDEN;
3569 /* Multiple types in SLP are handled by creating the appropriate number of
3570 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3571 case of SLP. */
3572 if (slp_node || PURE_SLP_STMT (stmt_info))
3573 ncopies = 1;
3574 else if (modifier == NARROW)
3575 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3576 else
3577 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3579 /* Sanity check: make sure that at least one copy of the vectorized stmt
3580 needs to be generated. */
3581 gcc_assert (ncopies >= 1);
3583 /* Supportable by target? */
3584 switch (modifier)
3586 case NONE:
3587 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3588 return false;
3589 if (supportable_convert_operation (code, vectype_out, vectype_in,
3590 &decl1, &code1))
3591 break;
3592 /* FALLTHRU */
3593 unsupported:
3594 if (dump_enabled_p ())
3595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3596 "conversion not supported by target.\n");
3597 return false;
3599 case WIDEN:
3600 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3601 &code1, &code2, &multi_step_cvt,
3602 &interm_types))
3604 /* Binary widening operation can only be supported directly by the
3605 architecture. */
3606 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3607 break;
3610 if (code != FLOAT_EXPR
3611 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3612 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3613 goto unsupported;
3615 rhs_mode = TYPE_MODE (rhs_type);
3616 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3617 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3618 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3619 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3621 cvt_type
3622 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3623 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3624 if (cvt_type == NULL_TREE)
3625 goto unsupported;
3627 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3629 if (!supportable_convert_operation (code, vectype_out,
3630 cvt_type, &decl1, &codecvt1))
3631 goto unsupported;
3633 else if (!supportable_widening_operation (code, stmt, vectype_out,
3634 cvt_type, &codecvt1,
3635 &codecvt2, &multi_step_cvt,
3636 &interm_types))
3637 continue;
3638 else
3639 gcc_assert (multi_step_cvt == 0);
3641 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3642 vectype_in, &code1, &code2,
3643 &multi_step_cvt, &interm_types))
3644 break;
3647 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3648 goto unsupported;
3650 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3651 codecvt2 = ERROR_MARK;
3652 else
3654 multi_step_cvt++;
3655 interm_types.safe_push (cvt_type);
3656 cvt_type = NULL_TREE;
3658 break;
3660 case NARROW:
3661 gcc_assert (op_type == unary_op);
3662 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3663 &code1, &multi_step_cvt,
3664 &interm_types))
3665 break;
3667 if (code != FIX_TRUNC_EXPR
3668 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3669 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3670 goto unsupported;
3672 rhs_mode = TYPE_MODE (rhs_type);
3673 cvt_type
3674 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3675 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3676 if (cvt_type == NULL_TREE)
3677 goto unsupported;
3678 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3679 &decl1, &codecvt1))
3680 goto unsupported;
3681 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3682 &code1, &multi_step_cvt,
3683 &interm_types))
3684 break;
3685 goto unsupported;
3687 default:
3688 gcc_unreachable ();
3691 if (!vec_stmt) /* transformation not required. */
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_NOTE, vect_location,
3695 "=== vectorizable_conversion ===\n");
3696 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3698 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3699 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3701 else if (modifier == NARROW)
3703 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3704 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3706 else
3708 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3709 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3711 interm_types.release ();
3712 return true;
3715 /** Transform. **/
3716 if (dump_enabled_p ())
3717 dump_printf_loc (MSG_NOTE, vect_location,
3718 "transform conversion. ncopies = %d.\n", ncopies);
3720 if (op_type == binary_op)
3722 if (CONSTANT_CLASS_P (op0))
3723 op0 = fold_convert (TREE_TYPE (op1), op0);
3724 else if (CONSTANT_CLASS_P (op1))
3725 op1 = fold_convert (TREE_TYPE (op0), op1);
3728 /* In case of multi-step conversion, we first generate conversion operations
3729 to the intermediate types, and then from that types to the final one.
3730 We create vector destinations for the intermediate type (TYPES) received
3731 from supportable_*_operation, and store them in the correct order
3732 for future use in vect_create_vectorized_*_stmts (). */
3733 vec_dsts.create (multi_step_cvt + 1);
3734 vec_dest = vect_create_destination_var (scalar_dest,
3735 (cvt_type && modifier == WIDEN)
3736 ? cvt_type : vectype_out);
3737 vec_dsts.quick_push (vec_dest);
3739 if (multi_step_cvt)
3741 for (i = interm_types.length () - 1;
3742 interm_types.iterate (i, &intermediate_type); i--)
3744 vec_dest = vect_create_destination_var (scalar_dest,
3745 intermediate_type);
3746 vec_dsts.quick_push (vec_dest);
3750 if (cvt_type)
3751 vec_dest = vect_create_destination_var (scalar_dest,
3752 modifier == WIDEN
3753 ? vectype_out : cvt_type);
3755 if (!slp_node)
3757 if (modifier == WIDEN)
3759 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3760 if (op_type == binary_op)
3761 vec_oprnds1.create (1);
3763 else if (modifier == NARROW)
3764 vec_oprnds0.create (
3765 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3767 else if (code == WIDEN_LSHIFT_EXPR)
3768 vec_oprnds1.create (slp_node->vec_stmts_size);
3770 last_oprnd = op0;
3771 prev_stmt_info = NULL;
3772 switch (modifier)
3774 case NONE:
3775 for (j = 0; j < ncopies; j++)
3777 if (j == 0)
3778 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3779 -1);
3780 else
3781 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3783 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3785 /* Arguments are ready, create the new vector stmt. */
3786 if (code1 == CALL_EXPR)
3788 new_stmt = gimple_build_call (decl1, 1, vop0);
3789 new_temp = make_ssa_name (vec_dest, new_stmt);
3790 gimple_call_set_lhs (new_stmt, new_temp);
3792 else
3794 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3795 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3796 vop0, NULL);
3797 new_temp = make_ssa_name (vec_dest, new_stmt);
3798 gimple_assign_set_lhs (new_stmt, new_temp);
3801 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3802 if (slp_node)
3803 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3806 if (j == 0)
3807 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3808 else
3809 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3810 prev_stmt_info = vinfo_for_stmt (new_stmt);
3812 break;
3814 case WIDEN:
3815 /* In case the vectorization factor (VF) is bigger than the number
3816 of elements that we can fit in a vectype (nunits), we have to
3817 generate more than one vector stmt - i.e - we need to "unroll"
3818 the vector stmt by a factor VF/nunits. */
3819 for (j = 0; j < ncopies; j++)
3821 /* Handle uses. */
3822 if (j == 0)
3824 if (slp_node)
3826 if (code == WIDEN_LSHIFT_EXPR)
3828 unsigned int k;
3830 vec_oprnd1 = op1;
3831 /* Store vec_oprnd1 for every vector stmt to be created
3832 for SLP_NODE. We check during the analysis that all
3833 the shift arguments are the same. */
3834 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3835 vec_oprnds1.quick_push (vec_oprnd1);
3837 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3838 slp_node, -1);
3840 else
3841 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3842 &vec_oprnds1, slp_node, -1);
3844 else
3846 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3847 vec_oprnds0.quick_push (vec_oprnd0);
3848 if (op_type == binary_op)
3850 if (code == WIDEN_LSHIFT_EXPR)
3851 vec_oprnd1 = op1;
3852 else
3853 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3854 NULL);
3855 vec_oprnds1.quick_push (vec_oprnd1);
3859 else
3861 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3862 vec_oprnds0.truncate (0);
3863 vec_oprnds0.quick_push (vec_oprnd0);
3864 if (op_type == binary_op)
3866 if (code == WIDEN_LSHIFT_EXPR)
3867 vec_oprnd1 = op1;
3868 else
3869 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3870 vec_oprnd1);
3871 vec_oprnds1.truncate (0);
3872 vec_oprnds1.quick_push (vec_oprnd1);
3876 /* Arguments are ready. Create the new vector stmts. */
3877 for (i = multi_step_cvt; i >= 0; i--)
3879 tree this_dest = vec_dsts[i];
3880 enum tree_code c1 = code1, c2 = code2;
3881 if (i == 0 && codecvt2 != ERROR_MARK)
3883 c1 = codecvt1;
3884 c2 = codecvt2;
3886 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3887 &vec_oprnds1,
3888 stmt, this_dest, gsi,
3889 c1, c2, decl1, decl2,
3890 op_type);
3893 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3895 if (cvt_type)
3897 if (codecvt1 == CALL_EXPR)
3899 new_stmt = gimple_build_call (decl1, 1, vop0);
3900 new_temp = make_ssa_name (vec_dest, new_stmt);
3901 gimple_call_set_lhs (new_stmt, new_temp);
3903 else
3905 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3906 new_temp = make_ssa_name (vec_dest, NULL);
3907 new_stmt = gimple_build_assign_with_ops (codecvt1,
3908 new_temp,
3909 vop0, NULL);
3912 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3914 else
3915 new_stmt = SSA_NAME_DEF_STMT (vop0);
3917 if (slp_node)
3918 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3919 else
3921 if (!prev_stmt_info)
3922 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3923 else
3924 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3925 prev_stmt_info = vinfo_for_stmt (new_stmt);
3930 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3931 break;
3933 case NARROW:
3934 /* In case the vectorization factor (VF) is bigger than the number
3935 of elements that we can fit in a vectype (nunits), we have to
3936 generate more than one vector stmt - i.e - we need to "unroll"
3937 the vector stmt by a factor VF/nunits. */
3938 for (j = 0; j < ncopies; j++)
3940 /* Handle uses. */
3941 if (slp_node)
3942 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3943 slp_node, -1);
3944 else
3946 vec_oprnds0.truncate (0);
3947 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3948 vect_pow2 (multi_step_cvt) - 1);
3951 /* Arguments are ready. Create the new vector stmts. */
3952 if (cvt_type)
3953 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3955 if (codecvt1 == CALL_EXPR)
3957 new_stmt = gimple_build_call (decl1, 1, vop0);
3958 new_temp = make_ssa_name (vec_dest, new_stmt);
3959 gimple_call_set_lhs (new_stmt, new_temp);
3961 else
3963 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3964 new_temp = make_ssa_name (vec_dest, NULL);
3965 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3966 vop0, NULL);
3969 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3970 vec_oprnds0[i] = new_temp;
3973 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3974 stmt, vec_dsts, gsi,
3975 slp_node, code1,
3976 &prev_stmt_info);
3979 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3980 break;
3983 vec_oprnds0.release ();
3984 vec_oprnds1.release ();
3985 vec_dsts.release ();
3986 interm_types.release ();
3988 return true;
3992 /* Function vectorizable_assignment.
3994 Check if STMT performs an assignment (copy) that can be vectorized.
3995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3999 static bool
4000 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4001 gimple *vec_stmt, slp_tree slp_node)
4003 tree vec_dest;
4004 tree scalar_dest;
4005 tree op;
4006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4008 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4009 tree new_temp;
4010 tree def;
4011 gimple def_stmt;
4012 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4013 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4014 int ncopies;
4015 int i, j;
4016 vec<tree> vec_oprnds = vNULL;
4017 tree vop;
4018 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4019 gimple new_stmt = NULL;
4020 stmt_vec_info prev_stmt_info = NULL;
4021 enum tree_code code;
4022 tree vectype_in;
4024 /* Multiple types in SLP are handled by creating the appropriate number of
4025 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4026 case of SLP. */
4027 if (slp_node || PURE_SLP_STMT (stmt_info))
4028 ncopies = 1;
4029 else
4030 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4032 gcc_assert (ncopies >= 1);
4034 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4035 return false;
4037 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4038 return false;
4040 /* Is vectorizable assignment? */
4041 if (!is_gimple_assign (stmt))
4042 return false;
4044 scalar_dest = gimple_assign_lhs (stmt);
4045 if (TREE_CODE (scalar_dest) != SSA_NAME)
4046 return false;
4048 code = gimple_assign_rhs_code (stmt);
4049 if (gimple_assign_single_p (stmt)
4050 || code == PAREN_EXPR
4051 || CONVERT_EXPR_CODE_P (code))
4052 op = gimple_assign_rhs1 (stmt);
4053 else
4054 return false;
4056 if (code == VIEW_CONVERT_EXPR)
4057 op = TREE_OPERAND (op, 0);
4059 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4060 &def_stmt, &def, &dt[0], &vectype_in))
4062 if (dump_enabled_p ())
4063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4064 "use not simple.\n");
4065 return false;
4068 /* We can handle NOP_EXPR conversions that do not change the number
4069 of elements or the vector size. */
4070 if ((CONVERT_EXPR_CODE_P (code)
4071 || code == VIEW_CONVERT_EXPR)
4072 && (!vectype_in
4073 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4074 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4075 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4076 return false;
4078 /* We do not handle bit-precision changes. */
4079 if ((CONVERT_EXPR_CODE_P (code)
4080 || code == VIEW_CONVERT_EXPR)
4081 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4082 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4083 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4084 || ((TYPE_PRECISION (TREE_TYPE (op))
4085 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4086 /* But a conversion that does not change the bit-pattern is ok. */
4087 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4088 > TYPE_PRECISION (TREE_TYPE (op)))
4089 && TYPE_UNSIGNED (TREE_TYPE (op))))
4091 if (dump_enabled_p ())
4092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4093 "type conversion to/from bit-precision "
4094 "unsupported.\n");
4095 return false;
4098 if (!vec_stmt) /* transformation not required. */
4100 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4101 if (dump_enabled_p ())
4102 dump_printf_loc (MSG_NOTE, vect_location,
4103 "=== vectorizable_assignment ===\n");
4104 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4105 return true;
4108 /** Transform. **/
4109 if (dump_enabled_p ())
4110 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4112 /* Handle def. */
4113 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4115 /* Handle use. */
4116 for (j = 0; j < ncopies; j++)
4118 /* Handle uses. */
4119 if (j == 0)
4120 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4121 else
4122 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4124 /* Arguments are ready. create the new vector stmt. */
4125 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4127 if (CONVERT_EXPR_CODE_P (code)
4128 || code == VIEW_CONVERT_EXPR)
4129 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4130 new_stmt = gimple_build_assign (vec_dest, vop);
4131 new_temp = make_ssa_name (vec_dest, new_stmt);
4132 gimple_assign_set_lhs (new_stmt, new_temp);
4133 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4134 if (slp_node)
4135 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4138 if (slp_node)
4139 continue;
4141 if (j == 0)
4142 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4143 else
4144 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4146 prev_stmt_info = vinfo_for_stmt (new_stmt);
4149 vec_oprnds.release ();
4150 return true;
4154 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4155 either as shift by a scalar or by a vector. */
4157 bool
4158 vect_supportable_shift (enum tree_code code, tree scalar_type)
4161 machine_mode vec_mode;
4162 optab optab;
4163 int icode;
4164 tree vectype;
4166 vectype = get_vectype_for_scalar_type (scalar_type);
4167 if (!vectype)
4168 return false;
4170 optab = optab_for_tree_code (code, vectype, optab_scalar);
4171 if (!optab
4172 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4174 optab = optab_for_tree_code (code, vectype, optab_vector);
4175 if (!optab
4176 || (optab_handler (optab, TYPE_MODE (vectype))
4177 == CODE_FOR_nothing))
4178 return false;
4181 vec_mode = TYPE_MODE (vectype);
4182 icode = (int) optab_handler (optab, vec_mode);
4183 if (icode == CODE_FOR_nothing)
4184 return false;
4186 return true;
4190 /* Function vectorizable_shift.
4192 Check if STMT performs a shift operation that can be vectorized.
4193 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4194 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4195 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4197 static bool
4198 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4199 gimple *vec_stmt, slp_tree slp_node)
4201 tree vec_dest;
4202 tree scalar_dest;
4203 tree op0, op1 = NULL;
4204 tree vec_oprnd1 = NULL_TREE;
4205 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4206 tree vectype;
4207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4208 enum tree_code code;
4209 machine_mode vec_mode;
4210 tree new_temp;
4211 optab optab;
4212 int icode;
4213 machine_mode optab_op2_mode;
4214 tree def;
4215 gimple def_stmt;
4216 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4217 gimple new_stmt = NULL;
4218 stmt_vec_info prev_stmt_info;
4219 int nunits_in;
4220 int nunits_out;
4221 tree vectype_out;
4222 tree op1_vectype;
4223 int ncopies;
4224 int j, i;
4225 vec<tree> vec_oprnds0 = vNULL;
4226 vec<tree> vec_oprnds1 = vNULL;
4227 tree vop0, vop1;
4228 unsigned int k;
4229 bool scalar_shift_arg = true;
4230 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4231 int vf;
4233 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4234 return false;
4236 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4237 return false;
4239 /* Is STMT a vectorizable binary/unary operation? */
4240 if (!is_gimple_assign (stmt))
4241 return false;
4243 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4244 return false;
4246 code = gimple_assign_rhs_code (stmt);
4248 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4249 || code == RROTATE_EXPR))
4250 return false;
4252 scalar_dest = gimple_assign_lhs (stmt);
4253 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4254 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4255 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4257 if (dump_enabled_p ())
4258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4259 "bit-precision shifts not supported.\n");
4260 return false;
4263 op0 = gimple_assign_rhs1 (stmt);
4264 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4265 &def_stmt, &def, &dt[0], &vectype))
4267 if (dump_enabled_p ())
4268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4269 "use not simple.\n");
4270 return false;
4272 /* If op0 is an external or constant def use a vector type with
4273 the same size as the output vector type. */
4274 if (!vectype)
4275 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4276 if (vec_stmt)
4277 gcc_assert (vectype);
4278 if (!vectype)
4280 if (dump_enabled_p ())
4281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4282 "no vectype for scalar type\n");
4283 return false;
4286 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4287 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4288 if (nunits_out != nunits_in)
4289 return false;
4291 op1 = gimple_assign_rhs2 (stmt);
4292 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4293 &def, &dt[1], &op1_vectype))
4295 if (dump_enabled_p ())
4296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4297 "use not simple.\n");
4298 return false;
4301 if (loop_vinfo)
4302 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4303 else
4304 vf = 1;
4306 /* Multiple types in SLP are handled by creating the appropriate number of
4307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4308 case of SLP. */
4309 if (slp_node || PURE_SLP_STMT (stmt_info))
4310 ncopies = 1;
4311 else
4312 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4314 gcc_assert (ncopies >= 1);
4316 /* Determine whether the shift amount is a vector, or scalar. If the
4317 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4319 if (dt[1] == vect_internal_def && !slp_node)
4320 scalar_shift_arg = false;
4321 else if (dt[1] == vect_constant_def
4322 || dt[1] == vect_external_def
4323 || dt[1] == vect_internal_def)
4325 /* In SLP, need to check whether the shift count is the same,
4326 in loops if it is a constant or invariant, it is always
4327 a scalar shift. */
4328 if (slp_node)
4330 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4331 gimple slpstmt;
4333 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4334 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4335 scalar_shift_arg = false;
4338 else
4340 if (dump_enabled_p ())
4341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4342 "operand mode requires invariant argument.\n");
4343 return false;
4346 /* Vector shifted by vector. */
4347 if (!scalar_shift_arg)
4349 optab = optab_for_tree_code (code, vectype, optab_vector);
4350 if (dump_enabled_p ())
4351 dump_printf_loc (MSG_NOTE, vect_location,
4352 "vector/vector shift/rotate found.\n");
4354 if (!op1_vectype)
4355 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4356 if (op1_vectype == NULL_TREE
4357 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4359 if (dump_enabled_p ())
4360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4361 "unusable type for last operand in"
4362 " vector/vector shift/rotate.\n");
4363 return false;
4366 /* See if the machine has a vector shifted by scalar insn and if not
4367 then see if it has a vector shifted by vector insn. */
4368 else
4370 optab = optab_for_tree_code (code, vectype, optab_scalar);
4371 if (optab
4372 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4374 if (dump_enabled_p ())
4375 dump_printf_loc (MSG_NOTE, vect_location,
4376 "vector/scalar shift/rotate found.\n");
4378 else
4380 optab = optab_for_tree_code (code, vectype, optab_vector);
4381 if (optab
4382 && (optab_handler (optab, TYPE_MODE (vectype))
4383 != CODE_FOR_nothing))
4385 scalar_shift_arg = false;
4387 if (dump_enabled_p ())
4388 dump_printf_loc (MSG_NOTE, vect_location,
4389 "vector/vector shift/rotate found.\n");
4391 /* Unlike the other binary operators, shifts/rotates have
4392 the rhs being int, instead of the same type as the lhs,
4393 so make sure the scalar is the right type if we are
4394 dealing with vectors of long long/long/short/char. */
4395 if (dt[1] == vect_constant_def)
4396 op1 = fold_convert (TREE_TYPE (vectype), op1);
4397 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4398 TREE_TYPE (op1)))
4400 if (slp_node
4401 && TYPE_MODE (TREE_TYPE (vectype))
4402 != TYPE_MODE (TREE_TYPE (op1)))
4404 if (dump_enabled_p ())
4405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4406 "unusable type for last operand in"
4407 " vector/vector shift/rotate.\n");
4408 return false;
4410 if (vec_stmt && !slp_node)
4412 op1 = fold_convert (TREE_TYPE (vectype), op1);
4413 op1 = vect_init_vector (stmt, op1,
4414 TREE_TYPE (vectype), NULL);
4421 /* Supportable by target? */
4422 if (!optab)
4424 if (dump_enabled_p ())
4425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4426 "no optab.\n");
4427 return false;
4429 vec_mode = TYPE_MODE (vectype);
4430 icode = (int) optab_handler (optab, vec_mode);
4431 if (icode == CODE_FOR_nothing)
4433 if (dump_enabled_p ())
4434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4435 "op not supported by target.\n");
4436 /* Check only during analysis. */
4437 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4438 || (vf < vect_min_worthwhile_factor (code)
4439 && !vec_stmt))
4440 return false;
4441 if (dump_enabled_p ())
4442 dump_printf_loc (MSG_NOTE, vect_location,
4443 "proceeding using word mode.\n");
4446 /* Worthwhile without SIMD support? Check only during analysis. */
4447 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4448 && vf < vect_min_worthwhile_factor (code)
4449 && !vec_stmt)
4451 if (dump_enabled_p ())
4452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4453 "not worthwhile without SIMD support.\n");
4454 return false;
4457 if (!vec_stmt) /* transformation not required. */
4459 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4460 if (dump_enabled_p ())
4461 dump_printf_loc (MSG_NOTE, vect_location,
4462 "=== vectorizable_shift ===\n");
4463 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4464 return true;
4467 /** Transform. **/
4469 if (dump_enabled_p ())
4470 dump_printf_loc (MSG_NOTE, vect_location,
4471 "transform binary/unary operation.\n");
4473 /* Handle def. */
4474 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4476 prev_stmt_info = NULL;
4477 for (j = 0; j < ncopies; j++)
4479 /* Handle uses. */
4480 if (j == 0)
4482 if (scalar_shift_arg)
4484 /* Vector shl and shr insn patterns can be defined with scalar
4485 operand 2 (shift operand). In this case, use constant or loop
4486 invariant op1 directly, without extending it to vector mode
4487 first. */
4488 optab_op2_mode = insn_data[icode].operand[2].mode;
4489 if (!VECTOR_MODE_P (optab_op2_mode))
4491 if (dump_enabled_p ())
4492 dump_printf_loc (MSG_NOTE, vect_location,
4493 "operand 1 using scalar mode.\n");
4494 vec_oprnd1 = op1;
4495 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4496 vec_oprnds1.quick_push (vec_oprnd1);
4497 if (slp_node)
4499 /* Store vec_oprnd1 for every vector stmt to be created
4500 for SLP_NODE. We check during the analysis that all
4501 the shift arguments are the same.
4502 TODO: Allow different constants for different vector
4503 stmts generated for an SLP instance. */
4504 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4505 vec_oprnds1.quick_push (vec_oprnd1);
4510 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4511 (a special case for certain kind of vector shifts); otherwise,
4512 operand 1 should be of a vector type (the usual case). */
4513 if (vec_oprnd1)
4514 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4515 slp_node, -1);
4516 else
4517 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4518 slp_node, -1);
4520 else
4521 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4523 /* Arguments are ready. Create the new vector stmt. */
4524 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4526 vop1 = vec_oprnds1[i];
4527 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4528 new_temp = make_ssa_name (vec_dest, new_stmt);
4529 gimple_assign_set_lhs (new_stmt, new_temp);
4530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4531 if (slp_node)
4532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4535 if (slp_node)
4536 continue;
4538 if (j == 0)
4539 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4540 else
4541 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4542 prev_stmt_info = vinfo_for_stmt (new_stmt);
4545 vec_oprnds0.release ();
4546 vec_oprnds1.release ();
4548 return true;
4552 /* Function vectorizable_operation.
4554 Check if STMT performs a binary, unary or ternary operation that can
4555 be vectorized.
4556 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4557 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4558 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4560 static bool
4561 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4562 gimple *vec_stmt, slp_tree slp_node)
4564 tree vec_dest;
4565 tree scalar_dest;
4566 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4568 tree vectype;
4569 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4570 enum tree_code code;
4571 machine_mode vec_mode;
4572 tree new_temp;
4573 int op_type;
4574 optab optab;
4575 int icode;
4576 tree def;
4577 gimple def_stmt;
4578 enum vect_def_type dt[3]
4579 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4580 gimple new_stmt = NULL;
4581 stmt_vec_info prev_stmt_info;
4582 int nunits_in;
4583 int nunits_out;
4584 tree vectype_out;
4585 int ncopies;
4586 int j, i;
4587 vec<tree> vec_oprnds0 = vNULL;
4588 vec<tree> vec_oprnds1 = vNULL;
4589 vec<tree> vec_oprnds2 = vNULL;
4590 tree vop0, vop1, vop2;
4591 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4592 int vf;
4594 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4595 return false;
4597 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4598 return false;
4600 /* Is STMT a vectorizable binary/unary operation? */
4601 if (!is_gimple_assign (stmt))
4602 return false;
4604 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4605 return false;
4607 code = gimple_assign_rhs_code (stmt);
4609 /* For pointer addition, we should use the normal plus for
4610 the vector addition. */
4611 if (code == POINTER_PLUS_EXPR)
4612 code = PLUS_EXPR;
4614 /* Support only unary or binary operations. */
4615 op_type = TREE_CODE_LENGTH (code);
4616 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4618 if (dump_enabled_p ())
4619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4620 "num. args = %d (not unary/binary/ternary op).\n",
4621 op_type);
4622 return false;
4625 scalar_dest = gimple_assign_lhs (stmt);
4626 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4628 /* Most operations cannot handle bit-precision types without extra
4629 truncations. */
4630 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4631 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4632 /* Exception are bitwise binary operations. */
4633 && code != BIT_IOR_EXPR
4634 && code != BIT_XOR_EXPR
4635 && code != BIT_AND_EXPR)
4637 if (dump_enabled_p ())
4638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4639 "bit-precision arithmetic not supported.\n");
4640 return false;
4643 op0 = gimple_assign_rhs1 (stmt);
4644 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4645 &def_stmt, &def, &dt[0], &vectype))
4647 if (dump_enabled_p ())
4648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4649 "use not simple.\n");
4650 return false;
4652 /* If op0 is an external or constant def use a vector type with
4653 the same size as the output vector type. */
4654 if (!vectype)
4655 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4656 if (vec_stmt)
4657 gcc_assert (vectype);
4658 if (!vectype)
4660 if (dump_enabled_p ())
4662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4663 "no vectype for scalar type ");
4664 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4665 TREE_TYPE (op0));
4666 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4669 return false;
4672 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4673 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4674 if (nunits_out != nunits_in)
4675 return false;
4677 if (op_type == binary_op || op_type == ternary_op)
4679 op1 = gimple_assign_rhs2 (stmt);
4680 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4681 &def, &dt[1]))
4683 if (dump_enabled_p ())
4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4685 "use not simple.\n");
4686 return false;
4689 if (op_type == ternary_op)
4691 op2 = gimple_assign_rhs3 (stmt);
4692 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4693 &def, &dt[2]))
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "use not simple.\n");
4698 return false;
4702 if (loop_vinfo)
4703 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4704 else
4705 vf = 1;
4707 /* Multiple types in SLP are handled by creating the appropriate number of
4708 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4709 case of SLP. */
4710 if (slp_node || PURE_SLP_STMT (stmt_info))
4711 ncopies = 1;
4712 else
4713 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4715 gcc_assert (ncopies >= 1);
4717 /* Shifts are handled in vectorizable_shift (). */
4718 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4719 || code == RROTATE_EXPR)
4720 return false;
4722 /* Supportable by target? */
4724 vec_mode = TYPE_MODE (vectype);
4725 if (code == MULT_HIGHPART_EXPR)
4727 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4728 icode = LAST_INSN_CODE;
4729 else
4730 icode = CODE_FOR_nothing;
4732 else
4734 optab = optab_for_tree_code (code, vectype, optab_default);
4735 if (!optab)
4737 if (dump_enabled_p ())
4738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4739 "no optab.\n");
4740 return false;
4742 icode = (int) optab_handler (optab, vec_mode);
4745 if (icode == CODE_FOR_nothing)
4747 if (dump_enabled_p ())
4748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4749 "op not supported by target.\n");
4750 /* Check only during analysis. */
4751 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4752 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4753 return false;
4754 if (dump_enabled_p ())
4755 dump_printf_loc (MSG_NOTE, vect_location,
4756 "proceeding using word mode.\n");
4759 /* Worthwhile without SIMD support? Check only during analysis. */
4760 if (!VECTOR_MODE_P (vec_mode)
4761 && !vec_stmt
4762 && vf < vect_min_worthwhile_factor (code))
4764 if (dump_enabled_p ())
4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4766 "not worthwhile without SIMD support.\n");
4767 return false;
4770 if (!vec_stmt) /* transformation not required. */
4772 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4773 if (dump_enabled_p ())
4774 dump_printf_loc (MSG_NOTE, vect_location,
4775 "=== vectorizable_operation ===\n");
4776 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4777 return true;
4780 /** Transform. **/
4782 if (dump_enabled_p ())
4783 dump_printf_loc (MSG_NOTE, vect_location,
4784 "transform binary/unary operation.\n");
4786 /* Handle def. */
4787 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4789 /* In case the vectorization factor (VF) is bigger than the number
4790 of elements that we can fit in a vectype (nunits), we have to generate
4791 more than one vector stmt - i.e - we need to "unroll" the
4792 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4793 from one copy of the vector stmt to the next, in the field
4794 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4795 stages to find the correct vector defs to be used when vectorizing
4796 stmts that use the defs of the current stmt. The example below
4797 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4798 we need to create 4 vectorized stmts):
4800 before vectorization:
4801 RELATED_STMT VEC_STMT
4802 S1: x = memref - -
4803 S2: z = x + 1 - -
4805 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4806 there):
4807 RELATED_STMT VEC_STMT
4808 VS1_0: vx0 = memref0 VS1_1 -
4809 VS1_1: vx1 = memref1 VS1_2 -
4810 VS1_2: vx2 = memref2 VS1_3 -
4811 VS1_3: vx3 = memref3 - -
4812 S1: x = load - VS1_0
4813 S2: z = x + 1 - -
4815 step2: vectorize stmt S2 (done here):
4816 To vectorize stmt S2 we first need to find the relevant vector
4817 def for the first operand 'x'. This is, as usual, obtained from
4818 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4819 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4820 relevant vector def 'vx0'. Having found 'vx0' we can generate
4821 the vector stmt VS2_0, and as usual, record it in the
4822 STMT_VINFO_VEC_STMT of stmt S2.
4823 When creating the second copy (VS2_1), we obtain the relevant vector
4824 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4825 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4826 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4827 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4828 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4829 chain of stmts and pointers:
4830 RELATED_STMT VEC_STMT
4831 VS1_0: vx0 = memref0 VS1_1 -
4832 VS1_1: vx1 = memref1 VS1_2 -
4833 VS1_2: vx2 = memref2 VS1_3 -
4834 VS1_3: vx3 = memref3 - -
4835 S1: x = load - VS1_0
4836 VS2_0: vz0 = vx0 + v1 VS2_1 -
4837 VS2_1: vz1 = vx1 + v1 VS2_2 -
4838 VS2_2: vz2 = vx2 + v1 VS2_3 -
4839 VS2_3: vz3 = vx3 + v1 - -
4840 S2: z = x + 1 - VS2_0 */
4842 prev_stmt_info = NULL;
4843 for (j = 0; j < ncopies; j++)
4845 /* Handle uses. */
4846 if (j == 0)
4848 if (op_type == binary_op || op_type == ternary_op)
4849 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4850 slp_node, -1);
4851 else
4852 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4853 slp_node, -1);
4854 if (op_type == ternary_op)
4856 vec_oprnds2.create (1);
4857 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4858 stmt,
4859 NULL));
4862 else
4864 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4865 if (op_type == ternary_op)
4867 tree vec_oprnd = vec_oprnds2.pop ();
4868 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4869 vec_oprnd));
4873 /* Arguments are ready. Create the new vector stmt. */
4874 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4876 vop1 = ((op_type == binary_op || op_type == ternary_op)
4877 ? vec_oprnds1[i] : NULL_TREE);
4878 vop2 = ((op_type == ternary_op)
4879 ? vec_oprnds2[i] : NULL_TREE);
4880 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4881 vop0, vop1, vop2);
4882 new_temp = make_ssa_name (vec_dest, new_stmt);
4883 gimple_assign_set_lhs (new_stmt, new_temp);
4884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4885 if (slp_node)
4886 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4889 if (slp_node)
4890 continue;
4892 if (j == 0)
4893 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4894 else
4895 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4896 prev_stmt_info = vinfo_for_stmt (new_stmt);
4899 vec_oprnds0.release ();
4900 vec_oprnds1.release ();
4901 vec_oprnds2.release ();
4903 return true;
4906 /* A helper function to ensure data reference DR's base alignment
4907 for STMT_INFO. */
4909 static void
4910 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4912 if (!dr->aux)
4913 return;
4915 if (((dataref_aux *)dr->aux)->base_misaligned)
4917 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4918 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4920 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4921 DECL_USER_ALIGN (base_decl) = 1;
4922 ((dataref_aux *)dr->aux)->base_misaligned = false;
4927 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4928 reversal of the vector elements. If that is impossible to do,
4929 returns NULL. */
4931 static tree
4932 perm_mask_for_reverse (tree vectype)
4934 int i, nunits;
4935 unsigned char *sel;
4937 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4938 sel = XALLOCAVEC (unsigned char, nunits);
4940 for (i = 0; i < nunits; ++i)
4941 sel[i] = nunits - 1 - i;
4943 return vect_gen_perm_mask (vectype, sel);
4946 /* Function vectorizable_store.
4948 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4949 can be vectorized.
4950 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4951 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4952 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4954 static bool
4955 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4956 slp_tree slp_node)
4958 tree scalar_dest;
4959 tree data_ref;
4960 tree op;
4961 tree vec_oprnd = NULL_TREE;
4962 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4963 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4964 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4965 tree elem_type;
4966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4967 struct loop *loop = NULL;
4968 machine_mode vec_mode;
4969 tree dummy;
4970 enum dr_alignment_support alignment_support_scheme;
4971 tree def;
4972 gimple def_stmt;
4973 enum vect_def_type dt;
4974 stmt_vec_info prev_stmt_info = NULL;
4975 tree dataref_ptr = NULL_TREE;
4976 tree dataref_offset = NULL_TREE;
4977 gimple ptr_incr = NULL;
4978 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4979 int ncopies;
4980 int j;
4981 gimple next_stmt, first_stmt = NULL;
4982 bool grouped_store = false;
4983 bool store_lanes_p = false;
4984 unsigned int group_size, i;
4985 vec<tree> dr_chain = vNULL;
4986 vec<tree> oprnds = vNULL;
4987 vec<tree> result_chain = vNULL;
4988 bool inv_p;
4989 bool negative = false;
4990 tree offset = NULL_TREE;
4991 vec<tree> vec_oprnds = vNULL;
4992 bool slp = (slp_node != NULL);
4993 unsigned int vec_num;
4994 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4995 tree aggr_type;
4997 if (loop_vinfo)
4998 loop = LOOP_VINFO_LOOP (loop_vinfo);
5000 /* Multiple types in SLP are handled by creating the appropriate number of
5001 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5002 case of SLP. */
5003 if (slp || PURE_SLP_STMT (stmt_info))
5004 ncopies = 1;
5005 else
5006 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5008 gcc_assert (ncopies >= 1);
5010 /* FORNOW. This restriction should be relaxed. */
5011 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5013 if (dump_enabled_p ())
5014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5015 "multiple types in nested loop.\n");
5016 return false;
5019 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5020 return false;
5022 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5023 return false;
5025 /* Is vectorizable store? */
5027 if (!is_gimple_assign (stmt))
5028 return false;
5030 scalar_dest = gimple_assign_lhs (stmt);
5031 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5032 && is_pattern_stmt_p (stmt_info))
5033 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5034 if (TREE_CODE (scalar_dest) != ARRAY_REF
5035 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5036 && TREE_CODE (scalar_dest) != INDIRECT_REF
5037 && TREE_CODE (scalar_dest) != COMPONENT_REF
5038 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5039 && TREE_CODE (scalar_dest) != REALPART_EXPR
5040 && TREE_CODE (scalar_dest) != MEM_REF)
5041 return false;
5043 gcc_assert (gimple_assign_single_p (stmt));
5044 op = gimple_assign_rhs1 (stmt);
5045 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5046 &def, &dt))
5048 if (dump_enabled_p ())
5049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5050 "use not simple.\n");
5051 return false;
5054 elem_type = TREE_TYPE (vectype);
5055 vec_mode = TYPE_MODE (vectype);
5057 /* FORNOW. In some cases can vectorize even if data-type not supported
5058 (e.g. - array initialization with 0). */
5059 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5060 return false;
5062 if (!STMT_VINFO_DATA_REF (stmt_info))
5063 return false;
5065 negative =
5066 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5067 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5068 size_zero_node) < 0;
5069 if (negative && ncopies > 1)
5071 if (dump_enabled_p ())
5072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5073 "multiple types with negative step.\n");
5074 return false;
5077 if (negative)
5079 gcc_assert (!grouped_store);
5080 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5081 if (alignment_support_scheme != dr_aligned
5082 && alignment_support_scheme != dr_unaligned_supported)
5084 if (dump_enabled_p ())
5085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5086 "negative step but alignment required.\n");
5087 return false;
5089 if (dt != vect_constant_def
5090 && dt != vect_external_def
5091 && !perm_mask_for_reverse (vectype))
5093 if (dump_enabled_p ())
5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5095 "negative step and reversing not supported.\n");
5096 return false;
5100 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5102 grouped_store = true;
5103 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5104 if (!slp && !PURE_SLP_STMT (stmt_info))
5106 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5107 if (vect_store_lanes_supported (vectype, group_size))
5108 store_lanes_p = true;
5109 else if (!vect_grouped_store_supported (vectype, group_size))
5110 return false;
5113 if (first_stmt == stmt)
5115 /* STMT is the leader of the group. Check the operands of all the
5116 stmts of the group. */
5117 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5118 while (next_stmt)
5120 gcc_assert (gimple_assign_single_p (next_stmt));
5121 op = gimple_assign_rhs1 (next_stmt);
5122 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5123 &def_stmt, &def, &dt))
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5127 "use not simple.\n");
5128 return false;
5130 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5135 if (!vec_stmt) /* transformation not required. */
5137 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5138 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5139 NULL, NULL, NULL);
5140 return true;
5143 /** Transform. **/
5145 ensure_base_align (stmt_info, dr);
5147 if (grouped_store)
5149 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5150 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5152 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5154 /* FORNOW */
5155 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5157 /* We vectorize all the stmts of the interleaving group when we
5158 reach the last stmt in the group. */
5159 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5160 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5161 && !slp)
5163 *vec_stmt = NULL;
5164 return true;
5167 if (slp)
5169 grouped_store = false;
5170 /* VEC_NUM is the number of vect stmts to be created for this
5171 group. */
5172 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5173 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5174 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5175 op = gimple_assign_rhs1 (first_stmt);
5177 else
5178 /* VEC_NUM is the number of vect stmts to be created for this
5179 group. */
5180 vec_num = group_size;
5182 else
5184 first_stmt = stmt;
5185 first_dr = dr;
5186 group_size = vec_num = 1;
5189 if (dump_enabled_p ())
5190 dump_printf_loc (MSG_NOTE, vect_location,
5191 "transform store. ncopies = %d\n", ncopies);
5193 dr_chain.create (group_size);
5194 oprnds.create (group_size);
5196 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5197 gcc_assert (alignment_support_scheme);
5198 /* Targets with store-lane instructions must not require explicit
5199 realignment. */
5200 gcc_assert (!store_lanes_p
5201 || alignment_support_scheme == dr_aligned
5202 || alignment_support_scheme == dr_unaligned_supported);
5204 if (negative)
5205 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5207 if (store_lanes_p)
5208 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5209 else
5210 aggr_type = vectype;
5212 /* In case the vectorization factor (VF) is bigger than the number
5213 of elements that we can fit in a vectype (nunits), we have to generate
5214 more than one vector stmt - i.e - we need to "unroll" the
5215 vector stmt by a factor VF/nunits. For more details see documentation in
5216 vect_get_vec_def_for_copy_stmt. */
5218 /* In case of interleaving (non-unit grouped access):
5220 S1: &base + 2 = x2
5221 S2: &base = x0
5222 S3: &base + 1 = x1
5223 S4: &base + 3 = x3
5225 We create vectorized stores starting from base address (the access of the
5226 first stmt in the chain (S2 in the above example), when the last store stmt
5227 of the chain (S4) is reached:
5229 VS1: &base = vx2
5230 VS2: &base + vec_size*1 = vx0
5231 VS3: &base + vec_size*2 = vx1
5232 VS4: &base + vec_size*3 = vx3
5234 Then permutation statements are generated:
5236 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5237 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5240 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5241 (the order of the data-refs in the output of vect_permute_store_chain
5242 corresponds to the order of scalar stmts in the interleaving chain - see
5243 the documentation of vect_permute_store_chain()).
5245 In case of both multiple types and interleaving, above vector stores and
5246 permutation stmts are created for every copy. The result vector stmts are
5247 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5248 STMT_VINFO_RELATED_STMT for the next copies.
5251 prev_stmt_info = NULL;
5252 for (j = 0; j < ncopies; j++)
5254 gimple new_stmt;
5256 if (j == 0)
5258 if (slp)
5260 /* Get vectorized arguments for SLP_NODE. */
5261 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5262 NULL, slp_node, -1);
5264 vec_oprnd = vec_oprnds[0];
5266 else
5268 /* For interleaved stores we collect vectorized defs for all the
5269 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5270 used as an input to vect_permute_store_chain(), and OPRNDS as
5271 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5273 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5274 OPRNDS are of size 1. */
5275 next_stmt = first_stmt;
5276 for (i = 0; i < group_size; i++)
5278 /* Since gaps are not supported for interleaved stores,
5279 GROUP_SIZE is the exact number of stmts in the chain.
5280 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5281 there is no interleaving, GROUP_SIZE is 1, and only one
5282 iteration of the loop will be executed. */
5283 gcc_assert (next_stmt
5284 && gimple_assign_single_p (next_stmt));
5285 op = gimple_assign_rhs1 (next_stmt);
5287 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5288 NULL);
5289 dr_chain.quick_push (vec_oprnd);
5290 oprnds.quick_push (vec_oprnd);
5291 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5295 /* We should have catched mismatched types earlier. */
5296 gcc_assert (useless_type_conversion_p (vectype,
5297 TREE_TYPE (vec_oprnd)));
5298 bool simd_lane_access_p
5299 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5300 if (simd_lane_access_p
5301 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5302 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5303 && integer_zerop (DR_OFFSET (first_dr))
5304 && integer_zerop (DR_INIT (first_dr))
5305 && alias_sets_conflict_p (get_alias_set (aggr_type),
5306 get_alias_set (DR_REF (first_dr))))
5308 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5309 dataref_offset = build_int_cst (reference_alias_ptr_type
5310 (DR_REF (first_dr)), 0);
5311 inv_p = false;
5313 else
5314 dataref_ptr
5315 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5316 simd_lane_access_p ? loop : NULL,
5317 offset, &dummy, gsi, &ptr_incr,
5318 simd_lane_access_p, &inv_p);
5319 gcc_assert (bb_vinfo || !inv_p);
5321 else
5323 /* For interleaved stores we created vectorized defs for all the
5324 defs stored in OPRNDS in the previous iteration (previous copy).
5325 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5326 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5327 next copy.
5328 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5329 OPRNDS are of size 1. */
5330 for (i = 0; i < group_size; i++)
5332 op = oprnds[i];
5333 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5334 &def, &dt);
5335 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5336 dr_chain[i] = vec_oprnd;
5337 oprnds[i] = vec_oprnd;
5339 if (dataref_offset)
5340 dataref_offset
5341 = int_const_binop (PLUS_EXPR, dataref_offset,
5342 TYPE_SIZE_UNIT (aggr_type));
5343 else
5344 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5345 TYPE_SIZE_UNIT (aggr_type));
5348 if (store_lanes_p)
5350 tree vec_array;
5352 /* Combine all the vectors into an array. */
5353 vec_array = create_vector_array (vectype, vec_num);
5354 for (i = 0; i < vec_num; i++)
5356 vec_oprnd = dr_chain[i];
5357 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5360 /* Emit:
5361 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5362 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5363 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5364 gimple_call_set_lhs (new_stmt, data_ref);
5365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5367 else
5369 new_stmt = NULL;
5370 if (grouped_store)
5372 if (j == 0)
5373 result_chain.create (group_size);
5374 /* Permute. */
5375 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5376 &result_chain);
5379 next_stmt = first_stmt;
5380 for (i = 0; i < vec_num; i++)
5382 unsigned align, misalign;
5384 if (i > 0)
5385 /* Bump the vector pointer. */
5386 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5387 stmt, NULL_TREE);
5389 if (slp)
5390 vec_oprnd = vec_oprnds[i];
5391 else if (grouped_store)
5392 /* For grouped stores vectorized defs are interleaved in
5393 vect_permute_store_chain(). */
5394 vec_oprnd = result_chain[i];
5396 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5397 dataref_offset
5398 ? dataref_offset
5399 : build_int_cst (reference_alias_ptr_type
5400 (DR_REF (first_dr)), 0));
5401 align = TYPE_ALIGN_UNIT (vectype);
5402 if (aligned_access_p (first_dr))
5403 misalign = 0;
5404 else if (DR_MISALIGNMENT (first_dr) == -1)
5406 TREE_TYPE (data_ref)
5407 = build_aligned_type (TREE_TYPE (data_ref),
5408 TYPE_ALIGN (elem_type));
5409 align = TYPE_ALIGN_UNIT (elem_type);
5410 misalign = 0;
5412 else
5414 TREE_TYPE (data_ref)
5415 = build_aligned_type (TREE_TYPE (data_ref),
5416 TYPE_ALIGN (elem_type));
5417 misalign = DR_MISALIGNMENT (first_dr);
5419 if (dataref_offset == NULL_TREE)
5420 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5421 misalign);
5423 if (negative
5424 && dt != vect_constant_def
5425 && dt != vect_external_def)
5427 tree perm_mask = perm_mask_for_reverse (vectype);
5428 tree perm_dest
5429 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5430 vectype);
5431 tree new_temp = make_ssa_name (perm_dest, NULL);
5433 /* Generate the permute statement. */
5434 gimple perm_stmt
5435 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5436 vec_oprnd, vec_oprnd,
5437 perm_mask);
5438 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5440 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5441 vec_oprnd = new_temp;
5444 /* Arguments are ready. Create the new vector stmt. */
5445 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5448 if (slp)
5449 continue;
5451 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5452 if (!next_stmt)
5453 break;
5456 if (!slp)
5458 if (j == 0)
5459 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5460 else
5461 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5462 prev_stmt_info = vinfo_for_stmt (new_stmt);
5466 dr_chain.release ();
5467 oprnds.release ();
5468 result_chain.release ();
5469 vec_oprnds.release ();
5471 return true;
5474 /* Given a vector type VECTYPE and permutation SEL returns
5475 the VECTOR_CST mask that implements the permutation of the
5476 vector elements. If that is impossible to do, returns NULL. */
5478 tree
5479 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5481 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5482 int i, nunits;
5484 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5486 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5487 return NULL;
5489 mask_elt_type = lang_hooks.types.type_for_mode
5490 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5491 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5493 mask_elts = XALLOCAVEC (tree, nunits);
5494 for (i = nunits - 1; i >= 0; i--)
5495 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5496 mask_vec = build_vector (mask_type, mask_elts);
5498 return mask_vec;
5501 /* Given a vector variable X and Y, that was generated for the scalar
5502 STMT, generate instructions to permute the vector elements of X and Y
5503 using permutation mask MASK_VEC, insert them at *GSI and return the
5504 permuted vector variable. */
5506 static tree
5507 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5508 gimple_stmt_iterator *gsi)
5510 tree vectype = TREE_TYPE (x);
5511 tree perm_dest, data_ref;
5512 gimple perm_stmt;
5514 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5515 data_ref = make_ssa_name (perm_dest, NULL);
5517 /* Generate the permute statement. */
5518 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5519 x, y, mask_vec);
5520 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5522 return data_ref;
5525 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5526 inserting them on the loops preheader edge. Returns true if we
5527 were successful in doing so (and thus STMT can be moved then),
5528 otherwise returns false. */
5530 static bool
5531 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5533 ssa_op_iter i;
5534 tree op;
5535 bool any = false;
5537 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5539 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5540 if (!gimple_nop_p (def_stmt)
5541 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5543 /* Make sure we don't need to recurse. While we could do
5544 so in simple cases when there are more complex use webs
5545 we don't have an easy way to preserve stmt order to fulfil
5546 dependencies within them. */
5547 tree op2;
5548 ssa_op_iter i2;
5549 if (gimple_code (def_stmt) == GIMPLE_PHI)
5550 return false;
5551 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5553 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5554 if (!gimple_nop_p (def_stmt2)
5555 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5556 return false;
5558 any = true;
5562 if (!any)
5563 return true;
5565 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5567 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5568 if (!gimple_nop_p (def_stmt)
5569 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5571 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5572 gsi_remove (&gsi, false);
5573 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5577 return true;
5580 /* vectorizable_load.
5582 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5583 can be vectorized.
5584 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5585 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5586 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5588 static bool
5589 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5590 slp_tree slp_node, slp_instance slp_node_instance)
5592 tree scalar_dest;
5593 tree vec_dest = NULL;
5594 tree data_ref = NULL;
5595 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5596 stmt_vec_info prev_stmt_info;
5597 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5598 struct loop *loop = NULL;
5599 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5600 bool nested_in_vect_loop = false;
5601 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5602 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5603 tree elem_type;
5604 tree new_temp;
5605 machine_mode mode;
5606 gimple new_stmt = NULL;
5607 tree dummy;
5608 enum dr_alignment_support alignment_support_scheme;
5609 tree dataref_ptr = NULL_TREE;
5610 tree dataref_offset = NULL_TREE;
5611 gimple ptr_incr = NULL;
5612 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5613 int ncopies;
5614 int i, j, group_size, group_gap;
5615 tree msq = NULL_TREE, lsq;
5616 tree offset = NULL_TREE;
5617 tree byte_offset = NULL_TREE;
5618 tree realignment_token = NULL_TREE;
5619 gimple phi = NULL;
5620 vec<tree> dr_chain = vNULL;
5621 bool grouped_load = false;
5622 bool load_lanes_p = false;
5623 gimple first_stmt;
5624 bool inv_p;
5625 bool negative = false;
5626 bool compute_in_loop = false;
5627 struct loop *at_loop;
5628 int vec_num;
5629 bool slp = (slp_node != NULL);
5630 bool slp_perm = false;
5631 enum tree_code code;
5632 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5633 int vf;
5634 tree aggr_type;
5635 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5636 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5637 int gather_scale = 1;
5638 enum vect_def_type gather_dt = vect_unknown_def_type;
5640 if (loop_vinfo)
5642 loop = LOOP_VINFO_LOOP (loop_vinfo);
5643 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5644 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5646 else
5647 vf = 1;
5649 /* Multiple types in SLP are handled by creating the appropriate number of
5650 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5651 case of SLP. */
5652 if (slp || PURE_SLP_STMT (stmt_info))
5653 ncopies = 1;
5654 else
5655 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5657 gcc_assert (ncopies >= 1);
5659 /* FORNOW. This restriction should be relaxed. */
5660 if (nested_in_vect_loop && ncopies > 1)
5662 if (dump_enabled_p ())
5663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5664 "multiple types in nested loop.\n");
5665 return false;
5668 /* Invalidate assumptions made by dependence analysis when vectorization
5669 on the unrolled body effectively re-orders stmts. */
5670 if (ncopies > 1
5671 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5672 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5673 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5675 if (dump_enabled_p ())
5676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5677 "cannot perform implicit CSE when unrolling "
5678 "with negative dependence distance\n");
5679 return false;
5682 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5683 return false;
5685 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5686 return false;
5688 /* Is vectorizable load? */
5689 if (!is_gimple_assign (stmt))
5690 return false;
5692 scalar_dest = gimple_assign_lhs (stmt);
5693 if (TREE_CODE (scalar_dest) != SSA_NAME)
5694 return false;
5696 code = gimple_assign_rhs_code (stmt);
5697 if (code != ARRAY_REF
5698 && code != BIT_FIELD_REF
5699 && code != INDIRECT_REF
5700 && code != COMPONENT_REF
5701 && code != IMAGPART_EXPR
5702 && code != REALPART_EXPR
5703 && code != MEM_REF
5704 && TREE_CODE_CLASS (code) != tcc_declaration)
5705 return false;
5707 if (!STMT_VINFO_DATA_REF (stmt_info))
5708 return false;
5710 elem_type = TREE_TYPE (vectype);
5711 mode = TYPE_MODE (vectype);
5713 /* FORNOW. In some cases can vectorize even if data-type not supported
5714 (e.g. - data copies). */
5715 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5717 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5719 "Aligned load, but unsupported type.\n");
5720 return false;
5723 /* Check if the load is a part of an interleaving chain. */
5724 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5726 grouped_load = true;
5727 /* FORNOW */
5728 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5730 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5731 if (!slp && !PURE_SLP_STMT (stmt_info))
5733 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5734 if (vect_load_lanes_supported (vectype, group_size))
5735 load_lanes_p = true;
5736 else if (!vect_grouped_load_supported (vectype, group_size))
5737 return false;
5740 /* Invalidate assumptions made by dependence analysis when vectorization
5741 on the unrolled body effectively re-orders stmts. */
5742 if (!PURE_SLP_STMT (stmt_info)
5743 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5744 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5745 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5747 if (dump_enabled_p ())
5748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5749 "cannot perform implicit CSE when performing "
5750 "group loads with negative dependence distance\n");
5751 return false;
5756 if (STMT_VINFO_GATHER_P (stmt_info))
5758 gimple def_stmt;
5759 tree def;
5760 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5761 &gather_off, &gather_scale);
5762 gcc_assert (gather_decl);
5763 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5764 &def_stmt, &def, &gather_dt,
5765 &gather_off_vectype))
5767 if (dump_enabled_p ())
5768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5769 "gather index use not simple.\n");
5770 return false;
5773 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5775 else
5777 negative = tree_int_cst_compare (nested_in_vect_loop
5778 ? STMT_VINFO_DR_STEP (stmt_info)
5779 : DR_STEP (dr),
5780 size_zero_node) < 0;
5781 if (negative && ncopies > 1)
5783 if (dump_enabled_p ())
5784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5785 "multiple types with negative step.\n");
5786 return false;
5789 if (negative)
5791 if (grouped_load)
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5795 "negative step for group load not supported"
5796 "\n");
5797 return false;
5799 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5800 if (alignment_support_scheme != dr_aligned
5801 && alignment_support_scheme != dr_unaligned_supported)
5803 if (dump_enabled_p ())
5804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5805 "negative step but alignment required.\n");
5806 return false;
5808 if (!perm_mask_for_reverse (vectype))
5810 if (dump_enabled_p ())
5811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5812 "negative step and reversing not supported."
5813 "\n");
5814 return false;
5819 if (!vec_stmt) /* transformation not required. */
5821 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5822 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5823 return true;
5826 if (dump_enabled_p ())
5827 dump_printf_loc (MSG_NOTE, vect_location,
5828 "transform load. ncopies = %d\n", ncopies);
5830 /** Transform. **/
5832 ensure_base_align (stmt_info, dr);
5834 if (STMT_VINFO_GATHER_P (stmt_info))
5836 tree vec_oprnd0 = NULL_TREE, op;
5837 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5838 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5839 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5840 edge pe = loop_preheader_edge (loop);
5841 gimple_seq seq;
5842 basic_block new_bb;
5843 enum { NARROW, NONE, WIDEN } modifier;
5844 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5846 if (nunits == gather_off_nunits)
5847 modifier = NONE;
5848 else if (nunits == gather_off_nunits / 2)
5850 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5851 modifier = WIDEN;
5853 for (i = 0; i < gather_off_nunits; ++i)
5854 sel[i] = i | nunits;
5856 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5857 gcc_assert (perm_mask != NULL_TREE);
5859 else if (nunits == gather_off_nunits * 2)
5861 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5862 modifier = NARROW;
5864 for (i = 0; i < nunits; ++i)
5865 sel[i] = i < gather_off_nunits
5866 ? i : i + nunits - gather_off_nunits;
5868 perm_mask = vect_gen_perm_mask (vectype, sel);
5869 gcc_assert (perm_mask != NULL_TREE);
5870 ncopies *= 2;
5872 else
5873 gcc_unreachable ();
5875 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5876 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5877 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5878 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5879 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5880 scaletype = TREE_VALUE (arglist);
5881 gcc_checking_assert (types_compatible_p (srctype, rettype));
5883 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5885 ptr = fold_convert (ptrtype, gather_base);
5886 if (!is_gimple_min_invariant (ptr))
5888 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5889 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5890 gcc_assert (!new_bb);
5893 /* Currently we support only unconditional gather loads,
5894 so mask should be all ones. */
5895 if (TREE_CODE (masktype) == INTEGER_TYPE)
5896 mask = build_int_cst (masktype, -1);
5897 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5899 mask = build_int_cst (TREE_TYPE (masktype), -1);
5900 mask = build_vector_from_val (masktype, mask);
5901 mask = vect_init_vector (stmt, mask, masktype, NULL);
5903 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5905 REAL_VALUE_TYPE r;
5906 long tmp[6];
5907 for (j = 0; j < 6; ++j)
5908 tmp[j] = -1;
5909 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5910 mask = build_real (TREE_TYPE (masktype), r);
5911 mask = build_vector_from_val (masktype, mask);
5912 mask = vect_init_vector (stmt, mask, masktype, NULL);
5914 else
5915 gcc_unreachable ();
5917 scale = build_int_cst (scaletype, gather_scale);
5919 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5920 merge = build_int_cst (TREE_TYPE (rettype), 0);
5921 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5923 REAL_VALUE_TYPE r;
5924 long tmp[6];
5925 for (j = 0; j < 6; ++j)
5926 tmp[j] = 0;
5927 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5928 merge = build_real (TREE_TYPE (rettype), r);
5930 else
5931 gcc_unreachable ();
5932 merge = build_vector_from_val (rettype, merge);
5933 merge = vect_init_vector (stmt, merge, rettype, NULL);
5935 prev_stmt_info = NULL;
5936 for (j = 0; j < ncopies; ++j)
5938 if (modifier == WIDEN && (j & 1))
5939 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5940 perm_mask, stmt, gsi);
5941 else if (j == 0)
5942 op = vec_oprnd0
5943 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5944 else
5945 op = vec_oprnd0
5946 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5948 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5950 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5951 == TYPE_VECTOR_SUBPARTS (idxtype));
5952 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5953 var = make_ssa_name (var, NULL);
5954 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5955 new_stmt
5956 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5957 op, NULL_TREE);
5958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5959 op = var;
5962 new_stmt
5963 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5965 if (!useless_type_conversion_p (vectype, rettype))
5967 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5968 == TYPE_VECTOR_SUBPARTS (rettype));
5969 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5970 op = make_ssa_name (var, new_stmt);
5971 gimple_call_set_lhs (new_stmt, op);
5972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5973 var = make_ssa_name (vec_dest, NULL);
5974 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5975 new_stmt
5976 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5977 NULL_TREE);
5979 else
5981 var = make_ssa_name (vec_dest, new_stmt);
5982 gimple_call_set_lhs (new_stmt, var);
5985 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5987 if (modifier == NARROW)
5989 if ((j & 1) == 0)
5991 prev_res = var;
5992 continue;
5994 var = permute_vec_elements (prev_res, var,
5995 perm_mask, stmt, gsi);
5996 new_stmt = SSA_NAME_DEF_STMT (var);
5999 if (prev_stmt_info == NULL)
6000 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6001 else
6002 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6003 prev_stmt_info = vinfo_for_stmt (new_stmt);
6005 return true;
6007 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6009 gimple_stmt_iterator incr_gsi;
6010 bool insert_after;
6011 gimple incr;
6012 tree offvar;
6013 tree ivstep;
6014 tree running_off;
6015 vec<constructor_elt, va_gc> *v = NULL;
6016 gimple_seq stmts = NULL;
6017 tree stride_base, stride_step, alias_off;
6019 gcc_assert (!nested_in_vect_loop);
6021 stride_base
6022 = fold_build_pointer_plus
6023 (unshare_expr (DR_BASE_ADDRESS (dr)),
6024 size_binop (PLUS_EXPR,
6025 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6026 convert_to_ptrofftype (DR_INIT (dr))));
6027 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6029 /* For a load with loop-invariant (but other than power-of-2)
6030 stride (i.e. not a grouped access) like so:
6032 for (i = 0; i < n; i += stride)
6033 ... = array[i];
6035 we generate a new induction variable and new accesses to
6036 form a new vector (or vectors, depending on ncopies):
6038 for (j = 0; ; j += VF*stride)
6039 tmp1 = array[j];
6040 tmp2 = array[j + stride];
6042 vectemp = {tmp1, tmp2, ...}
6045 ivstep = stride_step;
6046 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6047 build_int_cst (TREE_TYPE (ivstep), vf));
6049 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6051 create_iv (stride_base, ivstep, NULL,
6052 loop, &incr_gsi, insert_after,
6053 &offvar, NULL);
6054 incr = gsi_stmt (incr_gsi);
6055 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6057 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6058 if (stmts)
6059 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6061 prev_stmt_info = NULL;
6062 running_off = offvar;
6063 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6064 for (j = 0; j < ncopies; j++)
6066 tree vec_inv;
6068 vec_alloc (v, nunits);
6069 for (i = 0; i < nunits; i++)
6071 tree newref, newoff;
6072 gimple incr;
6073 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6074 running_off, alias_off);
6076 newref = force_gimple_operand_gsi (gsi, newref, true,
6077 NULL_TREE, true,
6078 GSI_SAME_STMT);
6079 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6080 newoff = copy_ssa_name (running_off, NULL);
6081 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6082 running_off, stride_step);
6083 vect_finish_stmt_generation (stmt, incr, gsi);
6085 running_off = newoff;
6088 vec_inv = build_constructor (vectype, v);
6089 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6090 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6092 if (j == 0)
6093 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6094 else
6095 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6096 prev_stmt_info = vinfo_for_stmt (new_stmt);
6098 return true;
6101 if (grouped_load)
6103 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6104 if (slp
6105 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6106 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6107 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6109 /* Check if the chain of loads is already vectorized. */
6110 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6111 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6112 ??? But we can only do so if there is exactly one
6113 as we have no way to get at the rest. Leave the CSE
6114 opportunity alone.
6115 ??? With the group load eventually participating
6116 in multiple different permutations (having multiple
6117 slp nodes which refer to the same group) the CSE
6118 is even wrong code. See PR56270. */
6119 && !slp)
6121 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6122 return true;
6124 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6125 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6127 /* VEC_NUM is the number of vect stmts to be created for this group. */
6128 if (slp)
6130 grouped_load = false;
6131 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6132 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6133 slp_perm = true;
6134 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6136 else
6138 vec_num = group_size;
6139 group_gap = 0;
6142 else
6144 first_stmt = stmt;
6145 first_dr = dr;
6146 group_size = vec_num = 1;
6147 group_gap = 0;
6150 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6151 gcc_assert (alignment_support_scheme);
6152 /* Targets with load-lane instructions must not require explicit
6153 realignment. */
6154 gcc_assert (!load_lanes_p
6155 || alignment_support_scheme == dr_aligned
6156 || alignment_support_scheme == dr_unaligned_supported);
6158 /* In case the vectorization factor (VF) is bigger than the number
6159 of elements that we can fit in a vectype (nunits), we have to generate
6160 more than one vector stmt - i.e - we need to "unroll" the
6161 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6162 from one copy of the vector stmt to the next, in the field
6163 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6164 stages to find the correct vector defs to be used when vectorizing
6165 stmts that use the defs of the current stmt. The example below
6166 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6167 need to create 4 vectorized stmts):
6169 before vectorization:
6170 RELATED_STMT VEC_STMT
6171 S1: x = memref - -
6172 S2: z = x + 1 - -
6174 step 1: vectorize stmt S1:
6175 We first create the vector stmt VS1_0, and, as usual, record a
6176 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6177 Next, we create the vector stmt VS1_1, and record a pointer to
6178 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6179 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6180 stmts and pointers:
6181 RELATED_STMT VEC_STMT
6182 VS1_0: vx0 = memref0 VS1_1 -
6183 VS1_1: vx1 = memref1 VS1_2 -
6184 VS1_2: vx2 = memref2 VS1_3 -
6185 VS1_3: vx3 = memref3 - -
6186 S1: x = load - VS1_0
6187 S2: z = x + 1 - -
6189 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6190 information we recorded in RELATED_STMT field is used to vectorize
6191 stmt S2. */
6193 /* In case of interleaving (non-unit grouped access):
6195 S1: x2 = &base + 2
6196 S2: x0 = &base
6197 S3: x1 = &base + 1
6198 S4: x3 = &base + 3
6200 Vectorized loads are created in the order of memory accesses
6201 starting from the access of the first stmt of the chain:
6203 VS1: vx0 = &base
6204 VS2: vx1 = &base + vec_size*1
6205 VS3: vx3 = &base + vec_size*2
6206 VS4: vx4 = &base + vec_size*3
6208 Then permutation statements are generated:
6210 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6211 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6214 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6215 (the order of the data-refs in the output of vect_permute_load_chain
6216 corresponds to the order of scalar stmts in the interleaving chain - see
6217 the documentation of vect_permute_load_chain()).
6218 The generation of permutation stmts and recording them in
6219 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6221 In case of both multiple types and interleaving, the vector loads and
6222 permutation stmts above are created for every copy. The result vector
6223 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6224 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6226 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6227 on a target that supports unaligned accesses (dr_unaligned_supported)
6228 we generate the following code:
6229 p = initial_addr;
6230 indx = 0;
6231 loop {
6232 p = p + indx * vectype_size;
6233 vec_dest = *(p);
6234 indx = indx + 1;
6237 Otherwise, the data reference is potentially unaligned on a target that
6238 does not support unaligned accesses (dr_explicit_realign_optimized) -
6239 then generate the following code, in which the data in each iteration is
6240 obtained by two vector loads, one from the previous iteration, and one
6241 from the current iteration:
6242 p1 = initial_addr;
6243 msq_init = *(floor(p1))
6244 p2 = initial_addr + VS - 1;
6245 realignment_token = call target_builtin;
6246 indx = 0;
6247 loop {
6248 p2 = p2 + indx * vectype_size
6249 lsq = *(floor(p2))
6250 vec_dest = realign_load (msq, lsq, realignment_token)
6251 indx = indx + 1;
6252 msq = lsq;
6253 } */
6255 /* If the misalignment remains the same throughout the execution of the
6256 loop, we can create the init_addr and permutation mask at the loop
6257 preheader. Otherwise, it needs to be created inside the loop.
6258 This can only occur when vectorizing memory accesses in the inner-loop
6259 nested within an outer-loop that is being vectorized. */
6261 if (nested_in_vect_loop
6262 && (TREE_INT_CST_LOW (DR_STEP (dr))
6263 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6265 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6266 compute_in_loop = true;
6269 if ((alignment_support_scheme == dr_explicit_realign_optimized
6270 || alignment_support_scheme == dr_explicit_realign)
6271 && !compute_in_loop)
6273 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6274 alignment_support_scheme, NULL_TREE,
6275 &at_loop);
6276 if (alignment_support_scheme == dr_explicit_realign_optimized)
6278 phi = SSA_NAME_DEF_STMT (msq);
6279 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6280 size_one_node);
6283 else
6284 at_loop = loop;
6286 if (negative)
6287 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6289 if (load_lanes_p)
6290 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6291 else
6292 aggr_type = vectype;
6294 prev_stmt_info = NULL;
6295 for (j = 0; j < ncopies; j++)
6297 /* 1. Create the vector or array pointer update chain. */
6298 if (j == 0)
6300 bool simd_lane_access_p
6301 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6302 if (simd_lane_access_p
6303 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6304 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6305 && integer_zerop (DR_OFFSET (first_dr))
6306 && integer_zerop (DR_INIT (first_dr))
6307 && alias_sets_conflict_p (get_alias_set (aggr_type),
6308 get_alias_set (DR_REF (first_dr)))
6309 && (alignment_support_scheme == dr_aligned
6310 || alignment_support_scheme == dr_unaligned_supported))
6312 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6313 dataref_offset = build_int_cst (reference_alias_ptr_type
6314 (DR_REF (first_dr)), 0);
6315 inv_p = false;
6317 else
6318 dataref_ptr
6319 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6320 offset, &dummy, gsi, &ptr_incr,
6321 simd_lane_access_p, &inv_p,
6322 byte_offset);
6324 else if (dataref_offset)
6325 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6326 TYPE_SIZE_UNIT (aggr_type));
6327 else
6328 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6329 TYPE_SIZE_UNIT (aggr_type));
6331 if (grouped_load || slp_perm)
6332 dr_chain.create (vec_num);
6334 if (load_lanes_p)
6336 tree vec_array;
6338 vec_array = create_vector_array (vectype, vec_num);
6340 /* Emit:
6341 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6342 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6343 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6344 gimple_call_set_lhs (new_stmt, vec_array);
6345 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6347 /* Extract each vector into an SSA_NAME. */
6348 for (i = 0; i < vec_num; i++)
6350 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6351 vec_array, i);
6352 dr_chain.quick_push (new_temp);
6355 /* Record the mapping between SSA_NAMEs and statements. */
6356 vect_record_grouped_load_vectors (stmt, dr_chain);
6358 else
6360 for (i = 0; i < vec_num; i++)
6362 if (i > 0)
6363 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6364 stmt, NULL_TREE);
6366 /* 2. Create the vector-load in the loop. */
6367 switch (alignment_support_scheme)
6369 case dr_aligned:
6370 case dr_unaligned_supported:
6372 unsigned int align, misalign;
6374 data_ref
6375 = build2 (MEM_REF, vectype, dataref_ptr,
6376 dataref_offset
6377 ? dataref_offset
6378 : build_int_cst (reference_alias_ptr_type
6379 (DR_REF (first_dr)), 0));
6380 align = TYPE_ALIGN_UNIT (vectype);
6381 if (alignment_support_scheme == dr_aligned)
6383 gcc_assert (aligned_access_p (first_dr));
6384 misalign = 0;
6386 else if (DR_MISALIGNMENT (first_dr) == -1)
6388 TREE_TYPE (data_ref)
6389 = build_aligned_type (TREE_TYPE (data_ref),
6390 TYPE_ALIGN (elem_type));
6391 align = TYPE_ALIGN_UNIT (elem_type);
6392 misalign = 0;
6394 else
6396 TREE_TYPE (data_ref)
6397 = build_aligned_type (TREE_TYPE (data_ref),
6398 TYPE_ALIGN (elem_type));
6399 misalign = DR_MISALIGNMENT (first_dr);
6401 if (dataref_offset == NULL_TREE)
6402 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6403 align, misalign);
6404 break;
6406 case dr_explicit_realign:
6408 tree ptr, bump;
6409 tree vs_minus_1;
6411 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6413 if (compute_in_loop)
6414 msq = vect_setup_realignment (first_stmt, gsi,
6415 &realignment_token,
6416 dr_explicit_realign,
6417 dataref_ptr, NULL);
6419 ptr = copy_ssa_name (dataref_ptr, NULL);
6420 new_stmt = gimple_build_assign_with_ops
6421 (BIT_AND_EXPR, ptr, dataref_ptr,
6422 build_int_cst
6423 (TREE_TYPE (dataref_ptr),
6424 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6425 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6426 data_ref
6427 = build2 (MEM_REF, vectype, ptr,
6428 build_int_cst (reference_alias_ptr_type
6429 (DR_REF (first_dr)), 0));
6430 vec_dest = vect_create_destination_var (scalar_dest,
6431 vectype);
6432 new_stmt = gimple_build_assign (vec_dest, data_ref);
6433 new_temp = make_ssa_name (vec_dest, new_stmt);
6434 gimple_assign_set_lhs (new_stmt, new_temp);
6435 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6436 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6438 msq = new_temp;
6440 bump = size_binop (MULT_EXPR, vs_minus_1,
6441 TYPE_SIZE_UNIT (elem_type));
6442 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6443 new_stmt = gimple_build_assign_with_ops
6444 (BIT_AND_EXPR, NULL_TREE, ptr,
6445 build_int_cst
6446 (TREE_TYPE (ptr),
6447 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6448 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6449 gimple_assign_set_lhs (new_stmt, ptr);
6450 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6451 data_ref
6452 = build2 (MEM_REF, vectype, ptr,
6453 build_int_cst (reference_alias_ptr_type
6454 (DR_REF (first_dr)), 0));
6455 break;
6457 case dr_explicit_realign_optimized:
6458 new_temp = copy_ssa_name (dataref_ptr, NULL);
6459 new_stmt = gimple_build_assign_with_ops
6460 (BIT_AND_EXPR, new_temp, dataref_ptr,
6461 build_int_cst
6462 (TREE_TYPE (dataref_ptr),
6463 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6464 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6465 data_ref
6466 = build2 (MEM_REF, vectype, new_temp,
6467 build_int_cst (reference_alias_ptr_type
6468 (DR_REF (first_dr)), 0));
6469 break;
6470 default:
6471 gcc_unreachable ();
6473 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6474 new_stmt = gimple_build_assign (vec_dest, data_ref);
6475 new_temp = make_ssa_name (vec_dest, new_stmt);
6476 gimple_assign_set_lhs (new_stmt, new_temp);
6477 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6479 /* 3. Handle explicit realignment if necessary/supported.
6480 Create in loop:
6481 vec_dest = realign_load (msq, lsq, realignment_token) */
6482 if (alignment_support_scheme == dr_explicit_realign_optimized
6483 || alignment_support_scheme == dr_explicit_realign)
6485 lsq = gimple_assign_lhs (new_stmt);
6486 if (!realignment_token)
6487 realignment_token = dataref_ptr;
6488 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6489 new_stmt
6490 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6491 vec_dest, msq, lsq,
6492 realignment_token);
6493 new_temp = make_ssa_name (vec_dest, new_stmt);
6494 gimple_assign_set_lhs (new_stmt, new_temp);
6495 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6497 if (alignment_support_scheme == dr_explicit_realign_optimized)
6499 gcc_assert (phi);
6500 if (i == vec_num - 1 && j == ncopies - 1)
6501 add_phi_arg (phi, lsq,
6502 loop_latch_edge (containing_loop),
6503 UNKNOWN_LOCATION);
6504 msq = lsq;
6508 /* 4. Handle invariant-load. */
6509 if (inv_p && !bb_vinfo)
6511 gcc_assert (!grouped_load);
6512 /* If we have versioned for aliasing or the loop doesn't
6513 have any data dependencies that would preclude this,
6514 then we are sure this is a loop invariant load and
6515 thus we can insert it on the preheader edge. */
6516 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6517 && !nested_in_vect_loop
6518 && hoist_defs_of_uses (stmt, loop))
6520 if (dump_enabled_p ())
6522 dump_printf_loc (MSG_NOTE, vect_location,
6523 "hoisting out of the vectorized "
6524 "loop: ");
6525 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6526 dump_printf (MSG_NOTE, "\n");
6528 tree tem = copy_ssa_name (scalar_dest, NULL);
6529 gsi_insert_on_edge_immediate
6530 (loop_preheader_edge (loop),
6531 gimple_build_assign (tem,
6532 unshare_expr
6533 (gimple_assign_rhs1 (stmt))));
6534 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6536 else
6538 gimple_stmt_iterator gsi2 = *gsi;
6539 gsi_next (&gsi2);
6540 new_temp = vect_init_vector (stmt, scalar_dest,
6541 vectype, &gsi2);
6543 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6544 set_vinfo_for_stmt (new_stmt,
6545 new_stmt_vec_info (new_stmt, loop_vinfo,
6546 bb_vinfo));
6549 if (negative)
6551 tree perm_mask = perm_mask_for_reverse (vectype);
6552 new_temp = permute_vec_elements (new_temp, new_temp,
6553 perm_mask, stmt, gsi);
6554 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6557 /* Collect vector loads and later create their permutation in
6558 vect_transform_grouped_load (). */
6559 if (grouped_load || slp_perm)
6560 dr_chain.quick_push (new_temp);
6562 /* Store vector loads in the corresponding SLP_NODE. */
6563 if (slp && !slp_perm)
6564 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6566 /* Bump the vector pointer to account for a gap. */
6567 if (slp && group_gap != 0)
6569 tree bump = size_binop (MULT_EXPR,
6570 TYPE_SIZE_UNIT (elem_type),
6571 size_int (group_gap));
6572 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6573 stmt, bump);
6577 if (slp && !slp_perm)
6578 continue;
6580 if (slp_perm)
6582 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6583 slp_node_instance, false))
6585 dr_chain.release ();
6586 return false;
6589 else
6591 if (grouped_load)
6593 if (!load_lanes_p)
6594 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6595 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6597 else
6599 if (j == 0)
6600 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6601 else
6602 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6603 prev_stmt_info = vinfo_for_stmt (new_stmt);
6606 dr_chain.release ();
6609 return true;
6612 /* Function vect_is_simple_cond.
6614 Input:
6615 LOOP - the loop that is being vectorized.
6616 COND - Condition that is checked for simple use.
6618 Output:
6619 *COMP_VECTYPE - the vector type for the comparison.
6621 Returns whether a COND can be vectorized. Checks whether
6622 condition operands are supportable using vec_is_simple_use. */
6624 static bool
6625 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6626 bb_vec_info bb_vinfo, tree *comp_vectype)
6628 tree lhs, rhs;
6629 tree def;
6630 enum vect_def_type dt;
6631 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6633 if (!COMPARISON_CLASS_P (cond))
6634 return false;
6636 lhs = TREE_OPERAND (cond, 0);
6637 rhs = TREE_OPERAND (cond, 1);
6639 if (TREE_CODE (lhs) == SSA_NAME)
6641 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6642 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6643 &lhs_def_stmt, &def, &dt, &vectype1))
6644 return false;
6646 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6647 && TREE_CODE (lhs) != FIXED_CST)
6648 return false;
6650 if (TREE_CODE (rhs) == SSA_NAME)
6652 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6653 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6654 &rhs_def_stmt, &def, &dt, &vectype2))
6655 return false;
6657 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6658 && TREE_CODE (rhs) != FIXED_CST)
6659 return false;
6661 *comp_vectype = vectype1 ? vectype1 : vectype2;
6662 return true;
6665 /* vectorizable_condition.
6667 Check if STMT is conditional modify expression that can be vectorized.
6668 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6669 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6670 at GSI.
6672 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6673 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6674 else caluse if it is 2).
6676 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6678 bool
6679 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6680 gimple *vec_stmt, tree reduc_def, int reduc_index,
6681 slp_tree slp_node)
6683 tree scalar_dest = NULL_TREE;
6684 tree vec_dest = NULL_TREE;
6685 tree cond_expr, then_clause, else_clause;
6686 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6687 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6688 tree comp_vectype = NULL_TREE;
6689 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6690 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6691 tree vec_compare, vec_cond_expr;
6692 tree new_temp;
6693 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6694 tree def;
6695 enum vect_def_type dt, dts[4];
6696 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6697 int ncopies;
6698 enum tree_code code;
6699 stmt_vec_info prev_stmt_info = NULL;
6700 int i, j;
6701 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6702 vec<tree> vec_oprnds0 = vNULL;
6703 vec<tree> vec_oprnds1 = vNULL;
6704 vec<tree> vec_oprnds2 = vNULL;
6705 vec<tree> vec_oprnds3 = vNULL;
6706 tree vec_cmp_type;
6708 if (slp_node || PURE_SLP_STMT (stmt_info))
6709 ncopies = 1;
6710 else
6711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6713 gcc_assert (ncopies >= 1);
6714 if (reduc_index && ncopies > 1)
6715 return false; /* FORNOW */
6717 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6718 return false;
6720 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6721 return false;
6723 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6724 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6725 && reduc_def))
6726 return false;
6728 /* FORNOW: not yet supported. */
6729 if (STMT_VINFO_LIVE_P (stmt_info))
6731 if (dump_enabled_p ())
6732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6733 "value used after loop.\n");
6734 return false;
6737 /* Is vectorizable conditional operation? */
6738 if (!is_gimple_assign (stmt))
6739 return false;
6741 code = gimple_assign_rhs_code (stmt);
6743 if (code != COND_EXPR)
6744 return false;
6746 cond_expr = gimple_assign_rhs1 (stmt);
6747 then_clause = gimple_assign_rhs2 (stmt);
6748 else_clause = gimple_assign_rhs3 (stmt);
6750 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6751 &comp_vectype)
6752 || !comp_vectype)
6753 return false;
6755 if (TREE_CODE (then_clause) == SSA_NAME)
6757 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6758 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6759 &then_def_stmt, &def, &dt))
6760 return false;
6762 else if (TREE_CODE (then_clause) != INTEGER_CST
6763 && TREE_CODE (then_clause) != REAL_CST
6764 && TREE_CODE (then_clause) != FIXED_CST)
6765 return false;
6767 if (TREE_CODE (else_clause) == SSA_NAME)
6769 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6770 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6771 &else_def_stmt, &def, &dt))
6772 return false;
6774 else if (TREE_CODE (else_clause) != INTEGER_CST
6775 && TREE_CODE (else_clause) != REAL_CST
6776 && TREE_CODE (else_clause) != FIXED_CST)
6777 return false;
6779 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6780 /* The result of a vector comparison should be signed type. */
6781 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6782 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6783 if (vec_cmp_type == NULL_TREE)
6784 return false;
6786 if (!vec_stmt)
6788 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6789 return expand_vec_cond_expr_p (vectype, comp_vectype);
6792 /* Transform. */
6794 if (!slp_node)
6796 vec_oprnds0.create (1);
6797 vec_oprnds1.create (1);
6798 vec_oprnds2.create (1);
6799 vec_oprnds3.create (1);
6802 /* Handle def. */
6803 scalar_dest = gimple_assign_lhs (stmt);
6804 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6806 /* Handle cond expr. */
6807 for (j = 0; j < ncopies; j++)
6809 gimple new_stmt = NULL;
6810 if (j == 0)
6812 if (slp_node)
6814 auto_vec<tree, 4> ops;
6815 auto_vec<vec<tree>, 4> vec_defs;
6817 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6818 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6819 ops.safe_push (then_clause);
6820 ops.safe_push (else_clause);
6821 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6822 vec_oprnds3 = vec_defs.pop ();
6823 vec_oprnds2 = vec_defs.pop ();
6824 vec_oprnds1 = vec_defs.pop ();
6825 vec_oprnds0 = vec_defs.pop ();
6827 ops.release ();
6828 vec_defs.release ();
6830 else
6832 gimple gtemp;
6833 vec_cond_lhs =
6834 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6835 stmt, NULL);
6836 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6837 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6839 vec_cond_rhs =
6840 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6841 stmt, NULL);
6842 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6843 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6844 if (reduc_index == 1)
6845 vec_then_clause = reduc_def;
6846 else
6848 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6849 stmt, NULL);
6850 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6851 NULL, &gtemp, &def, &dts[2]);
6853 if (reduc_index == 2)
6854 vec_else_clause = reduc_def;
6855 else
6857 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6858 stmt, NULL);
6859 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6860 NULL, &gtemp, &def, &dts[3]);
6864 else
6866 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6867 vec_oprnds0.pop ());
6868 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6869 vec_oprnds1.pop ());
6870 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6871 vec_oprnds2.pop ());
6872 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6873 vec_oprnds3.pop ());
6876 if (!slp_node)
6878 vec_oprnds0.quick_push (vec_cond_lhs);
6879 vec_oprnds1.quick_push (vec_cond_rhs);
6880 vec_oprnds2.quick_push (vec_then_clause);
6881 vec_oprnds3.quick_push (vec_else_clause);
6884 /* Arguments are ready. Create the new vector stmt. */
6885 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6887 vec_cond_rhs = vec_oprnds1[i];
6888 vec_then_clause = vec_oprnds2[i];
6889 vec_else_clause = vec_oprnds3[i];
6891 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6892 vec_cond_lhs, vec_cond_rhs);
6893 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6894 vec_compare, vec_then_clause, vec_else_clause);
6896 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6897 new_temp = make_ssa_name (vec_dest, new_stmt);
6898 gimple_assign_set_lhs (new_stmt, new_temp);
6899 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6900 if (slp_node)
6901 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6904 if (slp_node)
6905 continue;
6907 if (j == 0)
6908 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6909 else
6910 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6912 prev_stmt_info = vinfo_for_stmt (new_stmt);
6915 vec_oprnds0.release ();
6916 vec_oprnds1.release ();
6917 vec_oprnds2.release ();
6918 vec_oprnds3.release ();
6920 return true;
6924 /* Make sure the statement is vectorizable. */
6926 bool
6927 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6930 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6931 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6932 bool ok;
6933 tree scalar_type, vectype;
6934 gimple pattern_stmt;
6935 gimple_seq pattern_def_seq;
6937 if (dump_enabled_p ())
6939 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6940 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6941 dump_printf (MSG_NOTE, "\n");
6944 if (gimple_has_volatile_ops (stmt))
6946 if (dump_enabled_p ())
6947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6948 "not vectorized: stmt has volatile operands\n");
6950 return false;
6953 /* Skip stmts that do not need to be vectorized. In loops this is expected
6954 to include:
6955 - the COND_EXPR which is the loop exit condition
6956 - any LABEL_EXPRs in the loop
6957 - computations that are used only for array indexing or loop control.
6958 In basic blocks we only analyze statements that are a part of some SLP
6959 instance, therefore, all the statements are relevant.
6961 Pattern statement needs to be analyzed instead of the original statement
6962 if the original statement is not relevant. Otherwise, we analyze both
6963 statements. In basic blocks we are called from some SLP instance
6964 traversal, don't analyze pattern stmts instead, the pattern stmts
6965 already will be part of SLP instance. */
6967 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6968 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6969 && !STMT_VINFO_LIVE_P (stmt_info))
6971 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6972 && pattern_stmt
6973 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6974 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6976 /* Analyze PATTERN_STMT instead of the original stmt. */
6977 stmt = pattern_stmt;
6978 stmt_info = vinfo_for_stmt (pattern_stmt);
6979 if (dump_enabled_p ())
6981 dump_printf_loc (MSG_NOTE, vect_location,
6982 "==> examining pattern statement: ");
6983 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6984 dump_printf (MSG_NOTE, "\n");
6987 else
6989 if (dump_enabled_p ())
6990 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6992 return true;
6995 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6996 && node == NULL
6997 && pattern_stmt
6998 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6999 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7001 /* Analyze PATTERN_STMT too. */
7002 if (dump_enabled_p ())
7004 dump_printf_loc (MSG_NOTE, vect_location,
7005 "==> examining pattern statement: ");
7006 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7007 dump_printf (MSG_NOTE, "\n");
7010 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7011 return false;
7014 if (is_pattern_stmt_p (stmt_info)
7015 && node == NULL
7016 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7018 gimple_stmt_iterator si;
7020 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7022 gimple pattern_def_stmt = gsi_stmt (si);
7023 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7024 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7026 /* Analyze def stmt of STMT if it's a pattern stmt. */
7027 if (dump_enabled_p ())
7029 dump_printf_loc (MSG_NOTE, vect_location,
7030 "==> examining pattern def statement: ");
7031 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7032 dump_printf (MSG_NOTE, "\n");
7035 if (!vect_analyze_stmt (pattern_def_stmt,
7036 need_to_vectorize, node))
7037 return false;
7042 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7044 case vect_internal_def:
7045 break;
7047 case vect_reduction_def:
7048 case vect_nested_cycle:
7049 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7050 || relevance == vect_used_in_outer_by_reduction
7051 || relevance == vect_unused_in_scope));
7052 break;
7054 case vect_induction_def:
7055 case vect_constant_def:
7056 case vect_external_def:
7057 case vect_unknown_def_type:
7058 default:
7059 gcc_unreachable ();
7062 if (bb_vinfo)
7064 gcc_assert (PURE_SLP_STMT (stmt_info));
7066 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7067 if (dump_enabled_p ())
7069 dump_printf_loc (MSG_NOTE, vect_location,
7070 "get vectype for scalar type: ");
7071 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7072 dump_printf (MSG_NOTE, "\n");
7075 vectype = get_vectype_for_scalar_type (scalar_type);
7076 if (!vectype)
7078 if (dump_enabled_p ())
7080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7081 "not SLPed: unsupported data-type ");
7082 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7083 scalar_type);
7084 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7086 return false;
7089 if (dump_enabled_p ())
7091 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7092 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7093 dump_printf (MSG_NOTE, "\n");
7096 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7099 if (STMT_VINFO_RELEVANT_P (stmt_info))
7101 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7102 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7103 || (is_gimple_call (stmt)
7104 && gimple_call_lhs (stmt) == NULL_TREE));
7105 *need_to_vectorize = true;
7108 ok = true;
7109 if (!bb_vinfo
7110 && (STMT_VINFO_RELEVANT_P (stmt_info)
7111 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7112 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7113 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7114 || vectorizable_shift (stmt, NULL, NULL, NULL)
7115 || vectorizable_operation (stmt, NULL, NULL, NULL)
7116 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7117 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7118 || vectorizable_call (stmt, NULL, NULL, NULL)
7119 || vectorizable_store (stmt, NULL, NULL, NULL)
7120 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7121 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7122 else
7124 if (bb_vinfo)
7125 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7126 || vectorizable_conversion (stmt, NULL, NULL, node)
7127 || vectorizable_shift (stmt, NULL, NULL, node)
7128 || vectorizable_operation (stmt, NULL, NULL, node)
7129 || vectorizable_assignment (stmt, NULL, NULL, node)
7130 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7131 || vectorizable_call (stmt, NULL, NULL, node)
7132 || vectorizable_store (stmt, NULL, NULL, node)
7133 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7136 if (!ok)
7138 if (dump_enabled_p ())
7140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7141 "not vectorized: relevant stmt not ");
7142 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7143 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7144 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7147 return false;
7150 if (bb_vinfo)
7151 return true;
7153 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7154 need extra handling, except for vectorizable reductions. */
7155 if (STMT_VINFO_LIVE_P (stmt_info)
7156 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7157 ok = vectorizable_live_operation (stmt, NULL, NULL);
7159 if (!ok)
7161 if (dump_enabled_p ())
7163 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7164 "not vectorized: live stmt not ");
7165 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7166 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7167 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7170 return false;
7173 return true;
7177 /* Function vect_transform_stmt.
7179 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7181 bool
7182 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7183 bool *grouped_store, slp_tree slp_node,
7184 slp_instance slp_node_instance)
7186 bool is_store = false;
7187 gimple vec_stmt = NULL;
7188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7189 bool done;
7191 switch (STMT_VINFO_TYPE (stmt_info))
7193 case type_demotion_vec_info_type:
7194 case type_promotion_vec_info_type:
7195 case type_conversion_vec_info_type:
7196 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7197 gcc_assert (done);
7198 break;
7200 case induc_vec_info_type:
7201 gcc_assert (!slp_node);
7202 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7203 gcc_assert (done);
7204 break;
7206 case shift_vec_info_type:
7207 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7208 gcc_assert (done);
7209 break;
7211 case op_vec_info_type:
7212 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7213 gcc_assert (done);
7214 break;
7216 case assignment_vec_info_type:
7217 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7218 gcc_assert (done);
7219 break;
7221 case load_vec_info_type:
7222 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7223 slp_node_instance);
7224 gcc_assert (done);
7225 break;
7227 case store_vec_info_type:
7228 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7229 gcc_assert (done);
7230 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7232 /* In case of interleaving, the whole chain is vectorized when the
7233 last store in the chain is reached. Store stmts before the last
7234 one are skipped, and there vec_stmt_info shouldn't be freed
7235 meanwhile. */
7236 *grouped_store = true;
7237 if (STMT_VINFO_VEC_STMT (stmt_info))
7238 is_store = true;
7240 else
7241 is_store = true;
7242 break;
7244 case condition_vec_info_type:
7245 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7246 gcc_assert (done);
7247 break;
7249 case call_vec_info_type:
7250 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7251 stmt = gsi_stmt (*gsi);
7252 if (is_gimple_call (stmt)
7253 && gimple_call_internal_p (stmt)
7254 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7255 is_store = true;
7256 break;
7258 case call_simd_clone_vec_info_type:
7259 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7260 stmt = gsi_stmt (*gsi);
7261 break;
7263 case reduc_vec_info_type:
7264 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7265 gcc_assert (done);
7266 break;
7268 default:
7269 if (!STMT_VINFO_LIVE_P (stmt_info))
7271 if (dump_enabled_p ())
7272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7273 "stmt not supported.\n");
7274 gcc_unreachable ();
7278 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7279 is being vectorized, but outside the immediately enclosing loop. */
7280 if (vec_stmt
7281 && STMT_VINFO_LOOP_VINFO (stmt_info)
7282 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7283 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7284 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7285 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7286 || STMT_VINFO_RELEVANT (stmt_info) ==
7287 vect_used_in_outer_by_reduction))
7289 struct loop *innerloop = LOOP_VINFO_LOOP (
7290 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7291 imm_use_iterator imm_iter;
7292 use_operand_p use_p;
7293 tree scalar_dest;
7294 gimple exit_phi;
7296 if (dump_enabled_p ())
7297 dump_printf_loc (MSG_NOTE, vect_location,
7298 "Record the vdef for outer-loop vectorization.\n");
7300 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7301 (to be used when vectorizing outer-loop stmts that use the DEF of
7302 STMT). */
7303 if (gimple_code (stmt) == GIMPLE_PHI)
7304 scalar_dest = PHI_RESULT (stmt);
7305 else
7306 scalar_dest = gimple_assign_lhs (stmt);
7308 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7310 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7312 exit_phi = USE_STMT (use_p);
7313 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7318 /* Handle stmts whose DEF is used outside the loop-nest that is
7319 being vectorized. */
7320 if (STMT_VINFO_LIVE_P (stmt_info)
7321 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7323 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7324 gcc_assert (done);
7327 if (vec_stmt)
7328 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7330 return is_store;
7334 /* Remove a group of stores (for SLP or interleaving), free their
7335 stmt_vec_info. */
7337 void
7338 vect_remove_stores (gimple first_stmt)
7340 gimple next = first_stmt;
7341 gimple tmp;
7342 gimple_stmt_iterator next_si;
7344 while (next)
7346 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7348 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7349 if (is_pattern_stmt_p (stmt_info))
7350 next = STMT_VINFO_RELATED_STMT (stmt_info);
7351 /* Free the attached stmt_vec_info and remove the stmt. */
7352 next_si = gsi_for_stmt (next);
7353 unlink_stmt_vdef (next);
7354 gsi_remove (&next_si, true);
7355 release_defs (next);
7356 free_stmt_vec_info (next);
7357 next = tmp;
7362 /* Function new_stmt_vec_info.
7364 Create and initialize a new stmt_vec_info struct for STMT. */
7366 stmt_vec_info
7367 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7368 bb_vec_info bb_vinfo)
7370 stmt_vec_info res;
7371 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7373 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7374 STMT_VINFO_STMT (res) = stmt;
7375 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7376 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7377 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7378 STMT_VINFO_LIVE_P (res) = false;
7379 STMT_VINFO_VECTYPE (res) = NULL;
7380 STMT_VINFO_VEC_STMT (res) = NULL;
7381 STMT_VINFO_VECTORIZABLE (res) = true;
7382 STMT_VINFO_IN_PATTERN_P (res) = false;
7383 STMT_VINFO_RELATED_STMT (res) = NULL;
7384 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7385 STMT_VINFO_DATA_REF (res) = NULL;
7387 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7388 STMT_VINFO_DR_OFFSET (res) = NULL;
7389 STMT_VINFO_DR_INIT (res) = NULL;
7390 STMT_VINFO_DR_STEP (res) = NULL;
7391 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7393 if (gimple_code (stmt) == GIMPLE_PHI
7394 && is_loop_header_bb_p (gimple_bb (stmt)))
7395 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7396 else
7397 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7399 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7400 STMT_SLP_TYPE (res) = loop_vect;
7401 GROUP_FIRST_ELEMENT (res) = NULL;
7402 GROUP_NEXT_ELEMENT (res) = NULL;
7403 GROUP_SIZE (res) = 0;
7404 GROUP_STORE_COUNT (res) = 0;
7405 GROUP_GAP (res) = 0;
7406 GROUP_SAME_DR_STMT (res) = NULL;
7408 return res;
7412 /* Create a hash table for stmt_vec_info. */
7414 void
7415 init_stmt_vec_info_vec (void)
7417 gcc_assert (!stmt_vec_info_vec.exists ());
7418 stmt_vec_info_vec.create (50);
7422 /* Free hash table for stmt_vec_info. */
7424 void
7425 free_stmt_vec_info_vec (void)
7427 unsigned int i;
7428 vec_void_p info;
7429 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7430 if (info != NULL)
7431 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7432 gcc_assert (stmt_vec_info_vec.exists ());
7433 stmt_vec_info_vec.release ();
7437 /* Free stmt vectorization related info. */
7439 void
7440 free_stmt_vec_info (gimple stmt)
7442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7444 if (!stmt_info)
7445 return;
7447 /* Check if this statement has a related "pattern stmt"
7448 (introduced by the vectorizer during the pattern recognition
7449 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7450 too. */
7451 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7453 stmt_vec_info patt_info
7454 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7455 if (patt_info)
7457 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7458 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7459 gimple_set_bb (patt_stmt, NULL);
7460 tree lhs = gimple_get_lhs (patt_stmt);
7461 if (TREE_CODE (lhs) == SSA_NAME)
7462 release_ssa_name (lhs);
7463 if (seq)
7465 gimple_stmt_iterator si;
7466 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7468 gimple seq_stmt = gsi_stmt (si);
7469 gimple_set_bb (seq_stmt, NULL);
7470 lhs = gimple_get_lhs (patt_stmt);
7471 if (TREE_CODE (lhs) == SSA_NAME)
7472 release_ssa_name (lhs);
7473 free_stmt_vec_info (seq_stmt);
7476 free_stmt_vec_info (patt_stmt);
7480 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7481 set_vinfo_for_stmt (stmt, NULL);
7482 free (stmt_info);
7486 /* Function get_vectype_for_scalar_type_and_size.
7488 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7489 by the target. */
7491 static tree
7492 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7494 machine_mode inner_mode = TYPE_MODE (scalar_type);
7495 machine_mode simd_mode;
7496 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7497 int nunits;
7498 tree vectype;
7500 if (nbytes == 0)
7501 return NULL_TREE;
7503 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7504 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7505 return NULL_TREE;
7507 /* For vector types of elements whose mode precision doesn't
7508 match their types precision we use a element type of mode
7509 precision. The vectorization routines will have to make sure
7510 they support the proper result truncation/extension.
7511 We also make sure to build vector types with INTEGER_TYPE
7512 component type only. */
7513 if (INTEGRAL_TYPE_P (scalar_type)
7514 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7515 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7516 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7517 TYPE_UNSIGNED (scalar_type));
7519 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7520 When the component mode passes the above test simply use a type
7521 corresponding to that mode. The theory is that any use that
7522 would cause problems with this will disable vectorization anyway. */
7523 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7524 && !INTEGRAL_TYPE_P (scalar_type))
7525 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7527 /* We can't build a vector type of elements with alignment bigger than
7528 their size. */
7529 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7530 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7531 TYPE_UNSIGNED (scalar_type));
7533 /* If we felt back to using the mode fail if there was
7534 no scalar type for it. */
7535 if (scalar_type == NULL_TREE)
7536 return NULL_TREE;
7538 /* If no size was supplied use the mode the target prefers. Otherwise
7539 lookup a vector mode of the specified size. */
7540 if (size == 0)
7541 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7542 else
7543 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7544 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7545 if (nunits <= 1)
7546 return NULL_TREE;
7548 vectype = build_vector_type (scalar_type, nunits);
7550 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7551 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7552 return NULL_TREE;
7554 return vectype;
7557 unsigned int current_vector_size;
7559 /* Function get_vectype_for_scalar_type.
7561 Returns the vector type corresponding to SCALAR_TYPE as supported
7562 by the target. */
7564 tree
7565 get_vectype_for_scalar_type (tree scalar_type)
7567 tree vectype;
7568 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7569 current_vector_size);
7570 if (vectype
7571 && current_vector_size == 0)
7572 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7573 return vectype;
7576 /* Function get_same_sized_vectype
7578 Returns a vector type corresponding to SCALAR_TYPE of size
7579 VECTOR_TYPE if supported by the target. */
7581 tree
7582 get_same_sized_vectype (tree scalar_type, tree vector_type)
7584 return get_vectype_for_scalar_type_and_size
7585 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7588 /* Function vect_is_simple_use.
7590 Input:
7591 LOOP_VINFO - the vect info of the loop that is being vectorized.
7592 BB_VINFO - the vect info of the basic block that is being vectorized.
7593 OPERAND - operand of STMT in the loop or bb.
7594 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7596 Returns whether a stmt with OPERAND can be vectorized.
7597 For loops, supportable operands are constants, loop invariants, and operands
7598 that are defined by the current iteration of the loop. Unsupportable
7599 operands are those that are defined by a previous iteration of the loop (as
7600 is the case in reduction/induction computations).
7601 For basic blocks, supportable operands are constants and bb invariants.
7602 For now, operands defined outside the basic block are not supported. */
7604 bool
7605 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7606 bb_vec_info bb_vinfo, gimple *def_stmt,
7607 tree *def, enum vect_def_type *dt)
7609 basic_block bb;
7610 stmt_vec_info stmt_vinfo;
7611 struct loop *loop = NULL;
7613 if (loop_vinfo)
7614 loop = LOOP_VINFO_LOOP (loop_vinfo);
7616 *def_stmt = NULL;
7617 *def = NULL_TREE;
7619 if (dump_enabled_p ())
7621 dump_printf_loc (MSG_NOTE, vect_location,
7622 "vect_is_simple_use: operand ");
7623 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7624 dump_printf (MSG_NOTE, "\n");
7627 if (CONSTANT_CLASS_P (operand))
7629 *dt = vect_constant_def;
7630 return true;
7633 if (is_gimple_min_invariant (operand))
7635 *def = operand;
7636 *dt = vect_external_def;
7637 return true;
7640 if (TREE_CODE (operand) == PAREN_EXPR)
7642 if (dump_enabled_p ())
7643 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7644 operand = TREE_OPERAND (operand, 0);
7647 if (TREE_CODE (operand) != SSA_NAME)
7649 if (dump_enabled_p ())
7650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7651 "not ssa-name.\n");
7652 return false;
7655 *def_stmt = SSA_NAME_DEF_STMT (operand);
7656 if (*def_stmt == NULL)
7658 if (dump_enabled_p ())
7659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7660 "no def_stmt.\n");
7661 return false;
7664 if (dump_enabled_p ())
7666 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7667 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7668 dump_printf (MSG_NOTE, "\n");
7671 /* Empty stmt is expected only in case of a function argument.
7672 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7673 if (gimple_nop_p (*def_stmt))
7675 *def = operand;
7676 *dt = vect_external_def;
7677 return true;
7680 bb = gimple_bb (*def_stmt);
7682 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7683 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7684 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7685 *dt = vect_external_def;
7686 else
7688 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7689 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7692 if (*dt == vect_unknown_def_type
7693 || (stmt
7694 && *dt == vect_double_reduction_def
7695 && gimple_code (stmt) != GIMPLE_PHI))
7697 if (dump_enabled_p ())
7698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7699 "Unsupported pattern.\n");
7700 return false;
7703 if (dump_enabled_p ())
7704 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7706 switch (gimple_code (*def_stmt))
7708 case GIMPLE_PHI:
7709 *def = gimple_phi_result (*def_stmt);
7710 break;
7712 case GIMPLE_ASSIGN:
7713 *def = gimple_assign_lhs (*def_stmt);
7714 break;
7716 case GIMPLE_CALL:
7717 *def = gimple_call_lhs (*def_stmt);
7718 if (*def != NULL)
7719 break;
7720 /* FALLTHRU */
7721 default:
7722 if (dump_enabled_p ())
7723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7724 "unsupported defining stmt:\n");
7725 return false;
7728 return true;
7731 /* Function vect_is_simple_use_1.
7733 Same as vect_is_simple_use_1 but also determines the vector operand
7734 type of OPERAND and stores it to *VECTYPE. If the definition of
7735 OPERAND is vect_uninitialized_def, vect_constant_def or
7736 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7737 is responsible to compute the best suited vector type for the
7738 scalar operand. */
7740 bool
7741 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7742 bb_vec_info bb_vinfo, gimple *def_stmt,
7743 tree *def, enum vect_def_type *dt, tree *vectype)
7745 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7746 def, dt))
7747 return false;
7749 /* Now get a vector type if the def is internal, otherwise supply
7750 NULL_TREE and leave it up to the caller to figure out a proper
7751 type for the use stmt. */
7752 if (*dt == vect_internal_def
7753 || *dt == vect_induction_def
7754 || *dt == vect_reduction_def
7755 || *dt == vect_double_reduction_def
7756 || *dt == vect_nested_cycle)
7758 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7760 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7761 && !STMT_VINFO_RELEVANT (stmt_info)
7762 && !STMT_VINFO_LIVE_P (stmt_info))
7763 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7765 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7766 gcc_assert (*vectype != NULL_TREE);
7768 else if (*dt == vect_uninitialized_def
7769 || *dt == vect_constant_def
7770 || *dt == vect_external_def)
7771 *vectype = NULL_TREE;
7772 else
7773 gcc_unreachable ();
7775 return true;
7779 /* Function supportable_widening_operation
7781 Check whether an operation represented by the code CODE is a
7782 widening operation that is supported by the target platform in
7783 vector form (i.e., when operating on arguments of type VECTYPE_IN
7784 producing a result of type VECTYPE_OUT).
7786 Widening operations we currently support are NOP (CONVERT), FLOAT
7787 and WIDEN_MULT. This function checks if these operations are supported
7788 by the target platform either directly (via vector tree-codes), or via
7789 target builtins.
7791 Output:
7792 - CODE1 and CODE2 are codes of vector operations to be used when
7793 vectorizing the operation, if available.
7794 - MULTI_STEP_CVT determines the number of required intermediate steps in
7795 case of multi-step conversion (like char->short->int - in that case
7796 MULTI_STEP_CVT will be 1).
7797 - INTERM_TYPES contains the intermediate type required to perform the
7798 widening operation (short in the above example). */
7800 bool
7801 supportable_widening_operation (enum tree_code code, gimple stmt,
7802 tree vectype_out, tree vectype_in,
7803 enum tree_code *code1, enum tree_code *code2,
7804 int *multi_step_cvt,
7805 vec<tree> *interm_types)
7807 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7808 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7809 struct loop *vect_loop = NULL;
7810 machine_mode vec_mode;
7811 enum insn_code icode1, icode2;
7812 optab optab1, optab2;
7813 tree vectype = vectype_in;
7814 tree wide_vectype = vectype_out;
7815 enum tree_code c1, c2;
7816 int i;
7817 tree prev_type, intermediate_type;
7818 machine_mode intermediate_mode, prev_mode;
7819 optab optab3, optab4;
7821 *multi_step_cvt = 0;
7822 if (loop_info)
7823 vect_loop = LOOP_VINFO_LOOP (loop_info);
7825 switch (code)
7827 case WIDEN_MULT_EXPR:
7828 /* The result of a vectorized widening operation usually requires
7829 two vectors (because the widened results do not fit into one vector).
7830 The generated vector results would normally be expected to be
7831 generated in the same order as in the original scalar computation,
7832 i.e. if 8 results are generated in each vector iteration, they are
7833 to be organized as follows:
7834 vect1: [res1,res2,res3,res4],
7835 vect2: [res5,res6,res7,res8].
7837 However, in the special case that the result of the widening
7838 operation is used in a reduction computation only, the order doesn't
7839 matter (because when vectorizing a reduction we change the order of
7840 the computation). Some targets can take advantage of this and
7841 generate more efficient code. For example, targets like Altivec,
7842 that support widen_mult using a sequence of {mult_even,mult_odd}
7843 generate the following vectors:
7844 vect1: [res1,res3,res5,res7],
7845 vect2: [res2,res4,res6,res8].
7847 When vectorizing outer-loops, we execute the inner-loop sequentially
7848 (each vectorized inner-loop iteration contributes to VF outer-loop
7849 iterations in parallel). We therefore don't allow to change the
7850 order of the computation in the inner-loop during outer-loop
7851 vectorization. */
7852 /* TODO: Another case in which order doesn't *really* matter is when we
7853 widen and then contract again, e.g. (short)((int)x * y >> 8).
7854 Normally, pack_trunc performs an even/odd permute, whereas the
7855 repack from an even/odd expansion would be an interleave, which
7856 would be significantly simpler for e.g. AVX2. */
7857 /* In any case, in order to avoid duplicating the code below, recurse
7858 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7859 are properly set up for the caller. If we fail, we'll continue with
7860 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7861 if (vect_loop
7862 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7863 && !nested_in_vect_loop_p (vect_loop, stmt)
7864 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7865 stmt, vectype_out, vectype_in,
7866 code1, code2, multi_step_cvt,
7867 interm_types))
7869 /* Elements in a vector with vect_used_by_reduction property cannot
7870 be reordered if the use chain with this property does not have the
7871 same operation. One such an example is s += a * b, where elements
7872 in a and b cannot be reordered. Here we check if the vector defined
7873 by STMT is only directly used in the reduction statement. */
7874 tree lhs = gimple_assign_lhs (stmt);
7875 use_operand_p dummy;
7876 gimple use_stmt;
7877 stmt_vec_info use_stmt_info = NULL;
7878 if (single_imm_use (lhs, &dummy, &use_stmt)
7879 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7880 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7881 return true;
7883 c1 = VEC_WIDEN_MULT_LO_EXPR;
7884 c2 = VEC_WIDEN_MULT_HI_EXPR;
7885 break;
7887 case VEC_WIDEN_MULT_EVEN_EXPR:
7888 /* Support the recursion induced just above. */
7889 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7890 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7891 break;
7893 case WIDEN_LSHIFT_EXPR:
7894 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7895 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7896 break;
7898 CASE_CONVERT:
7899 c1 = VEC_UNPACK_LO_EXPR;
7900 c2 = VEC_UNPACK_HI_EXPR;
7901 break;
7903 case FLOAT_EXPR:
7904 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7905 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7906 break;
7908 case FIX_TRUNC_EXPR:
7909 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7910 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7911 computing the operation. */
7912 return false;
7914 default:
7915 gcc_unreachable ();
7918 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7920 enum tree_code ctmp = c1;
7921 c1 = c2;
7922 c2 = ctmp;
7925 if (code == FIX_TRUNC_EXPR)
7927 /* The signedness is determined from output operand. */
7928 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7929 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7931 else
7933 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7934 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7937 if (!optab1 || !optab2)
7938 return false;
7940 vec_mode = TYPE_MODE (vectype);
7941 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7942 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7943 return false;
7945 *code1 = c1;
7946 *code2 = c2;
7948 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7949 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7950 return true;
7952 /* Check if it's a multi-step conversion that can be done using intermediate
7953 types. */
7955 prev_type = vectype;
7956 prev_mode = vec_mode;
7958 if (!CONVERT_EXPR_CODE_P (code))
7959 return false;
7961 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7962 intermediate steps in promotion sequence. We try
7963 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7964 not. */
7965 interm_types->create (MAX_INTERM_CVT_STEPS);
7966 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7968 intermediate_mode = insn_data[icode1].operand[0].mode;
7969 intermediate_type
7970 = lang_hooks.types.type_for_mode (intermediate_mode,
7971 TYPE_UNSIGNED (prev_type));
7972 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7973 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7975 if (!optab3 || !optab4
7976 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7977 || insn_data[icode1].operand[0].mode != intermediate_mode
7978 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7979 || insn_data[icode2].operand[0].mode != intermediate_mode
7980 || ((icode1 = optab_handler (optab3, intermediate_mode))
7981 == CODE_FOR_nothing)
7982 || ((icode2 = optab_handler (optab4, intermediate_mode))
7983 == CODE_FOR_nothing))
7984 break;
7986 interm_types->quick_push (intermediate_type);
7987 (*multi_step_cvt)++;
7989 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7990 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7991 return true;
7993 prev_type = intermediate_type;
7994 prev_mode = intermediate_mode;
7997 interm_types->release ();
7998 return false;
8002 /* Function supportable_narrowing_operation
8004 Check whether an operation represented by the code CODE is a
8005 narrowing operation that is supported by the target platform in
8006 vector form (i.e., when operating on arguments of type VECTYPE_IN
8007 and producing a result of type VECTYPE_OUT).
8009 Narrowing operations we currently support are NOP (CONVERT) and
8010 FIX_TRUNC. This function checks if these operations are supported by
8011 the target platform directly via vector tree-codes.
8013 Output:
8014 - CODE1 is the code of a vector operation to be used when
8015 vectorizing the operation, if available.
8016 - MULTI_STEP_CVT determines the number of required intermediate steps in
8017 case of multi-step conversion (like int->short->char - in that case
8018 MULTI_STEP_CVT will be 1).
8019 - INTERM_TYPES contains the intermediate type required to perform the
8020 narrowing operation (short in the above example). */
8022 bool
8023 supportable_narrowing_operation (enum tree_code code,
8024 tree vectype_out, tree vectype_in,
8025 enum tree_code *code1, int *multi_step_cvt,
8026 vec<tree> *interm_types)
8028 machine_mode vec_mode;
8029 enum insn_code icode1;
8030 optab optab1, interm_optab;
8031 tree vectype = vectype_in;
8032 tree narrow_vectype = vectype_out;
8033 enum tree_code c1;
8034 tree intermediate_type;
8035 machine_mode intermediate_mode, prev_mode;
8036 int i;
8037 bool uns;
8039 *multi_step_cvt = 0;
8040 switch (code)
8042 CASE_CONVERT:
8043 c1 = VEC_PACK_TRUNC_EXPR;
8044 break;
8046 case FIX_TRUNC_EXPR:
8047 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8048 break;
8050 case FLOAT_EXPR:
8051 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8052 tree code and optabs used for computing the operation. */
8053 return false;
8055 default:
8056 gcc_unreachable ();
8059 if (code == FIX_TRUNC_EXPR)
8060 /* The signedness is determined from output operand. */
8061 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8062 else
8063 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8065 if (!optab1)
8066 return false;
8068 vec_mode = TYPE_MODE (vectype);
8069 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8070 return false;
8072 *code1 = c1;
8074 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8075 return true;
8077 /* Check if it's a multi-step conversion that can be done using intermediate
8078 types. */
8079 prev_mode = vec_mode;
8080 if (code == FIX_TRUNC_EXPR)
8081 uns = TYPE_UNSIGNED (vectype_out);
8082 else
8083 uns = TYPE_UNSIGNED (vectype);
8085 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8086 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8087 costly than signed. */
8088 if (code == FIX_TRUNC_EXPR && uns)
8090 enum insn_code icode2;
8092 intermediate_type
8093 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8094 interm_optab
8095 = optab_for_tree_code (c1, intermediate_type, optab_default);
8096 if (interm_optab != unknown_optab
8097 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8098 && insn_data[icode1].operand[0].mode
8099 == insn_data[icode2].operand[0].mode)
8101 uns = false;
8102 optab1 = interm_optab;
8103 icode1 = icode2;
8107 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8108 intermediate steps in promotion sequence. We try
8109 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8110 interm_types->create (MAX_INTERM_CVT_STEPS);
8111 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8113 intermediate_mode = insn_data[icode1].operand[0].mode;
8114 intermediate_type
8115 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8116 interm_optab
8117 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8118 optab_default);
8119 if (!interm_optab
8120 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8121 || insn_data[icode1].operand[0].mode != intermediate_mode
8122 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8123 == CODE_FOR_nothing))
8124 break;
8126 interm_types->quick_push (intermediate_type);
8127 (*multi_step_cvt)++;
8129 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8130 return true;
8132 prev_mode = intermediate_mode;
8133 optab1 = interm_optab;
8136 interm_types->release ();
8137 return false;