* gcc.dg/store-motion-fgcse-sm.c (dg-final): Cleanup
[official-gcc.git] / gcc / tree-vect-stmts.c
blobe80ac95a7feb36d816c7f63f59df7fa313cfd302
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimplify.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "expr.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "insn-codes.h"
64 #include "optabs.h"
65 #include "diagnostic-core.h"
66 #include "tree-vectorizer.h"
67 #include "dumpfile.h"
68 #include "hash-map.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "builtins.h"
74 /* For lang_hooks.types.type_for_mode. */
75 #include "langhooks.h"
77 /* Return the vectorized type for the given statement. */
79 tree
80 stmt_vectype (struct _stmt_vec_info *stmt_info)
82 return STMT_VINFO_VECTYPE (stmt_info);
85 /* Return TRUE iff the given statement is in an inner loop relative to
86 the loop being vectorized. */
87 bool
88 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
90 gimple stmt = STMT_VINFO_STMT (stmt_info);
91 basic_block bb = gimple_bb (stmt);
92 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
93 struct loop* loop;
95 if (!loop_vinfo)
96 return false;
98 loop = LOOP_VINFO_LOOP (loop_vinfo);
100 return (bb->loop_father == loop->inner);
103 /* Record the cost of a statement, either by directly informing the
104 target model or by saving it in a vector for later processing.
105 Return a preliminary estimate of the statement's cost. */
107 unsigned
108 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
109 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
110 int misalign, enum vect_cost_model_location where)
112 if (body_cost_vec)
114 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
115 add_stmt_info_to_vec (body_cost_vec, count, kind,
116 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
117 misalign);
118 return (unsigned)
119 (builtin_vectorization_cost (kind, vectype, misalign) * count);
122 else
124 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
125 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
126 void *target_cost_data;
128 if (loop_vinfo)
129 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
130 else
131 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
133 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
134 misalign, where);
138 /* Return a variable of type ELEM_TYPE[NELEMS]. */
140 static tree
141 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
143 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
144 "vect_array");
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Return an SSA_NAME for the vector in index N. The reference
149 is part of the vectorization of STMT and the vector is associated
150 with scalar destination SCALAR_DEST. */
152 static tree
153 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
154 tree array, unsigned HOST_WIDE_INT n)
156 tree vect_type, vect, vect_name, array_ref;
157 gimple new_stmt;
159 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
160 vect_type = TREE_TYPE (TREE_TYPE (array));
161 vect = vect_create_destination_var (scalar_dest, vect_type);
162 array_ref = build4 (ARRAY_REF, vect_type, array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
166 new_stmt = gimple_build_assign (vect, array_ref);
167 vect_name = make_ssa_name (vect, new_stmt);
168 gimple_assign_set_lhs (new_stmt, vect_name);
169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
171 return vect_name;
174 /* ARRAY is an array of vectors created by create_vector_array.
175 Emit code to store SSA_NAME VECT in index N of the array.
176 The store is part of the vectorization of STMT. */
178 static void
179 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
180 tree array, unsigned HOST_WIDE_INT n)
182 tree array_ref;
183 gimple new_stmt;
185 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
186 build_int_cst (size_type_node, n),
187 NULL_TREE, NULL_TREE);
189 new_stmt = gimple_build_assign (array_ref, vect);
190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
193 /* PTR is a pointer to an array of type TYPE. Return a representation
194 of *PTR. The memory reference replaces those in FIRST_DR
195 (and its group). */
197 static tree
198 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
200 tree mem_ref, alias_ptr_type;
202 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
203 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
204 /* Arrays have the same alignment as their type. */
205 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
206 return mem_ref;
209 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
211 /* Function vect_mark_relevant.
213 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
215 static void
216 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
217 enum vect_relevant relevant, bool live_p,
218 bool used_in_pattern)
220 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
221 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
222 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
223 gimple pattern_stmt;
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "mark relevant %d, live %d.\n", relevant, live_p);
229 /* If this stmt is an original stmt in a pattern, we might need to mark its
230 related pattern stmt instead of the original stmt. However, such stmts
231 may have their own uses that are not in any pattern, in such cases the
232 stmt itself should be marked. */
233 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
235 bool found = false;
236 if (!used_in_pattern)
238 imm_use_iterator imm_iter;
239 use_operand_p use_p;
240 gimple use_stmt;
241 tree lhs;
242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
245 if (is_gimple_assign (stmt))
246 lhs = gimple_assign_lhs (stmt);
247 else
248 lhs = gimple_call_lhs (stmt);
250 /* This use is out of pattern use, if LHS has other uses that are
251 pattern uses, we should mark the stmt itself, and not the pattern
252 stmt. */
253 if (lhs && TREE_CODE (lhs) == SSA_NAME)
254 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
256 if (is_gimple_debug (USE_STMT (use_p)))
257 continue;
258 use_stmt = USE_STMT (use_p);
260 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
261 continue;
263 if (vinfo_for_stmt (use_stmt)
264 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
266 found = true;
267 break;
272 if (!found)
274 /* This is the last stmt in a sequence that was detected as a
275 pattern that can potentially be vectorized. Don't mark the stmt
276 as relevant/live because it's not going to be vectorized.
277 Instead mark the pattern-stmt that replaces it. */
279 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
281 if (dump_enabled_p ())
282 dump_printf_loc (MSG_NOTE, vect_location,
283 "last stmt in pattern. don't mark"
284 " relevant/live.\n");
285 stmt_info = vinfo_for_stmt (pattern_stmt);
286 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
287 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
288 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
289 stmt = pattern_stmt;
293 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
294 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
295 STMT_VINFO_RELEVANT (stmt_info) = relevant;
297 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
298 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
300 if (dump_enabled_p ())
301 dump_printf_loc (MSG_NOTE, vect_location,
302 "already marked relevant/live.\n");
303 return;
306 worklist->safe_push (stmt);
310 /* Function vect_stmt_relevant_p.
312 Return true if STMT in loop that is represented by LOOP_VINFO is
313 "relevant for vectorization".
315 A stmt is considered "relevant for vectorization" if:
316 - it has uses outside the loop.
317 - it has vdefs (it alters memory).
318 - control stmts in the loop (except for the exit condition).
320 CHECKME: what other side effects would the vectorizer allow? */
322 static bool
323 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
324 enum vect_relevant *relevant, bool *live_p)
326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
327 ssa_op_iter op_iter;
328 imm_use_iterator imm_iter;
329 use_operand_p use_p;
330 def_operand_p def_p;
332 *relevant = vect_unused_in_scope;
333 *live_p = false;
335 /* cond stmt other than loop exit cond. */
336 if (is_ctrl_stmt (stmt)
337 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
338 != loop_exit_ctrl_vec_info_type)
339 *relevant = vect_used_in_scope;
341 /* changing memory. */
342 if (gimple_code (stmt) != GIMPLE_PHI)
343 if (gimple_vdef (stmt))
345 if (dump_enabled_p ())
346 dump_printf_loc (MSG_NOTE, vect_location,
347 "vec_stmt_relevant_p: stmt has vdefs.\n");
348 *relevant = vect_used_in_scope;
351 /* uses outside the loop. */
352 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
354 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
356 basic_block bb = gimple_bb (USE_STMT (use_p));
357 if (!flow_bb_inside_loop_p (loop, bb))
359 if (dump_enabled_p ())
360 dump_printf_loc (MSG_NOTE, vect_location,
361 "vec_stmt_relevant_p: used out of loop.\n");
363 if (is_gimple_debug (USE_STMT (use_p)))
364 continue;
366 /* We expect all such uses to be in the loop exit phis
367 (because of loop closed form) */
368 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
369 gcc_assert (bb == single_exit (loop)->dest);
371 *live_p = true;
376 return (*live_p || *relevant);
380 /* Function exist_non_indexing_operands_for_use_p
382 USE is one of the uses attached to STMT. Check if USE is
383 used in STMT for anything other than indexing an array. */
385 static bool
386 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
388 tree operand;
389 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
391 /* USE corresponds to some operand in STMT. If there is no data
392 reference in STMT, then any operand that corresponds to USE
393 is not indexing an array. */
394 if (!STMT_VINFO_DATA_REF (stmt_info))
395 return true;
397 /* STMT has a data_ref. FORNOW this means that its of one of
398 the following forms:
399 -1- ARRAY_REF = var
400 -2- var = ARRAY_REF
401 (This should have been verified in analyze_data_refs).
403 'var' in the second case corresponds to a def, not a use,
404 so USE cannot correspond to any operands that are not used
405 for array indexing.
407 Therefore, all we need to check is if STMT falls into the
408 first case, and whether var corresponds to USE. */
410 if (!gimple_assign_copy_p (stmt))
412 if (is_gimple_call (stmt)
413 && gimple_call_internal_p (stmt))
414 switch (gimple_call_internal_fn (stmt))
416 case IFN_MASK_STORE:
417 operand = gimple_call_arg (stmt, 3);
418 if (operand == use)
419 return true;
420 /* FALLTHRU */
421 case IFN_MASK_LOAD:
422 operand = gimple_call_arg (stmt, 2);
423 if (operand == use)
424 return true;
425 break;
426 default:
427 break;
429 return false;
432 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
433 return false;
434 operand = gimple_assign_rhs1 (stmt);
435 if (TREE_CODE (operand) != SSA_NAME)
436 return false;
438 if (operand == use)
439 return true;
441 return false;
446 Function process_use.
448 Inputs:
449 - a USE in STMT in a loop represented by LOOP_VINFO
450 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
451 that defined USE. This is done by calling mark_relevant and passing it
452 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
453 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
454 be performed.
456 Outputs:
457 Generally, LIVE_P and RELEVANT are used to define the liveness and
458 relevance info of the DEF_STMT of this USE:
459 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
460 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
461 Exceptions:
462 - case 1: If USE is used only for address computations (e.g. array indexing),
463 which does not need to be directly vectorized, then the liveness/relevance
464 of the respective DEF_STMT is left unchanged.
465 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
466 skip DEF_STMT cause it had already been processed.
467 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
468 be modified accordingly.
470 Return true if everything is as expected. Return false otherwise. */
472 static bool
473 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
474 enum vect_relevant relevant, vec<gimple> *worklist,
475 bool force)
477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
479 stmt_vec_info dstmt_vinfo;
480 basic_block bb, def_bb;
481 tree def;
482 gimple def_stmt;
483 enum vect_def_type dt;
485 /* case 1: we are only interested in uses that need to be vectorized. Uses
486 that are used for address computation are not considered relevant. */
487 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
488 return true;
490 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494 "not vectorized: unsupported use in stmt.\n");
495 return false;
498 if (!def_stmt || gimple_nop_p (def_stmt))
499 return true;
501 def_bb = gimple_bb (def_stmt);
502 if (!flow_bb_inside_loop_p (loop, def_bb))
504 if (dump_enabled_p ())
505 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
506 return true;
509 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
510 DEF_STMT must have already been processed, because this should be the
511 only way that STMT, which is a reduction-phi, was put in the worklist,
512 as there should be no other uses for DEF_STMT in the loop. So we just
513 check that everything is as expected, and we are done. */
514 dstmt_vinfo = vinfo_for_stmt (def_stmt);
515 bb = gimple_bb (stmt);
516 if (gimple_code (stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (def_stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
526 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
527 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
528 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
529 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
530 return true;
533 /* case 3a: outer-loop stmt defining an inner-loop stmt:
534 outer-loop-header-bb:
535 d = def_stmt
536 inner-loop:
537 stmt # use (d)
538 outer-loop-tail-bb:
539 ... */
540 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
542 if (dump_enabled_p ())
543 dump_printf_loc (MSG_NOTE, vect_location,
544 "outer-loop def-stmt defining inner-loop stmt.\n");
546 switch (relevant)
548 case vect_unused_in_scope:
549 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
550 vect_used_in_scope : vect_unused_in_scope;
551 break;
553 case vect_used_in_outer_by_reduction:
554 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
555 relevant = vect_used_by_reduction;
556 break;
558 case vect_used_in_outer:
559 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
560 relevant = vect_used_in_scope;
561 break;
563 case vect_used_in_scope:
564 break;
566 default:
567 gcc_unreachable ();
571 /* case 3b: inner-loop stmt defining an outer-loop stmt:
572 outer-loop-header-bb:
574 inner-loop:
575 d = def_stmt
576 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
577 stmt # use (d) */
578 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
580 if (dump_enabled_p ())
581 dump_printf_loc (MSG_NOTE, vect_location,
582 "inner-loop def-stmt defining outer-loop stmt.\n");
584 switch (relevant)
586 case vect_unused_in_scope:
587 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
588 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
589 vect_used_in_outer_by_reduction : vect_unused_in_scope;
590 break;
592 case vect_used_by_reduction:
593 relevant = vect_used_in_outer_by_reduction;
594 break;
596 case vect_used_in_scope:
597 relevant = vect_used_in_outer;
598 break;
600 default:
601 gcc_unreachable ();
605 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
606 is_pattern_stmt_p (stmt_vinfo));
607 return true;
611 /* Function vect_mark_stmts_to_be_vectorized.
613 Not all stmts in the loop need to be vectorized. For example:
615 for i...
616 for j...
617 1. T0 = i + j
618 2. T1 = a[T0]
620 3. j = j + 1
622 Stmt 1 and 3 do not need to be vectorized, because loop control and
623 addressing of vectorized data-refs are handled differently.
625 This pass detects such stmts. */
627 bool
628 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
632 unsigned int nbbs = loop->num_nodes;
633 gimple_stmt_iterator si;
634 gimple stmt;
635 unsigned int i;
636 stmt_vec_info stmt_vinfo;
637 basic_block bb;
638 gimple phi;
639 bool live_p;
640 enum vect_relevant relevant, tmp_relevant;
641 enum vect_def_type def_type;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec<gimple, 64> worklist;
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
655 phi = gsi_stmt (si);
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
660 dump_printf (MSG_NOTE, "\n");
663 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
664 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
666 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
668 stmt = gsi_stmt (si);
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
672 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
673 dump_printf (MSG_NOTE, "\n");
676 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
677 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
681 /* 2. Process_worklist */
682 while (worklist.length () > 0)
684 use_operand_p use_p;
685 ssa_op_iter iter;
687 stmt = worklist.pop ();
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
692 dump_printf (MSG_NOTE, "\n");
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant and live/dead according to the
697 liveness and relevance properties of STMT. */
698 stmt_vinfo = vinfo_for_stmt (stmt);
699 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
702 /* Generally, the liveness and relevance properties of STMT are
703 propagated as is to the DEF_STMTs of its USEs:
704 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
705 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
707 One exception is when STMT has been identified as defining a reduction
708 variable; in this case we set the liveness/relevance as follows:
709 live_p = false
710 relevant = vect_used_by_reduction
711 This is because we distinguish between two kinds of relevant stmts -
712 those that are used by a reduction computation, and those that are
713 (also) used by a regular computation. This allows us later on to
714 identify stmts that are used solely by a reduction, and therefore the
715 order of the results that they produce does not have to be kept. */
717 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
718 tmp_relevant = relevant;
719 switch (def_type)
721 case vect_reduction_def:
722 switch (tmp_relevant)
724 case vect_unused_in_scope:
725 relevant = vect_used_by_reduction;
726 break;
728 case vect_used_by_reduction:
729 if (gimple_code (stmt) == GIMPLE_PHI)
730 break;
731 /* fall through */
733 default:
734 if (dump_enabled_p ())
735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
736 "unsupported use of reduction.\n");
737 return false;
740 live_p = false;
741 break;
743 case vect_nested_cycle:
744 if (tmp_relevant != vect_unused_in_scope
745 && tmp_relevant != vect_used_in_outer_by_reduction
746 && tmp_relevant != vect_used_in_outer)
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
750 "unsupported use of nested cycle.\n");
752 return false;
755 live_p = false;
756 break;
758 case vect_double_reduction_def:
759 if (tmp_relevant != vect_unused_in_scope
760 && tmp_relevant != vect_used_by_reduction)
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
764 "unsupported use of double reduction.\n");
766 return false;
769 live_p = false;
770 break;
772 default:
773 break;
776 if (is_pattern_stmt_p (stmt_vinfo))
778 /* Pattern statements are not inserted into the code, so
779 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
780 have to scan the RHS or function arguments instead. */
781 if (is_gimple_assign (stmt))
783 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
784 tree op = gimple_assign_rhs1 (stmt);
786 i = 1;
787 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
789 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
790 live_p, relevant, &worklist, false)
791 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
792 live_p, relevant, &worklist, false))
793 return false;
794 i = 2;
796 for (; i < gimple_num_ops (stmt); i++)
798 op = gimple_op (stmt, i);
799 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
800 &worklist, false))
801 return false;
804 else if (is_gimple_call (stmt))
806 for (i = 0; i < gimple_call_num_args (stmt); i++)
808 tree arg = gimple_call_arg (stmt, i);
809 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
810 &worklist, false))
811 return false;
815 else
816 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
818 tree op = USE_FROM_PTR (use_p);
819 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
820 &worklist, false))
821 return false;
824 if (STMT_VINFO_GATHER_P (stmt_vinfo))
826 tree off;
827 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
828 gcc_assert (decl);
829 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
830 &worklist, true))
831 return false;
833 } /* while worklist */
835 return true;
839 /* Function vect_model_simple_cost.
841 Models cost for simple operations, i.e. those that only emit ncopies of a
842 single op. Right now, this does not account for multiple insns that could
843 be generated for the single vector op. We will handle that shortly. */
845 void
846 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
847 enum vect_def_type *dt,
848 stmt_vector_for_cost *prologue_cost_vec,
849 stmt_vector_for_cost *body_cost_vec)
851 int i;
852 int inside_cost = 0, prologue_cost = 0;
854 /* The SLP costs were already calculated during SLP tree build. */
855 if (PURE_SLP_STMT (stmt_info))
856 return;
858 /* FORNOW: Assuming maximum 2 args per stmts. */
859 for (i = 0; i < 2; i++)
860 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
861 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
862 stmt_info, 0, vect_prologue);
864 /* Pass the inside-of-loop statements to the target-specific cost model. */
865 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
866 stmt_info, 0, vect_body);
868 if (dump_enabled_p ())
869 dump_printf_loc (MSG_NOTE, vect_location,
870 "vect_model_simple_cost: inside_cost = %d, "
871 "prologue_cost = %d .\n", inside_cost, prologue_cost);
875 /* Model cost for type demotion and promotion operations. PWR is normally
876 zero for single-step promotions and demotions. It will be one if
877 two-step promotion/demotion is required, and so on. Each additional
878 step doubles the number of instructions required. */
880 static void
881 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
882 enum vect_def_type *dt, int pwr)
884 int i, tmp;
885 int inside_cost = 0, prologue_cost = 0;
886 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
887 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
888 void *target_cost_data;
890 /* The SLP costs were already calculated during SLP tree build. */
891 if (PURE_SLP_STMT (stmt_info))
892 return;
894 if (loop_vinfo)
895 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
896 else
897 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
899 for (i = 0; i < pwr + 1; i++)
901 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
902 (i + 1) : i;
903 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
904 vec_promote_demote, stmt_info, 0,
905 vect_body);
908 /* FORNOW: Assuming maximum 2 args per stmts. */
909 for (i = 0; i < 2; i++)
910 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
911 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
912 stmt_info, 0, vect_prologue);
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_NOTE, vect_location,
916 "vect_model_promotion_demotion_cost: inside_cost = %d, "
917 "prologue_cost = %d .\n", inside_cost, prologue_cost);
920 /* Function vect_cost_group_size
922 For grouped load or store, return the group_size only if it is the first
923 load or store of a group, else return 1. This ensures that group size is
924 only returned once per group. */
926 static int
927 vect_cost_group_size (stmt_vec_info stmt_info)
929 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
931 if (first_stmt == STMT_VINFO_STMT (stmt_info))
932 return GROUP_SIZE (stmt_info);
934 return 1;
938 /* Function vect_model_store_cost
940 Models cost for stores. In the case of grouped accesses, one access
941 has the overhead of the grouped access attributed to it. */
943 void
944 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
945 bool store_lanes_p, enum vect_def_type dt,
946 slp_tree slp_node,
947 stmt_vector_for_cost *prologue_cost_vec,
948 stmt_vector_for_cost *body_cost_vec)
950 int group_size;
951 unsigned int inside_cost = 0, prologue_cost = 0;
952 struct data_reference *first_dr;
953 gimple first_stmt;
955 /* The SLP costs were already calculated during SLP tree build. */
956 if (PURE_SLP_STMT (stmt_info))
957 return;
959 if (dt == vect_constant_def || dt == vect_external_def)
960 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
961 stmt_info, 0, vect_prologue);
963 /* Grouped access? */
964 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
966 if (slp_node)
968 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
969 group_size = 1;
971 else
973 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
974 group_size = vect_cost_group_size (stmt_info);
977 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
979 /* Not a grouped access. */
980 else
982 group_size = 1;
983 first_dr = STMT_VINFO_DATA_REF (stmt_info);
986 /* We assume that the cost of a single store-lanes instruction is
987 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
988 access is instead being provided by a permute-and-store operation,
989 include the cost of the permutes. */
990 if (!store_lanes_p && group_size > 1)
992 /* Uses a high and low interleave or shuffle operations for each
993 needed permute. */
994 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
995 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
996 stmt_info, 0, vect_body);
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: strided group_size = %d .\n",
1001 group_size);
1004 /* Costs of the stores. */
1005 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "vect_model_store_cost: inside_cost = %d, "
1010 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1014 /* Calculate cost of DR's memory access. */
1015 void
1016 vect_get_store_cost (struct data_reference *dr, int ncopies,
1017 unsigned int *inside_cost,
1018 stmt_vector_for_cost *body_cost_vec)
1020 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1021 gimple stmt = DR_STMT (dr);
1022 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1024 switch (alignment_support_scheme)
1026 case dr_aligned:
1028 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1029 vector_store, stmt_info, 0,
1030 vect_body);
1032 if (dump_enabled_p ())
1033 dump_printf_loc (MSG_NOTE, vect_location,
1034 "vect_model_store_cost: aligned.\n");
1035 break;
1038 case dr_unaligned_supported:
1040 /* Here, we assign an additional cost for the unaligned store. */
1041 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1042 unaligned_store, stmt_info,
1043 DR_MISALIGNMENT (dr), vect_body);
1044 if (dump_enabled_p ())
1045 dump_printf_loc (MSG_NOTE, vect_location,
1046 "vect_model_store_cost: unaligned supported by "
1047 "hardware.\n");
1048 break;
1051 case dr_unaligned_unsupported:
1053 *inside_cost = VECT_MAX_COST;
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1057 "vect_model_store_cost: unsupported access.\n");
1058 break;
1061 default:
1062 gcc_unreachable ();
1067 /* Function vect_model_load_cost
1069 Models cost for loads. In the case of grouped accesses, the last access
1070 has the overhead of the grouped access attributed to it. Since unaligned
1071 accesses are supported for loads, we also account for the costs of the
1072 access scheme chosen. */
1074 void
1075 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1076 bool load_lanes_p, slp_tree slp_node,
1077 stmt_vector_for_cost *prologue_cost_vec,
1078 stmt_vector_for_cost *body_cost_vec)
1080 int group_size;
1081 gimple first_stmt;
1082 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1083 unsigned int inside_cost = 0, prologue_cost = 0;
1085 /* The SLP costs were already calculated during SLP tree build. */
1086 if (PURE_SLP_STMT (stmt_info))
1087 return;
1089 /* Grouped accesses? */
1090 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1091 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1093 group_size = vect_cost_group_size (stmt_info);
1094 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1096 /* Not a grouped access. */
1097 else
1099 group_size = 1;
1100 first_dr = dr;
1103 /* We assume that the cost of a single load-lanes instruction is
1104 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1105 access is instead being provided by a load-and-permute operation,
1106 include the cost of the permutes. */
1107 if (!load_lanes_p && group_size > 1)
1109 /* Uses an even and odd extract operations or shuffle operations
1110 for each needed permute. */
1111 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1112 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1113 stmt_info, 0, vect_body);
1115 if (dump_enabled_p ())
1116 dump_printf_loc (MSG_NOTE, vect_location,
1117 "vect_model_load_cost: strided group_size = %d .\n",
1118 group_size);
1121 /* The loads themselves. */
1122 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1124 /* N scalar loads plus gathering them into a vector. */
1125 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1126 inside_cost += record_stmt_cost (body_cost_vec,
1127 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1128 scalar_load, stmt_info, 0, vect_body);
1129 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1130 stmt_info, 0, vect_body);
1132 else
1133 vect_get_load_cost (first_dr, ncopies,
1134 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1135 || group_size > 1 || slp_node),
1136 &inside_cost, &prologue_cost,
1137 prologue_cost_vec, body_cost_vec, true);
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location,
1141 "vect_model_load_cost: inside_cost = %d, "
1142 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1146 /* Calculate cost of DR's memory access. */
1147 void
1148 vect_get_load_cost (struct data_reference *dr, int ncopies,
1149 bool add_realign_cost, unsigned int *inside_cost,
1150 unsigned int *prologue_cost,
1151 stmt_vector_for_cost *prologue_cost_vec,
1152 stmt_vector_for_cost *body_cost_vec,
1153 bool record_prologue_costs)
1155 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1156 gimple stmt = DR_STMT (dr);
1157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1159 switch (alignment_support_scheme)
1161 case dr_aligned:
1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: aligned.\n");
1170 break;
1172 case dr_unaligned_supported:
1174 /* Here, we assign an additional cost for the unaligned load. */
1175 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1176 unaligned_load, stmt_info,
1177 DR_MISALIGNMENT (dr), vect_body);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: unaligned supported by "
1182 "hardware.\n");
1184 break;
1186 case dr_explicit_realign:
1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1189 vector_load, stmt_info, 0, vect_body);
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1191 vec_perm, stmt_info, 0, vect_body);
1193 /* FIXME: If the misalignment remains fixed across the iterations of
1194 the containing loop, the following cost should be added to the
1195 prologue costs. */
1196 if (targetm.vectorize.builtin_mask_for_load)
1197 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1198 stmt_info, 0, vect_body);
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: explicit realign\n");
1204 break;
1206 case dr_explicit_realign_optimized:
1208 if (dump_enabled_p ())
1209 dump_printf_loc (MSG_NOTE, vect_location,
1210 "vect_model_load_cost: unaligned software "
1211 "pipelined.\n");
1213 /* Unaligned software pipeline has a load of an address, an initial
1214 load, and possibly a mask operation to "prime" the loop. However,
1215 if this is an access in a group of loads, which provide grouped
1216 access, then the above cost should only be considered for one
1217 access in the group. Inside the loop, there is a load op
1218 and a realignment op. */
1220 if (add_realign_cost && record_prologue_costs)
1222 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1223 vector_stmt, stmt_info,
1224 0, vect_prologue);
1225 if (targetm.vectorize.builtin_mask_for_load)
1226 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1227 vector_stmt, stmt_info,
1228 0, vect_prologue);
1231 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1232 stmt_info, 0, vect_body);
1233 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1234 stmt_info, 0, vect_body);
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "vect_model_load_cost: explicit realign optimized"
1239 "\n");
1241 break;
1244 case dr_unaligned_unsupported:
1246 *inside_cost = VECT_MAX_COST;
1248 if (dump_enabled_p ())
1249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1250 "vect_model_load_cost: unsupported access.\n");
1251 break;
1254 default:
1255 gcc_unreachable ();
1259 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1260 the loop preheader for the vectorized stmt STMT. */
1262 static void
1263 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1265 if (gsi)
1266 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1267 else
1269 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1270 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1272 if (loop_vinfo)
1274 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1275 basic_block new_bb;
1276 edge pe;
1278 if (nested_in_vect_loop_p (loop, stmt))
1279 loop = loop->inner;
1281 pe = loop_preheader_edge (loop);
1282 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1283 gcc_assert (!new_bb);
1285 else
1287 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1288 basic_block bb;
1289 gimple_stmt_iterator gsi_bb_start;
1291 gcc_assert (bb_vinfo);
1292 bb = BB_VINFO_BB (bb_vinfo);
1293 gsi_bb_start = gsi_after_labels (bb);
1294 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1298 if (dump_enabled_p ())
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "created new init_stmt: ");
1302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1303 dump_printf (MSG_NOTE, "\n");
1307 /* Function vect_init_vector.
1309 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1310 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1311 vector type a vector with all elements equal to VAL is created first.
1312 Place the initialization at BSI if it is not NULL. Otherwise, place the
1313 initialization at the loop preheader.
1314 Return the DEF of INIT_STMT.
1315 It will be used in the vectorization of STMT. */
1317 tree
1318 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1320 tree new_var;
1321 gimple init_stmt;
1322 tree vec_oprnd;
1323 tree new_temp;
1325 if (TREE_CODE (type) == VECTOR_TYPE
1326 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1328 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1330 if (CONSTANT_CLASS_P (val))
1331 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1332 else
1334 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1335 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1336 new_temp, val);
1337 vect_init_vector_1 (stmt, init_stmt, gsi);
1338 val = new_temp;
1341 val = build_vector_from_val (type, val);
1344 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1345 init_stmt = gimple_build_assign (new_var, val);
1346 new_temp = make_ssa_name (new_var, init_stmt);
1347 gimple_assign_set_lhs (init_stmt, new_temp);
1348 vect_init_vector_1 (stmt, init_stmt, gsi);
1349 vec_oprnd = gimple_assign_lhs (init_stmt);
1350 return vec_oprnd;
1354 /* Function vect_get_vec_def_for_operand.
1356 OP is an operand in STMT. This function returns a (vector) def that will be
1357 used in the vectorized stmt for STMT.
1359 In the case that OP is an SSA_NAME which is defined in the loop, then
1360 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1362 In case OP is an invariant or constant, a new stmt that creates a vector def
1363 needs to be introduced. */
1365 tree
1366 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1368 tree vec_oprnd;
1369 gimple vec_stmt;
1370 gimple def_stmt;
1371 stmt_vec_info def_stmt_info = NULL;
1372 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1373 unsigned int nunits;
1374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1375 tree def;
1376 enum vect_def_type dt;
1377 bool is_simple_use;
1378 tree vector_type;
1380 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE, vect_location,
1383 "vect_get_vec_def_for_operand: ");
1384 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1385 dump_printf (MSG_NOTE, "\n");
1388 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1389 &def_stmt, &def, &dt);
1390 gcc_assert (is_simple_use);
1391 if (dump_enabled_p ())
1393 int loc_printed = 0;
1394 if (def)
1396 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1397 loc_printed = 1;
1398 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1399 dump_printf (MSG_NOTE, "\n");
1401 if (def_stmt)
1403 if (loc_printed)
1404 dump_printf (MSG_NOTE, " def_stmt = ");
1405 else
1406 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1407 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1408 dump_printf (MSG_NOTE, "\n");
1412 switch (dt)
1414 /* Case 1: operand is a constant. */
1415 case vect_constant_def:
1417 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1418 gcc_assert (vector_type);
1419 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1421 if (scalar_def)
1422 *scalar_def = op;
1424 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Create vector_cst. nunits = %d\n", nunits);
1429 return vect_init_vector (stmt, op, vector_type, NULL);
1432 /* Case 2: operand is defined outside the loop - loop invariant. */
1433 case vect_external_def:
1435 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1436 gcc_assert (vector_type);
1438 if (scalar_def)
1439 *scalar_def = def;
1441 /* Create 'vec_inv = {inv,inv,..,inv}' */
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1445 return vect_init_vector (stmt, def, vector_type, NULL);
1448 /* Case 3: operand is defined inside the loop. */
1449 case vect_internal_def:
1451 if (scalar_def)
1452 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1457 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1458 /* Get vectorized pattern statement. */
1459 if (!vec_stmt
1460 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1461 && !STMT_VINFO_RELEVANT (def_stmt_info))
1462 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1463 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1464 gcc_assert (vec_stmt);
1465 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1466 vec_oprnd = PHI_RESULT (vec_stmt);
1467 else if (is_gimple_call (vec_stmt))
1468 vec_oprnd = gimple_call_lhs (vec_stmt);
1469 else
1470 vec_oprnd = gimple_assign_lhs (vec_stmt);
1471 return vec_oprnd;
1474 /* Case 4: operand is defined by a loop header phi - reduction */
1475 case vect_reduction_def:
1476 case vect_double_reduction_def:
1477 case vect_nested_cycle:
1479 struct loop *loop;
1481 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1482 loop = (gimple_bb (def_stmt))->loop_father;
1484 /* Get the def before the loop */
1485 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1486 return get_initial_def_for_reduction (stmt, op, scalar_def);
1489 /* Case 5: operand is defined by loop-header phi - induction. */
1490 case vect_induction_def:
1492 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1494 /* Get the def from the vectorized stmt. */
1495 def_stmt_info = vinfo_for_stmt (def_stmt);
1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1497 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1498 vec_oprnd = PHI_RESULT (vec_stmt);
1499 else
1500 vec_oprnd = gimple_get_lhs (vec_stmt);
1501 return vec_oprnd;
1504 default:
1505 gcc_unreachable ();
1510 /* Function vect_get_vec_def_for_stmt_copy
1512 Return a vector-def for an operand. This function is used when the
1513 vectorized stmt to be created (by the caller to this function) is a "copy"
1514 created in case the vectorized result cannot fit in one vector, and several
1515 copies of the vector-stmt are required. In this case the vector-def is
1516 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1517 of the stmt that defines VEC_OPRND.
1518 DT is the type of the vector def VEC_OPRND.
1520 Context:
1521 In case the vectorization factor (VF) is bigger than the number
1522 of elements that can fit in a vectype (nunits), we have to generate
1523 more than one vector stmt to vectorize the scalar stmt. This situation
1524 arises when there are multiple data-types operated upon in the loop; the
1525 smallest data-type determines the VF, and as a result, when vectorizing
1526 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1527 vector stmt (each computing a vector of 'nunits' results, and together
1528 computing 'VF' results in each iteration). This function is called when
1529 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1530 which VF=16 and nunits=4, so the number of copies required is 4):
1532 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1534 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1535 VS1.1: vx.1 = memref1 VS1.2
1536 VS1.2: vx.2 = memref2 VS1.3
1537 VS1.3: vx.3 = memref3
1539 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1540 VSnew.1: vz1 = vx.1 + ... VSnew.2
1541 VSnew.2: vz2 = vx.2 + ... VSnew.3
1542 VSnew.3: vz3 = vx.3 + ...
1544 The vectorization of S1 is explained in vectorizable_load.
1545 The vectorization of S2:
1546 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1547 the function 'vect_get_vec_def_for_operand' is called to
1548 get the relevant vector-def for each operand of S2. For operand x it
1549 returns the vector-def 'vx.0'.
1551 To create the remaining copies of the vector-stmt (VSnew.j), this
1552 function is called to get the relevant vector-def for each operand. It is
1553 obtained from the respective VS1.j stmt, which is recorded in the
1554 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1556 For example, to obtain the vector-def 'vx.1' in order to create the
1557 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1558 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1559 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1560 and return its def ('vx.1').
1561 Overall, to create the above sequence this function will be called 3 times:
1562 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1563 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1564 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1566 tree
1567 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1569 gimple vec_stmt_for_operand;
1570 stmt_vec_info def_stmt_info;
1572 /* Do nothing; can reuse same def. */
1573 if (dt == vect_external_def || dt == vect_constant_def )
1574 return vec_oprnd;
1576 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1577 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1578 gcc_assert (def_stmt_info);
1579 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1580 gcc_assert (vec_stmt_for_operand);
1581 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1582 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1583 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1584 else
1585 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1586 return vec_oprnd;
1590 /* Get vectorized definitions for the operands to create a copy of an original
1591 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1593 static void
1594 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1595 vec<tree> *vec_oprnds0,
1596 vec<tree> *vec_oprnds1)
1598 tree vec_oprnd = vec_oprnds0->pop ();
1600 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1601 vec_oprnds0->quick_push (vec_oprnd);
1603 if (vec_oprnds1 && vec_oprnds1->length ())
1605 vec_oprnd = vec_oprnds1->pop ();
1606 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1607 vec_oprnds1->quick_push (vec_oprnd);
1612 /* Get vectorized definitions for OP0 and OP1.
1613 REDUC_INDEX is the index of reduction operand in case of reduction,
1614 and -1 otherwise. */
1616 void
1617 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1618 vec<tree> *vec_oprnds0,
1619 vec<tree> *vec_oprnds1,
1620 slp_tree slp_node, int reduc_index)
1622 if (slp_node)
1624 int nops = (op1 == NULL_TREE) ? 1 : 2;
1625 auto_vec<tree> ops (nops);
1626 auto_vec<vec<tree> > vec_defs (nops);
1628 ops.quick_push (op0);
1629 if (op1)
1630 ops.quick_push (op1);
1632 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1634 *vec_oprnds0 = vec_defs[0];
1635 if (op1)
1636 *vec_oprnds1 = vec_defs[1];
1638 else
1640 tree vec_oprnd;
1642 vec_oprnds0->create (1);
1643 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1644 vec_oprnds0->quick_push (vec_oprnd);
1646 if (op1)
1648 vec_oprnds1->create (1);
1649 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1650 vec_oprnds1->quick_push (vec_oprnd);
1656 /* Function vect_finish_stmt_generation.
1658 Insert a new stmt. */
1660 void
1661 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1662 gimple_stmt_iterator *gsi)
1664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1665 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1666 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1668 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1670 if (!gsi_end_p (*gsi)
1671 && gimple_has_mem_ops (vec_stmt))
1673 gimple at_stmt = gsi_stmt (*gsi);
1674 tree vuse = gimple_vuse (at_stmt);
1675 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1677 tree vdef = gimple_vdef (at_stmt);
1678 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1679 /* If we have an SSA vuse and insert a store, update virtual
1680 SSA form to avoid triggering the renamer. Do so only
1681 if we can easily see all uses - which is what almost always
1682 happens with the way vectorized stmts are inserted. */
1683 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1684 && ((is_gimple_assign (vec_stmt)
1685 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1686 || (is_gimple_call (vec_stmt)
1687 && !(gimple_call_flags (vec_stmt)
1688 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1690 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1691 gimple_set_vdef (vec_stmt, new_vdef);
1692 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1696 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1698 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1699 bb_vinfo));
1701 if (dump_enabled_p ())
1703 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1705 dump_printf (MSG_NOTE, "\n");
1708 gimple_set_location (vec_stmt, gimple_location (stmt));
1710 /* While EH edges will generally prevent vectorization, stmt might
1711 e.g. be in a must-not-throw region. Ensure newly created stmts
1712 that could throw are part of the same region. */
1713 int lp_nr = lookup_stmt_eh_lp (stmt);
1714 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1715 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1718 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1719 a function declaration if the target has a vectorized version
1720 of the function, or NULL_TREE if the function cannot be vectorized. */
1722 tree
1723 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1725 tree fndecl = gimple_call_fndecl (call);
1727 /* We only handle functions that do not read or clobber memory -- i.e.
1728 const or novops ones. */
1729 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1730 return NULL_TREE;
1732 if (!fndecl
1733 || TREE_CODE (fndecl) != FUNCTION_DECL
1734 || !DECL_BUILT_IN (fndecl))
1735 return NULL_TREE;
1737 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1738 vectype_in);
1742 static tree permute_vec_elements (tree, tree, tree, gimple,
1743 gimple_stmt_iterator *);
1746 /* Function vectorizable_mask_load_store.
1748 Check if STMT performs a conditional load or store that can be vectorized.
1749 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1750 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1751 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1753 static bool
1754 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1755 gimple *vec_stmt, slp_tree slp_node)
1757 tree vec_dest = NULL;
1758 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1759 stmt_vec_info prev_stmt_info;
1760 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1761 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1762 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1763 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1764 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1765 tree elem_type;
1766 gimple new_stmt;
1767 tree dummy;
1768 tree dataref_ptr = NULL_TREE;
1769 gimple ptr_incr;
1770 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1771 int ncopies;
1772 int i, j;
1773 bool inv_p;
1774 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1775 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1776 int gather_scale = 1;
1777 enum vect_def_type gather_dt = vect_unknown_def_type;
1778 bool is_store;
1779 tree mask;
1780 gimple def_stmt;
1781 tree def;
1782 enum vect_def_type dt;
1784 if (slp_node != NULL)
1785 return false;
1787 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1788 gcc_assert (ncopies >= 1);
1790 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1791 mask = gimple_call_arg (stmt, 2);
1792 if (TYPE_PRECISION (TREE_TYPE (mask))
1793 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1794 return false;
1796 /* FORNOW. This restriction should be relaxed. */
1797 if (nested_in_vect_loop && ncopies > 1)
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "multiple types in nested loop.");
1802 return false;
1805 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1806 return false;
1808 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1809 return false;
1811 if (!STMT_VINFO_DATA_REF (stmt_info))
1812 return false;
1814 elem_type = TREE_TYPE (vectype);
1816 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1817 return false;
1819 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1820 return false;
1822 if (STMT_VINFO_GATHER_P (stmt_info))
1824 gimple def_stmt;
1825 tree def;
1826 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1827 &gather_off, &gather_scale);
1828 gcc_assert (gather_decl);
1829 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1830 &def_stmt, &def, &gather_dt,
1831 &gather_off_vectype))
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "gather index use not simple.");
1836 return false;
1839 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1840 tree masktype
1841 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1842 if (TREE_CODE (masktype) == INTEGER_TYPE)
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "masked gather with integer mask not supported.");
1847 return false;
1850 else if (tree_int_cst_compare (nested_in_vect_loop
1851 ? STMT_VINFO_DR_STEP (stmt_info)
1852 : DR_STEP (dr), size_zero_node) <= 0)
1853 return false;
1854 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1855 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1856 return false;
1858 if (TREE_CODE (mask) != SSA_NAME)
1859 return false;
1861 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1862 &def_stmt, &def, &dt))
1863 return false;
1865 if (is_store)
1867 tree rhs = gimple_call_arg (stmt, 3);
1868 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1869 &def_stmt, &def, &dt))
1870 return false;
1873 if (!vec_stmt) /* transformation not required. */
1875 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1876 if (is_store)
1877 vect_model_store_cost (stmt_info, ncopies, false, dt,
1878 NULL, NULL, NULL);
1879 else
1880 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1881 return true;
1884 /** Transform. **/
1886 if (STMT_VINFO_GATHER_P (stmt_info))
1888 tree vec_oprnd0 = NULL_TREE, op;
1889 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1890 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1891 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1892 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1893 tree mask_perm_mask = NULL_TREE;
1894 edge pe = loop_preheader_edge (loop);
1895 gimple_seq seq;
1896 basic_block new_bb;
1897 enum { NARROW, NONE, WIDEN } modifier;
1898 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1900 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1901 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1902 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1903 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1904 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1905 scaletype = TREE_VALUE (arglist);
1906 gcc_checking_assert (types_compatible_p (srctype, rettype)
1907 && types_compatible_p (srctype, masktype));
1909 if (nunits == gather_off_nunits)
1910 modifier = NONE;
1911 else if (nunits == gather_off_nunits / 2)
1913 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1914 modifier = WIDEN;
1916 for (i = 0; i < gather_off_nunits; ++i)
1917 sel[i] = i | nunits;
1919 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1921 else if (nunits == gather_off_nunits * 2)
1923 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1924 modifier = NARROW;
1926 for (i = 0; i < nunits; ++i)
1927 sel[i] = i < gather_off_nunits
1928 ? i : i + nunits - gather_off_nunits;
1930 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1931 ncopies *= 2;
1932 for (i = 0; i < nunits; ++i)
1933 sel[i] = i | gather_off_nunits;
1934 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1936 else
1937 gcc_unreachable ();
1939 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1941 ptr = fold_convert (ptrtype, gather_base);
1942 if (!is_gimple_min_invariant (ptr))
1944 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1945 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1946 gcc_assert (!new_bb);
1949 scale = build_int_cst (scaletype, gather_scale);
1951 prev_stmt_info = NULL;
1952 for (j = 0; j < ncopies; ++j)
1954 if (modifier == WIDEN && (j & 1))
1955 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1956 perm_mask, stmt, gsi);
1957 else if (j == 0)
1958 op = vec_oprnd0
1959 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1960 else
1961 op = vec_oprnd0
1962 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1964 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1967 == TYPE_VECTOR_SUBPARTS (idxtype));
1968 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1969 var = make_ssa_name (var, NULL);
1970 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1971 new_stmt
1972 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op);
1973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1974 op = var;
1977 if (mask_perm_mask && (j & 1))
1978 mask_op = permute_vec_elements (mask_op, mask_op,
1979 mask_perm_mask, stmt, gsi);
1980 else
1982 if (j == 0)
1983 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1984 else
1986 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1987 &def_stmt, &def, &dt);
1988 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1991 mask_op = vec_mask;
1992 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1994 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1995 == TYPE_VECTOR_SUBPARTS (masktype));
1996 var = vect_get_new_vect_var (masktype, vect_simple_var,
1997 NULL);
1998 var = make_ssa_name (var, NULL);
1999 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2000 new_stmt
2001 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
2002 mask_op);
2003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2004 mask_op = var;
2008 new_stmt
2009 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2010 scale);
2012 if (!useless_type_conversion_p (vectype, rettype))
2014 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2015 == TYPE_VECTOR_SUBPARTS (rettype));
2016 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2017 op = make_ssa_name (var, new_stmt);
2018 gimple_call_set_lhs (new_stmt, op);
2019 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2020 var = make_ssa_name (vec_dest, NULL);
2021 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2022 new_stmt
2023 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op);
2025 else
2027 var = make_ssa_name (vec_dest, new_stmt);
2028 gimple_call_set_lhs (new_stmt, var);
2031 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2033 if (modifier == NARROW)
2035 if ((j & 1) == 0)
2037 prev_res = var;
2038 continue;
2040 var = permute_vec_elements (prev_res, var,
2041 perm_mask, stmt, gsi);
2042 new_stmt = SSA_NAME_DEF_STMT (var);
2045 if (prev_stmt_info == NULL)
2046 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2047 else
2048 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2049 prev_stmt_info = vinfo_for_stmt (new_stmt);
2052 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2053 from the IL. */
2054 tree lhs = gimple_call_lhs (stmt);
2055 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2056 set_vinfo_for_stmt (new_stmt, stmt_info);
2057 set_vinfo_for_stmt (stmt, NULL);
2058 STMT_VINFO_STMT (stmt_info) = new_stmt;
2059 gsi_replace (gsi, new_stmt, true);
2060 return true;
2062 else if (is_store)
2064 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2065 prev_stmt_info = NULL;
2066 for (i = 0; i < ncopies; i++)
2068 unsigned align, misalign;
2070 if (i == 0)
2072 tree rhs = gimple_call_arg (stmt, 3);
2073 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2074 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2075 /* We should have catched mismatched types earlier. */
2076 gcc_assert (useless_type_conversion_p (vectype,
2077 TREE_TYPE (vec_rhs)));
2078 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2079 NULL_TREE, &dummy, gsi,
2080 &ptr_incr, false, &inv_p);
2081 gcc_assert (!inv_p);
2083 else
2085 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2086 &def, &dt);
2087 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2088 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2089 &def, &dt);
2090 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2091 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2092 TYPE_SIZE_UNIT (vectype));
2095 align = TYPE_ALIGN_UNIT (vectype);
2096 if (aligned_access_p (dr))
2097 misalign = 0;
2098 else if (DR_MISALIGNMENT (dr) == -1)
2100 align = TYPE_ALIGN_UNIT (elem_type);
2101 misalign = 0;
2103 else
2104 misalign = DR_MISALIGNMENT (dr);
2105 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2106 misalign);
2107 new_stmt
2108 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2109 gimple_call_arg (stmt, 1),
2110 vec_mask, vec_rhs);
2111 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2112 if (i == 0)
2113 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2114 else
2115 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2116 prev_stmt_info = vinfo_for_stmt (new_stmt);
2119 else
2121 tree vec_mask = NULL_TREE;
2122 prev_stmt_info = NULL;
2123 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2124 for (i = 0; i < ncopies; i++)
2126 unsigned align, misalign;
2128 if (i == 0)
2130 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2131 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2132 NULL_TREE, &dummy, gsi,
2133 &ptr_incr, false, &inv_p);
2134 gcc_assert (!inv_p);
2136 else
2138 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2139 &def, &dt);
2140 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2141 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2142 TYPE_SIZE_UNIT (vectype));
2145 align = TYPE_ALIGN_UNIT (vectype);
2146 if (aligned_access_p (dr))
2147 misalign = 0;
2148 else if (DR_MISALIGNMENT (dr) == -1)
2150 align = TYPE_ALIGN_UNIT (elem_type);
2151 misalign = 0;
2153 else
2154 misalign = DR_MISALIGNMENT (dr);
2155 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2156 misalign);
2157 new_stmt
2158 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2159 gimple_call_arg (stmt, 1),
2160 vec_mask);
2161 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2163 if (i == 0)
2164 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2165 else
2166 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2167 prev_stmt_info = vinfo_for_stmt (new_stmt);
2171 if (!is_store)
2173 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2174 from the IL. */
2175 tree lhs = gimple_call_lhs (stmt);
2176 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2177 set_vinfo_for_stmt (new_stmt, stmt_info);
2178 set_vinfo_for_stmt (stmt, NULL);
2179 STMT_VINFO_STMT (stmt_info) = new_stmt;
2180 gsi_replace (gsi, new_stmt, true);
2183 return true;
2187 /* Function vectorizable_call.
2189 Check if GS performs a function call that can be vectorized.
2190 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2191 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2192 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2194 static bool
2195 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2196 slp_tree slp_node)
2198 gcall *stmt;
2199 tree vec_dest;
2200 tree scalar_dest;
2201 tree op, type;
2202 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2203 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2204 tree vectype_out, vectype_in;
2205 int nunits_in;
2206 int nunits_out;
2207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2208 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2209 tree fndecl, new_temp, def, rhs_type;
2210 gimple def_stmt;
2211 enum vect_def_type dt[3]
2212 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2213 gimple new_stmt = NULL;
2214 int ncopies, j;
2215 vec<tree> vargs = vNULL;
2216 enum { NARROW, NONE, WIDEN } modifier;
2217 size_t i, nargs;
2218 tree lhs;
2220 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2221 return false;
2223 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2224 return false;
2226 /* Is GS a vectorizable call? */
2227 stmt = dyn_cast <gcall *> (gs);
2228 if (!stmt)
2229 return false;
2231 if (gimple_call_internal_p (stmt)
2232 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2233 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2234 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2235 slp_node);
2237 if (gimple_call_lhs (stmt) == NULL_TREE
2238 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2239 return false;
2241 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2243 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2245 /* Process function arguments. */
2246 rhs_type = NULL_TREE;
2247 vectype_in = NULL_TREE;
2248 nargs = gimple_call_num_args (stmt);
2250 /* Bail out if the function has more than three arguments, we do not have
2251 interesting builtin functions to vectorize with more than two arguments
2252 except for fma. No arguments is also not good. */
2253 if (nargs == 0 || nargs > 3)
2254 return false;
2256 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2257 if (gimple_call_internal_p (stmt)
2258 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2260 nargs = 0;
2261 rhs_type = unsigned_type_node;
2264 for (i = 0; i < nargs; i++)
2266 tree opvectype;
2268 op = gimple_call_arg (stmt, i);
2270 /* We can only handle calls with arguments of the same type. */
2271 if (rhs_type
2272 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "argument types differ.\n");
2277 return false;
2279 if (!rhs_type)
2280 rhs_type = TREE_TYPE (op);
2282 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2283 &def_stmt, &def, &dt[i], &opvectype))
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "use not simple.\n");
2288 return false;
2291 if (!vectype_in)
2292 vectype_in = opvectype;
2293 else if (opvectype
2294 && opvectype != vectype_in)
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "argument vector types differ.\n");
2299 return false;
2302 /* If all arguments are external or constant defs use a vector type with
2303 the same size as the output vector type. */
2304 if (!vectype_in)
2305 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2306 if (vec_stmt)
2307 gcc_assert (vectype_in);
2308 if (!vectype_in)
2310 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2313 "no vectype for scalar type ");
2314 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2315 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2318 return false;
2321 /* FORNOW */
2322 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2323 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2324 if (nunits_in == nunits_out / 2)
2325 modifier = NARROW;
2326 else if (nunits_out == nunits_in)
2327 modifier = NONE;
2328 else if (nunits_out == nunits_in / 2)
2329 modifier = WIDEN;
2330 else
2331 return false;
2333 /* For now, we only vectorize functions if a target specific builtin
2334 is available. TODO -- in some cases, it might be profitable to
2335 insert the calls for pieces of the vector, in order to be able
2336 to vectorize other operations in the loop. */
2337 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2338 if (fndecl == NULL_TREE)
2340 if (gimple_call_internal_p (stmt)
2341 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2342 && !slp_node
2343 && loop_vinfo
2344 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2345 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2346 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2347 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2349 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2350 { 0, 1, 2, ... vf - 1 } vector. */
2351 gcc_assert (nargs == 0);
2353 else
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2357 "function is not vectorizable.\n");
2358 return false;
2362 gcc_assert (!gimple_vuse (stmt));
2364 if (slp_node || PURE_SLP_STMT (stmt_info))
2365 ncopies = 1;
2366 else if (modifier == NARROW)
2367 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2368 else
2369 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2371 /* Sanity check: make sure that at least one copy of the vectorized stmt
2372 needs to be generated. */
2373 gcc_assert (ncopies >= 1);
2375 if (!vec_stmt) /* transformation not required. */
2377 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2380 "\n");
2381 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2382 return true;
2385 /** Transform. **/
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2390 /* Handle def. */
2391 scalar_dest = gimple_call_lhs (stmt);
2392 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2394 prev_stmt_info = NULL;
2395 switch (modifier)
2397 case NONE:
2398 for (j = 0; j < ncopies; ++j)
2400 /* Build argument list for the vectorized call. */
2401 if (j == 0)
2402 vargs.create (nargs);
2403 else
2404 vargs.truncate (0);
2406 if (slp_node)
2408 auto_vec<vec<tree> > vec_defs (nargs);
2409 vec<tree> vec_oprnds0;
2411 for (i = 0; i < nargs; i++)
2412 vargs.quick_push (gimple_call_arg (stmt, i));
2413 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2414 vec_oprnds0 = vec_defs[0];
2416 /* Arguments are ready. Create the new vector stmt. */
2417 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2419 size_t k;
2420 for (k = 0; k < nargs; k++)
2422 vec<tree> vec_oprndsk = vec_defs[k];
2423 vargs[k] = vec_oprndsk[i];
2425 new_stmt = gimple_build_call_vec (fndecl, vargs);
2426 new_temp = make_ssa_name (vec_dest, new_stmt);
2427 gimple_call_set_lhs (new_stmt, new_temp);
2428 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2429 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2432 for (i = 0; i < nargs; i++)
2434 vec<tree> vec_oprndsi = vec_defs[i];
2435 vec_oprndsi.release ();
2437 continue;
2440 for (i = 0; i < nargs; i++)
2442 op = gimple_call_arg (stmt, i);
2443 if (j == 0)
2444 vec_oprnd0
2445 = vect_get_vec_def_for_operand (op, stmt, NULL);
2446 else
2448 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2449 vec_oprnd0
2450 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2453 vargs.quick_push (vec_oprnd0);
2456 if (gimple_call_internal_p (stmt)
2457 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2459 tree *v = XALLOCAVEC (tree, nunits_out);
2460 int k;
2461 for (k = 0; k < nunits_out; ++k)
2462 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2463 tree cst = build_vector (vectype_out, v);
2464 tree new_var
2465 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2466 gimple init_stmt = gimple_build_assign (new_var, cst);
2467 new_temp = make_ssa_name (new_var, init_stmt);
2468 gimple_assign_set_lhs (init_stmt, new_temp);
2469 vect_init_vector_1 (stmt, init_stmt, NULL);
2470 new_temp = make_ssa_name (vec_dest, NULL);
2471 new_stmt = gimple_build_assign (new_temp,
2472 gimple_assign_lhs (init_stmt));
2474 else
2476 new_stmt = gimple_build_call_vec (fndecl, vargs);
2477 new_temp = make_ssa_name (vec_dest, new_stmt);
2478 gimple_call_set_lhs (new_stmt, new_temp);
2480 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2482 if (j == 0)
2483 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2484 else
2485 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2487 prev_stmt_info = vinfo_for_stmt (new_stmt);
2490 break;
2492 case NARROW:
2493 for (j = 0; j < ncopies; ++j)
2495 /* Build argument list for the vectorized call. */
2496 if (j == 0)
2497 vargs.create (nargs * 2);
2498 else
2499 vargs.truncate (0);
2501 if (slp_node)
2503 auto_vec<vec<tree> > vec_defs (nargs);
2504 vec<tree> vec_oprnds0;
2506 for (i = 0; i < nargs; i++)
2507 vargs.quick_push (gimple_call_arg (stmt, i));
2508 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2509 vec_oprnds0 = vec_defs[0];
2511 /* Arguments are ready. Create the new vector stmt. */
2512 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2514 size_t k;
2515 vargs.truncate (0);
2516 for (k = 0; k < nargs; k++)
2518 vec<tree> vec_oprndsk = vec_defs[k];
2519 vargs.quick_push (vec_oprndsk[i]);
2520 vargs.quick_push (vec_oprndsk[i + 1]);
2522 new_stmt = gimple_build_call_vec (fndecl, vargs);
2523 new_temp = make_ssa_name (vec_dest, new_stmt);
2524 gimple_call_set_lhs (new_stmt, new_temp);
2525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2526 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2529 for (i = 0; i < nargs; i++)
2531 vec<tree> vec_oprndsi = vec_defs[i];
2532 vec_oprndsi.release ();
2534 continue;
2537 for (i = 0; i < nargs; i++)
2539 op = gimple_call_arg (stmt, i);
2540 if (j == 0)
2542 vec_oprnd0
2543 = vect_get_vec_def_for_operand (op, stmt, NULL);
2544 vec_oprnd1
2545 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2547 else
2549 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2550 vec_oprnd0
2551 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2552 vec_oprnd1
2553 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2556 vargs.quick_push (vec_oprnd0);
2557 vargs.quick_push (vec_oprnd1);
2560 new_stmt = gimple_build_call_vec (fndecl, vargs);
2561 new_temp = make_ssa_name (vec_dest, new_stmt);
2562 gimple_call_set_lhs (new_stmt, new_temp);
2563 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2565 if (j == 0)
2566 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2567 else
2568 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2570 prev_stmt_info = vinfo_for_stmt (new_stmt);
2573 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2575 break;
2577 case WIDEN:
2578 /* No current target implements this case. */
2579 return false;
2582 vargs.release ();
2584 /* The call in STMT might prevent it from being removed in dce.
2585 We however cannot remove it here, due to the way the ssa name
2586 it defines is mapped to the new definition. So just replace
2587 rhs of the statement with something harmless. */
2589 if (slp_node)
2590 return true;
2592 type = TREE_TYPE (scalar_dest);
2593 if (is_pattern_stmt_p (stmt_info))
2594 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2595 else
2596 lhs = gimple_call_lhs (stmt);
2597 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2598 set_vinfo_for_stmt (new_stmt, stmt_info);
2599 set_vinfo_for_stmt (stmt, NULL);
2600 STMT_VINFO_STMT (stmt_info) = new_stmt;
2601 gsi_replace (gsi, new_stmt, false);
2603 return true;
2607 struct simd_call_arg_info
2609 tree vectype;
2610 tree op;
2611 enum vect_def_type dt;
2612 HOST_WIDE_INT linear_step;
2613 unsigned int align;
2616 /* Function vectorizable_simd_clone_call.
2618 Check if STMT performs a function call that can be vectorized
2619 by calling a simd clone of the function.
2620 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2621 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2622 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2624 static bool
2625 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2626 gimple *vec_stmt, slp_tree slp_node)
2628 tree vec_dest;
2629 tree scalar_dest;
2630 tree op, type;
2631 tree vec_oprnd0 = NULL_TREE;
2632 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2633 tree vectype;
2634 unsigned int nunits;
2635 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2636 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2637 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2638 tree fndecl, new_temp, def;
2639 gimple def_stmt;
2640 gimple new_stmt = NULL;
2641 int ncopies, j;
2642 vec<simd_call_arg_info> arginfo = vNULL;
2643 vec<tree> vargs = vNULL;
2644 size_t i, nargs;
2645 tree lhs, rtype, ratype;
2646 vec<constructor_elt, va_gc> *ret_ctor_elts;
2648 /* Is STMT a vectorizable call? */
2649 if (!is_gimple_call (stmt))
2650 return false;
2652 fndecl = gimple_call_fndecl (stmt);
2653 if (fndecl == NULL_TREE)
2654 return false;
2656 struct cgraph_node *node = cgraph_node::get (fndecl);
2657 if (node == NULL || node->simd_clones == NULL)
2658 return false;
2660 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2661 return false;
2663 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2664 return false;
2666 if (gimple_call_lhs (stmt)
2667 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2668 return false;
2670 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2672 vectype = STMT_VINFO_VECTYPE (stmt_info);
2674 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2675 return false;
2677 /* FORNOW */
2678 if (slp_node || PURE_SLP_STMT (stmt_info))
2679 return false;
2681 /* Process function arguments. */
2682 nargs = gimple_call_num_args (stmt);
2684 /* Bail out if the function has zero arguments. */
2685 if (nargs == 0)
2686 return false;
2688 arginfo.create (nargs);
2690 for (i = 0; i < nargs; i++)
2692 simd_call_arg_info thisarginfo;
2693 affine_iv iv;
2695 thisarginfo.linear_step = 0;
2696 thisarginfo.align = 0;
2697 thisarginfo.op = NULL_TREE;
2699 op = gimple_call_arg (stmt, i);
2700 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2701 &def_stmt, &def, &thisarginfo.dt,
2702 &thisarginfo.vectype)
2703 || thisarginfo.dt == vect_uninitialized_def)
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2707 "use not simple.\n");
2708 arginfo.release ();
2709 return false;
2712 if (thisarginfo.dt == vect_constant_def
2713 || thisarginfo.dt == vect_external_def)
2714 gcc_assert (thisarginfo.vectype == NULL_TREE);
2715 else
2716 gcc_assert (thisarginfo.vectype != NULL_TREE);
2718 if (thisarginfo.dt != vect_constant_def
2719 && thisarginfo.dt != vect_external_def
2720 && loop_vinfo
2721 && TREE_CODE (op) == SSA_NAME
2722 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2723 && tree_fits_shwi_p (iv.step))
2725 thisarginfo.linear_step = tree_to_shwi (iv.step);
2726 thisarginfo.op = iv.base;
2728 else if ((thisarginfo.dt == vect_constant_def
2729 || thisarginfo.dt == vect_external_def)
2730 && POINTER_TYPE_P (TREE_TYPE (op)))
2731 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2733 arginfo.quick_push (thisarginfo);
2736 unsigned int badness = 0;
2737 struct cgraph_node *bestn = NULL;
2738 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2739 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2740 else
2741 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2742 n = n->simdclone->next_clone)
2744 unsigned int this_badness = 0;
2745 if (n->simdclone->simdlen
2746 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2747 || n->simdclone->nargs != nargs)
2748 continue;
2749 if (n->simdclone->simdlen
2750 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2751 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2752 - exact_log2 (n->simdclone->simdlen)) * 1024;
2753 if (n->simdclone->inbranch)
2754 this_badness += 2048;
2755 int target_badness = targetm.simd_clone.usable (n);
2756 if (target_badness < 0)
2757 continue;
2758 this_badness += target_badness * 512;
2759 /* FORNOW: Have to add code to add the mask argument. */
2760 if (n->simdclone->inbranch)
2761 continue;
2762 for (i = 0; i < nargs; i++)
2764 switch (n->simdclone->args[i].arg_type)
2766 case SIMD_CLONE_ARG_TYPE_VECTOR:
2767 if (!useless_type_conversion_p
2768 (n->simdclone->args[i].orig_type,
2769 TREE_TYPE (gimple_call_arg (stmt, i))))
2770 i = -1;
2771 else if (arginfo[i].dt == vect_constant_def
2772 || arginfo[i].dt == vect_external_def
2773 || arginfo[i].linear_step)
2774 this_badness += 64;
2775 break;
2776 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2777 if (arginfo[i].dt != vect_constant_def
2778 && arginfo[i].dt != vect_external_def)
2779 i = -1;
2780 break;
2781 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2782 if (arginfo[i].dt == vect_constant_def
2783 || arginfo[i].dt == vect_external_def
2784 || (arginfo[i].linear_step
2785 != n->simdclone->args[i].linear_step))
2786 i = -1;
2787 break;
2788 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2789 /* FORNOW */
2790 i = -1;
2791 break;
2792 case SIMD_CLONE_ARG_TYPE_MASK:
2793 gcc_unreachable ();
2795 if (i == (size_t) -1)
2796 break;
2797 if (n->simdclone->args[i].alignment > arginfo[i].align)
2799 i = -1;
2800 break;
2802 if (arginfo[i].align)
2803 this_badness += (exact_log2 (arginfo[i].align)
2804 - exact_log2 (n->simdclone->args[i].alignment));
2806 if (i == (size_t) -1)
2807 continue;
2808 if (bestn == NULL || this_badness < badness)
2810 bestn = n;
2811 badness = this_badness;
2815 if (bestn == NULL)
2817 arginfo.release ();
2818 return false;
2821 for (i = 0; i < nargs; i++)
2822 if ((arginfo[i].dt == vect_constant_def
2823 || arginfo[i].dt == vect_external_def)
2824 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2826 arginfo[i].vectype
2827 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2828 i)));
2829 if (arginfo[i].vectype == NULL
2830 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2831 > bestn->simdclone->simdlen))
2833 arginfo.release ();
2834 return false;
2838 fndecl = bestn->decl;
2839 nunits = bestn->simdclone->simdlen;
2840 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2842 /* If the function isn't const, only allow it in simd loops where user
2843 has asserted that at least nunits consecutive iterations can be
2844 performed using SIMD instructions. */
2845 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2846 && gimple_vuse (stmt))
2848 arginfo.release ();
2849 return false;
2852 /* Sanity check: make sure that at least one copy of the vectorized stmt
2853 needs to be generated. */
2854 gcc_assert (ncopies >= 1);
2856 if (!vec_stmt) /* transformation not required. */
2858 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2859 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2860 if (dump_enabled_p ())
2861 dump_printf_loc (MSG_NOTE, vect_location,
2862 "=== vectorizable_simd_clone_call ===\n");
2863 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2864 arginfo.release ();
2865 return true;
2868 /** Transform. **/
2870 if (dump_enabled_p ())
2871 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2873 /* Handle def. */
2874 scalar_dest = gimple_call_lhs (stmt);
2875 vec_dest = NULL_TREE;
2876 rtype = NULL_TREE;
2877 ratype = NULL_TREE;
2878 if (scalar_dest)
2880 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2881 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2882 if (TREE_CODE (rtype) == ARRAY_TYPE)
2884 ratype = rtype;
2885 rtype = TREE_TYPE (ratype);
2889 prev_stmt_info = NULL;
2890 for (j = 0; j < ncopies; ++j)
2892 /* Build argument list for the vectorized call. */
2893 if (j == 0)
2894 vargs.create (nargs);
2895 else
2896 vargs.truncate (0);
2898 for (i = 0; i < nargs; i++)
2900 unsigned int k, l, m, o;
2901 tree atype;
2902 op = gimple_call_arg (stmt, i);
2903 switch (bestn->simdclone->args[i].arg_type)
2905 case SIMD_CLONE_ARG_TYPE_VECTOR:
2906 atype = bestn->simdclone->args[i].vector_type;
2907 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2908 for (m = j * o; m < (j + 1) * o; m++)
2910 if (TYPE_VECTOR_SUBPARTS (atype)
2911 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2913 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2914 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2915 / TYPE_VECTOR_SUBPARTS (atype));
2916 gcc_assert ((k & (k - 1)) == 0);
2917 if (m == 0)
2918 vec_oprnd0
2919 = vect_get_vec_def_for_operand (op, stmt, NULL);
2920 else
2922 vec_oprnd0 = arginfo[i].op;
2923 if ((m & (k - 1)) == 0)
2924 vec_oprnd0
2925 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2926 vec_oprnd0);
2928 arginfo[i].op = vec_oprnd0;
2929 vec_oprnd0
2930 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2931 size_int (prec),
2932 bitsize_int ((m & (k - 1)) * prec));
2933 new_stmt
2934 = gimple_build_assign (make_ssa_name (atype, NULL),
2935 vec_oprnd0);
2936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2937 vargs.safe_push (gimple_assign_lhs (new_stmt));
2939 else
2941 k = (TYPE_VECTOR_SUBPARTS (atype)
2942 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2943 gcc_assert ((k & (k - 1)) == 0);
2944 vec<constructor_elt, va_gc> *ctor_elts;
2945 if (k != 1)
2946 vec_alloc (ctor_elts, k);
2947 else
2948 ctor_elts = NULL;
2949 for (l = 0; l < k; l++)
2951 if (m == 0 && l == 0)
2952 vec_oprnd0
2953 = vect_get_vec_def_for_operand (op, stmt, NULL);
2954 else
2955 vec_oprnd0
2956 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2957 arginfo[i].op);
2958 arginfo[i].op = vec_oprnd0;
2959 if (k == 1)
2960 break;
2961 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2962 vec_oprnd0);
2964 if (k == 1)
2965 vargs.safe_push (vec_oprnd0);
2966 else
2968 vec_oprnd0 = build_constructor (atype, ctor_elts);
2969 new_stmt
2970 = gimple_build_assign (make_ssa_name (atype, NULL),
2971 vec_oprnd0);
2972 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2973 vargs.safe_push (gimple_assign_lhs (new_stmt));
2977 break;
2978 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2979 vargs.safe_push (op);
2980 break;
2981 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2982 if (j == 0)
2984 gimple_seq stmts;
2985 arginfo[i].op
2986 = force_gimple_operand (arginfo[i].op, &stmts, true,
2987 NULL_TREE);
2988 if (stmts != NULL)
2990 basic_block new_bb;
2991 edge pe = loop_preheader_edge (loop);
2992 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2993 gcc_assert (!new_bb);
2995 tree phi_res = copy_ssa_name (op, NULL);
2996 gphi *new_phi = create_phi_node (phi_res, loop->header);
2997 set_vinfo_for_stmt (new_phi,
2998 new_stmt_vec_info (new_phi, loop_vinfo,
2999 NULL));
3000 add_phi_arg (new_phi, arginfo[i].op,
3001 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3002 enum tree_code code
3003 = POINTER_TYPE_P (TREE_TYPE (op))
3004 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3005 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3006 ? sizetype : TREE_TYPE (op);
3007 widest_int cst
3008 = wi::mul (bestn->simdclone->args[i].linear_step,
3009 ncopies * nunits);
3010 tree tcst = wide_int_to_tree (type, cst);
3011 tree phi_arg = copy_ssa_name (op, NULL);
3012 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3013 phi_res, tcst);
3014 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3015 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3016 set_vinfo_for_stmt (new_stmt,
3017 new_stmt_vec_info (new_stmt, loop_vinfo,
3018 NULL));
3019 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3020 UNKNOWN_LOCATION);
3021 arginfo[i].op = phi_res;
3022 vargs.safe_push (phi_res);
3024 else
3026 enum tree_code code
3027 = POINTER_TYPE_P (TREE_TYPE (op))
3028 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3029 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3030 ? sizetype : TREE_TYPE (op);
3031 widest_int cst
3032 = wi::mul (bestn->simdclone->args[i].linear_step,
3033 j * nunits);
3034 tree tcst = wide_int_to_tree (type, cst);
3035 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3036 new_stmt
3037 = gimple_build_assign_with_ops (code, new_temp,
3038 arginfo[i].op, tcst);
3039 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3040 vargs.safe_push (new_temp);
3042 break;
3043 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3044 default:
3045 gcc_unreachable ();
3049 new_stmt = gimple_build_call_vec (fndecl, vargs);
3050 if (vec_dest)
3052 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3053 if (ratype)
3054 new_temp = create_tmp_var (ratype, NULL);
3055 else if (TYPE_VECTOR_SUBPARTS (vectype)
3056 == TYPE_VECTOR_SUBPARTS (rtype))
3057 new_temp = make_ssa_name (vec_dest, new_stmt);
3058 else
3059 new_temp = make_ssa_name (rtype, new_stmt);
3060 gimple_call_set_lhs (new_stmt, new_temp);
3062 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3064 if (vec_dest)
3066 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3068 unsigned int k, l;
3069 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3070 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3071 gcc_assert ((k & (k - 1)) == 0);
3072 for (l = 0; l < k; l++)
3074 tree t;
3075 if (ratype)
3077 t = build_fold_addr_expr (new_temp);
3078 t = build2 (MEM_REF, vectype, t,
3079 build_int_cst (TREE_TYPE (t),
3080 l * prec / BITS_PER_UNIT));
3082 else
3083 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3084 size_int (prec), bitsize_int (l * prec));
3085 new_stmt
3086 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3088 if (j == 0 && l == 0)
3089 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3090 else
3091 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3093 prev_stmt_info = vinfo_for_stmt (new_stmt);
3096 if (ratype)
3098 tree clobber = build_constructor (ratype, NULL);
3099 TREE_THIS_VOLATILE (clobber) = 1;
3100 new_stmt = gimple_build_assign (new_temp, clobber);
3101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3103 continue;
3105 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3107 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3108 / TYPE_VECTOR_SUBPARTS (rtype));
3109 gcc_assert ((k & (k - 1)) == 0);
3110 if ((j & (k - 1)) == 0)
3111 vec_alloc (ret_ctor_elts, k);
3112 if (ratype)
3114 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3115 for (m = 0; m < o; m++)
3117 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3118 size_int (m), NULL_TREE, NULL_TREE);
3119 new_stmt
3120 = gimple_build_assign (make_ssa_name (rtype, NULL),
3121 tem);
3122 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3123 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3124 gimple_assign_lhs (new_stmt));
3126 tree clobber = build_constructor (ratype, NULL);
3127 TREE_THIS_VOLATILE (clobber) = 1;
3128 new_stmt = gimple_build_assign (new_temp, clobber);
3129 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3131 else
3132 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3133 if ((j & (k - 1)) != k - 1)
3134 continue;
3135 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3136 new_stmt
3137 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3138 vec_oprnd0);
3139 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3141 if ((unsigned) j == k - 1)
3142 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3143 else
3144 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3146 prev_stmt_info = vinfo_for_stmt (new_stmt);
3147 continue;
3149 else if (ratype)
3151 tree t = build_fold_addr_expr (new_temp);
3152 t = build2 (MEM_REF, vectype, t,
3153 build_int_cst (TREE_TYPE (t), 0));
3154 new_stmt
3155 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3157 tree clobber = build_constructor (ratype, NULL);
3158 TREE_THIS_VOLATILE (clobber) = 1;
3159 vect_finish_stmt_generation (stmt,
3160 gimple_build_assign (new_temp,
3161 clobber), gsi);
3165 if (j == 0)
3166 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3167 else
3168 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3170 prev_stmt_info = vinfo_for_stmt (new_stmt);
3173 vargs.release ();
3175 /* The call in STMT might prevent it from being removed in dce.
3176 We however cannot remove it here, due to the way the ssa name
3177 it defines is mapped to the new definition. So just replace
3178 rhs of the statement with something harmless. */
3180 if (slp_node)
3181 return true;
3183 if (scalar_dest)
3185 type = TREE_TYPE (scalar_dest);
3186 if (is_pattern_stmt_p (stmt_info))
3187 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3188 else
3189 lhs = gimple_call_lhs (stmt);
3190 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3192 else
3193 new_stmt = gimple_build_nop ();
3194 set_vinfo_for_stmt (new_stmt, stmt_info);
3195 set_vinfo_for_stmt (stmt, NULL);
3196 STMT_VINFO_STMT (stmt_info) = new_stmt;
3197 gsi_replace (gsi, new_stmt, true);
3198 unlink_stmt_vdef (stmt);
3200 return true;
3204 /* Function vect_gen_widened_results_half
3206 Create a vector stmt whose code, type, number of arguments, and result
3207 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3208 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3209 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3210 needs to be created (DECL is a function-decl of a target-builtin).
3211 STMT is the original scalar stmt that we are vectorizing. */
3213 static gimple
3214 vect_gen_widened_results_half (enum tree_code code,
3215 tree decl,
3216 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3217 tree vec_dest, gimple_stmt_iterator *gsi,
3218 gimple stmt)
3220 gimple new_stmt;
3221 tree new_temp;
3223 /* Generate half of the widened result: */
3224 if (code == CALL_EXPR)
3226 /* Target specific support */
3227 if (op_type == binary_op)
3228 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3229 else
3230 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3231 new_temp = make_ssa_name (vec_dest, new_stmt);
3232 gimple_call_set_lhs (new_stmt, new_temp);
3234 else
3236 /* Generic support */
3237 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3238 if (op_type != binary_op)
3239 vec_oprnd1 = NULL;
3240 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3241 vec_oprnd1);
3242 new_temp = make_ssa_name (vec_dest, new_stmt);
3243 gimple_assign_set_lhs (new_stmt, new_temp);
3245 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3247 return new_stmt;
3251 /* Get vectorized definitions for loop-based vectorization. For the first
3252 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3253 scalar operand), and for the rest we get a copy with
3254 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3255 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3256 The vectors are collected into VEC_OPRNDS. */
3258 static void
3259 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3260 vec<tree> *vec_oprnds, int multi_step_cvt)
3262 tree vec_oprnd;
3264 /* Get first vector operand. */
3265 /* All the vector operands except the very first one (that is scalar oprnd)
3266 are stmt copies. */
3267 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3268 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3269 else
3270 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3272 vec_oprnds->quick_push (vec_oprnd);
3274 /* Get second vector operand. */
3275 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3276 vec_oprnds->quick_push (vec_oprnd);
3278 *oprnd = vec_oprnd;
3280 /* For conversion in multiple steps, continue to get operands
3281 recursively. */
3282 if (multi_step_cvt)
3283 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3287 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3288 For multi-step conversions store the resulting vectors and call the function
3289 recursively. */
3291 static void
3292 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3293 int multi_step_cvt, gimple stmt,
3294 vec<tree> vec_dsts,
3295 gimple_stmt_iterator *gsi,
3296 slp_tree slp_node, enum tree_code code,
3297 stmt_vec_info *prev_stmt_info)
3299 unsigned int i;
3300 tree vop0, vop1, new_tmp, vec_dest;
3301 gimple new_stmt;
3302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3304 vec_dest = vec_dsts.pop ();
3306 for (i = 0; i < vec_oprnds->length (); i += 2)
3308 /* Create demotion operation. */
3309 vop0 = (*vec_oprnds)[i];
3310 vop1 = (*vec_oprnds)[i + 1];
3311 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3312 new_tmp = make_ssa_name (vec_dest, new_stmt);
3313 gimple_assign_set_lhs (new_stmt, new_tmp);
3314 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3316 if (multi_step_cvt)
3317 /* Store the resulting vector for next recursive call. */
3318 (*vec_oprnds)[i/2] = new_tmp;
3319 else
3321 /* This is the last step of the conversion sequence. Store the
3322 vectors in SLP_NODE or in vector info of the scalar statement
3323 (or in STMT_VINFO_RELATED_STMT chain). */
3324 if (slp_node)
3325 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3326 else
3328 if (!*prev_stmt_info)
3329 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3330 else
3331 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3333 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3338 /* For multi-step demotion operations we first generate demotion operations
3339 from the source type to the intermediate types, and then combine the
3340 results (stored in VEC_OPRNDS) in demotion operation to the destination
3341 type. */
3342 if (multi_step_cvt)
3344 /* At each level of recursion we have half of the operands we had at the
3345 previous level. */
3346 vec_oprnds->truncate ((i+1)/2);
3347 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3348 stmt, vec_dsts, gsi, slp_node,
3349 VEC_PACK_TRUNC_EXPR,
3350 prev_stmt_info);
3353 vec_dsts.quick_push (vec_dest);
3357 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3358 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3359 the resulting vectors and call the function recursively. */
3361 static void
3362 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3363 vec<tree> *vec_oprnds1,
3364 gimple stmt, tree vec_dest,
3365 gimple_stmt_iterator *gsi,
3366 enum tree_code code1,
3367 enum tree_code code2, tree decl1,
3368 tree decl2, int op_type)
3370 int i;
3371 tree vop0, vop1, new_tmp1, new_tmp2;
3372 gimple new_stmt1, new_stmt2;
3373 vec<tree> vec_tmp = vNULL;
3375 vec_tmp.create (vec_oprnds0->length () * 2);
3376 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3378 if (op_type == binary_op)
3379 vop1 = (*vec_oprnds1)[i];
3380 else
3381 vop1 = NULL_TREE;
3383 /* Generate the two halves of promotion operation. */
3384 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3385 op_type, vec_dest, gsi, stmt);
3386 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3387 op_type, vec_dest, gsi, stmt);
3388 if (is_gimple_call (new_stmt1))
3390 new_tmp1 = gimple_call_lhs (new_stmt1);
3391 new_tmp2 = gimple_call_lhs (new_stmt2);
3393 else
3395 new_tmp1 = gimple_assign_lhs (new_stmt1);
3396 new_tmp2 = gimple_assign_lhs (new_stmt2);
3399 /* Store the results for the next step. */
3400 vec_tmp.quick_push (new_tmp1);
3401 vec_tmp.quick_push (new_tmp2);
3404 vec_oprnds0->release ();
3405 *vec_oprnds0 = vec_tmp;
3409 /* Check if STMT performs a conversion operation, that can be vectorized.
3410 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3411 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3412 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3414 static bool
3415 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3416 gimple *vec_stmt, slp_tree slp_node)
3418 tree vec_dest;
3419 tree scalar_dest;
3420 tree op0, op1 = NULL_TREE;
3421 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3422 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3424 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3425 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3426 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3427 tree new_temp;
3428 tree def;
3429 gimple def_stmt;
3430 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3431 gimple new_stmt = NULL;
3432 stmt_vec_info prev_stmt_info;
3433 int nunits_in;
3434 int nunits_out;
3435 tree vectype_out, vectype_in;
3436 int ncopies, i, j;
3437 tree lhs_type, rhs_type;
3438 enum { NARROW, NONE, WIDEN } modifier;
3439 vec<tree> vec_oprnds0 = vNULL;
3440 vec<tree> vec_oprnds1 = vNULL;
3441 tree vop0;
3442 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3443 int multi_step_cvt = 0;
3444 vec<tree> vec_dsts = vNULL;
3445 vec<tree> interm_types = vNULL;
3446 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3447 int op_type;
3448 machine_mode rhs_mode;
3449 unsigned short fltsz;
3451 /* Is STMT a vectorizable conversion? */
3453 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3454 return false;
3456 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3457 return false;
3459 if (!is_gimple_assign (stmt))
3460 return false;
3462 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3463 return false;
3465 code = gimple_assign_rhs_code (stmt);
3466 if (!CONVERT_EXPR_CODE_P (code)
3467 && code != FIX_TRUNC_EXPR
3468 && code != FLOAT_EXPR
3469 && code != WIDEN_MULT_EXPR
3470 && code != WIDEN_LSHIFT_EXPR)
3471 return false;
3473 op_type = TREE_CODE_LENGTH (code);
3475 /* Check types of lhs and rhs. */
3476 scalar_dest = gimple_assign_lhs (stmt);
3477 lhs_type = TREE_TYPE (scalar_dest);
3478 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3480 op0 = gimple_assign_rhs1 (stmt);
3481 rhs_type = TREE_TYPE (op0);
3483 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3484 && !((INTEGRAL_TYPE_P (lhs_type)
3485 && INTEGRAL_TYPE_P (rhs_type))
3486 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3487 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3488 return false;
3490 if ((INTEGRAL_TYPE_P (lhs_type)
3491 && (TYPE_PRECISION (lhs_type)
3492 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3493 || (INTEGRAL_TYPE_P (rhs_type)
3494 && (TYPE_PRECISION (rhs_type)
3495 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3497 if (dump_enabled_p ())
3498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3499 "type conversion to/from bit-precision unsupported."
3500 "\n");
3501 return false;
3504 /* Check the operands of the operation. */
3505 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3506 &def_stmt, &def, &dt[0], &vectype_in))
3508 if (dump_enabled_p ())
3509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3510 "use not simple.\n");
3511 return false;
3513 if (op_type == binary_op)
3515 bool ok;
3517 op1 = gimple_assign_rhs2 (stmt);
3518 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3519 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3520 OP1. */
3521 if (CONSTANT_CLASS_P (op0))
3522 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3523 &def_stmt, &def, &dt[1], &vectype_in);
3524 else
3525 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3526 &def, &dt[1]);
3528 if (!ok)
3530 if (dump_enabled_p ())
3531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3532 "use not simple.\n");
3533 return false;
3537 /* If op0 is an external or constant defs use a vector type of
3538 the same size as the output vector type. */
3539 if (!vectype_in)
3540 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3541 if (vec_stmt)
3542 gcc_assert (vectype_in);
3543 if (!vectype_in)
3545 if (dump_enabled_p ())
3547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548 "no vectype for scalar type ");
3549 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3550 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3553 return false;
3556 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3557 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3558 if (nunits_in < nunits_out)
3559 modifier = NARROW;
3560 else if (nunits_out == nunits_in)
3561 modifier = NONE;
3562 else
3563 modifier = WIDEN;
3565 /* Multiple types in SLP are handled by creating the appropriate number of
3566 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3567 case of SLP. */
3568 if (slp_node || PURE_SLP_STMT (stmt_info))
3569 ncopies = 1;
3570 else if (modifier == NARROW)
3571 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3572 else
3573 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3575 /* Sanity check: make sure that at least one copy of the vectorized stmt
3576 needs to be generated. */
3577 gcc_assert (ncopies >= 1);
3579 /* Supportable by target? */
3580 switch (modifier)
3582 case NONE:
3583 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3584 return false;
3585 if (supportable_convert_operation (code, vectype_out, vectype_in,
3586 &decl1, &code1))
3587 break;
3588 /* FALLTHRU */
3589 unsupported:
3590 if (dump_enabled_p ())
3591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3592 "conversion not supported by target.\n");
3593 return false;
3595 case WIDEN:
3596 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3597 &code1, &code2, &multi_step_cvt,
3598 &interm_types))
3600 /* Binary widening operation can only be supported directly by the
3601 architecture. */
3602 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3603 break;
3606 if (code != FLOAT_EXPR
3607 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3608 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3609 goto unsupported;
3611 rhs_mode = TYPE_MODE (rhs_type);
3612 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3613 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3614 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3615 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3617 cvt_type
3618 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3619 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3620 if (cvt_type == NULL_TREE)
3621 goto unsupported;
3623 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3625 if (!supportable_convert_operation (code, vectype_out,
3626 cvt_type, &decl1, &codecvt1))
3627 goto unsupported;
3629 else if (!supportable_widening_operation (code, stmt, vectype_out,
3630 cvt_type, &codecvt1,
3631 &codecvt2, &multi_step_cvt,
3632 &interm_types))
3633 continue;
3634 else
3635 gcc_assert (multi_step_cvt == 0);
3637 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3638 vectype_in, &code1, &code2,
3639 &multi_step_cvt, &interm_types))
3640 break;
3643 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3644 goto unsupported;
3646 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3647 codecvt2 = ERROR_MARK;
3648 else
3650 multi_step_cvt++;
3651 interm_types.safe_push (cvt_type);
3652 cvt_type = NULL_TREE;
3654 break;
3656 case NARROW:
3657 gcc_assert (op_type == unary_op);
3658 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3659 &code1, &multi_step_cvt,
3660 &interm_types))
3661 break;
3663 if (code != FIX_TRUNC_EXPR
3664 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3665 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3666 goto unsupported;
3668 rhs_mode = TYPE_MODE (rhs_type);
3669 cvt_type
3670 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3671 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3672 if (cvt_type == NULL_TREE)
3673 goto unsupported;
3674 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3675 &decl1, &codecvt1))
3676 goto unsupported;
3677 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3678 &code1, &multi_step_cvt,
3679 &interm_types))
3680 break;
3681 goto unsupported;
3683 default:
3684 gcc_unreachable ();
3687 if (!vec_stmt) /* transformation not required. */
3689 if (dump_enabled_p ())
3690 dump_printf_loc (MSG_NOTE, vect_location,
3691 "=== vectorizable_conversion ===\n");
3692 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3694 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3695 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3697 else if (modifier == NARROW)
3699 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3700 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3702 else
3704 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3705 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3707 interm_types.release ();
3708 return true;
3711 /** Transform. **/
3712 if (dump_enabled_p ())
3713 dump_printf_loc (MSG_NOTE, vect_location,
3714 "transform conversion. ncopies = %d.\n", ncopies);
3716 if (op_type == binary_op)
3718 if (CONSTANT_CLASS_P (op0))
3719 op0 = fold_convert (TREE_TYPE (op1), op0);
3720 else if (CONSTANT_CLASS_P (op1))
3721 op1 = fold_convert (TREE_TYPE (op0), op1);
3724 /* In case of multi-step conversion, we first generate conversion operations
3725 to the intermediate types, and then from that types to the final one.
3726 We create vector destinations for the intermediate type (TYPES) received
3727 from supportable_*_operation, and store them in the correct order
3728 for future use in vect_create_vectorized_*_stmts (). */
3729 vec_dsts.create (multi_step_cvt + 1);
3730 vec_dest = vect_create_destination_var (scalar_dest,
3731 (cvt_type && modifier == WIDEN)
3732 ? cvt_type : vectype_out);
3733 vec_dsts.quick_push (vec_dest);
3735 if (multi_step_cvt)
3737 for (i = interm_types.length () - 1;
3738 interm_types.iterate (i, &intermediate_type); i--)
3740 vec_dest = vect_create_destination_var (scalar_dest,
3741 intermediate_type);
3742 vec_dsts.quick_push (vec_dest);
3746 if (cvt_type)
3747 vec_dest = vect_create_destination_var (scalar_dest,
3748 modifier == WIDEN
3749 ? vectype_out : cvt_type);
3751 if (!slp_node)
3753 if (modifier == WIDEN)
3755 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3756 if (op_type == binary_op)
3757 vec_oprnds1.create (1);
3759 else if (modifier == NARROW)
3760 vec_oprnds0.create (
3761 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3763 else if (code == WIDEN_LSHIFT_EXPR)
3764 vec_oprnds1.create (slp_node->vec_stmts_size);
3766 last_oprnd = op0;
3767 prev_stmt_info = NULL;
3768 switch (modifier)
3770 case NONE:
3771 for (j = 0; j < ncopies; j++)
3773 if (j == 0)
3774 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3775 -1);
3776 else
3777 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3779 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3781 /* Arguments are ready, create the new vector stmt. */
3782 if (code1 == CALL_EXPR)
3784 new_stmt = gimple_build_call (decl1, 1, vop0);
3785 new_temp = make_ssa_name (vec_dest, new_stmt);
3786 gimple_call_set_lhs (new_stmt, new_temp);
3788 else
3790 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3791 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3792 vop0);
3793 new_temp = make_ssa_name (vec_dest, new_stmt);
3794 gimple_assign_set_lhs (new_stmt, new_temp);
3797 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3798 if (slp_node)
3799 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3802 if (j == 0)
3803 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3804 else
3805 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3806 prev_stmt_info = vinfo_for_stmt (new_stmt);
3808 break;
3810 case WIDEN:
3811 /* In case the vectorization factor (VF) is bigger than the number
3812 of elements that we can fit in a vectype (nunits), we have to
3813 generate more than one vector stmt - i.e - we need to "unroll"
3814 the vector stmt by a factor VF/nunits. */
3815 for (j = 0; j < ncopies; j++)
3817 /* Handle uses. */
3818 if (j == 0)
3820 if (slp_node)
3822 if (code == WIDEN_LSHIFT_EXPR)
3824 unsigned int k;
3826 vec_oprnd1 = op1;
3827 /* Store vec_oprnd1 for every vector stmt to be created
3828 for SLP_NODE. We check during the analysis that all
3829 the shift arguments are the same. */
3830 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3831 vec_oprnds1.quick_push (vec_oprnd1);
3833 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3834 slp_node, -1);
3836 else
3837 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3838 &vec_oprnds1, slp_node, -1);
3840 else
3842 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3843 vec_oprnds0.quick_push (vec_oprnd0);
3844 if (op_type == binary_op)
3846 if (code == WIDEN_LSHIFT_EXPR)
3847 vec_oprnd1 = op1;
3848 else
3849 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3850 NULL);
3851 vec_oprnds1.quick_push (vec_oprnd1);
3855 else
3857 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3858 vec_oprnds0.truncate (0);
3859 vec_oprnds0.quick_push (vec_oprnd0);
3860 if (op_type == binary_op)
3862 if (code == WIDEN_LSHIFT_EXPR)
3863 vec_oprnd1 = op1;
3864 else
3865 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3866 vec_oprnd1);
3867 vec_oprnds1.truncate (0);
3868 vec_oprnds1.quick_push (vec_oprnd1);
3872 /* Arguments are ready. Create the new vector stmts. */
3873 for (i = multi_step_cvt; i >= 0; i--)
3875 tree this_dest = vec_dsts[i];
3876 enum tree_code c1 = code1, c2 = code2;
3877 if (i == 0 && codecvt2 != ERROR_MARK)
3879 c1 = codecvt1;
3880 c2 = codecvt2;
3882 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3883 &vec_oprnds1,
3884 stmt, this_dest, gsi,
3885 c1, c2, decl1, decl2,
3886 op_type);
3889 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3891 if (cvt_type)
3893 if (codecvt1 == CALL_EXPR)
3895 new_stmt = gimple_build_call (decl1, 1, vop0);
3896 new_temp = make_ssa_name (vec_dest, new_stmt);
3897 gimple_call_set_lhs (new_stmt, new_temp);
3899 else
3901 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3902 new_temp = make_ssa_name (vec_dest, NULL);
3903 new_stmt = gimple_build_assign_with_ops (codecvt1,
3904 new_temp, vop0);
3907 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3909 else
3910 new_stmt = SSA_NAME_DEF_STMT (vop0);
3912 if (slp_node)
3913 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3914 else
3916 if (!prev_stmt_info)
3917 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3918 else
3919 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3920 prev_stmt_info = vinfo_for_stmt (new_stmt);
3925 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3926 break;
3928 case NARROW:
3929 /* In case the vectorization factor (VF) is bigger than the number
3930 of elements that we can fit in a vectype (nunits), we have to
3931 generate more than one vector stmt - i.e - we need to "unroll"
3932 the vector stmt by a factor VF/nunits. */
3933 for (j = 0; j < ncopies; j++)
3935 /* Handle uses. */
3936 if (slp_node)
3937 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3938 slp_node, -1);
3939 else
3941 vec_oprnds0.truncate (0);
3942 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3943 vect_pow2 (multi_step_cvt) - 1);
3946 /* Arguments are ready. Create the new vector stmts. */
3947 if (cvt_type)
3948 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3950 if (codecvt1 == CALL_EXPR)
3952 new_stmt = gimple_build_call (decl1, 1, vop0);
3953 new_temp = make_ssa_name (vec_dest, new_stmt);
3954 gimple_call_set_lhs (new_stmt, new_temp);
3956 else
3958 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3959 new_temp = make_ssa_name (vec_dest, NULL);
3960 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3961 vop0);
3964 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3965 vec_oprnds0[i] = new_temp;
3968 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3969 stmt, vec_dsts, gsi,
3970 slp_node, code1,
3971 &prev_stmt_info);
3974 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3975 break;
3978 vec_oprnds0.release ();
3979 vec_oprnds1.release ();
3980 vec_dsts.release ();
3981 interm_types.release ();
3983 return true;
3987 /* Function vectorizable_assignment.
3989 Check if STMT performs an assignment (copy) that can be vectorized.
3990 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3991 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3992 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3994 static bool
3995 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3996 gimple *vec_stmt, slp_tree slp_node)
3998 tree vec_dest;
3999 tree scalar_dest;
4000 tree op;
4001 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4002 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4003 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4004 tree new_temp;
4005 tree def;
4006 gimple def_stmt;
4007 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4008 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4009 int ncopies;
4010 int i, j;
4011 vec<tree> vec_oprnds = vNULL;
4012 tree vop;
4013 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4014 gimple new_stmt = NULL;
4015 stmt_vec_info prev_stmt_info = NULL;
4016 enum tree_code code;
4017 tree vectype_in;
4019 /* Multiple types in SLP are handled by creating the appropriate number of
4020 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4021 case of SLP. */
4022 if (slp_node || PURE_SLP_STMT (stmt_info))
4023 ncopies = 1;
4024 else
4025 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4027 gcc_assert (ncopies >= 1);
4029 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4030 return false;
4032 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4033 return false;
4035 /* Is vectorizable assignment? */
4036 if (!is_gimple_assign (stmt))
4037 return false;
4039 scalar_dest = gimple_assign_lhs (stmt);
4040 if (TREE_CODE (scalar_dest) != SSA_NAME)
4041 return false;
4043 code = gimple_assign_rhs_code (stmt);
4044 if (gimple_assign_single_p (stmt)
4045 || code == PAREN_EXPR
4046 || CONVERT_EXPR_CODE_P (code))
4047 op = gimple_assign_rhs1 (stmt);
4048 else
4049 return false;
4051 if (code == VIEW_CONVERT_EXPR)
4052 op = TREE_OPERAND (op, 0);
4054 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4055 &def_stmt, &def, &dt[0], &vectype_in))
4057 if (dump_enabled_p ())
4058 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4059 "use not simple.\n");
4060 return false;
4063 /* We can handle NOP_EXPR conversions that do not change the number
4064 of elements or the vector size. */
4065 if ((CONVERT_EXPR_CODE_P (code)
4066 || code == VIEW_CONVERT_EXPR)
4067 && (!vectype_in
4068 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4069 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4070 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4071 return false;
4073 /* We do not handle bit-precision changes. */
4074 if ((CONVERT_EXPR_CODE_P (code)
4075 || code == VIEW_CONVERT_EXPR)
4076 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4077 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4078 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4079 || ((TYPE_PRECISION (TREE_TYPE (op))
4080 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4081 /* But a conversion that does not change the bit-pattern is ok. */
4082 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4083 > TYPE_PRECISION (TREE_TYPE (op)))
4084 && TYPE_UNSIGNED (TREE_TYPE (op))))
4086 if (dump_enabled_p ())
4087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4088 "type conversion to/from bit-precision "
4089 "unsupported.\n");
4090 return false;
4093 if (!vec_stmt) /* transformation not required. */
4095 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4096 if (dump_enabled_p ())
4097 dump_printf_loc (MSG_NOTE, vect_location,
4098 "=== vectorizable_assignment ===\n");
4099 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4100 return true;
4103 /** Transform. **/
4104 if (dump_enabled_p ())
4105 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4107 /* Handle def. */
4108 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4110 /* Handle use. */
4111 for (j = 0; j < ncopies; j++)
4113 /* Handle uses. */
4114 if (j == 0)
4115 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4116 else
4117 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4119 /* Arguments are ready. create the new vector stmt. */
4120 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4122 if (CONVERT_EXPR_CODE_P (code)
4123 || code == VIEW_CONVERT_EXPR)
4124 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4125 new_stmt = gimple_build_assign (vec_dest, vop);
4126 new_temp = make_ssa_name (vec_dest, new_stmt);
4127 gimple_assign_set_lhs (new_stmt, new_temp);
4128 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4129 if (slp_node)
4130 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4133 if (slp_node)
4134 continue;
4136 if (j == 0)
4137 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4138 else
4139 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4141 prev_stmt_info = vinfo_for_stmt (new_stmt);
4144 vec_oprnds.release ();
4145 return true;
4149 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4150 either as shift by a scalar or by a vector. */
4152 bool
4153 vect_supportable_shift (enum tree_code code, tree scalar_type)
4156 machine_mode vec_mode;
4157 optab optab;
4158 int icode;
4159 tree vectype;
4161 vectype = get_vectype_for_scalar_type (scalar_type);
4162 if (!vectype)
4163 return false;
4165 optab = optab_for_tree_code (code, vectype, optab_scalar);
4166 if (!optab
4167 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4169 optab = optab_for_tree_code (code, vectype, optab_vector);
4170 if (!optab
4171 || (optab_handler (optab, TYPE_MODE (vectype))
4172 == CODE_FOR_nothing))
4173 return false;
4176 vec_mode = TYPE_MODE (vectype);
4177 icode = (int) optab_handler (optab, vec_mode);
4178 if (icode == CODE_FOR_nothing)
4179 return false;
4181 return true;
4185 /* Function vectorizable_shift.
4187 Check if STMT performs a shift operation that can be vectorized.
4188 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4189 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4190 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4192 static bool
4193 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4194 gimple *vec_stmt, slp_tree slp_node)
4196 tree vec_dest;
4197 tree scalar_dest;
4198 tree op0, op1 = NULL;
4199 tree vec_oprnd1 = NULL_TREE;
4200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4201 tree vectype;
4202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4203 enum tree_code code;
4204 machine_mode vec_mode;
4205 tree new_temp;
4206 optab optab;
4207 int icode;
4208 machine_mode optab_op2_mode;
4209 tree def;
4210 gimple def_stmt;
4211 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4212 gimple new_stmt = NULL;
4213 stmt_vec_info prev_stmt_info;
4214 int nunits_in;
4215 int nunits_out;
4216 tree vectype_out;
4217 tree op1_vectype;
4218 int ncopies;
4219 int j, i;
4220 vec<tree> vec_oprnds0 = vNULL;
4221 vec<tree> vec_oprnds1 = vNULL;
4222 tree vop0, vop1;
4223 unsigned int k;
4224 bool scalar_shift_arg = true;
4225 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4226 int vf;
4228 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4229 return false;
4231 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4232 return false;
4234 /* Is STMT a vectorizable binary/unary operation? */
4235 if (!is_gimple_assign (stmt))
4236 return false;
4238 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4239 return false;
4241 code = gimple_assign_rhs_code (stmt);
4243 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4244 || code == RROTATE_EXPR))
4245 return false;
4247 scalar_dest = gimple_assign_lhs (stmt);
4248 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4249 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4250 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4252 if (dump_enabled_p ())
4253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4254 "bit-precision shifts not supported.\n");
4255 return false;
4258 op0 = gimple_assign_rhs1 (stmt);
4259 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4260 &def_stmt, &def, &dt[0], &vectype))
4262 if (dump_enabled_p ())
4263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4264 "use not simple.\n");
4265 return false;
4267 /* If op0 is an external or constant def use a vector type with
4268 the same size as the output vector type. */
4269 if (!vectype)
4270 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4271 if (vec_stmt)
4272 gcc_assert (vectype);
4273 if (!vectype)
4275 if (dump_enabled_p ())
4276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4277 "no vectype for scalar type\n");
4278 return false;
4281 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4282 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4283 if (nunits_out != nunits_in)
4284 return false;
4286 op1 = gimple_assign_rhs2 (stmt);
4287 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4288 &def, &dt[1], &op1_vectype))
4290 if (dump_enabled_p ())
4291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4292 "use not simple.\n");
4293 return false;
4296 if (loop_vinfo)
4297 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4298 else
4299 vf = 1;
4301 /* Multiple types in SLP are handled by creating the appropriate number of
4302 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4303 case of SLP. */
4304 if (slp_node || PURE_SLP_STMT (stmt_info))
4305 ncopies = 1;
4306 else
4307 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4309 gcc_assert (ncopies >= 1);
4311 /* Determine whether the shift amount is a vector, or scalar. If the
4312 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4314 if (dt[1] == vect_internal_def && !slp_node)
4315 scalar_shift_arg = false;
4316 else if (dt[1] == vect_constant_def
4317 || dt[1] == vect_external_def
4318 || dt[1] == vect_internal_def)
4320 /* In SLP, need to check whether the shift count is the same,
4321 in loops if it is a constant or invariant, it is always
4322 a scalar shift. */
4323 if (slp_node)
4325 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4326 gimple slpstmt;
4328 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4329 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4330 scalar_shift_arg = false;
4333 else
4335 if (dump_enabled_p ())
4336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4337 "operand mode requires invariant argument.\n");
4338 return false;
4341 /* Vector shifted by vector. */
4342 if (!scalar_shift_arg)
4344 optab = optab_for_tree_code (code, vectype, optab_vector);
4345 if (dump_enabled_p ())
4346 dump_printf_loc (MSG_NOTE, vect_location,
4347 "vector/vector shift/rotate found.\n");
4349 if (!op1_vectype)
4350 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4351 if (op1_vectype == NULL_TREE
4352 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4354 if (dump_enabled_p ())
4355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4356 "unusable type for last operand in"
4357 " vector/vector shift/rotate.\n");
4358 return false;
4361 /* See if the machine has a vector shifted by scalar insn and if not
4362 then see if it has a vector shifted by vector insn. */
4363 else
4365 optab = optab_for_tree_code (code, vectype, optab_scalar);
4366 if (optab
4367 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4369 if (dump_enabled_p ())
4370 dump_printf_loc (MSG_NOTE, vect_location,
4371 "vector/scalar shift/rotate found.\n");
4373 else
4375 optab = optab_for_tree_code (code, vectype, optab_vector);
4376 if (optab
4377 && (optab_handler (optab, TYPE_MODE (vectype))
4378 != CODE_FOR_nothing))
4380 scalar_shift_arg = false;
4382 if (dump_enabled_p ())
4383 dump_printf_loc (MSG_NOTE, vect_location,
4384 "vector/vector shift/rotate found.\n");
4386 /* Unlike the other binary operators, shifts/rotates have
4387 the rhs being int, instead of the same type as the lhs,
4388 so make sure the scalar is the right type if we are
4389 dealing with vectors of long long/long/short/char. */
4390 if (dt[1] == vect_constant_def)
4391 op1 = fold_convert (TREE_TYPE (vectype), op1);
4392 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4393 TREE_TYPE (op1)))
4395 if (slp_node
4396 && TYPE_MODE (TREE_TYPE (vectype))
4397 != TYPE_MODE (TREE_TYPE (op1)))
4399 if (dump_enabled_p ())
4400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4401 "unusable type for last operand in"
4402 " vector/vector shift/rotate.\n");
4403 return false;
4405 if (vec_stmt && !slp_node)
4407 op1 = fold_convert (TREE_TYPE (vectype), op1);
4408 op1 = vect_init_vector (stmt, op1,
4409 TREE_TYPE (vectype), NULL);
4416 /* Supportable by target? */
4417 if (!optab)
4419 if (dump_enabled_p ())
4420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4421 "no optab.\n");
4422 return false;
4424 vec_mode = TYPE_MODE (vectype);
4425 icode = (int) optab_handler (optab, vec_mode);
4426 if (icode == CODE_FOR_nothing)
4428 if (dump_enabled_p ())
4429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4430 "op not supported by target.\n");
4431 /* Check only during analysis. */
4432 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4433 || (vf < vect_min_worthwhile_factor (code)
4434 && !vec_stmt))
4435 return false;
4436 if (dump_enabled_p ())
4437 dump_printf_loc (MSG_NOTE, vect_location,
4438 "proceeding using word mode.\n");
4441 /* Worthwhile without SIMD support? Check only during analysis. */
4442 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4443 && vf < vect_min_worthwhile_factor (code)
4444 && !vec_stmt)
4446 if (dump_enabled_p ())
4447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4448 "not worthwhile without SIMD support.\n");
4449 return false;
4452 if (!vec_stmt) /* transformation not required. */
4454 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4455 if (dump_enabled_p ())
4456 dump_printf_loc (MSG_NOTE, vect_location,
4457 "=== vectorizable_shift ===\n");
4458 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4459 return true;
4462 /** Transform. **/
4464 if (dump_enabled_p ())
4465 dump_printf_loc (MSG_NOTE, vect_location,
4466 "transform binary/unary operation.\n");
4468 /* Handle def. */
4469 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4471 prev_stmt_info = NULL;
4472 for (j = 0; j < ncopies; j++)
4474 /* Handle uses. */
4475 if (j == 0)
4477 if (scalar_shift_arg)
4479 /* Vector shl and shr insn patterns can be defined with scalar
4480 operand 2 (shift operand). In this case, use constant or loop
4481 invariant op1 directly, without extending it to vector mode
4482 first. */
4483 optab_op2_mode = insn_data[icode].operand[2].mode;
4484 if (!VECTOR_MODE_P (optab_op2_mode))
4486 if (dump_enabled_p ())
4487 dump_printf_loc (MSG_NOTE, vect_location,
4488 "operand 1 using scalar mode.\n");
4489 vec_oprnd1 = op1;
4490 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4491 vec_oprnds1.quick_push (vec_oprnd1);
4492 if (slp_node)
4494 /* Store vec_oprnd1 for every vector stmt to be created
4495 for SLP_NODE. We check during the analysis that all
4496 the shift arguments are the same.
4497 TODO: Allow different constants for different vector
4498 stmts generated for an SLP instance. */
4499 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4500 vec_oprnds1.quick_push (vec_oprnd1);
4505 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4506 (a special case for certain kind of vector shifts); otherwise,
4507 operand 1 should be of a vector type (the usual case). */
4508 if (vec_oprnd1)
4509 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4510 slp_node, -1);
4511 else
4512 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4513 slp_node, -1);
4515 else
4516 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4518 /* Arguments are ready. Create the new vector stmt. */
4519 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4521 vop1 = vec_oprnds1[i];
4522 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4523 new_temp = make_ssa_name (vec_dest, new_stmt);
4524 gimple_assign_set_lhs (new_stmt, new_temp);
4525 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4526 if (slp_node)
4527 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4530 if (slp_node)
4531 continue;
4533 if (j == 0)
4534 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4535 else
4536 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4537 prev_stmt_info = vinfo_for_stmt (new_stmt);
4540 vec_oprnds0.release ();
4541 vec_oprnds1.release ();
4543 return true;
4547 /* Function vectorizable_operation.
4549 Check if STMT performs a binary, unary or ternary operation that can
4550 be vectorized.
4551 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4552 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4553 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4555 static bool
4556 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4557 gimple *vec_stmt, slp_tree slp_node)
4559 tree vec_dest;
4560 tree scalar_dest;
4561 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4562 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4563 tree vectype;
4564 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4565 enum tree_code code;
4566 machine_mode vec_mode;
4567 tree new_temp;
4568 int op_type;
4569 optab optab;
4570 int icode;
4571 tree def;
4572 gimple def_stmt;
4573 enum vect_def_type dt[3]
4574 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4575 gimple new_stmt = NULL;
4576 stmt_vec_info prev_stmt_info;
4577 int nunits_in;
4578 int nunits_out;
4579 tree vectype_out;
4580 int ncopies;
4581 int j, i;
4582 vec<tree> vec_oprnds0 = vNULL;
4583 vec<tree> vec_oprnds1 = vNULL;
4584 vec<tree> vec_oprnds2 = vNULL;
4585 tree vop0, vop1, vop2;
4586 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4587 int vf;
4589 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4590 return false;
4592 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4593 return false;
4595 /* Is STMT a vectorizable binary/unary operation? */
4596 if (!is_gimple_assign (stmt))
4597 return false;
4599 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4600 return false;
4602 code = gimple_assign_rhs_code (stmt);
4604 /* For pointer addition, we should use the normal plus for
4605 the vector addition. */
4606 if (code == POINTER_PLUS_EXPR)
4607 code = PLUS_EXPR;
4609 /* Support only unary or binary operations. */
4610 op_type = TREE_CODE_LENGTH (code);
4611 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4613 if (dump_enabled_p ())
4614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4615 "num. args = %d (not unary/binary/ternary op).\n",
4616 op_type);
4617 return false;
4620 scalar_dest = gimple_assign_lhs (stmt);
4621 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4623 /* Most operations cannot handle bit-precision types without extra
4624 truncations. */
4625 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4626 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4627 /* Exception are bitwise binary operations. */
4628 && code != BIT_IOR_EXPR
4629 && code != BIT_XOR_EXPR
4630 && code != BIT_AND_EXPR)
4632 if (dump_enabled_p ())
4633 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4634 "bit-precision arithmetic not supported.\n");
4635 return false;
4638 op0 = gimple_assign_rhs1 (stmt);
4639 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4640 &def_stmt, &def, &dt[0], &vectype))
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4644 "use not simple.\n");
4645 return false;
4647 /* If op0 is an external or constant def use a vector type with
4648 the same size as the output vector type. */
4649 if (!vectype)
4650 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4651 if (vec_stmt)
4652 gcc_assert (vectype);
4653 if (!vectype)
4655 if (dump_enabled_p ())
4657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4658 "no vectype for scalar type ");
4659 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4660 TREE_TYPE (op0));
4661 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4664 return false;
4667 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4668 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4669 if (nunits_out != nunits_in)
4670 return false;
4672 if (op_type == binary_op || op_type == ternary_op)
4674 op1 = gimple_assign_rhs2 (stmt);
4675 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4676 &def, &dt[1]))
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4680 "use not simple.\n");
4681 return false;
4684 if (op_type == ternary_op)
4686 op2 = gimple_assign_rhs3 (stmt);
4687 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4688 &def, &dt[2]))
4690 if (dump_enabled_p ())
4691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4692 "use not simple.\n");
4693 return false;
4697 if (loop_vinfo)
4698 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4699 else
4700 vf = 1;
4702 /* Multiple types in SLP are handled by creating the appropriate number of
4703 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4704 case of SLP. */
4705 if (slp_node || PURE_SLP_STMT (stmt_info))
4706 ncopies = 1;
4707 else
4708 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4710 gcc_assert (ncopies >= 1);
4712 /* Shifts are handled in vectorizable_shift (). */
4713 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4714 || code == RROTATE_EXPR)
4715 return false;
4717 /* Supportable by target? */
4719 vec_mode = TYPE_MODE (vectype);
4720 if (code == MULT_HIGHPART_EXPR)
4722 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4723 icode = LAST_INSN_CODE;
4724 else
4725 icode = CODE_FOR_nothing;
4727 else
4729 optab = optab_for_tree_code (code, vectype, optab_default);
4730 if (!optab)
4732 if (dump_enabled_p ())
4733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4734 "no optab.\n");
4735 return false;
4737 icode = (int) optab_handler (optab, vec_mode);
4740 if (icode == CODE_FOR_nothing)
4742 if (dump_enabled_p ())
4743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4744 "op not supported by target.\n");
4745 /* Check only during analysis. */
4746 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4747 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4748 return false;
4749 if (dump_enabled_p ())
4750 dump_printf_loc (MSG_NOTE, vect_location,
4751 "proceeding using word mode.\n");
4754 /* Worthwhile without SIMD support? Check only during analysis. */
4755 if (!VECTOR_MODE_P (vec_mode)
4756 && !vec_stmt
4757 && vf < vect_min_worthwhile_factor (code))
4759 if (dump_enabled_p ())
4760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4761 "not worthwhile without SIMD support.\n");
4762 return false;
4765 if (!vec_stmt) /* transformation not required. */
4767 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4768 if (dump_enabled_p ())
4769 dump_printf_loc (MSG_NOTE, vect_location,
4770 "=== vectorizable_operation ===\n");
4771 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4772 return true;
4775 /** Transform. **/
4777 if (dump_enabled_p ())
4778 dump_printf_loc (MSG_NOTE, vect_location,
4779 "transform binary/unary operation.\n");
4781 /* Handle def. */
4782 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4784 /* In case the vectorization factor (VF) is bigger than the number
4785 of elements that we can fit in a vectype (nunits), we have to generate
4786 more than one vector stmt - i.e - we need to "unroll" the
4787 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4788 from one copy of the vector stmt to the next, in the field
4789 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4790 stages to find the correct vector defs to be used when vectorizing
4791 stmts that use the defs of the current stmt. The example below
4792 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4793 we need to create 4 vectorized stmts):
4795 before vectorization:
4796 RELATED_STMT VEC_STMT
4797 S1: x = memref - -
4798 S2: z = x + 1 - -
4800 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4801 there):
4802 RELATED_STMT VEC_STMT
4803 VS1_0: vx0 = memref0 VS1_1 -
4804 VS1_1: vx1 = memref1 VS1_2 -
4805 VS1_2: vx2 = memref2 VS1_3 -
4806 VS1_3: vx3 = memref3 - -
4807 S1: x = load - VS1_0
4808 S2: z = x + 1 - -
4810 step2: vectorize stmt S2 (done here):
4811 To vectorize stmt S2 we first need to find the relevant vector
4812 def for the first operand 'x'. This is, as usual, obtained from
4813 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4814 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4815 relevant vector def 'vx0'. Having found 'vx0' we can generate
4816 the vector stmt VS2_0, and as usual, record it in the
4817 STMT_VINFO_VEC_STMT of stmt S2.
4818 When creating the second copy (VS2_1), we obtain the relevant vector
4819 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4820 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4821 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4822 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4823 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4824 chain of stmts and pointers:
4825 RELATED_STMT VEC_STMT
4826 VS1_0: vx0 = memref0 VS1_1 -
4827 VS1_1: vx1 = memref1 VS1_2 -
4828 VS1_2: vx2 = memref2 VS1_3 -
4829 VS1_3: vx3 = memref3 - -
4830 S1: x = load - VS1_0
4831 VS2_0: vz0 = vx0 + v1 VS2_1 -
4832 VS2_1: vz1 = vx1 + v1 VS2_2 -
4833 VS2_2: vz2 = vx2 + v1 VS2_3 -
4834 VS2_3: vz3 = vx3 + v1 - -
4835 S2: z = x + 1 - VS2_0 */
4837 prev_stmt_info = NULL;
4838 for (j = 0; j < ncopies; j++)
4840 /* Handle uses. */
4841 if (j == 0)
4843 if (op_type == binary_op || op_type == ternary_op)
4844 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4845 slp_node, -1);
4846 else
4847 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4848 slp_node, -1);
4849 if (op_type == ternary_op)
4851 vec_oprnds2.create (1);
4852 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4853 stmt,
4854 NULL));
4857 else
4859 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4860 if (op_type == ternary_op)
4862 tree vec_oprnd = vec_oprnds2.pop ();
4863 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4864 vec_oprnd));
4868 /* Arguments are ready. Create the new vector stmt. */
4869 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4871 vop1 = ((op_type == binary_op || op_type == ternary_op)
4872 ? vec_oprnds1[i] : NULL_TREE);
4873 vop2 = ((op_type == ternary_op)
4874 ? vec_oprnds2[i] : NULL_TREE);
4875 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4876 vop0, vop1, vop2);
4877 new_temp = make_ssa_name (vec_dest, new_stmt);
4878 gimple_assign_set_lhs (new_stmt, new_temp);
4879 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4880 if (slp_node)
4881 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4884 if (slp_node)
4885 continue;
4887 if (j == 0)
4888 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4889 else
4890 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4891 prev_stmt_info = vinfo_for_stmt (new_stmt);
4894 vec_oprnds0.release ();
4895 vec_oprnds1.release ();
4896 vec_oprnds2.release ();
4898 return true;
4901 /* A helper function to ensure data reference DR's base alignment
4902 for STMT_INFO. */
4904 static void
4905 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4907 if (!dr->aux)
4908 return;
4910 if (((dataref_aux *)dr->aux)->base_misaligned)
4912 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4913 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4915 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4916 DECL_USER_ALIGN (base_decl) = 1;
4917 ((dataref_aux *)dr->aux)->base_misaligned = false;
4922 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4923 reversal of the vector elements. If that is impossible to do,
4924 returns NULL. */
4926 static tree
4927 perm_mask_for_reverse (tree vectype)
4929 int i, nunits;
4930 unsigned char *sel;
4932 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4933 sel = XALLOCAVEC (unsigned char, nunits);
4935 for (i = 0; i < nunits; ++i)
4936 sel[i] = nunits - 1 - i;
4938 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4939 return NULL_TREE;
4940 return vect_gen_perm_mask_checked (vectype, sel);
4943 /* Function vectorizable_store.
4945 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4946 can be vectorized.
4947 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4948 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4949 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4951 static bool
4952 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4953 slp_tree slp_node)
4955 tree scalar_dest;
4956 tree data_ref;
4957 tree op;
4958 tree vec_oprnd = NULL_TREE;
4959 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4960 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4961 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4962 tree elem_type;
4963 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4964 struct loop *loop = NULL;
4965 machine_mode vec_mode;
4966 tree dummy;
4967 enum dr_alignment_support alignment_support_scheme;
4968 tree def;
4969 gimple def_stmt;
4970 enum vect_def_type dt;
4971 stmt_vec_info prev_stmt_info = NULL;
4972 tree dataref_ptr = NULL_TREE;
4973 tree dataref_offset = NULL_TREE;
4974 gimple ptr_incr = NULL;
4975 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4976 int ncopies;
4977 int j;
4978 gimple next_stmt, first_stmt = NULL;
4979 bool grouped_store = false;
4980 bool store_lanes_p = false;
4981 unsigned int group_size, i;
4982 vec<tree> dr_chain = vNULL;
4983 vec<tree> oprnds = vNULL;
4984 vec<tree> result_chain = vNULL;
4985 bool inv_p;
4986 bool negative = false;
4987 tree offset = NULL_TREE;
4988 vec<tree> vec_oprnds = vNULL;
4989 bool slp = (slp_node != NULL);
4990 unsigned int vec_num;
4991 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4992 tree aggr_type;
4994 if (loop_vinfo)
4995 loop = LOOP_VINFO_LOOP (loop_vinfo);
4997 /* Multiple types in SLP are handled by creating the appropriate number of
4998 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4999 case of SLP. */
5000 if (slp || PURE_SLP_STMT (stmt_info))
5001 ncopies = 1;
5002 else
5003 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5005 gcc_assert (ncopies >= 1);
5007 /* FORNOW. This restriction should be relaxed. */
5008 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5010 if (dump_enabled_p ())
5011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5012 "multiple types in nested loop.\n");
5013 return false;
5016 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5017 return false;
5019 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5020 return false;
5022 /* Is vectorizable store? */
5024 if (!is_gimple_assign (stmt))
5025 return false;
5027 scalar_dest = gimple_assign_lhs (stmt);
5028 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5029 && is_pattern_stmt_p (stmt_info))
5030 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5031 if (TREE_CODE (scalar_dest) != ARRAY_REF
5032 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5033 && TREE_CODE (scalar_dest) != INDIRECT_REF
5034 && TREE_CODE (scalar_dest) != COMPONENT_REF
5035 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5036 && TREE_CODE (scalar_dest) != REALPART_EXPR
5037 && TREE_CODE (scalar_dest) != MEM_REF)
5038 return false;
5040 gcc_assert (gimple_assign_single_p (stmt));
5041 op = gimple_assign_rhs1 (stmt);
5042 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5043 &def, &dt))
5045 if (dump_enabled_p ())
5046 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5047 "use not simple.\n");
5048 return false;
5051 elem_type = TREE_TYPE (vectype);
5052 vec_mode = TYPE_MODE (vectype);
5054 /* FORNOW. In some cases can vectorize even if data-type not supported
5055 (e.g. - array initialization with 0). */
5056 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5057 return false;
5059 if (!STMT_VINFO_DATA_REF (stmt_info))
5060 return false;
5062 negative =
5063 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5064 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5065 size_zero_node) < 0;
5066 if (negative && ncopies > 1)
5068 if (dump_enabled_p ())
5069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5070 "multiple types with negative step.\n");
5071 return false;
5074 if (negative)
5076 gcc_assert (!grouped_store);
5077 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5078 if (alignment_support_scheme != dr_aligned
5079 && alignment_support_scheme != dr_unaligned_supported)
5081 if (dump_enabled_p ())
5082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5083 "negative step but alignment required.\n");
5084 return false;
5086 if (dt != vect_constant_def
5087 && dt != vect_external_def
5088 && !perm_mask_for_reverse (vectype))
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5092 "negative step and reversing not supported.\n");
5093 return false;
5097 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5099 grouped_store = true;
5100 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5101 if (!slp && !PURE_SLP_STMT (stmt_info))
5103 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5104 if (vect_store_lanes_supported (vectype, group_size))
5105 store_lanes_p = true;
5106 else if (!vect_grouped_store_supported (vectype, group_size))
5107 return false;
5110 if (first_stmt == stmt)
5112 /* STMT is the leader of the group. Check the operands of all the
5113 stmts of the group. */
5114 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5115 while (next_stmt)
5117 gcc_assert (gimple_assign_single_p (next_stmt));
5118 op = gimple_assign_rhs1 (next_stmt);
5119 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5120 &def_stmt, &def, &dt))
5122 if (dump_enabled_p ())
5123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5124 "use not simple.\n");
5125 return false;
5127 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5132 if (!vec_stmt) /* transformation not required. */
5134 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5135 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5136 NULL, NULL, NULL);
5137 return true;
5140 /** Transform. **/
5142 ensure_base_align (stmt_info, dr);
5144 if (grouped_store)
5146 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5147 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5149 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5151 /* FORNOW */
5152 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5154 /* We vectorize all the stmts of the interleaving group when we
5155 reach the last stmt in the group. */
5156 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5157 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5158 && !slp)
5160 *vec_stmt = NULL;
5161 return true;
5164 if (slp)
5166 grouped_store = false;
5167 /* VEC_NUM is the number of vect stmts to be created for this
5168 group. */
5169 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5170 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5171 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5172 op = gimple_assign_rhs1 (first_stmt);
5174 else
5175 /* VEC_NUM is the number of vect stmts to be created for this
5176 group. */
5177 vec_num = group_size;
5179 else
5181 first_stmt = stmt;
5182 first_dr = dr;
5183 group_size = vec_num = 1;
5186 if (dump_enabled_p ())
5187 dump_printf_loc (MSG_NOTE, vect_location,
5188 "transform store. ncopies = %d\n", ncopies);
5190 dr_chain.create (group_size);
5191 oprnds.create (group_size);
5193 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5194 gcc_assert (alignment_support_scheme);
5195 /* Targets with store-lane instructions must not require explicit
5196 realignment. */
5197 gcc_assert (!store_lanes_p
5198 || alignment_support_scheme == dr_aligned
5199 || alignment_support_scheme == dr_unaligned_supported);
5201 if (negative)
5202 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5204 if (store_lanes_p)
5205 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5206 else
5207 aggr_type = vectype;
5209 /* In case the vectorization factor (VF) is bigger than the number
5210 of elements that we can fit in a vectype (nunits), we have to generate
5211 more than one vector stmt - i.e - we need to "unroll" the
5212 vector stmt by a factor VF/nunits. For more details see documentation in
5213 vect_get_vec_def_for_copy_stmt. */
5215 /* In case of interleaving (non-unit grouped access):
5217 S1: &base + 2 = x2
5218 S2: &base = x0
5219 S3: &base + 1 = x1
5220 S4: &base + 3 = x3
5222 We create vectorized stores starting from base address (the access of the
5223 first stmt in the chain (S2 in the above example), when the last store stmt
5224 of the chain (S4) is reached:
5226 VS1: &base = vx2
5227 VS2: &base + vec_size*1 = vx0
5228 VS3: &base + vec_size*2 = vx1
5229 VS4: &base + vec_size*3 = vx3
5231 Then permutation statements are generated:
5233 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5234 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5237 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5238 (the order of the data-refs in the output of vect_permute_store_chain
5239 corresponds to the order of scalar stmts in the interleaving chain - see
5240 the documentation of vect_permute_store_chain()).
5242 In case of both multiple types and interleaving, above vector stores and
5243 permutation stmts are created for every copy. The result vector stmts are
5244 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5245 STMT_VINFO_RELATED_STMT for the next copies.
5248 prev_stmt_info = NULL;
5249 for (j = 0; j < ncopies; j++)
5251 gimple new_stmt;
5253 if (j == 0)
5255 if (slp)
5257 /* Get vectorized arguments for SLP_NODE. */
5258 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5259 NULL, slp_node, -1);
5261 vec_oprnd = vec_oprnds[0];
5263 else
5265 /* For interleaved stores we collect vectorized defs for all the
5266 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5267 used as an input to vect_permute_store_chain(), and OPRNDS as
5268 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5270 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5271 OPRNDS are of size 1. */
5272 next_stmt = first_stmt;
5273 for (i = 0; i < group_size; i++)
5275 /* Since gaps are not supported for interleaved stores,
5276 GROUP_SIZE is the exact number of stmts in the chain.
5277 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5278 there is no interleaving, GROUP_SIZE is 1, and only one
5279 iteration of the loop will be executed. */
5280 gcc_assert (next_stmt
5281 && gimple_assign_single_p (next_stmt));
5282 op = gimple_assign_rhs1 (next_stmt);
5284 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5285 NULL);
5286 dr_chain.quick_push (vec_oprnd);
5287 oprnds.quick_push (vec_oprnd);
5288 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5292 /* We should have catched mismatched types earlier. */
5293 gcc_assert (useless_type_conversion_p (vectype,
5294 TREE_TYPE (vec_oprnd)));
5295 bool simd_lane_access_p
5296 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5297 if (simd_lane_access_p
5298 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5299 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5300 && integer_zerop (DR_OFFSET (first_dr))
5301 && integer_zerop (DR_INIT (first_dr))
5302 && alias_sets_conflict_p (get_alias_set (aggr_type),
5303 get_alias_set (DR_REF (first_dr))))
5305 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5306 dataref_offset = build_int_cst (reference_alias_ptr_type
5307 (DR_REF (first_dr)), 0);
5308 inv_p = false;
5310 else
5311 dataref_ptr
5312 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5313 simd_lane_access_p ? loop : NULL,
5314 offset, &dummy, gsi, &ptr_incr,
5315 simd_lane_access_p, &inv_p);
5316 gcc_assert (bb_vinfo || !inv_p);
5318 else
5320 /* For interleaved stores we created vectorized defs for all the
5321 defs stored in OPRNDS in the previous iteration (previous copy).
5322 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5323 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5324 next copy.
5325 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5326 OPRNDS are of size 1. */
5327 for (i = 0; i < group_size; i++)
5329 op = oprnds[i];
5330 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5331 &def, &dt);
5332 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5333 dr_chain[i] = vec_oprnd;
5334 oprnds[i] = vec_oprnd;
5336 if (dataref_offset)
5337 dataref_offset
5338 = int_const_binop (PLUS_EXPR, dataref_offset,
5339 TYPE_SIZE_UNIT (aggr_type));
5340 else
5341 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5342 TYPE_SIZE_UNIT (aggr_type));
5345 if (store_lanes_p)
5347 tree vec_array;
5349 /* Combine all the vectors into an array. */
5350 vec_array = create_vector_array (vectype, vec_num);
5351 for (i = 0; i < vec_num; i++)
5353 vec_oprnd = dr_chain[i];
5354 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5357 /* Emit:
5358 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5359 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5360 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5361 gimple_call_set_lhs (new_stmt, data_ref);
5362 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5364 else
5366 new_stmt = NULL;
5367 if (grouped_store)
5369 if (j == 0)
5370 result_chain.create (group_size);
5371 /* Permute. */
5372 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5373 &result_chain);
5376 next_stmt = first_stmt;
5377 for (i = 0; i < vec_num; i++)
5379 unsigned align, misalign;
5381 if (i > 0)
5382 /* Bump the vector pointer. */
5383 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5384 stmt, NULL_TREE);
5386 if (slp)
5387 vec_oprnd = vec_oprnds[i];
5388 else if (grouped_store)
5389 /* For grouped stores vectorized defs are interleaved in
5390 vect_permute_store_chain(). */
5391 vec_oprnd = result_chain[i];
5393 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5394 dataref_offset
5395 ? dataref_offset
5396 : build_int_cst (reference_alias_ptr_type
5397 (DR_REF (first_dr)), 0));
5398 align = TYPE_ALIGN_UNIT (vectype);
5399 if (aligned_access_p (first_dr))
5400 misalign = 0;
5401 else if (DR_MISALIGNMENT (first_dr) == -1)
5403 TREE_TYPE (data_ref)
5404 = build_aligned_type (TREE_TYPE (data_ref),
5405 TYPE_ALIGN (elem_type));
5406 align = TYPE_ALIGN_UNIT (elem_type);
5407 misalign = 0;
5409 else
5411 TREE_TYPE (data_ref)
5412 = build_aligned_type (TREE_TYPE (data_ref),
5413 TYPE_ALIGN (elem_type));
5414 misalign = DR_MISALIGNMENT (first_dr);
5416 if (dataref_offset == NULL_TREE)
5417 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5418 misalign);
5420 if (negative
5421 && dt != vect_constant_def
5422 && dt != vect_external_def)
5424 tree perm_mask = perm_mask_for_reverse (vectype);
5425 tree perm_dest
5426 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5427 vectype);
5428 tree new_temp = make_ssa_name (perm_dest, NULL);
5430 /* Generate the permute statement. */
5431 gimple perm_stmt
5432 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5433 vec_oprnd, vec_oprnd,
5434 perm_mask);
5435 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5437 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5438 vec_oprnd = new_temp;
5441 /* Arguments are ready. Create the new vector stmt. */
5442 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5443 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5445 if (slp)
5446 continue;
5448 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5449 if (!next_stmt)
5450 break;
5453 if (!slp)
5455 if (j == 0)
5456 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5457 else
5458 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5459 prev_stmt_info = vinfo_for_stmt (new_stmt);
5463 dr_chain.release ();
5464 oprnds.release ();
5465 result_chain.release ();
5466 vec_oprnds.release ();
5468 return true;
5471 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5472 VECTOR_CST mask. No checks are made that the target platform supports the
5473 mask, so callers may wish to test can_vec_perm_p separately, or use
5474 vect_gen_perm_mask_checked. */
5476 tree
5477 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5479 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5480 int i, nunits;
5482 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5484 mask_elt_type = lang_hooks.types.type_for_mode
5485 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5486 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5488 mask_elts = XALLOCAVEC (tree, nunits);
5489 for (i = nunits - 1; i >= 0; i--)
5490 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5491 mask_vec = build_vector (mask_type, mask_elts);
5493 return mask_vec;
5496 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5497 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5499 tree
5500 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5502 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5503 return vect_gen_perm_mask_any (vectype, sel);
5506 /* Given a vector variable X and Y, that was generated for the scalar
5507 STMT, generate instructions to permute the vector elements of X and Y
5508 using permutation mask MASK_VEC, insert them at *GSI and return the
5509 permuted vector variable. */
5511 static tree
5512 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5513 gimple_stmt_iterator *gsi)
5515 tree vectype = TREE_TYPE (x);
5516 tree perm_dest, data_ref;
5517 gimple perm_stmt;
5519 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5520 data_ref = make_ssa_name (perm_dest, NULL);
5522 /* Generate the permute statement. */
5523 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5524 x, y, mask_vec);
5525 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5527 return data_ref;
5530 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5531 inserting them on the loops preheader edge. Returns true if we
5532 were successful in doing so (and thus STMT can be moved then),
5533 otherwise returns false. */
5535 static bool
5536 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5538 ssa_op_iter i;
5539 tree op;
5540 bool any = false;
5542 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5544 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5545 if (!gimple_nop_p (def_stmt)
5546 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5548 /* Make sure we don't need to recurse. While we could do
5549 so in simple cases when there are more complex use webs
5550 we don't have an easy way to preserve stmt order to fulfil
5551 dependencies within them. */
5552 tree op2;
5553 ssa_op_iter i2;
5554 if (gimple_code (def_stmt) == GIMPLE_PHI)
5555 return false;
5556 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5558 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5559 if (!gimple_nop_p (def_stmt2)
5560 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5561 return false;
5563 any = true;
5567 if (!any)
5568 return true;
5570 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5572 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5573 if (!gimple_nop_p (def_stmt)
5574 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5576 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5577 gsi_remove (&gsi, false);
5578 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5582 return true;
5585 /* vectorizable_load.
5587 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5588 can be vectorized.
5589 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5590 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5593 static bool
5594 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5595 slp_tree slp_node, slp_instance slp_node_instance)
5597 tree scalar_dest;
5598 tree vec_dest = NULL;
5599 tree data_ref = NULL;
5600 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5601 stmt_vec_info prev_stmt_info;
5602 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5603 struct loop *loop = NULL;
5604 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5605 bool nested_in_vect_loop = false;
5606 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5607 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5608 tree elem_type;
5609 tree new_temp;
5610 machine_mode mode;
5611 gimple new_stmt = NULL;
5612 tree dummy;
5613 enum dr_alignment_support alignment_support_scheme;
5614 tree dataref_ptr = NULL_TREE;
5615 tree dataref_offset = NULL_TREE;
5616 gimple ptr_incr = NULL;
5617 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5618 int ncopies;
5619 int i, j, group_size, group_gap;
5620 tree msq = NULL_TREE, lsq;
5621 tree offset = NULL_TREE;
5622 tree byte_offset = NULL_TREE;
5623 tree realignment_token = NULL_TREE;
5624 gphi *phi = NULL;
5625 vec<tree> dr_chain = vNULL;
5626 bool grouped_load = false;
5627 bool load_lanes_p = false;
5628 gimple first_stmt;
5629 bool inv_p;
5630 bool negative = false;
5631 bool compute_in_loop = false;
5632 struct loop *at_loop;
5633 int vec_num;
5634 bool slp = (slp_node != NULL);
5635 bool slp_perm = false;
5636 enum tree_code code;
5637 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5638 int vf;
5639 tree aggr_type;
5640 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5641 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5642 int gather_scale = 1;
5643 enum vect_def_type gather_dt = vect_unknown_def_type;
5645 if (loop_vinfo)
5647 loop = LOOP_VINFO_LOOP (loop_vinfo);
5648 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5649 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5651 else
5652 vf = 1;
5654 /* Multiple types in SLP are handled by creating the appropriate number of
5655 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5656 case of SLP. */
5657 if (slp || PURE_SLP_STMT (stmt_info))
5658 ncopies = 1;
5659 else
5660 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5662 gcc_assert (ncopies >= 1);
5664 /* FORNOW. This restriction should be relaxed. */
5665 if (nested_in_vect_loop && ncopies > 1)
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5669 "multiple types in nested loop.\n");
5670 return false;
5673 /* Invalidate assumptions made by dependence analysis when vectorization
5674 on the unrolled body effectively re-orders stmts. */
5675 if (ncopies > 1
5676 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5677 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5678 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5682 "cannot perform implicit CSE when unrolling "
5683 "with negative dependence distance\n");
5684 return false;
5687 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5688 return false;
5690 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5691 return false;
5693 /* Is vectorizable load? */
5694 if (!is_gimple_assign (stmt))
5695 return false;
5697 scalar_dest = gimple_assign_lhs (stmt);
5698 if (TREE_CODE (scalar_dest) != SSA_NAME)
5699 return false;
5701 code = gimple_assign_rhs_code (stmt);
5702 if (code != ARRAY_REF
5703 && code != BIT_FIELD_REF
5704 && code != INDIRECT_REF
5705 && code != COMPONENT_REF
5706 && code != IMAGPART_EXPR
5707 && code != REALPART_EXPR
5708 && code != MEM_REF
5709 && TREE_CODE_CLASS (code) != tcc_declaration)
5710 return false;
5712 if (!STMT_VINFO_DATA_REF (stmt_info))
5713 return false;
5715 elem_type = TREE_TYPE (vectype);
5716 mode = TYPE_MODE (vectype);
5718 /* FORNOW. In some cases can vectorize even if data-type not supported
5719 (e.g. - data copies). */
5720 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5722 if (dump_enabled_p ())
5723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5724 "Aligned load, but unsupported type.\n");
5725 return false;
5728 /* Check if the load is a part of an interleaving chain. */
5729 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5731 grouped_load = true;
5732 /* FORNOW */
5733 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5735 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5736 if (!slp && !PURE_SLP_STMT (stmt_info))
5738 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5739 if (vect_load_lanes_supported (vectype, group_size))
5740 load_lanes_p = true;
5741 else if (!vect_grouped_load_supported (vectype, group_size))
5742 return false;
5745 /* Invalidate assumptions made by dependence analysis when vectorization
5746 on the unrolled body effectively re-orders stmts. */
5747 if (!PURE_SLP_STMT (stmt_info)
5748 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5749 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5750 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5752 if (dump_enabled_p ())
5753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5754 "cannot perform implicit CSE when performing "
5755 "group loads with negative dependence distance\n");
5756 return false;
5761 if (STMT_VINFO_GATHER_P (stmt_info))
5763 gimple def_stmt;
5764 tree def;
5765 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5766 &gather_off, &gather_scale);
5767 gcc_assert (gather_decl);
5768 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5769 &def_stmt, &def, &gather_dt,
5770 &gather_off_vectype))
5772 if (dump_enabled_p ())
5773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5774 "gather index use not simple.\n");
5775 return false;
5778 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5780 else
5782 negative = tree_int_cst_compare (nested_in_vect_loop
5783 ? STMT_VINFO_DR_STEP (stmt_info)
5784 : DR_STEP (dr),
5785 size_zero_node) < 0;
5786 if (negative && ncopies > 1)
5788 if (dump_enabled_p ())
5789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5790 "multiple types with negative step.\n");
5791 return false;
5794 if (negative)
5796 if (grouped_load)
5798 if (dump_enabled_p ())
5799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5800 "negative step for group load not supported"
5801 "\n");
5802 return false;
5804 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5805 if (alignment_support_scheme != dr_aligned
5806 && alignment_support_scheme != dr_unaligned_supported)
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5810 "negative step but alignment required.\n");
5811 return false;
5813 if (!perm_mask_for_reverse (vectype))
5815 if (dump_enabled_p ())
5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5817 "negative step and reversing not supported."
5818 "\n");
5819 return false;
5824 if (!vec_stmt) /* transformation not required. */
5826 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5827 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5828 return true;
5831 if (dump_enabled_p ())
5832 dump_printf_loc (MSG_NOTE, vect_location,
5833 "transform load. ncopies = %d\n", ncopies);
5835 /** Transform. **/
5837 ensure_base_align (stmt_info, dr);
5839 if (STMT_VINFO_GATHER_P (stmt_info))
5841 tree vec_oprnd0 = NULL_TREE, op;
5842 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5843 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5844 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5845 edge pe = loop_preheader_edge (loop);
5846 gimple_seq seq;
5847 basic_block new_bb;
5848 enum { NARROW, NONE, WIDEN } modifier;
5849 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5851 if (nunits == gather_off_nunits)
5852 modifier = NONE;
5853 else if (nunits == gather_off_nunits / 2)
5855 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5856 modifier = WIDEN;
5858 for (i = 0; i < gather_off_nunits; ++i)
5859 sel[i] = i | nunits;
5861 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5863 else if (nunits == gather_off_nunits * 2)
5865 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5866 modifier = NARROW;
5868 for (i = 0; i < nunits; ++i)
5869 sel[i] = i < gather_off_nunits
5870 ? i : i + nunits - gather_off_nunits;
5872 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5873 ncopies *= 2;
5875 else
5876 gcc_unreachable ();
5878 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5879 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5880 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5881 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5882 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5883 scaletype = TREE_VALUE (arglist);
5884 gcc_checking_assert (types_compatible_p (srctype, rettype));
5886 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5888 ptr = fold_convert (ptrtype, gather_base);
5889 if (!is_gimple_min_invariant (ptr))
5891 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5892 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5893 gcc_assert (!new_bb);
5896 /* Currently we support only unconditional gather loads,
5897 so mask should be all ones. */
5898 if (TREE_CODE (masktype) == INTEGER_TYPE)
5899 mask = build_int_cst (masktype, -1);
5900 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5902 mask = build_int_cst (TREE_TYPE (masktype), -1);
5903 mask = build_vector_from_val (masktype, mask);
5904 mask = vect_init_vector (stmt, mask, masktype, NULL);
5906 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5908 REAL_VALUE_TYPE r;
5909 long tmp[6];
5910 for (j = 0; j < 6; ++j)
5911 tmp[j] = -1;
5912 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5913 mask = build_real (TREE_TYPE (masktype), r);
5914 mask = build_vector_from_val (masktype, mask);
5915 mask = vect_init_vector (stmt, mask, masktype, NULL);
5917 else
5918 gcc_unreachable ();
5920 scale = build_int_cst (scaletype, gather_scale);
5922 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5923 merge = build_int_cst (TREE_TYPE (rettype), 0);
5924 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5926 REAL_VALUE_TYPE r;
5927 long tmp[6];
5928 for (j = 0; j < 6; ++j)
5929 tmp[j] = 0;
5930 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5931 merge = build_real (TREE_TYPE (rettype), r);
5933 else
5934 gcc_unreachable ();
5935 merge = build_vector_from_val (rettype, merge);
5936 merge = vect_init_vector (stmt, merge, rettype, NULL);
5938 prev_stmt_info = NULL;
5939 for (j = 0; j < ncopies; ++j)
5941 if (modifier == WIDEN && (j & 1))
5942 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5943 perm_mask, stmt, gsi);
5944 else if (j == 0)
5945 op = vec_oprnd0
5946 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5947 else
5948 op = vec_oprnd0
5949 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5951 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5953 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5954 == TYPE_VECTOR_SUBPARTS (idxtype));
5955 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5956 var = make_ssa_name (var, NULL);
5957 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5958 new_stmt
5959 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op);
5960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5961 op = var;
5964 new_stmt
5965 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5967 if (!useless_type_conversion_p (vectype, rettype))
5969 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5970 == TYPE_VECTOR_SUBPARTS (rettype));
5971 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5972 op = make_ssa_name (var, new_stmt);
5973 gimple_call_set_lhs (new_stmt, op);
5974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5975 var = make_ssa_name (vec_dest, NULL);
5976 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5977 new_stmt
5978 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op);
5980 else
5982 var = make_ssa_name (vec_dest, new_stmt);
5983 gimple_call_set_lhs (new_stmt, var);
5986 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5988 if (modifier == NARROW)
5990 if ((j & 1) == 0)
5992 prev_res = var;
5993 continue;
5995 var = permute_vec_elements (prev_res, var,
5996 perm_mask, stmt, gsi);
5997 new_stmt = SSA_NAME_DEF_STMT (var);
6000 if (prev_stmt_info == NULL)
6001 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6002 else
6003 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6004 prev_stmt_info = vinfo_for_stmt (new_stmt);
6006 return true;
6008 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6010 gimple_stmt_iterator incr_gsi;
6011 bool insert_after;
6012 gimple incr;
6013 tree offvar;
6014 tree ivstep;
6015 tree running_off;
6016 vec<constructor_elt, va_gc> *v = NULL;
6017 gimple_seq stmts = NULL;
6018 tree stride_base, stride_step, alias_off;
6020 gcc_assert (!nested_in_vect_loop);
6022 stride_base
6023 = fold_build_pointer_plus
6024 (unshare_expr (DR_BASE_ADDRESS (dr)),
6025 size_binop (PLUS_EXPR,
6026 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6027 convert_to_ptrofftype (DR_INIT (dr))));
6028 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6030 /* For a load with loop-invariant (but other than power-of-2)
6031 stride (i.e. not a grouped access) like so:
6033 for (i = 0; i < n; i += stride)
6034 ... = array[i];
6036 we generate a new induction variable and new accesses to
6037 form a new vector (or vectors, depending on ncopies):
6039 for (j = 0; ; j += VF*stride)
6040 tmp1 = array[j];
6041 tmp2 = array[j + stride];
6043 vectemp = {tmp1, tmp2, ...}
6046 ivstep = stride_step;
6047 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6048 build_int_cst (TREE_TYPE (ivstep), vf));
6050 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6052 create_iv (stride_base, ivstep, NULL,
6053 loop, &incr_gsi, insert_after,
6054 &offvar, NULL);
6055 incr = gsi_stmt (incr_gsi);
6056 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6058 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6059 if (stmts)
6060 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6062 prev_stmt_info = NULL;
6063 running_off = offvar;
6064 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6065 for (j = 0; j < ncopies; j++)
6067 tree vec_inv;
6069 vec_alloc (v, nunits);
6070 for (i = 0; i < nunits; i++)
6072 tree newref, newoff;
6073 gimple incr;
6074 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6075 running_off, alias_off);
6077 newref = force_gimple_operand_gsi (gsi, newref, true,
6078 NULL_TREE, true,
6079 GSI_SAME_STMT);
6080 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6081 newoff = copy_ssa_name (running_off, NULL);
6082 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6083 running_off, stride_step);
6084 vect_finish_stmt_generation (stmt, incr, gsi);
6086 running_off = newoff;
6089 vec_inv = build_constructor (vectype, v);
6090 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6091 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6093 if (j == 0)
6094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6095 else
6096 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6097 prev_stmt_info = vinfo_for_stmt (new_stmt);
6099 return true;
6102 if (grouped_load)
6104 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6105 if (slp
6106 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6107 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6108 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6110 /* Check if the chain of loads is already vectorized. */
6111 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6112 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6113 ??? But we can only do so if there is exactly one
6114 as we have no way to get at the rest. Leave the CSE
6115 opportunity alone.
6116 ??? With the group load eventually participating
6117 in multiple different permutations (having multiple
6118 slp nodes which refer to the same group) the CSE
6119 is even wrong code. See PR56270. */
6120 && !slp)
6122 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6123 return true;
6125 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6126 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6128 /* VEC_NUM is the number of vect stmts to be created for this group. */
6129 if (slp)
6131 grouped_load = false;
6132 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6133 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6134 slp_perm = true;
6135 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6137 else
6139 vec_num = group_size;
6140 group_gap = 0;
6143 else
6145 first_stmt = stmt;
6146 first_dr = dr;
6147 group_size = vec_num = 1;
6148 group_gap = 0;
6151 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6152 gcc_assert (alignment_support_scheme);
6153 /* Targets with load-lane instructions must not require explicit
6154 realignment. */
6155 gcc_assert (!load_lanes_p
6156 || alignment_support_scheme == dr_aligned
6157 || alignment_support_scheme == dr_unaligned_supported);
6159 /* In case the vectorization factor (VF) is bigger than the number
6160 of elements that we can fit in a vectype (nunits), we have to generate
6161 more than one vector stmt - i.e - we need to "unroll" the
6162 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6163 from one copy of the vector stmt to the next, in the field
6164 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6165 stages to find the correct vector defs to be used when vectorizing
6166 stmts that use the defs of the current stmt. The example below
6167 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6168 need to create 4 vectorized stmts):
6170 before vectorization:
6171 RELATED_STMT VEC_STMT
6172 S1: x = memref - -
6173 S2: z = x + 1 - -
6175 step 1: vectorize stmt S1:
6176 We first create the vector stmt VS1_0, and, as usual, record a
6177 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6178 Next, we create the vector stmt VS1_1, and record a pointer to
6179 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6180 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6181 stmts and pointers:
6182 RELATED_STMT VEC_STMT
6183 VS1_0: vx0 = memref0 VS1_1 -
6184 VS1_1: vx1 = memref1 VS1_2 -
6185 VS1_2: vx2 = memref2 VS1_3 -
6186 VS1_3: vx3 = memref3 - -
6187 S1: x = load - VS1_0
6188 S2: z = x + 1 - -
6190 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6191 information we recorded in RELATED_STMT field is used to vectorize
6192 stmt S2. */
6194 /* In case of interleaving (non-unit grouped access):
6196 S1: x2 = &base + 2
6197 S2: x0 = &base
6198 S3: x1 = &base + 1
6199 S4: x3 = &base + 3
6201 Vectorized loads are created in the order of memory accesses
6202 starting from the access of the first stmt of the chain:
6204 VS1: vx0 = &base
6205 VS2: vx1 = &base + vec_size*1
6206 VS3: vx3 = &base + vec_size*2
6207 VS4: vx4 = &base + vec_size*3
6209 Then permutation statements are generated:
6211 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6212 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6215 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6216 (the order of the data-refs in the output of vect_permute_load_chain
6217 corresponds to the order of scalar stmts in the interleaving chain - see
6218 the documentation of vect_permute_load_chain()).
6219 The generation of permutation stmts and recording them in
6220 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6222 In case of both multiple types and interleaving, the vector loads and
6223 permutation stmts above are created for every copy. The result vector
6224 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6225 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6227 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6228 on a target that supports unaligned accesses (dr_unaligned_supported)
6229 we generate the following code:
6230 p = initial_addr;
6231 indx = 0;
6232 loop {
6233 p = p + indx * vectype_size;
6234 vec_dest = *(p);
6235 indx = indx + 1;
6238 Otherwise, the data reference is potentially unaligned on a target that
6239 does not support unaligned accesses (dr_explicit_realign_optimized) -
6240 then generate the following code, in which the data in each iteration is
6241 obtained by two vector loads, one from the previous iteration, and one
6242 from the current iteration:
6243 p1 = initial_addr;
6244 msq_init = *(floor(p1))
6245 p2 = initial_addr + VS - 1;
6246 realignment_token = call target_builtin;
6247 indx = 0;
6248 loop {
6249 p2 = p2 + indx * vectype_size
6250 lsq = *(floor(p2))
6251 vec_dest = realign_load (msq, lsq, realignment_token)
6252 indx = indx + 1;
6253 msq = lsq;
6254 } */
6256 /* If the misalignment remains the same throughout the execution of the
6257 loop, we can create the init_addr and permutation mask at the loop
6258 preheader. Otherwise, it needs to be created inside the loop.
6259 This can only occur when vectorizing memory accesses in the inner-loop
6260 nested within an outer-loop that is being vectorized. */
6262 if (nested_in_vect_loop
6263 && (TREE_INT_CST_LOW (DR_STEP (dr))
6264 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6266 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6267 compute_in_loop = true;
6270 if ((alignment_support_scheme == dr_explicit_realign_optimized
6271 || alignment_support_scheme == dr_explicit_realign)
6272 && !compute_in_loop)
6274 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6275 alignment_support_scheme, NULL_TREE,
6276 &at_loop);
6277 if (alignment_support_scheme == dr_explicit_realign_optimized)
6279 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6280 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6281 size_one_node);
6284 else
6285 at_loop = loop;
6287 if (negative)
6288 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6290 if (load_lanes_p)
6291 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6292 else
6293 aggr_type = vectype;
6295 prev_stmt_info = NULL;
6296 for (j = 0; j < ncopies; j++)
6298 /* 1. Create the vector or array pointer update chain. */
6299 if (j == 0)
6301 bool simd_lane_access_p
6302 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6303 if (simd_lane_access_p
6304 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6305 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6306 && integer_zerop (DR_OFFSET (first_dr))
6307 && integer_zerop (DR_INIT (first_dr))
6308 && alias_sets_conflict_p (get_alias_set (aggr_type),
6309 get_alias_set (DR_REF (first_dr)))
6310 && (alignment_support_scheme == dr_aligned
6311 || alignment_support_scheme == dr_unaligned_supported))
6313 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6314 dataref_offset = build_int_cst (reference_alias_ptr_type
6315 (DR_REF (first_dr)), 0);
6316 inv_p = false;
6318 else
6319 dataref_ptr
6320 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6321 offset, &dummy, gsi, &ptr_incr,
6322 simd_lane_access_p, &inv_p,
6323 byte_offset);
6325 else if (dataref_offset)
6326 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6327 TYPE_SIZE_UNIT (aggr_type));
6328 else
6329 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6330 TYPE_SIZE_UNIT (aggr_type));
6332 if (grouped_load || slp_perm)
6333 dr_chain.create (vec_num);
6335 if (load_lanes_p)
6337 tree vec_array;
6339 vec_array = create_vector_array (vectype, vec_num);
6341 /* Emit:
6342 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6343 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6344 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6345 gimple_call_set_lhs (new_stmt, vec_array);
6346 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6348 /* Extract each vector into an SSA_NAME. */
6349 for (i = 0; i < vec_num; i++)
6351 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6352 vec_array, i);
6353 dr_chain.quick_push (new_temp);
6356 /* Record the mapping between SSA_NAMEs and statements. */
6357 vect_record_grouped_load_vectors (stmt, dr_chain);
6359 else
6361 for (i = 0; i < vec_num; i++)
6363 if (i > 0)
6364 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6365 stmt, NULL_TREE);
6367 /* 2. Create the vector-load in the loop. */
6368 switch (alignment_support_scheme)
6370 case dr_aligned:
6371 case dr_unaligned_supported:
6373 unsigned int align, misalign;
6375 data_ref
6376 = build2 (MEM_REF, vectype, dataref_ptr,
6377 dataref_offset
6378 ? dataref_offset
6379 : build_int_cst (reference_alias_ptr_type
6380 (DR_REF (first_dr)), 0));
6381 align = TYPE_ALIGN_UNIT (vectype);
6382 if (alignment_support_scheme == dr_aligned)
6384 gcc_assert (aligned_access_p (first_dr));
6385 misalign = 0;
6387 else if (DR_MISALIGNMENT (first_dr) == -1)
6389 TREE_TYPE (data_ref)
6390 = build_aligned_type (TREE_TYPE (data_ref),
6391 TYPE_ALIGN (elem_type));
6392 align = TYPE_ALIGN_UNIT (elem_type);
6393 misalign = 0;
6395 else
6397 TREE_TYPE (data_ref)
6398 = build_aligned_type (TREE_TYPE (data_ref),
6399 TYPE_ALIGN (elem_type));
6400 misalign = DR_MISALIGNMENT (first_dr);
6402 if (dataref_offset == NULL_TREE)
6403 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6404 align, misalign);
6405 break;
6407 case dr_explicit_realign:
6409 tree ptr, bump;
6410 tree vs_minus_1;
6412 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6414 if (compute_in_loop)
6415 msq = vect_setup_realignment (first_stmt, gsi,
6416 &realignment_token,
6417 dr_explicit_realign,
6418 dataref_ptr, NULL);
6420 ptr = copy_ssa_name (dataref_ptr, NULL);
6421 new_stmt = gimple_build_assign_with_ops
6422 (BIT_AND_EXPR, ptr, dataref_ptr,
6423 build_int_cst
6424 (TREE_TYPE (dataref_ptr),
6425 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6426 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6427 data_ref
6428 = build2 (MEM_REF, vectype, ptr,
6429 build_int_cst (reference_alias_ptr_type
6430 (DR_REF (first_dr)), 0));
6431 vec_dest = vect_create_destination_var (scalar_dest,
6432 vectype);
6433 new_stmt = gimple_build_assign (vec_dest, data_ref);
6434 new_temp = make_ssa_name (vec_dest, new_stmt);
6435 gimple_assign_set_lhs (new_stmt, new_temp);
6436 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6437 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6438 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6439 msq = new_temp;
6441 bump = size_binop (MULT_EXPR, vs_minus_1,
6442 TYPE_SIZE_UNIT (elem_type));
6443 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6444 new_stmt = gimple_build_assign_with_ops
6445 (BIT_AND_EXPR, NULL_TREE, ptr,
6446 build_int_cst
6447 (TREE_TYPE (ptr),
6448 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6449 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6450 gimple_assign_set_lhs (new_stmt, ptr);
6451 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6452 data_ref
6453 = build2 (MEM_REF, vectype, ptr,
6454 build_int_cst (reference_alias_ptr_type
6455 (DR_REF (first_dr)), 0));
6456 break;
6458 case dr_explicit_realign_optimized:
6459 new_temp = copy_ssa_name (dataref_ptr, NULL);
6460 new_stmt = gimple_build_assign_with_ops
6461 (BIT_AND_EXPR, new_temp, dataref_ptr,
6462 build_int_cst
6463 (TREE_TYPE (dataref_ptr),
6464 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6465 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6466 data_ref
6467 = build2 (MEM_REF, vectype, new_temp,
6468 build_int_cst (reference_alias_ptr_type
6469 (DR_REF (first_dr)), 0));
6470 break;
6471 default:
6472 gcc_unreachable ();
6474 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6475 new_stmt = gimple_build_assign (vec_dest, data_ref);
6476 new_temp = make_ssa_name (vec_dest, new_stmt);
6477 gimple_assign_set_lhs (new_stmt, new_temp);
6478 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6480 /* 3. Handle explicit realignment if necessary/supported.
6481 Create in loop:
6482 vec_dest = realign_load (msq, lsq, realignment_token) */
6483 if (alignment_support_scheme == dr_explicit_realign_optimized
6484 || alignment_support_scheme == dr_explicit_realign)
6486 lsq = gimple_assign_lhs (new_stmt);
6487 if (!realignment_token)
6488 realignment_token = dataref_ptr;
6489 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6490 new_stmt
6491 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6492 vec_dest, msq, lsq,
6493 realignment_token);
6494 new_temp = make_ssa_name (vec_dest, new_stmt);
6495 gimple_assign_set_lhs (new_stmt, new_temp);
6496 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6498 if (alignment_support_scheme == dr_explicit_realign_optimized)
6500 gcc_assert (phi);
6501 if (i == vec_num - 1 && j == ncopies - 1)
6502 add_phi_arg (phi, lsq,
6503 loop_latch_edge (containing_loop),
6504 UNKNOWN_LOCATION);
6505 msq = lsq;
6509 /* 4. Handle invariant-load. */
6510 if (inv_p && !bb_vinfo)
6512 gcc_assert (!grouped_load);
6513 /* If we have versioned for aliasing or the loop doesn't
6514 have any data dependencies that would preclude this,
6515 then we are sure this is a loop invariant load and
6516 thus we can insert it on the preheader edge. */
6517 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6518 && !nested_in_vect_loop
6519 && hoist_defs_of_uses (stmt, loop))
6521 if (dump_enabled_p ())
6523 dump_printf_loc (MSG_NOTE, vect_location,
6524 "hoisting out of the vectorized "
6525 "loop: ");
6526 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6527 dump_printf (MSG_NOTE, "\n");
6529 tree tem = copy_ssa_name (scalar_dest, NULL);
6530 gsi_insert_on_edge_immediate
6531 (loop_preheader_edge (loop),
6532 gimple_build_assign (tem,
6533 unshare_expr
6534 (gimple_assign_rhs1 (stmt))));
6535 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6537 else
6539 gimple_stmt_iterator gsi2 = *gsi;
6540 gsi_next (&gsi2);
6541 new_temp = vect_init_vector (stmt, scalar_dest,
6542 vectype, &gsi2);
6544 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6545 set_vinfo_for_stmt (new_stmt,
6546 new_stmt_vec_info (new_stmt, loop_vinfo,
6547 bb_vinfo));
6550 if (negative)
6552 tree perm_mask = perm_mask_for_reverse (vectype);
6553 new_temp = permute_vec_elements (new_temp, new_temp,
6554 perm_mask, stmt, gsi);
6555 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6558 /* Collect vector loads and later create their permutation in
6559 vect_transform_grouped_load (). */
6560 if (grouped_load || slp_perm)
6561 dr_chain.quick_push (new_temp);
6563 /* Store vector loads in the corresponding SLP_NODE. */
6564 if (slp && !slp_perm)
6565 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6567 /* Bump the vector pointer to account for a gap. */
6568 if (slp && group_gap != 0)
6570 tree bump = size_binop (MULT_EXPR,
6571 TYPE_SIZE_UNIT (elem_type),
6572 size_int (group_gap));
6573 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6574 stmt, bump);
6578 if (slp && !slp_perm)
6579 continue;
6581 if (slp_perm)
6583 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6584 slp_node_instance, false))
6586 dr_chain.release ();
6587 return false;
6590 else
6592 if (grouped_load)
6594 if (!load_lanes_p)
6595 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6596 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6598 else
6600 if (j == 0)
6601 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6602 else
6603 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6604 prev_stmt_info = vinfo_for_stmt (new_stmt);
6607 dr_chain.release ();
6610 return true;
6613 /* Function vect_is_simple_cond.
6615 Input:
6616 LOOP - the loop that is being vectorized.
6617 COND - Condition that is checked for simple use.
6619 Output:
6620 *COMP_VECTYPE - the vector type for the comparison.
6622 Returns whether a COND can be vectorized. Checks whether
6623 condition operands are supportable using vec_is_simple_use. */
6625 static bool
6626 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6627 bb_vec_info bb_vinfo, tree *comp_vectype)
6629 tree lhs, rhs;
6630 tree def;
6631 enum vect_def_type dt;
6632 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6634 if (!COMPARISON_CLASS_P (cond))
6635 return false;
6637 lhs = TREE_OPERAND (cond, 0);
6638 rhs = TREE_OPERAND (cond, 1);
6640 if (TREE_CODE (lhs) == SSA_NAME)
6642 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6643 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6644 &lhs_def_stmt, &def, &dt, &vectype1))
6645 return false;
6647 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6648 && TREE_CODE (lhs) != FIXED_CST)
6649 return false;
6651 if (TREE_CODE (rhs) == SSA_NAME)
6653 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6654 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6655 &rhs_def_stmt, &def, &dt, &vectype2))
6656 return false;
6658 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6659 && TREE_CODE (rhs) != FIXED_CST)
6660 return false;
6662 *comp_vectype = vectype1 ? vectype1 : vectype2;
6663 return true;
6666 /* vectorizable_condition.
6668 Check if STMT is conditional modify expression that can be vectorized.
6669 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6670 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6671 at GSI.
6673 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6674 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6675 else caluse if it is 2).
6677 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6679 bool
6680 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6681 gimple *vec_stmt, tree reduc_def, int reduc_index,
6682 slp_tree slp_node)
6684 tree scalar_dest = NULL_TREE;
6685 tree vec_dest = NULL_TREE;
6686 tree cond_expr, then_clause, else_clause;
6687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6689 tree comp_vectype = NULL_TREE;
6690 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6691 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6692 tree vec_compare, vec_cond_expr;
6693 tree new_temp;
6694 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6695 tree def;
6696 enum vect_def_type dt, dts[4];
6697 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6698 int ncopies;
6699 enum tree_code code;
6700 stmt_vec_info prev_stmt_info = NULL;
6701 int i, j;
6702 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6703 vec<tree> vec_oprnds0 = vNULL;
6704 vec<tree> vec_oprnds1 = vNULL;
6705 vec<tree> vec_oprnds2 = vNULL;
6706 vec<tree> vec_oprnds3 = vNULL;
6707 tree vec_cmp_type;
6709 if (slp_node || PURE_SLP_STMT (stmt_info))
6710 ncopies = 1;
6711 else
6712 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6714 gcc_assert (ncopies >= 1);
6715 if (reduc_index && ncopies > 1)
6716 return false; /* FORNOW */
6718 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6719 return false;
6721 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6722 return false;
6724 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6725 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6726 && reduc_def))
6727 return false;
6729 /* FORNOW: not yet supported. */
6730 if (STMT_VINFO_LIVE_P (stmt_info))
6732 if (dump_enabled_p ())
6733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6734 "value used after loop.\n");
6735 return false;
6738 /* Is vectorizable conditional operation? */
6739 if (!is_gimple_assign (stmt))
6740 return false;
6742 code = gimple_assign_rhs_code (stmt);
6744 if (code != COND_EXPR)
6745 return false;
6747 cond_expr = gimple_assign_rhs1 (stmt);
6748 then_clause = gimple_assign_rhs2 (stmt);
6749 else_clause = gimple_assign_rhs3 (stmt);
6751 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6752 &comp_vectype)
6753 || !comp_vectype)
6754 return false;
6756 if (TREE_CODE (then_clause) == SSA_NAME)
6758 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6759 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6760 &then_def_stmt, &def, &dt))
6761 return false;
6763 else if (TREE_CODE (then_clause) != INTEGER_CST
6764 && TREE_CODE (then_clause) != REAL_CST
6765 && TREE_CODE (then_clause) != FIXED_CST)
6766 return false;
6768 if (TREE_CODE (else_clause) == SSA_NAME)
6770 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6771 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6772 &else_def_stmt, &def, &dt))
6773 return false;
6775 else if (TREE_CODE (else_clause) != INTEGER_CST
6776 && TREE_CODE (else_clause) != REAL_CST
6777 && TREE_CODE (else_clause) != FIXED_CST)
6778 return false;
6780 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6781 /* The result of a vector comparison should be signed type. */
6782 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6783 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6784 if (vec_cmp_type == NULL_TREE)
6785 return false;
6787 if (!vec_stmt)
6789 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6790 return expand_vec_cond_expr_p (vectype, comp_vectype);
6793 /* Transform. */
6795 if (!slp_node)
6797 vec_oprnds0.create (1);
6798 vec_oprnds1.create (1);
6799 vec_oprnds2.create (1);
6800 vec_oprnds3.create (1);
6803 /* Handle def. */
6804 scalar_dest = gimple_assign_lhs (stmt);
6805 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6807 /* Handle cond expr. */
6808 for (j = 0; j < ncopies; j++)
6810 gassign *new_stmt = NULL;
6811 if (j == 0)
6813 if (slp_node)
6815 auto_vec<tree, 4> ops;
6816 auto_vec<vec<tree>, 4> vec_defs;
6818 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6819 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6820 ops.safe_push (then_clause);
6821 ops.safe_push (else_clause);
6822 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6823 vec_oprnds3 = vec_defs.pop ();
6824 vec_oprnds2 = vec_defs.pop ();
6825 vec_oprnds1 = vec_defs.pop ();
6826 vec_oprnds0 = vec_defs.pop ();
6828 ops.release ();
6829 vec_defs.release ();
6831 else
6833 gimple gtemp;
6834 vec_cond_lhs =
6835 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6836 stmt, NULL);
6837 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6838 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6840 vec_cond_rhs =
6841 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6842 stmt, NULL);
6843 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6844 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6845 if (reduc_index == 1)
6846 vec_then_clause = reduc_def;
6847 else
6849 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6850 stmt, NULL);
6851 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6852 NULL, &gtemp, &def, &dts[2]);
6854 if (reduc_index == 2)
6855 vec_else_clause = reduc_def;
6856 else
6858 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6859 stmt, NULL);
6860 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6861 NULL, &gtemp, &def, &dts[3]);
6865 else
6867 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6868 vec_oprnds0.pop ());
6869 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6870 vec_oprnds1.pop ());
6871 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6872 vec_oprnds2.pop ());
6873 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6874 vec_oprnds3.pop ());
6877 if (!slp_node)
6879 vec_oprnds0.quick_push (vec_cond_lhs);
6880 vec_oprnds1.quick_push (vec_cond_rhs);
6881 vec_oprnds2.quick_push (vec_then_clause);
6882 vec_oprnds3.quick_push (vec_else_clause);
6885 /* Arguments are ready. Create the new vector stmt. */
6886 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6888 vec_cond_rhs = vec_oprnds1[i];
6889 vec_then_clause = vec_oprnds2[i];
6890 vec_else_clause = vec_oprnds3[i];
6892 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6893 vec_cond_lhs, vec_cond_rhs);
6894 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6895 vec_compare, vec_then_clause, vec_else_clause);
6897 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6898 new_temp = make_ssa_name (vec_dest, new_stmt);
6899 gimple_assign_set_lhs (new_stmt, new_temp);
6900 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6901 if (slp_node)
6902 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6905 if (slp_node)
6906 continue;
6908 if (j == 0)
6909 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6910 else
6911 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6913 prev_stmt_info = vinfo_for_stmt (new_stmt);
6916 vec_oprnds0.release ();
6917 vec_oprnds1.release ();
6918 vec_oprnds2.release ();
6919 vec_oprnds3.release ();
6921 return true;
6925 /* Make sure the statement is vectorizable. */
6927 bool
6928 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6930 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6931 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6932 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6933 bool ok;
6934 tree scalar_type, vectype;
6935 gimple pattern_stmt;
6936 gimple_seq pattern_def_seq;
6938 if (dump_enabled_p ())
6940 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6941 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6942 dump_printf (MSG_NOTE, "\n");
6945 if (gimple_has_volatile_ops (stmt))
6947 if (dump_enabled_p ())
6948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6949 "not vectorized: stmt has volatile operands\n");
6951 return false;
6954 /* Skip stmts that do not need to be vectorized. In loops this is expected
6955 to include:
6956 - the COND_EXPR which is the loop exit condition
6957 - any LABEL_EXPRs in the loop
6958 - computations that are used only for array indexing or loop control.
6959 In basic blocks we only analyze statements that are a part of some SLP
6960 instance, therefore, all the statements are relevant.
6962 Pattern statement needs to be analyzed instead of the original statement
6963 if the original statement is not relevant. Otherwise, we analyze both
6964 statements. In basic blocks we are called from some SLP instance
6965 traversal, don't analyze pattern stmts instead, the pattern stmts
6966 already will be part of SLP instance. */
6968 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6969 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6970 && !STMT_VINFO_LIVE_P (stmt_info))
6972 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6973 && pattern_stmt
6974 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6975 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6977 /* Analyze PATTERN_STMT instead of the original stmt. */
6978 stmt = pattern_stmt;
6979 stmt_info = vinfo_for_stmt (pattern_stmt);
6980 if (dump_enabled_p ())
6982 dump_printf_loc (MSG_NOTE, vect_location,
6983 "==> examining pattern statement: ");
6984 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6985 dump_printf (MSG_NOTE, "\n");
6988 else
6990 if (dump_enabled_p ())
6991 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6993 return true;
6996 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6997 && node == NULL
6998 && pattern_stmt
6999 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7000 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7002 /* Analyze PATTERN_STMT too. */
7003 if (dump_enabled_p ())
7005 dump_printf_loc (MSG_NOTE, vect_location,
7006 "==> examining pattern statement: ");
7007 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7008 dump_printf (MSG_NOTE, "\n");
7011 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7012 return false;
7015 if (is_pattern_stmt_p (stmt_info)
7016 && node == NULL
7017 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7019 gimple_stmt_iterator si;
7021 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7023 gimple pattern_def_stmt = gsi_stmt (si);
7024 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7025 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7027 /* Analyze def stmt of STMT if it's a pattern stmt. */
7028 if (dump_enabled_p ())
7030 dump_printf_loc (MSG_NOTE, vect_location,
7031 "==> examining pattern def statement: ");
7032 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7033 dump_printf (MSG_NOTE, "\n");
7036 if (!vect_analyze_stmt (pattern_def_stmt,
7037 need_to_vectorize, node))
7038 return false;
7043 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7045 case vect_internal_def:
7046 break;
7048 case vect_reduction_def:
7049 case vect_nested_cycle:
7050 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7051 || relevance == vect_used_in_outer_by_reduction
7052 || relevance == vect_unused_in_scope));
7053 break;
7055 case vect_induction_def:
7056 case vect_constant_def:
7057 case vect_external_def:
7058 case vect_unknown_def_type:
7059 default:
7060 gcc_unreachable ();
7063 if (bb_vinfo)
7065 gcc_assert (PURE_SLP_STMT (stmt_info));
7067 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7068 if (dump_enabled_p ())
7070 dump_printf_loc (MSG_NOTE, vect_location,
7071 "get vectype for scalar type: ");
7072 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7073 dump_printf (MSG_NOTE, "\n");
7076 vectype = get_vectype_for_scalar_type (scalar_type);
7077 if (!vectype)
7079 if (dump_enabled_p ())
7081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7082 "not SLPed: unsupported data-type ");
7083 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7084 scalar_type);
7085 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7087 return false;
7090 if (dump_enabled_p ())
7092 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7093 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7094 dump_printf (MSG_NOTE, "\n");
7097 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7100 if (STMT_VINFO_RELEVANT_P (stmt_info))
7102 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7103 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7104 || (is_gimple_call (stmt)
7105 && gimple_call_lhs (stmt) == NULL_TREE));
7106 *need_to_vectorize = true;
7109 ok = true;
7110 if (!bb_vinfo
7111 && (STMT_VINFO_RELEVANT_P (stmt_info)
7112 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7113 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7114 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7115 || vectorizable_shift (stmt, NULL, NULL, NULL)
7116 || vectorizable_operation (stmt, NULL, NULL, NULL)
7117 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7118 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7119 || vectorizable_call (stmt, NULL, NULL, NULL)
7120 || vectorizable_store (stmt, NULL, NULL, NULL)
7121 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7122 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7123 else
7125 if (bb_vinfo)
7126 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7127 || vectorizable_conversion (stmt, NULL, NULL, node)
7128 || vectorizable_shift (stmt, NULL, NULL, node)
7129 || vectorizable_operation (stmt, NULL, NULL, node)
7130 || vectorizable_assignment (stmt, NULL, NULL, node)
7131 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7132 || vectorizable_call (stmt, NULL, NULL, node)
7133 || vectorizable_store (stmt, NULL, NULL, node)
7134 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7137 if (!ok)
7139 if (dump_enabled_p ())
7141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7142 "not vectorized: relevant stmt not ");
7143 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7144 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7145 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7148 return false;
7151 if (bb_vinfo)
7152 return true;
7154 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7155 need extra handling, except for vectorizable reductions. */
7156 if (STMT_VINFO_LIVE_P (stmt_info)
7157 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7158 ok = vectorizable_live_operation (stmt, NULL, NULL);
7160 if (!ok)
7162 if (dump_enabled_p ())
7164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7165 "not vectorized: live stmt not ");
7166 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7167 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7168 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7171 return false;
7174 return true;
7178 /* Function vect_transform_stmt.
7180 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7182 bool
7183 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7184 bool *grouped_store, slp_tree slp_node,
7185 slp_instance slp_node_instance)
7187 bool is_store = false;
7188 gimple vec_stmt = NULL;
7189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7190 bool done;
7192 switch (STMT_VINFO_TYPE (stmt_info))
7194 case type_demotion_vec_info_type:
7195 case type_promotion_vec_info_type:
7196 case type_conversion_vec_info_type:
7197 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7198 gcc_assert (done);
7199 break;
7201 case induc_vec_info_type:
7202 gcc_assert (!slp_node);
7203 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7204 gcc_assert (done);
7205 break;
7207 case shift_vec_info_type:
7208 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7209 gcc_assert (done);
7210 break;
7212 case op_vec_info_type:
7213 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7214 gcc_assert (done);
7215 break;
7217 case assignment_vec_info_type:
7218 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7219 gcc_assert (done);
7220 break;
7222 case load_vec_info_type:
7223 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7224 slp_node_instance);
7225 gcc_assert (done);
7226 break;
7228 case store_vec_info_type:
7229 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7230 gcc_assert (done);
7231 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7233 /* In case of interleaving, the whole chain is vectorized when the
7234 last store in the chain is reached. Store stmts before the last
7235 one are skipped, and there vec_stmt_info shouldn't be freed
7236 meanwhile. */
7237 *grouped_store = true;
7238 if (STMT_VINFO_VEC_STMT (stmt_info))
7239 is_store = true;
7241 else
7242 is_store = true;
7243 break;
7245 case condition_vec_info_type:
7246 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7247 gcc_assert (done);
7248 break;
7250 case call_vec_info_type:
7251 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7252 stmt = gsi_stmt (*gsi);
7253 if (is_gimple_call (stmt)
7254 && gimple_call_internal_p (stmt)
7255 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7256 is_store = true;
7257 break;
7259 case call_simd_clone_vec_info_type:
7260 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7261 stmt = gsi_stmt (*gsi);
7262 break;
7264 case reduc_vec_info_type:
7265 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7266 gcc_assert (done);
7267 break;
7269 default:
7270 if (!STMT_VINFO_LIVE_P (stmt_info))
7272 if (dump_enabled_p ())
7273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7274 "stmt not supported.\n");
7275 gcc_unreachable ();
7279 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7280 is being vectorized, but outside the immediately enclosing loop. */
7281 if (vec_stmt
7282 && STMT_VINFO_LOOP_VINFO (stmt_info)
7283 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7284 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7285 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7286 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7287 || STMT_VINFO_RELEVANT (stmt_info) ==
7288 vect_used_in_outer_by_reduction))
7290 struct loop *innerloop = LOOP_VINFO_LOOP (
7291 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7292 imm_use_iterator imm_iter;
7293 use_operand_p use_p;
7294 tree scalar_dest;
7295 gimple exit_phi;
7297 if (dump_enabled_p ())
7298 dump_printf_loc (MSG_NOTE, vect_location,
7299 "Record the vdef for outer-loop vectorization.\n");
7301 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7302 (to be used when vectorizing outer-loop stmts that use the DEF of
7303 STMT). */
7304 if (gimple_code (stmt) == GIMPLE_PHI)
7305 scalar_dest = PHI_RESULT (stmt);
7306 else
7307 scalar_dest = gimple_assign_lhs (stmt);
7309 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7311 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7313 exit_phi = USE_STMT (use_p);
7314 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7319 /* Handle stmts whose DEF is used outside the loop-nest that is
7320 being vectorized. */
7321 if (STMT_VINFO_LIVE_P (stmt_info)
7322 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7324 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7325 gcc_assert (done);
7328 if (vec_stmt)
7329 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7331 return is_store;
7335 /* Remove a group of stores (for SLP or interleaving), free their
7336 stmt_vec_info. */
7338 void
7339 vect_remove_stores (gimple first_stmt)
7341 gimple next = first_stmt;
7342 gimple tmp;
7343 gimple_stmt_iterator next_si;
7345 while (next)
7347 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7349 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7350 if (is_pattern_stmt_p (stmt_info))
7351 next = STMT_VINFO_RELATED_STMT (stmt_info);
7352 /* Free the attached stmt_vec_info and remove the stmt. */
7353 next_si = gsi_for_stmt (next);
7354 unlink_stmt_vdef (next);
7355 gsi_remove (&next_si, true);
7356 release_defs (next);
7357 free_stmt_vec_info (next);
7358 next = tmp;
7363 /* Function new_stmt_vec_info.
7365 Create and initialize a new stmt_vec_info struct for STMT. */
7367 stmt_vec_info
7368 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7369 bb_vec_info bb_vinfo)
7371 stmt_vec_info res;
7372 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7374 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7375 STMT_VINFO_STMT (res) = stmt;
7376 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7377 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7378 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7379 STMT_VINFO_LIVE_P (res) = false;
7380 STMT_VINFO_VECTYPE (res) = NULL;
7381 STMT_VINFO_VEC_STMT (res) = NULL;
7382 STMT_VINFO_VECTORIZABLE (res) = true;
7383 STMT_VINFO_IN_PATTERN_P (res) = false;
7384 STMT_VINFO_RELATED_STMT (res) = NULL;
7385 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7386 STMT_VINFO_DATA_REF (res) = NULL;
7388 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7389 STMT_VINFO_DR_OFFSET (res) = NULL;
7390 STMT_VINFO_DR_INIT (res) = NULL;
7391 STMT_VINFO_DR_STEP (res) = NULL;
7392 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7394 if (gimple_code (stmt) == GIMPLE_PHI
7395 && is_loop_header_bb_p (gimple_bb (stmt)))
7396 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7397 else
7398 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7400 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7401 STMT_SLP_TYPE (res) = loop_vect;
7402 GROUP_FIRST_ELEMENT (res) = NULL;
7403 GROUP_NEXT_ELEMENT (res) = NULL;
7404 GROUP_SIZE (res) = 0;
7405 GROUP_STORE_COUNT (res) = 0;
7406 GROUP_GAP (res) = 0;
7407 GROUP_SAME_DR_STMT (res) = NULL;
7409 return res;
7413 /* Create a hash table for stmt_vec_info. */
7415 void
7416 init_stmt_vec_info_vec (void)
7418 gcc_assert (!stmt_vec_info_vec.exists ());
7419 stmt_vec_info_vec.create (50);
7423 /* Free hash table for stmt_vec_info. */
7425 void
7426 free_stmt_vec_info_vec (void)
7428 unsigned int i;
7429 vec_void_p info;
7430 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7431 if (info != NULL)
7432 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7433 gcc_assert (stmt_vec_info_vec.exists ());
7434 stmt_vec_info_vec.release ();
7438 /* Free stmt vectorization related info. */
7440 void
7441 free_stmt_vec_info (gimple stmt)
7443 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7445 if (!stmt_info)
7446 return;
7448 /* Check if this statement has a related "pattern stmt"
7449 (introduced by the vectorizer during the pattern recognition
7450 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7451 too. */
7452 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7454 stmt_vec_info patt_info
7455 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7456 if (patt_info)
7458 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7459 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7460 gimple_set_bb (patt_stmt, NULL);
7461 tree lhs = gimple_get_lhs (patt_stmt);
7462 if (TREE_CODE (lhs) == SSA_NAME)
7463 release_ssa_name (lhs);
7464 if (seq)
7466 gimple_stmt_iterator si;
7467 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7469 gimple seq_stmt = gsi_stmt (si);
7470 gimple_set_bb (seq_stmt, NULL);
7471 lhs = gimple_get_lhs (patt_stmt);
7472 if (TREE_CODE (lhs) == SSA_NAME)
7473 release_ssa_name (lhs);
7474 free_stmt_vec_info (seq_stmt);
7477 free_stmt_vec_info (patt_stmt);
7481 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7482 set_vinfo_for_stmt (stmt, NULL);
7483 free (stmt_info);
7487 /* Function get_vectype_for_scalar_type_and_size.
7489 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7490 by the target. */
7492 static tree
7493 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7495 machine_mode inner_mode = TYPE_MODE (scalar_type);
7496 machine_mode simd_mode;
7497 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7498 int nunits;
7499 tree vectype;
7501 if (nbytes == 0)
7502 return NULL_TREE;
7504 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7505 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7506 return NULL_TREE;
7508 /* For vector types of elements whose mode precision doesn't
7509 match their types precision we use a element type of mode
7510 precision. The vectorization routines will have to make sure
7511 they support the proper result truncation/extension.
7512 We also make sure to build vector types with INTEGER_TYPE
7513 component type only. */
7514 if (INTEGRAL_TYPE_P (scalar_type)
7515 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7516 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7517 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7518 TYPE_UNSIGNED (scalar_type));
7520 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7521 When the component mode passes the above test simply use a type
7522 corresponding to that mode. The theory is that any use that
7523 would cause problems with this will disable vectorization anyway. */
7524 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7525 && !INTEGRAL_TYPE_P (scalar_type))
7526 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7528 /* We can't build a vector type of elements with alignment bigger than
7529 their size. */
7530 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7531 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7532 TYPE_UNSIGNED (scalar_type));
7534 /* If we felt back to using the mode fail if there was
7535 no scalar type for it. */
7536 if (scalar_type == NULL_TREE)
7537 return NULL_TREE;
7539 /* If no size was supplied use the mode the target prefers. Otherwise
7540 lookup a vector mode of the specified size. */
7541 if (size == 0)
7542 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7543 else
7544 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7545 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7546 if (nunits <= 1)
7547 return NULL_TREE;
7549 vectype = build_vector_type (scalar_type, nunits);
7551 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7552 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7553 return NULL_TREE;
7555 return vectype;
7558 unsigned int current_vector_size;
7560 /* Function get_vectype_for_scalar_type.
7562 Returns the vector type corresponding to SCALAR_TYPE as supported
7563 by the target. */
7565 tree
7566 get_vectype_for_scalar_type (tree scalar_type)
7568 tree vectype;
7569 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7570 current_vector_size);
7571 if (vectype
7572 && current_vector_size == 0)
7573 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7574 return vectype;
7577 /* Function get_same_sized_vectype
7579 Returns a vector type corresponding to SCALAR_TYPE of size
7580 VECTOR_TYPE if supported by the target. */
7582 tree
7583 get_same_sized_vectype (tree scalar_type, tree vector_type)
7585 return get_vectype_for_scalar_type_and_size
7586 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7589 /* Function vect_is_simple_use.
7591 Input:
7592 LOOP_VINFO - the vect info of the loop that is being vectorized.
7593 BB_VINFO - the vect info of the basic block that is being vectorized.
7594 OPERAND - operand of STMT in the loop or bb.
7595 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7597 Returns whether a stmt with OPERAND can be vectorized.
7598 For loops, supportable operands are constants, loop invariants, and operands
7599 that are defined by the current iteration of the loop. Unsupportable
7600 operands are those that are defined by a previous iteration of the loop (as
7601 is the case in reduction/induction computations).
7602 For basic blocks, supportable operands are constants and bb invariants.
7603 For now, operands defined outside the basic block are not supported. */
7605 bool
7606 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7607 bb_vec_info bb_vinfo, gimple *def_stmt,
7608 tree *def, enum vect_def_type *dt)
7610 basic_block bb;
7611 stmt_vec_info stmt_vinfo;
7612 struct loop *loop = NULL;
7614 if (loop_vinfo)
7615 loop = LOOP_VINFO_LOOP (loop_vinfo);
7617 *def_stmt = NULL;
7618 *def = NULL_TREE;
7620 if (dump_enabled_p ())
7622 dump_printf_loc (MSG_NOTE, vect_location,
7623 "vect_is_simple_use: operand ");
7624 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7625 dump_printf (MSG_NOTE, "\n");
7628 if (CONSTANT_CLASS_P (operand))
7630 *dt = vect_constant_def;
7631 return true;
7634 if (is_gimple_min_invariant (operand))
7636 *def = operand;
7637 *dt = vect_external_def;
7638 return true;
7641 if (TREE_CODE (operand) == PAREN_EXPR)
7643 if (dump_enabled_p ())
7644 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7645 operand = TREE_OPERAND (operand, 0);
7648 if (TREE_CODE (operand) != SSA_NAME)
7650 if (dump_enabled_p ())
7651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7652 "not ssa-name.\n");
7653 return false;
7656 *def_stmt = SSA_NAME_DEF_STMT (operand);
7657 if (*def_stmt == NULL)
7659 if (dump_enabled_p ())
7660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7661 "no def_stmt.\n");
7662 return false;
7665 if (dump_enabled_p ())
7667 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7669 dump_printf (MSG_NOTE, "\n");
7672 /* Empty stmt is expected only in case of a function argument.
7673 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7674 if (gimple_nop_p (*def_stmt))
7676 *def = operand;
7677 *dt = vect_external_def;
7678 return true;
7681 bb = gimple_bb (*def_stmt);
7683 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7684 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7685 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7686 *dt = vect_external_def;
7687 else
7689 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7690 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7693 if (*dt == vect_unknown_def_type
7694 || (stmt
7695 && *dt == vect_double_reduction_def
7696 && gimple_code (stmt) != GIMPLE_PHI))
7698 if (dump_enabled_p ())
7699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7700 "Unsupported pattern.\n");
7701 return false;
7704 if (dump_enabled_p ())
7705 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7707 switch (gimple_code (*def_stmt))
7709 case GIMPLE_PHI:
7710 *def = gimple_phi_result (*def_stmt);
7711 break;
7713 case GIMPLE_ASSIGN:
7714 *def = gimple_assign_lhs (*def_stmt);
7715 break;
7717 case GIMPLE_CALL:
7718 *def = gimple_call_lhs (*def_stmt);
7719 if (*def != NULL)
7720 break;
7721 /* FALLTHRU */
7722 default:
7723 if (dump_enabled_p ())
7724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7725 "unsupported defining stmt:\n");
7726 return false;
7729 return true;
7732 /* Function vect_is_simple_use_1.
7734 Same as vect_is_simple_use_1 but also determines the vector operand
7735 type of OPERAND and stores it to *VECTYPE. If the definition of
7736 OPERAND is vect_uninitialized_def, vect_constant_def or
7737 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7738 is responsible to compute the best suited vector type for the
7739 scalar operand. */
7741 bool
7742 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7743 bb_vec_info bb_vinfo, gimple *def_stmt,
7744 tree *def, enum vect_def_type *dt, tree *vectype)
7746 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7747 def, dt))
7748 return false;
7750 /* Now get a vector type if the def is internal, otherwise supply
7751 NULL_TREE and leave it up to the caller to figure out a proper
7752 type for the use stmt. */
7753 if (*dt == vect_internal_def
7754 || *dt == vect_induction_def
7755 || *dt == vect_reduction_def
7756 || *dt == vect_double_reduction_def
7757 || *dt == vect_nested_cycle)
7759 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7761 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7762 && !STMT_VINFO_RELEVANT (stmt_info)
7763 && !STMT_VINFO_LIVE_P (stmt_info))
7764 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7766 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7767 gcc_assert (*vectype != NULL_TREE);
7769 else if (*dt == vect_uninitialized_def
7770 || *dt == vect_constant_def
7771 || *dt == vect_external_def)
7772 *vectype = NULL_TREE;
7773 else
7774 gcc_unreachable ();
7776 return true;
7780 /* Function supportable_widening_operation
7782 Check whether an operation represented by the code CODE is a
7783 widening operation that is supported by the target platform in
7784 vector form (i.e., when operating on arguments of type VECTYPE_IN
7785 producing a result of type VECTYPE_OUT).
7787 Widening operations we currently support are NOP (CONVERT), FLOAT
7788 and WIDEN_MULT. This function checks if these operations are supported
7789 by the target platform either directly (via vector tree-codes), or via
7790 target builtins.
7792 Output:
7793 - CODE1 and CODE2 are codes of vector operations to be used when
7794 vectorizing the operation, if available.
7795 - MULTI_STEP_CVT determines the number of required intermediate steps in
7796 case of multi-step conversion (like char->short->int - in that case
7797 MULTI_STEP_CVT will be 1).
7798 - INTERM_TYPES contains the intermediate type required to perform the
7799 widening operation (short in the above example). */
7801 bool
7802 supportable_widening_operation (enum tree_code code, gimple stmt,
7803 tree vectype_out, tree vectype_in,
7804 enum tree_code *code1, enum tree_code *code2,
7805 int *multi_step_cvt,
7806 vec<tree> *interm_types)
7808 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7809 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7810 struct loop *vect_loop = NULL;
7811 machine_mode vec_mode;
7812 enum insn_code icode1, icode2;
7813 optab optab1, optab2;
7814 tree vectype = vectype_in;
7815 tree wide_vectype = vectype_out;
7816 enum tree_code c1, c2;
7817 int i;
7818 tree prev_type, intermediate_type;
7819 machine_mode intermediate_mode, prev_mode;
7820 optab optab3, optab4;
7822 *multi_step_cvt = 0;
7823 if (loop_info)
7824 vect_loop = LOOP_VINFO_LOOP (loop_info);
7826 switch (code)
7828 case WIDEN_MULT_EXPR:
7829 /* The result of a vectorized widening operation usually requires
7830 two vectors (because the widened results do not fit into one vector).
7831 The generated vector results would normally be expected to be
7832 generated in the same order as in the original scalar computation,
7833 i.e. if 8 results are generated in each vector iteration, they are
7834 to be organized as follows:
7835 vect1: [res1,res2,res3,res4],
7836 vect2: [res5,res6,res7,res8].
7838 However, in the special case that the result of the widening
7839 operation is used in a reduction computation only, the order doesn't
7840 matter (because when vectorizing a reduction we change the order of
7841 the computation). Some targets can take advantage of this and
7842 generate more efficient code. For example, targets like Altivec,
7843 that support widen_mult using a sequence of {mult_even,mult_odd}
7844 generate the following vectors:
7845 vect1: [res1,res3,res5,res7],
7846 vect2: [res2,res4,res6,res8].
7848 When vectorizing outer-loops, we execute the inner-loop sequentially
7849 (each vectorized inner-loop iteration contributes to VF outer-loop
7850 iterations in parallel). We therefore don't allow to change the
7851 order of the computation in the inner-loop during outer-loop
7852 vectorization. */
7853 /* TODO: Another case in which order doesn't *really* matter is when we
7854 widen and then contract again, e.g. (short)((int)x * y >> 8).
7855 Normally, pack_trunc performs an even/odd permute, whereas the
7856 repack from an even/odd expansion would be an interleave, which
7857 would be significantly simpler for e.g. AVX2. */
7858 /* In any case, in order to avoid duplicating the code below, recurse
7859 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7860 are properly set up for the caller. If we fail, we'll continue with
7861 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7862 if (vect_loop
7863 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7864 && !nested_in_vect_loop_p (vect_loop, stmt)
7865 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7866 stmt, vectype_out, vectype_in,
7867 code1, code2, multi_step_cvt,
7868 interm_types))
7870 /* Elements in a vector with vect_used_by_reduction property cannot
7871 be reordered if the use chain with this property does not have the
7872 same operation. One such an example is s += a * b, where elements
7873 in a and b cannot be reordered. Here we check if the vector defined
7874 by STMT is only directly used in the reduction statement. */
7875 tree lhs = gimple_assign_lhs (stmt);
7876 use_operand_p dummy;
7877 gimple use_stmt;
7878 stmt_vec_info use_stmt_info = NULL;
7879 if (single_imm_use (lhs, &dummy, &use_stmt)
7880 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7881 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7882 return true;
7884 c1 = VEC_WIDEN_MULT_LO_EXPR;
7885 c2 = VEC_WIDEN_MULT_HI_EXPR;
7886 break;
7888 case VEC_WIDEN_MULT_EVEN_EXPR:
7889 /* Support the recursion induced just above. */
7890 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7891 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7892 break;
7894 case WIDEN_LSHIFT_EXPR:
7895 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7896 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7897 break;
7899 CASE_CONVERT:
7900 c1 = VEC_UNPACK_LO_EXPR;
7901 c2 = VEC_UNPACK_HI_EXPR;
7902 break;
7904 case FLOAT_EXPR:
7905 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7906 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7907 break;
7909 case FIX_TRUNC_EXPR:
7910 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7911 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7912 computing the operation. */
7913 return false;
7915 default:
7916 gcc_unreachable ();
7919 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7921 enum tree_code ctmp = c1;
7922 c1 = c2;
7923 c2 = ctmp;
7926 if (code == FIX_TRUNC_EXPR)
7928 /* The signedness is determined from output operand. */
7929 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7930 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7932 else
7934 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7935 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7938 if (!optab1 || !optab2)
7939 return false;
7941 vec_mode = TYPE_MODE (vectype);
7942 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7943 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7944 return false;
7946 *code1 = c1;
7947 *code2 = c2;
7949 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7950 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7951 return true;
7953 /* Check if it's a multi-step conversion that can be done using intermediate
7954 types. */
7956 prev_type = vectype;
7957 prev_mode = vec_mode;
7959 if (!CONVERT_EXPR_CODE_P (code))
7960 return false;
7962 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7963 intermediate steps in promotion sequence. We try
7964 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7965 not. */
7966 interm_types->create (MAX_INTERM_CVT_STEPS);
7967 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7969 intermediate_mode = insn_data[icode1].operand[0].mode;
7970 intermediate_type
7971 = lang_hooks.types.type_for_mode (intermediate_mode,
7972 TYPE_UNSIGNED (prev_type));
7973 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7974 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7976 if (!optab3 || !optab4
7977 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7978 || insn_data[icode1].operand[0].mode != intermediate_mode
7979 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7980 || insn_data[icode2].operand[0].mode != intermediate_mode
7981 || ((icode1 = optab_handler (optab3, intermediate_mode))
7982 == CODE_FOR_nothing)
7983 || ((icode2 = optab_handler (optab4, intermediate_mode))
7984 == CODE_FOR_nothing))
7985 break;
7987 interm_types->quick_push (intermediate_type);
7988 (*multi_step_cvt)++;
7990 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7991 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7992 return true;
7994 prev_type = intermediate_type;
7995 prev_mode = intermediate_mode;
7998 interm_types->release ();
7999 return false;
8003 /* Function supportable_narrowing_operation
8005 Check whether an operation represented by the code CODE is a
8006 narrowing operation that is supported by the target platform in
8007 vector form (i.e., when operating on arguments of type VECTYPE_IN
8008 and producing a result of type VECTYPE_OUT).
8010 Narrowing operations we currently support are NOP (CONVERT) and
8011 FIX_TRUNC. This function checks if these operations are supported by
8012 the target platform directly via vector tree-codes.
8014 Output:
8015 - CODE1 is the code of a vector operation to be used when
8016 vectorizing the operation, if available.
8017 - MULTI_STEP_CVT determines the number of required intermediate steps in
8018 case of multi-step conversion (like int->short->char - in that case
8019 MULTI_STEP_CVT will be 1).
8020 - INTERM_TYPES contains the intermediate type required to perform the
8021 narrowing operation (short in the above example). */
8023 bool
8024 supportable_narrowing_operation (enum tree_code code,
8025 tree vectype_out, tree vectype_in,
8026 enum tree_code *code1, int *multi_step_cvt,
8027 vec<tree> *interm_types)
8029 machine_mode vec_mode;
8030 enum insn_code icode1;
8031 optab optab1, interm_optab;
8032 tree vectype = vectype_in;
8033 tree narrow_vectype = vectype_out;
8034 enum tree_code c1;
8035 tree intermediate_type;
8036 machine_mode intermediate_mode, prev_mode;
8037 int i;
8038 bool uns;
8040 *multi_step_cvt = 0;
8041 switch (code)
8043 CASE_CONVERT:
8044 c1 = VEC_PACK_TRUNC_EXPR;
8045 break;
8047 case FIX_TRUNC_EXPR:
8048 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8049 break;
8051 case FLOAT_EXPR:
8052 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8053 tree code and optabs used for computing the operation. */
8054 return false;
8056 default:
8057 gcc_unreachable ();
8060 if (code == FIX_TRUNC_EXPR)
8061 /* The signedness is determined from output operand. */
8062 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8063 else
8064 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8066 if (!optab1)
8067 return false;
8069 vec_mode = TYPE_MODE (vectype);
8070 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8071 return false;
8073 *code1 = c1;
8075 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8076 return true;
8078 /* Check if it's a multi-step conversion that can be done using intermediate
8079 types. */
8080 prev_mode = vec_mode;
8081 if (code == FIX_TRUNC_EXPR)
8082 uns = TYPE_UNSIGNED (vectype_out);
8083 else
8084 uns = TYPE_UNSIGNED (vectype);
8086 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8087 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8088 costly than signed. */
8089 if (code == FIX_TRUNC_EXPR && uns)
8091 enum insn_code icode2;
8093 intermediate_type
8094 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8095 interm_optab
8096 = optab_for_tree_code (c1, intermediate_type, optab_default);
8097 if (interm_optab != unknown_optab
8098 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8099 && insn_data[icode1].operand[0].mode
8100 == insn_data[icode2].operand[0].mode)
8102 uns = false;
8103 optab1 = interm_optab;
8104 icode1 = icode2;
8108 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8109 intermediate steps in promotion sequence. We try
8110 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8111 interm_types->create (MAX_INTERM_CVT_STEPS);
8112 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8114 intermediate_mode = insn_data[icode1].operand[0].mode;
8115 intermediate_type
8116 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8117 interm_optab
8118 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8119 optab_default);
8120 if (!interm_optab
8121 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8122 || insn_data[icode1].operand[0].mode != intermediate_mode
8123 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8124 == CODE_FOR_nothing))
8125 break;
8127 interm_types->quick_push (intermediate_type);
8128 (*multi_step_cvt)++;
8130 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8131 return true;
8133 prev_mode = intermediate_mode;
8134 optab1 = interm_optab;
8137 interm_types->release ();
8138 return false;