Make all gimple_omp_for_ accessors typesafe
[official-gcc.git] / gcc / tree-vect-stmts.c
blob02ac7d0bd301f6919a0cdc79694ff7cbd381fe82
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimplify.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "expr.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "tree-vectorizer.h"
66 #include "dumpfile.h"
67 #include "cgraph.h"
68 #include "builtins.h"
70 /* For lang_hooks.types.type_for_mode. */
71 #include "langhooks.h"
73 /* Return the vectorized type for the given statement. */
75 tree
76 stmt_vectype (struct _stmt_vec_info *stmt_info)
78 return STMT_VINFO_VECTYPE (stmt_info);
81 /* Return TRUE iff the given statement is in an inner loop relative to
82 the loop being vectorized. */
83 bool
84 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
86 gimple stmt = STMT_VINFO_STMT (stmt_info);
87 basic_block bb = gimple_bb (stmt);
88 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
89 struct loop* loop;
91 if (!loop_vinfo)
92 return false;
94 loop = LOOP_VINFO_LOOP (loop_vinfo);
96 return (bb->loop_father == loop->inner);
99 /* Record the cost of a statement, either by directly informing the
100 target model or by saving it in a vector for later processing.
101 Return a preliminary estimate of the statement's cost. */
103 unsigned
104 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
105 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
106 int misalign, enum vect_cost_model_location where)
108 if (body_cost_vec)
110 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
111 add_stmt_info_to_vec (body_cost_vec, count, kind,
112 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
113 misalign);
114 return (unsigned)
115 (builtin_vectorization_cost (kind, vectype, misalign) * count);
118 else
120 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
121 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
122 void *target_cost_data;
124 if (loop_vinfo)
125 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
126 else
127 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
129 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
130 misalign, where);
134 /* Return a variable of type ELEM_TYPE[NELEMS]. */
136 static tree
137 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
139 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
140 "vect_array");
143 /* ARRAY is an array of vectors created by create_vector_array.
144 Return an SSA_NAME for the vector in index N. The reference
145 is part of the vectorization of STMT and the vector is associated
146 with scalar destination SCALAR_DEST. */
148 static tree
149 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
150 tree array, unsigned HOST_WIDE_INT n)
152 tree vect_type, vect, vect_name, array_ref;
153 gimple new_stmt;
155 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
156 vect_type = TREE_TYPE (TREE_TYPE (array));
157 vect = vect_create_destination_var (scalar_dest, vect_type);
158 array_ref = build4 (ARRAY_REF, vect_type, array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (vect, array_ref);
163 vect_name = make_ssa_name (vect, new_stmt);
164 gimple_assign_set_lhs (new_stmt, vect_name);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
167 return vect_name;
170 /* ARRAY is an array of vectors created by create_vector_array.
171 Emit code to store SSA_NAME VECT in index N of the array.
172 The store is part of the vectorization of STMT. */
174 static void
175 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
176 tree array, unsigned HOST_WIDE_INT n)
178 tree array_ref;
179 gimple new_stmt;
181 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
182 build_int_cst (size_type_node, n),
183 NULL_TREE, NULL_TREE);
185 new_stmt = gimple_build_assign (array_ref, vect);
186 vect_finish_stmt_generation (stmt, new_stmt, gsi);
189 /* PTR is a pointer to an array of type TYPE. Return a representation
190 of *PTR. The memory reference replaces those in FIRST_DR
191 (and its group). */
193 static tree
194 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
196 tree mem_ref, alias_ptr_type;
198 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
199 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
200 /* Arrays have the same alignment as their type. */
201 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
202 return mem_ref;
205 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
207 /* Function vect_mark_relevant.
209 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
211 static void
212 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
213 enum vect_relevant relevant, bool live_p,
214 bool used_in_pattern)
216 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
217 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
219 gimple pattern_stmt;
221 if (dump_enabled_p ())
222 dump_printf_loc (MSG_NOTE, vect_location,
223 "mark relevant %d, live %d.\n", relevant, live_p);
225 /* If this stmt is an original stmt in a pattern, we might need to mark its
226 related pattern stmt instead of the original stmt. However, such stmts
227 may have their own uses that are not in any pattern, in such cases the
228 stmt itself should be marked. */
229 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
231 bool found = false;
232 if (!used_in_pattern)
234 imm_use_iterator imm_iter;
235 use_operand_p use_p;
236 gimple use_stmt;
237 tree lhs;
238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
239 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
241 if (is_gimple_assign (stmt))
242 lhs = gimple_assign_lhs (stmt);
243 else
244 lhs = gimple_call_lhs (stmt);
246 /* This use is out of pattern use, if LHS has other uses that are
247 pattern uses, we should mark the stmt itself, and not the pattern
248 stmt. */
249 if (lhs && TREE_CODE (lhs) == SSA_NAME)
250 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
252 if (is_gimple_debug (USE_STMT (use_p)))
253 continue;
254 use_stmt = USE_STMT (use_p);
256 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
257 continue;
259 if (vinfo_for_stmt (use_stmt)
260 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
262 found = true;
263 break;
268 if (!found)
270 /* This is the last stmt in a sequence that was detected as a
271 pattern that can potentially be vectorized. Don't mark the stmt
272 as relevant/live because it's not going to be vectorized.
273 Instead mark the pattern-stmt that replaces it. */
275 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
277 if (dump_enabled_p ())
278 dump_printf_loc (MSG_NOTE, vect_location,
279 "last stmt in pattern. don't mark"
280 " relevant/live.\n");
281 stmt_info = vinfo_for_stmt (pattern_stmt);
282 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
283 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
284 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
285 stmt = pattern_stmt;
289 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
290 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
291 STMT_VINFO_RELEVANT (stmt_info) = relevant;
293 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
294 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
296 if (dump_enabled_p ())
297 dump_printf_loc (MSG_NOTE, vect_location,
298 "already marked relevant/live.\n");
299 return;
302 worklist->safe_push (stmt);
306 /* Function vect_stmt_relevant_p.
308 Return true if STMT in loop that is represented by LOOP_VINFO is
309 "relevant for vectorization".
311 A stmt is considered "relevant for vectorization" if:
312 - it has uses outside the loop.
313 - it has vdefs (it alters memory).
314 - control stmts in the loop (except for the exit condition).
316 CHECKME: what other side effects would the vectorizer allow? */
318 static bool
319 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
320 enum vect_relevant *relevant, bool *live_p)
322 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
323 ssa_op_iter op_iter;
324 imm_use_iterator imm_iter;
325 use_operand_p use_p;
326 def_operand_p def_p;
328 *relevant = vect_unused_in_scope;
329 *live_p = false;
331 /* cond stmt other than loop exit cond. */
332 if (is_ctrl_stmt (stmt)
333 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
334 != loop_exit_ctrl_vec_info_type)
335 *relevant = vect_used_in_scope;
337 /* changing memory. */
338 if (gimple_code (stmt) != GIMPLE_PHI)
339 if (gimple_vdef (stmt))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE, vect_location,
343 "vec_stmt_relevant_p: stmt has vdefs.\n");
344 *relevant = vect_used_in_scope;
347 /* uses outside the loop. */
348 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
350 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
352 basic_block bb = gimple_bb (USE_STMT (use_p));
353 if (!flow_bb_inside_loop_p (loop, bb))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: used out of loop.\n");
359 if (is_gimple_debug (USE_STMT (use_p)))
360 continue;
362 /* We expect all such uses to be in the loop exit phis
363 (because of loop closed form) */
364 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
365 gcc_assert (bb == single_exit (loop)->dest);
367 *live_p = true;
372 return (*live_p || *relevant);
376 /* Function exist_non_indexing_operands_for_use_p
378 USE is one of the uses attached to STMT. Check if USE is
379 used in STMT for anything other than indexing an array. */
381 static bool
382 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
384 tree operand;
385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
387 /* USE corresponds to some operand in STMT. If there is no data
388 reference in STMT, then any operand that corresponds to USE
389 is not indexing an array. */
390 if (!STMT_VINFO_DATA_REF (stmt_info))
391 return true;
393 /* STMT has a data_ref. FORNOW this means that its of one of
394 the following forms:
395 -1- ARRAY_REF = var
396 -2- var = ARRAY_REF
397 (This should have been verified in analyze_data_refs).
399 'var' in the second case corresponds to a def, not a use,
400 so USE cannot correspond to any operands that are not used
401 for array indexing.
403 Therefore, all we need to check is if STMT falls into the
404 first case, and whether var corresponds to USE. */
406 if (!gimple_assign_copy_p (stmt))
408 if (is_gimple_call (stmt)
409 && gimple_call_internal_p (stmt))
410 switch (gimple_call_internal_fn (stmt))
412 case IFN_MASK_STORE:
413 operand = gimple_call_arg (stmt, 3);
414 if (operand == use)
415 return true;
416 /* FALLTHRU */
417 case IFN_MASK_LOAD:
418 operand = gimple_call_arg (stmt, 2);
419 if (operand == use)
420 return true;
421 break;
422 default:
423 break;
425 return false;
428 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
429 return false;
430 operand = gimple_assign_rhs1 (stmt);
431 if (TREE_CODE (operand) != SSA_NAME)
432 return false;
434 if (operand == use)
435 return true;
437 return false;
442 Function process_use.
444 Inputs:
445 - a USE in STMT in a loop represented by LOOP_VINFO
446 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
447 that defined USE. This is done by calling mark_relevant and passing it
448 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
449 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
450 be performed.
452 Outputs:
453 Generally, LIVE_P and RELEVANT are used to define the liveness and
454 relevance info of the DEF_STMT of this USE:
455 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
456 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
457 Exceptions:
458 - case 1: If USE is used only for address computations (e.g. array indexing),
459 which does not need to be directly vectorized, then the liveness/relevance
460 of the respective DEF_STMT is left unchanged.
461 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
462 skip DEF_STMT cause it had already been processed.
463 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
464 be modified accordingly.
466 Return true if everything is as expected. Return false otherwise. */
468 static bool
469 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
470 enum vect_relevant relevant, vec<gimple> *worklist,
471 bool force)
473 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
474 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
475 stmt_vec_info dstmt_vinfo;
476 basic_block bb, def_bb;
477 tree def;
478 gimple def_stmt;
479 enum vect_def_type dt;
481 /* case 1: we are only interested in uses that need to be vectorized. Uses
482 that are used for address computation are not considered relevant. */
483 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
484 return true;
486 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
488 if (dump_enabled_p ())
489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
490 "not vectorized: unsupported use in stmt.\n");
491 return false;
494 if (!def_stmt || gimple_nop_p (def_stmt))
495 return true;
497 def_bb = gimple_bb (def_stmt);
498 if (!flow_bb_inside_loop_p (loop, def_bb))
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
502 return true;
505 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
506 DEF_STMT must have already been processed, because this should be the
507 only way that STMT, which is a reduction-phi, was put in the worklist,
508 as there should be no other uses for DEF_STMT in the loop. So we just
509 check that everything is as expected, and we are done. */
510 dstmt_vinfo = vinfo_for_stmt (def_stmt);
511 bb = gimple_bb (stmt);
512 if (gimple_code (stmt) == GIMPLE_PHI
513 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
514 && gimple_code (def_stmt) != GIMPLE_PHI
515 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
516 && bb->loop_father == def_bb->loop_father)
518 if (dump_enabled_p ())
519 dump_printf_loc (MSG_NOTE, vect_location,
520 "reduc-stmt defining reduc-phi in the same nest.\n");
521 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
522 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
523 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
524 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
525 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
526 return true;
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = def_stmt
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
542 switch (relevant)
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
559 case vect_used_in_scope:
560 break;
562 default:
563 gcc_unreachable ();
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
570 inner-loop:
571 d = def_stmt
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
580 switch (relevant)
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
588 case vect_used_by_reduction:
589 relevant = vect_used_in_outer_by_reduction;
590 break;
592 case vect_used_in_scope:
593 relevant = vect_used_in_outer;
594 break;
596 default:
597 gcc_unreachable ();
601 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
602 is_pattern_stmt_p (stmt_vinfo));
603 return true;
607 /* Function vect_mark_stmts_to_be_vectorized.
609 Not all stmts in the loop need to be vectorized. For example:
611 for i...
612 for j...
613 1. T0 = i + j
614 2. T1 = a[T0]
616 3. j = j + 1
618 Stmt 1 and 3 do not need to be vectorized, because loop control and
619 addressing of vectorized data-refs are handled differently.
621 This pass detects such stmts. */
623 bool
624 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
626 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
627 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
628 unsigned int nbbs = loop->num_nodes;
629 gimple_stmt_iterator si;
630 gimple stmt;
631 unsigned int i;
632 stmt_vec_info stmt_vinfo;
633 basic_block bb;
634 gimple phi;
635 bool live_p;
636 enum vect_relevant relevant, tmp_relevant;
637 enum vect_def_type def_type;
639 if (dump_enabled_p ())
640 dump_printf_loc (MSG_NOTE, vect_location,
641 "=== vect_mark_stmts_to_be_vectorized ===\n");
643 auto_vec<gimple, 64> worklist;
645 /* 1. Init worklist. */
646 for (i = 0; i < nbbs; i++)
648 bb = bbs[i];
649 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
651 phi = gsi_stmt (si);
652 if (dump_enabled_p ())
654 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
655 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
656 dump_printf (MSG_NOTE, "\n");
659 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
660 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
662 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
664 stmt = gsi_stmt (si);
665 if (dump_enabled_p ())
667 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
669 dump_printf (MSG_NOTE, "\n");
672 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
673 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
677 /* 2. Process_worklist */
678 while (worklist.length () > 0)
680 use_operand_p use_p;
681 ssa_op_iter iter;
683 stmt = worklist.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
688 dump_printf (MSG_NOTE, "\n");
691 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
692 (DEF_STMT) as relevant/irrelevant and live/dead according to the
693 liveness and relevance properties of STMT. */
694 stmt_vinfo = vinfo_for_stmt (stmt);
695 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
696 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
698 /* Generally, the liveness and relevance properties of STMT are
699 propagated as is to the DEF_STMTs of its USEs:
700 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
701 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the liveness/relevance as follows:
705 live_p = false
706 relevant = vect_used_by_reduction
707 This is because we distinguish between two kinds of relevant stmts -
708 those that are used by a reduction computation, and those that are
709 (also) used by a regular computation. This allows us later on to
710 identify stmts that are used solely by a reduction, and therefore the
711 order of the results that they produce does not have to be kept. */
713 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
714 tmp_relevant = relevant;
715 switch (def_type)
717 case vect_reduction_def:
718 switch (tmp_relevant)
720 case vect_unused_in_scope:
721 relevant = vect_used_by_reduction;
722 break;
724 case vect_used_by_reduction:
725 if (gimple_code (stmt) == GIMPLE_PHI)
726 break;
727 /* fall through */
729 default:
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "unsupported use of reduction.\n");
733 return false;
736 live_p = false;
737 break;
739 case vect_nested_cycle:
740 if (tmp_relevant != vect_unused_in_scope
741 && tmp_relevant != vect_used_in_outer_by_reduction
742 && tmp_relevant != vect_used_in_outer)
744 if (dump_enabled_p ())
745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
746 "unsupported use of nested cycle.\n");
748 return false;
751 live_p = false;
752 break;
754 case vect_double_reduction_def:
755 if (tmp_relevant != vect_unused_in_scope
756 && tmp_relevant != vect_used_by_reduction)
758 if (dump_enabled_p ())
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
760 "unsupported use of double reduction.\n");
762 return false;
765 live_p = false;
766 break;
768 default:
769 break;
772 if (is_pattern_stmt_p (stmt_vinfo))
774 /* Pattern statements are not inserted into the code, so
775 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
776 have to scan the RHS or function arguments instead. */
777 if (is_gimple_assign (stmt))
779 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
780 tree op = gimple_assign_rhs1 (stmt);
782 i = 1;
783 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
785 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
786 live_p, relevant, &worklist, false)
787 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
788 live_p, relevant, &worklist, false))
789 return false;
790 i = 2;
792 for (; i < gimple_num_ops (stmt); i++)
794 op = gimple_op (stmt, i);
795 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
796 &worklist, false))
797 return false;
800 else if (is_gimple_call (stmt))
802 for (i = 0; i < gimple_call_num_args (stmt); i++)
804 tree arg = gimple_call_arg (stmt, i);
805 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
806 &worklist, false))
807 return false;
811 else
812 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
814 tree op = USE_FROM_PTR (use_p);
815 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
816 &worklist, false))
817 return false;
820 if (STMT_VINFO_GATHER_P (stmt_vinfo))
822 tree off;
823 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
824 gcc_assert (decl);
825 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
826 &worklist, true))
827 return false;
829 } /* while worklist */
831 return true;
835 /* Function vect_model_simple_cost.
837 Models cost for simple operations, i.e. those that only emit ncopies of a
838 single op. Right now, this does not account for multiple insns that could
839 be generated for the single vector op. We will handle that shortly. */
841 void
842 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
843 enum vect_def_type *dt,
844 stmt_vector_for_cost *prologue_cost_vec,
845 stmt_vector_for_cost *body_cost_vec)
847 int i;
848 int inside_cost = 0, prologue_cost = 0;
850 /* The SLP costs were already calculated during SLP tree build. */
851 if (PURE_SLP_STMT (stmt_info))
852 return;
854 /* FORNOW: Assuming maximum 2 args per stmts. */
855 for (i = 0; i < 2; i++)
856 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
857 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
858 stmt_info, 0, vect_prologue);
860 /* Pass the inside-of-loop statements to the target-specific cost model. */
861 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
862 stmt_info, 0, vect_body);
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_NOTE, vect_location,
866 "vect_model_simple_cost: inside_cost = %d, "
867 "prologue_cost = %d .\n", inside_cost, prologue_cost);
871 /* Model cost for type demotion and promotion operations. PWR is normally
872 zero for single-step promotions and demotions. It will be one if
873 two-step promotion/demotion is required, and so on. Each additional
874 step doubles the number of instructions required. */
876 static void
877 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
878 enum vect_def_type *dt, int pwr)
880 int i, tmp;
881 int inside_cost = 0, prologue_cost = 0;
882 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
883 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
884 void *target_cost_data;
886 /* The SLP costs were already calculated during SLP tree build. */
887 if (PURE_SLP_STMT (stmt_info))
888 return;
890 if (loop_vinfo)
891 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
892 else
893 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
895 for (i = 0; i < pwr + 1; i++)
897 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
898 (i + 1) : i;
899 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
900 vec_promote_demote, stmt_info, 0,
901 vect_body);
904 /* FORNOW: Assuming maximum 2 args per stmts. */
905 for (i = 0; i < 2; i++)
906 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
907 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
908 stmt_info, 0, vect_prologue);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE, vect_location,
912 "vect_model_promotion_demotion_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost, prologue_cost);
916 /* Function vect_cost_group_size
918 For grouped load or store, return the group_size only if it is the first
919 load or store of a group, else return 1. This ensures that group size is
920 only returned once per group. */
922 static int
923 vect_cost_group_size (stmt_vec_info stmt_info)
925 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
927 if (first_stmt == STMT_VINFO_STMT (stmt_info))
928 return GROUP_SIZE (stmt_info);
930 return 1;
934 /* Function vect_model_store_cost
936 Models cost for stores. In the case of grouped accesses, one access
937 has the overhead of the grouped access attributed to it. */
939 void
940 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
941 bool store_lanes_p, enum vect_def_type dt,
942 slp_tree slp_node,
943 stmt_vector_for_cost *prologue_cost_vec,
944 stmt_vector_for_cost *body_cost_vec)
946 int group_size;
947 unsigned int inside_cost = 0, prologue_cost = 0;
948 struct data_reference *first_dr;
949 gimple first_stmt;
951 /* The SLP costs were already calculated during SLP tree build. */
952 if (PURE_SLP_STMT (stmt_info))
953 return;
955 if (dt == vect_constant_def || dt == vect_external_def)
956 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
957 stmt_info, 0, vect_prologue);
959 /* Grouped access? */
960 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
962 if (slp_node)
964 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
965 group_size = 1;
967 else
969 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
970 group_size = vect_cost_group_size (stmt_info);
973 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
975 /* Not a grouped access. */
976 else
978 group_size = 1;
979 first_dr = STMT_VINFO_DATA_REF (stmt_info);
982 /* We assume that the cost of a single store-lanes instruction is
983 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
984 access is instead being provided by a permute-and-store operation,
985 include the cost of the permutes. */
986 if (!store_lanes_p && group_size > 1)
988 /* Uses a high and low interleave or shuffle operations for each
989 needed permute. */
990 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
991 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
992 stmt_info, 0, vect_body);
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_NOTE, vect_location,
996 "vect_model_store_cost: strided group_size = %d .\n",
997 group_size);
1000 /* Costs of the stores. */
1001 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_NOTE, vect_location,
1005 "vect_model_store_cost: inside_cost = %d, "
1006 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1010 /* Calculate cost of DR's memory access. */
1011 void
1012 vect_get_store_cost (struct data_reference *dr, int ncopies,
1013 unsigned int *inside_cost,
1014 stmt_vector_for_cost *body_cost_vec)
1016 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1017 gimple stmt = DR_STMT (dr);
1018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1020 switch (alignment_support_scheme)
1022 case dr_aligned:
1024 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1025 vector_store, stmt_info, 0,
1026 vect_body);
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_NOTE, vect_location,
1030 "vect_model_store_cost: aligned.\n");
1031 break;
1034 case dr_unaligned_supported:
1036 /* Here, we assign an additional cost for the unaligned store. */
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 unaligned_store, stmt_info,
1039 DR_MISALIGNMENT (dr), vect_body);
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE, vect_location,
1042 "vect_model_store_cost: unaligned supported by "
1043 "hardware.\n");
1044 break;
1047 case dr_unaligned_unsupported:
1049 *inside_cost = VECT_MAX_COST;
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1053 "vect_model_store_cost: unsupported access.\n");
1054 break;
1057 default:
1058 gcc_unreachable ();
1063 /* Function vect_model_load_cost
1065 Models cost for loads. In the case of grouped accesses, the last access
1066 has the overhead of the grouped access attributed to it. Since unaligned
1067 accesses are supported for loads, we also account for the costs of the
1068 access scheme chosen. */
1070 void
1071 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1072 bool load_lanes_p, slp_tree slp_node,
1073 stmt_vector_for_cost *prologue_cost_vec,
1074 stmt_vector_for_cost *body_cost_vec)
1076 int group_size;
1077 gimple first_stmt;
1078 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1079 unsigned int inside_cost = 0, prologue_cost = 0;
1081 /* The SLP costs were already calculated during SLP tree build. */
1082 if (PURE_SLP_STMT (stmt_info))
1083 return;
1085 /* Grouped accesses? */
1086 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1087 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1089 group_size = vect_cost_group_size (stmt_info);
1090 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1092 /* Not a grouped access. */
1093 else
1095 group_size = 1;
1096 first_dr = dr;
1099 /* We assume that the cost of a single load-lanes instruction is
1100 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1101 access is instead being provided by a load-and-permute operation,
1102 include the cost of the permutes. */
1103 if (!load_lanes_p && group_size > 1)
1105 /* Uses an even and odd extract operations or shuffle operations
1106 for each needed permute. */
1107 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1108 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1109 stmt_info, 0, vect_body);
1111 if (dump_enabled_p ())
1112 dump_printf_loc (MSG_NOTE, vect_location,
1113 "vect_model_load_cost: strided group_size = %d .\n",
1114 group_size);
1117 /* The loads themselves. */
1118 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1120 /* N scalar loads plus gathering them into a vector. */
1121 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1122 inside_cost += record_stmt_cost (body_cost_vec,
1123 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1124 scalar_load, stmt_info, 0, vect_body);
1125 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1126 stmt_info, 0, vect_body);
1128 else
1129 vect_get_load_cost (first_dr, ncopies,
1130 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1131 || group_size > 1 || slp_node),
1132 &inside_cost, &prologue_cost,
1133 prologue_cost_vec, body_cost_vec, true);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE, vect_location,
1137 "vect_model_load_cost: inside_cost = %d, "
1138 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1142 /* Calculate cost of DR's memory access. */
1143 void
1144 vect_get_load_cost (struct data_reference *dr, int ncopies,
1145 bool add_realign_cost, unsigned int *inside_cost,
1146 unsigned int *prologue_cost,
1147 stmt_vector_for_cost *prologue_cost_vec,
1148 stmt_vector_for_cost *body_cost_vec,
1149 bool record_prologue_costs)
1151 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1152 gimple stmt = DR_STMT (dr);
1153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1155 switch (alignment_support_scheme)
1157 case dr_aligned:
1159 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1160 stmt_info, 0, vect_body);
1162 if (dump_enabled_p ())
1163 dump_printf_loc (MSG_NOTE, vect_location,
1164 "vect_model_load_cost: aligned.\n");
1166 break;
1168 case dr_unaligned_supported:
1170 /* Here, we assign an additional cost for the unaligned load. */
1171 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1172 unaligned_load, stmt_info,
1173 DR_MISALIGNMENT (dr), vect_body);
1175 if (dump_enabled_p ())
1176 dump_printf_loc (MSG_NOTE, vect_location,
1177 "vect_model_load_cost: unaligned supported by "
1178 "hardware.\n");
1180 break;
1182 case dr_explicit_realign:
1184 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1185 vector_load, stmt_info, 0, vect_body);
1186 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1187 vec_perm, stmt_info, 0, vect_body);
1189 /* FIXME: If the misalignment remains fixed across the iterations of
1190 the containing loop, the following cost should be added to the
1191 prologue costs. */
1192 if (targetm.vectorize.builtin_mask_for_load)
1193 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1194 stmt_info, 0, vect_body);
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: explicit realign\n");
1200 break;
1202 case dr_explicit_realign_optimized:
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE, vect_location,
1206 "vect_model_load_cost: unaligned software "
1207 "pipelined.\n");
1209 /* Unaligned software pipeline has a load of an address, an initial
1210 load, and possibly a mask operation to "prime" the loop. However,
1211 if this is an access in a group of loads, which provide grouped
1212 access, then the above cost should only be considered for one
1213 access in the group. Inside the loop, there is a load op
1214 and a realignment op. */
1216 if (add_realign_cost && record_prologue_costs)
1218 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1219 vector_stmt, stmt_info,
1220 0, vect_prologue);
1221 if (targetm.vectorize.builtin_mask_for_load)
1222 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1223 vector_stmt, stmt_info,
1224 0, vect_prologue);
1227 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1228 stmt_info, 0, vect_body);
1229 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1230 stmt_info, 0, vect_body);
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location,
1234 "vect_model_load_cost: explicit realign optimized"
1235 "\n");
1237 break;
1240 case dr_unaligned_unsupported:
1242 *inside_cost = VECT_MAX_COST;
1244 if (dump_enabled_p ())
1245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1246 "vect_model_load_cost: unsupported access.\n");
1247 break;
1250 default:
1251 gcc_unreachable ();
1255 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1256 the loop preheader for the vectorized stmt STMT. */
1258 static void
1259 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1261 if (gsi)
1262 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1263 else
1265 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1266 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1268 if (loop_vinfo)
1270 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1271 basic_block new_bb;
1272 edge pe;
1274 if (nested_in_vect_loop_p (loop, stmt))
1275 loop = loop->inner;
1277 pe = loop_preheader_edge (loop);
1278 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1279 gcc_assert (!new_bb);
1281 else
1283 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1284 basic_block bb;
1285 gimple_stmt_iterator gsi_bb_start;
1287 gcc_assert (bb_vinfo);
1288 bb = BB_VINFO_BB (bb_vinfo);
1289 gsi_bb_start = gsi_after_labels (bb);
1290 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1294 if (dump_enabled_p ())
1296 dump_printf_loc (MSG_NOTE, vect_location,
1297 "created new init_stmt: ");
1298 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1299 dump_printf (MSG_NOTE, "\n");
1303 /* Function vect_init_vector.
1305 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1306 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1307 vector type a vector with all elements equal to VAL is created first.
1308 Place the initialization at BSI if it is not NULL. Otherwise, place the
1309 initialization at the loop preheader.
1310 Return the DEF of INIT_STMT.
1311 It will be used in the vectorization of STMT. */
1313 tree
1314 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1316 tree new_var;
1317 gimple init_stmt;
1318 tree vec_oprnd;
1319 tree new_temp;
1321 if (TREE_CODE (type) == VECTOR_TYPE
1322 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1324 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1326 if (CONSTANT_CLASS_P (val))
1327 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1328 else
1330 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1331 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1332 new_temp, val,
1333 NULL_TREE);
1334 vect_init_vector_1 (stmt, init_stmt, gsi);
1335 val = new_temp;
1338 val = build_vector_from_val (type, val);
1341 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1342 init_stmt = gimple_build_assign (new_var, val);
1343 new_temp = make_ssa_name (new_var, init_stmt);
1344 gimple_assign_set_lhs (init_stmt, new_temp);
1345 vect_init_vector_1 (stmt, init_stmt, gsi);
1346 vec_oprnd = gimple_assign_lhs (init_stmt);
1347 return vec_oprnd;
1351 /* Function vect_get_vec_def_for_operand.
1353 OP is an operand in STMT. This function returns a (vector) def that will be
1354 used in the vectorized stmt for STMT.
1356 In the case that OP is an SSA_NAME which is defined in the loop, then
1357 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1359 In case OP is an invariant or constant, a new stmt that creates a vector def
1360 needs to be introduced. */
1362 tree
1363 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1365 tree vec_oprnd;
1366 gimple vec_stmt;
1367 gimple def_stmt;
1368 stmt_vec_info def_stmt_info = NULL;
1369 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1370 unsigned int nunits;
1371 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1372 tree def;
1373 enum vect_def_type dt;
1374 bool is_simple_use;
1375 tree vector_type;
1377 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "vect_get_vec_def_for_operand: ");
1381 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1382 dump_printf (MSG_NOTE, "\n");
1385 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1386 &def_stmt, &def, &dt);
1387 gcc_assert (is_simple_use);
1388 if (dump_enabled_p ())
1390 int loc_printed = 0;
1391 if (def)
1393 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1394 loc_printed = 1;
1395 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1396 dump_printf (MSG_NOTE, "\n");
1398 if (def_stmt)
1400 if (loc_printed)
1401 dump_printf (MSG_NOTE, " def_stmt = ");
1402 else
1403 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1404 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1405 dump_printf (MSG_NOTE, "\n");
1409 switch (dt)
1411 /* Case 1: operand is a constant. */
1412 case vect_constant_def:
1414 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1415 gcc_assert (vector_type);
1416 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1418 if (scalar_def)
1419 *scalar_def = op;
1421 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1422 if (dump_enabled_p ())
1423 dump_printf_loc (MSG_NOTE, vect_location,
1424 "Create vector_cst. nunits = %d\n", nunits);
1426 return vect_init_vector (stmt, op, vector_type, NULL);
1429 /* Case 2: operand is defined outside the loop - loop invariant. */
1430 case vect_external_def:
1432 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1433 gcc_assert (vector_type);
1435 if (scalar_def)
1436 *scalar_def = def;
1438 /* Create 'vec_inv = {inv,inv,..,inv}' */
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1442 return vect_init_vector (stmt, def, vector_type, NULL);
1445 /* Case 3: operand is defined inside the loop. */
1446 case vect_internal_def:
1448 if (scalar_def)
1449 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1451 /* Get the def from the vectorized stmt. */
1452 def_stmt_info = vinfo_for_stmt (def_stmt);
1454 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1455 /* Get vectorized pattern statement. */
1456 if (!vec_stmt
1457 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1458 && !STMT_VINFO_RELEVANT (def_stmt_info))
1459 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1460 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1461 gcc_assert (vec_stmt);
1462 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1463 vec_oprnd = PHI_RESULT (vec_stmt);
1464 else if (is_gimple_call (vec_stmt))
1465 vec_oprnd = gimple_call_lhs (vec_stmt);
1466 else
1467 vec_oprnd = gimple_assign_lhs (vec_stmt);
1468 return vec_oprnd;
1471 /* Case 4: operand is defined by a loop header phi - reduction */
1472 case vect_reduction_def:
1473 case vect_double_reduction_def:
1474 case vect_nested_cycle:
1476 struct loop *loop;
1478 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1479 loop = (gimple_bb (def_stmt))->loop_father;
1481 /* Get the def before the loop */
1482 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1483 return get_initial_def_for_reduction (stmt, op, scalar_def);
1486 /* Case 5: operand is defined by loop-header phi - induction. */
1487 case vect_induction_def:
1489 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1491 /* Get the def from the vectorized stmt. */
1492 def_stmt_info = vinfo_for_stmt (def_stmt);
1493 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1494 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1495 vec_oprnd = PHI_RESULT (vec_stmt);
1496 else
1497 vec_oprnd = gimple_get_lhs (vec_stmt);
1498 return vec_oprnd;
1501 default:
1502 gcc_unreachable ();
1507 /* Function vect_get_vec_def_for_stmt_copy
1509 Return a vector-def for an operand. This function is used when the
1510 vectorized stmt to be created (by the caller to this function) is a "copy"
1511 created in case the vectorized result cannot fit in one vector, and several
1512 copies of the vector-stmt are required. In this case the vector-def is
1513 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1514 of the stmt that defines VEC_OPRND.
1515 DT is the type of the vector def VEC_OPRND.
1517 Context:
1518 In case the vectorization factor (VF) is bigger than the number
1519 of elements that can fit in a vectype (nunits), we have to generate
1520 more than one vector stmt to vectorize the scalar stmt. This situation
1521 arises when there are multiple data-types operated upon in the loop; the
1522 smallest data-type determines the VF, and as a result, when vectorizing
1523 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1524 vector stmt (each computing a vector of 'nunits' results, and together
1525 computing 'VF' results in each iteration). This function is called when
1526 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1527 which VF=16 and nunits=4, so the number of copies required is 4):
1529 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1531 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1532 VS1.1: vx.1 = memref1 VS1.2
1533 VS1.2: vx.2 = memref2 VS1.3
1534 VS1.3: vx.3 = memref3
1536 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1537 VSnew.1: vz1 = vx.1 + ... VSnew.2
1538 VSnew.2: vz2 = vx.2 + ... VSnew.3
1539 VSnew.3: vz3 = vx.3 + ...
1541 The vectorization of S1 is explained in vectorizable_load.
1542 The vectorization of S2:
1543 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1544 the function 'vect_get_vec_def_for_operand' is called to
1545 get the relevant vector-def for each operand of S2. For operand x it
1546 returns the vector-def 'vx.0'.
1548 To create the remaining copies of the vector-stmt (VSnew.j), this
1549 function is called to get the relevant vector-def for each operand. It is
1550 obtained from the respective VS1.j stmt, which is recorded in the
1551 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1553 For example, to obtain the vector-def 'vx.1' in order to create the
1554 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1555 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1556 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1557 and return its def ('vx.1').
1558 Overall, to create the above sequence this function will be called 3 times:
1559 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1560 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1561 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1563 tree
1564 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1566 gimple vec_stmt_for_operand;
1567 stmt_vec_info def_stmt_info;
1569 /* Do nothing; can reuse same def. */
1570 if (dt == vect_external_def || dt == vect_constant_def )
1571 return vec_oprnd;
1573 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1574 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1575 gcc_assert (def_stmt_info);
1576 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1577 gcc_assert (vec_stmt_for_operand);
1578 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1579 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1580 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1581 else
1582 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1583 return vec_oprnd;
1587 /* Get vectorized definitions for the operands to create a copy of an original
1588 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1590 static void
1591 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1592 vec<tree> *vec_oprnds0,
1593 vec<tree> *vec_oprnds1)
1595 tree vec_oprnd = vec_oprnds0->pop ();
1597 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1598 vec_oprnds0->quick_push (vec_oprnd);
1600 if (vec_oprnds1 && vec_oprnds1->length ())
1602 vec_oprnd = vec_oprnds1->pop ();
1603 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1604 vec_oprnds1->quick_push (vec_oprnd);
1609 /* Get vectorized definitions for OP0 and OP1.
1610 REDUC_INDEX is the index of reduction operand in case of reduction,
1611 and -1 otherwise. */
1613 void
1614 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1615 vec<tree> *vec_oprnds0,
1616 vec<tree> *vec_oprnds1,
1617 slp_tree slp_node, int reduc_index)
1619 if (slp_node)
1621 int nops = (op1 == NULL_TREE) ? 1 : 2;
1622 auto_vec<tree> ops (nops);
1623 auto_vec<vec<tree> > vec_defs (nops);
1625 ops.quick_push (op0);
1626 if (op1)
1627 ops.quick_push (op1);
1629 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1631 *vec_oprnds0 = vec_defs[0];
1632 if (op1)
1633 *vec_oprnds1 = vec_defs[1];
1635 else
1637 tree vec_oprnd;
1639 vec_oprnds0->create (1);
1640 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1641 vec_oprnds0->quick_push (vec_oprnd);
1643 if (op1)
1645 vec_oprnds1->create (1);
1646 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1647 vec_oprnds1->quick_push (vec_oprnd);
1653 /* Function vect_finish_stmt_generation.
1655 Insert a new stmt. */
1657 void
1658 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1659 gimple_stmt_iterator *gsi)
1661 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1662 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1663 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1665 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1667 if (!gsi_end_p (*gsi)
1668 && gimple_has_mem_ops (vec_stmt))
1670 gimple at_stmt = gsi_stmt (*gsi);
1671 tree vuse = gimple_vuse (at_stmt);
1672 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1674 tree vdef = gimple_vdef (at_stmt);
1675 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1676 /* If we have an SSA vuse and insert a store, update virtual
1677 SSA form to avoid triggering the renamer. Do so only
1678 if we can easily see all uses - which is what almost always
1679 happens with the way vectorized stmts are inserted. */
1680 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1681 && ((is_gimple_assign (vec_stmt)
1682 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1683 || (is_gimple_call (vec_stmt)
1684 && !(gimple_call_flags (vec_stmt)
1685 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1687 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1688 gimple_set_vdef (vec_stmt, new_vdef);
1689 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1693 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1695 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1696 bb_vinfo));
1698 if (dump_enabled_p ())
1700 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1701 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1702 dump_printf (MSG_NOTE, "\n");
1705 gimple_set_location (vec_stmt, gimple_location (stmt));
1707 /* While EH edges will generally prevent vectorization, stmt might
1708 e.g. be in a must-not-throw region. Ensure newly created stmts
1709 that could throw are part of the same region. */
1710 int lp_nr = lookup_stmt_eh_lp (stmt);
1711 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1712 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1715 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1716 a function declaration if the target has a vectorized version
1717 of the function, or NULL_TREE if the function cannot be vectorized. */
1719 tree
1720 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1722 tree fndecl = gimple_call_fndecl (call);
1724 /* We only handle functions that do not read or clobber memory -- i.e.
1725 const or novops ones. */
1726 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1727 return NULL_TREE;
1729 if (!fndecl
1730 || TREE_CODE (fndecl) != FUNCTION_DECL
1731 || !DECL_BUILT_IN (fndecl))
1732 return NULL_TREE;
1734 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1735 vectype_in);
1739 static tree permute_vec_elements (tree, tree, tree, gimple,
1740 gimple_stmt_iterator *);
1743 /* Function vectorizable_mask_load_store.
1745 Check if STMT performs a conditional load or store that can be vectorized.
1746 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1747 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1748 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1750 static bool
1751 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1752 gimple *vec_stmt, slp_tree slp_node)
1754 tree vec_dest = NULL;
1755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1756 stmt_vec_info prev_stmt_info;
1757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1758 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1759 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1760 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1761 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1762 tree elem_type;
1763 gimple new_stmt;
1764 tree dummy;
1765 tree dataref_ptr = NULL_TREE;
1766 gimple ptr_incr;
1767 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1768 int ncopies;
1769 int i, j;
1770 bool inv_p;
1771 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1772 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1773 int gather_scale = 1;
1774 enum vect_def_type gather_dt = vect_unknown_def_type;
1775 bool is_store;
1776 tree mask;
1777 gimple def_stmt;
1778 tree def;
1779 enum vect_def_type dt;
1781 if (slp_node != NULL)
1782 return false;
1784 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1785 gcc_assert (ncopies >= 1);
1787 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1788 mask = gimple_call_arg (stmt, 2);
1789 if (TYPE_PRECISION (TREE_TYPE (mask))
1790 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1791 return false;
1793 /* FORNOW. This restriction should be relaxed. */
1794 if (nested_in_vect_loop && ncopies > 1)
1796 if (dump_enabled_p ())
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "multiple types in nested loop.");
1799 return false;
1802 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1803 return false;
1805 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1806 return false;
1808 if (!STMT_VINFO_DATA_REF (stmt_info))
1809 return false;
1811 elem_type = TREE_TYPE (vectype);
1813 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1814 return false;
1816 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1817 return false;
1819 if (STMT_VINFO_GATHER_P (stmt_info))
1821 gimple def_stmt;
1822 tree def;
1823 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1824 &gather_off, &gather_scale);
1825 gcc_assert (gather_decl);
1826 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1827 &def_stmt, &def, &gather_dt,
1828 &gather_off_vectype))
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832 "gather index use not simple.");
1833 return false;
1836 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1837 tree masktype
1838 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1839 if (TREE_CODE (masktype) == INTEGER_TYPE)
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "masked gather with integer mask not supported.");
1844 return false;
1847 else if (tree_int_cst_compare (nested_in_vect_loop
1848 ? STMT_VINFO_DR_STEP (stmt_info)
1849 : DR_STEP (dr), size_zero_node) <= 0)
1850 return false;
1851 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1852 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1853 return false;
1855 if (TREE_CODE (mask) != SSA_NAME)
1856 return false;
1858 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1859 &def_stmt, &def, &dt))
1860 return false;
1862 if (is_store)
1864 tree rhs = gimple_call_arg (stmt, 3);
1865 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1866 &def_stmt, &def, &dt))
1867 return false;
1870 if (!vec_stmt) /* transformation not required. */
1872 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1873 if (is_store)
1874 vect_model_store_cost (stmt_info, ncopies, false, dt,
1875 NULL, NULL, NULL);
1876 else
1877 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1878 return true;
1881 /** Transform. **/
1883 if (STMT_VINFO_GATHER_P (stmt_info))
1885 tree vec_oprnd0 = NULL_TREE, op;
1886 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1887 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1888 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1889 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1890 tree mask_perm_mask = NULL_TREE;
1891 edge pe = loop_preheader_edge (loop);
1892 gimple_seq seq;
1893 basic_block new_bb;
1894 enum { NARROW, NONE, WIDEN } modifier;
1895 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1897 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1898 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1899 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1900 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1901 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1902 scaletype = TREE_VALUE (arglist);
1903 gcc_checking_assert (types_compatible_p (srctype, rettype)
1904 && types_compatible_p (srctype, masktype));
1906 if (nunits == gather_off_nunits)
1907 modifier = NONE;
1908 else if (nunits == gather_off_nunits / 2)
1910 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1911 modifier = WIDEN;
1913 for (i = 0; i < gather_off_nunits; ++i)
1914 sel[i] = i | nunits;
1916 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1917 gcc_assert (perm_mask != NULL_TREE);
1919 else if (nunits == gather_off_nunits * 2)
1921 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1922 modifier = NARROW;
1924 for (i = 0; i < nunits; ++i)
1925 sel[i] = i < gather_off_nunits
1926 ? i : i + nunits - gather_off_nunits;
1928 perm_mask = vect_gen_perm_mask (vectype, sel);
1929 gcc_assert (perm_mask != NULL_TREE);
1930 ncopies *= 2;
1931 for (i = 0; i < nunits; ++i)
1932 sel[i] = i | gather_off_nunits;
1933 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1934 gcc_assert (mask_perm_mask != NULL_TREE);
1936 else
1937 gcc_unreachable ();
1939 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1941 ptr = fold_convert (ptrtype, gather_base);
1942 if (!is_gimple_min_invariant (ptr))
1944 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1945 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1946 gcc_assert (!new_bb);
1949 scale = build_int_cst (scaletype, gather_scale);
1951 prev_stmt_info = NULL;
1952 for (j = 0; j < ncopies; ++j)
1954 if (modifier == WIDEN && (j & 1))
1955 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1956 perm_mask, stmt, gsi);
1957 else if (j == 0)
1958 op = vec_oprnd0
1959 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1960 else
1961 op = vec_oprnd0
1962 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1964 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1966 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1967 == TYPE_VECTOR_SUBPARTS (idxtype));
1968 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1969 var = make_ssa_name (var, NULL);
1970 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1971 new_stmt
1972 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1973 op, NULL_TREE);
1974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1975 op = var;
1978 if (mask_perm_mask && (j & 1))
1979 mask_op = permute_vec_elements (mask_op, mask_op,
1980 mask_perm_mask, stmt, gsi);
1981 else
1983 if (j == 0)
1984 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1985 else
1987 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1988 &def_stmt, &def, &dt);
1989 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1992 mask_op = vec_mask;
1993 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1995 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1996 == TYPE_VECTOR_SUBPARTS (masktype));
1997 var = vect_get_new_vect_var (masktype, vect_simple_var,
1998 NULL);
1999 var = make_ssa_name (var, NULL);
2000 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2001 new_stmt
2002 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
2003 mask_op, NULL_TREE);
2004 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2005 mask_op = var;
2009 new_stmt
2010 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2011 scale);
2013 if (!useless_type_conversion_p (vectype, rettype))
2015 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2016 == TYPE_VECTOR_SUBPARTS (rettype));
2017 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2018 op = make_ssa_name (var, new_stmt);
2019 gimple_call_set_lhs (new_stmt, op);
2020 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2021 var = make_ssa_name (vec_dest, NULL);
2022 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2023 new_stmt
2024 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2025 NULL_TREE);
2027 else
2029 var = make_ssa_name (vec_dest, new_stmt);
2030 gimple_call_set_lhs (new_stmt, var);
2033 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2035 if (modifier == NARROW)
2037 if ((j & 1) == 0)
2039 prev_res = var;
2040 continue;
2042 var = permute_vec_elements (prev_res, var,
2043 perm_mask, stmt, gsi);
2044 new_stmt = SSA_NAME_DEF_STMT (var);
2047 if (prev_stmt_info == NULL)
2048 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2049 else
2050 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2051 prev_stmt_info = vinfo_for_stmt (new_stmt);
2054 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2055 from the IL. */
2056 tree lhs = gimple_call_lhs (stmt);
2057 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2058 set_vinfo_for_stmt (new_stmt, stmt_info);
2059 set_vinfo_for_stmt (stmt, NULL);
2060 STMT_VINFO_STMT (stmt_info) = new_stmt;
2061 gsi_replace (gsi, new_stmt, true);
2062 return true;
2064 else if (is_store)
2066 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2067 prev_stmt_info = NULL;
2068 for (i = 0; i < ncopies; i++)
2070 unsigned align, misalign;
2072 if (i == 0)
2074 tree rhs = gimple_call_arg (stmt, 3);
2075 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2076 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2077 /* We should have catched mismatched types earlier. */
2078 gcc_assert (useless_type_conversion_p (vectype,
2079 TREE_TYPE (vec_rhs)));
2080 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2081 NULL_TREE, &dummy, gsi,
2082 &ptr_incr, false, &inv_p);
2083 gcc_assert (!inv_p);
2085 else
2087 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2088 &def, &dt);
2089 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2090 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2091 &def, &dt);
2092 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2093 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2094 TYPE_SIZE_UNIT (vectype));
2097 align = TYPE_ALIGN_UNIT (vectype);
2098 if (aligned_access_p (dr))
2099 misalign = 0;
2100 else if (DR_MISALIGNMENT (dr) == -1)
2102 align = TYPE_ALIGN_UNIT (elem_type);
2103 misalign = 0;
2105 else
2106 misalign = DR_MISALIGNMENT (dr);
2107 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2108 misalign);
2109 new_stmt
2110 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2111 gimple_call_arg (stmt, 1),
2112 vec_mask, vec_rhs);
2113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114 if (i == 0)
2115 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2116 else
2117 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2118 prev_stmt_info = vinfo_for_stmt (new_stmt);
2121 else
2123 tree vec_mask = NULL_TREE;
2124 prev_stmt_info = NULL;
2125 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2126 for (i = 0; i < ncopies; i++)
2128 unsigned align, misalign;
2130 if (i == 0)
2132 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2133 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2134 NULL_TREE, &dummy, gsi,
2135 &ptr_incr, false, &inv_p);
2136 gcc_assert (!inv_p);
2138 else
2140 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2141 &def, &dt);
2142 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2143 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2144 TYPE_SIZE_UNIT (vectype));
2147 align = TYPE_ALIGN_UNIT (vectype);
2148 if (aligned_access_p (dr))
2149 misalign = 0;
2150 else if (DR_MISALIGNMENT (dr) == -1)
2152 align = TYPE_ALIGN_UNIT (elem_type);
2153 misalign = 0;
2155 else
2156 misalign = DR_MISALIGNMENT (dr);
2157 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2158 misalign);
2159 new_stmt
2160 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2161 gimple_call_arg (stmt, 1),
2162 vec_mask);
2163 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2165 if (i == 0)
2166 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2167 else
2168 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2169 prev_stmt_info = vinfo_for_stmt (new_stmt);
2173 if (!is_store)
2175 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2176 from the IL. */
2177 tree lhs = gimple_call_lhs (stmt);
2178 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2179 set_vinfo_for_stmt (new_stmt, stmt_info);
2180 set_vinfo_for_stmt (stmt, NULL);
2181 STMT_VINFO_STMT (stmt_info) = new_stmt;
2182 gsi_replace (gsi, new_stmt, true);
2185 return true;
2189 /* Function vectorizable_call.
2191 Check if GS performs a function call that can be vectorized.
2192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2196 static bool
2197 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2198 slp_tree slp_node)
2200 gcall *stmt;
2201 tree vec_dest;
2202 tree scalar_dest;
2203 tree op, type;
2204 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2205 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2206 tree vectype_out, vectype_in;
2207 int nunits_in;
2208 int nunits_out;
2209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2210 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2211 tree fndecl, new_temp, def, rhs_type;
2212 gimple def_stmt;
2213 enum vect_def_type dt[3]
2214 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2215 gimple new_stmt = NULL;
2216 int ncopies, j;
2217 vec<tree> vargs = vNULL;
2218 enum { NARROW, NONE, WIDEN } modifier;
2219 size_t i, nargs;
2220 tree lhs;
2222 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2223 return false;
2225 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2226 return false;
2228 /* Is GS a vectorizable call? */
2229 stmt = dyn_cast <gcall *> (gs);
2230 if (!stmt)
2231 return false;
2233 if (gimple_call_internal_p (stmt)
2234 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2235 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2236 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2237 slp_node);
2239 if (gimple_call_lhs (stmt) == NULL_TREE
2240 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2241 return false;
2243 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2245 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2247 /* Process function arguments. */
2248 rhs_type = NULL_TREE;
2249 vectype_in = NULL_TREE;
2250 nargs = gimple_call_num_args (stmt);
2252 /* Bail out if the function has more than three arguments, we do not have
2253 interesting builtin functions to vectorize with more than two arguments
2254 except for fma. No arguments is also not good. */
2255 if (nargs == 0 || nargs > 3)
2256 return false;
2258 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2259 if (gimple_call_internal_p (stmt)
2260 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2262 nargs = 0;
2263 rhs_type = unsigned_type_node;
2266 for (i = 0; i < nargs; i++)
2268 tree opvectype;
2270 op = gimple_call_arg (stmt, i);
2272 /* We can only handle calls with arguments of the same type. */
2273 if (rhs_type
2274 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2276 if (dump_enabled_p ())
2277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2278 "argument types differ.\n");
2279 return false;
2281 if (!rhs_type)
2282 rhs_type = TREE_TYPE (op);
2284 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2285 &def_stmt, &def, &dt[i], &opvectype))
2287 if (dump_enabled_p ())
2288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2289 "use not simple.\n");
2290 return false;
2293 if (!vectype_in)
2294 vectype_in = opvectype;
2295 else if (opvectype
2296 && opvectype != vectype_in)
2298 if (dump_enabled_p ())
2299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2300 "argument vector types differ.\n");
2301 return false;
2304 /* If all arguments are external or constant defs use a vector type with
2305 the same size as the output vector type. */
2306 if (!vectype_in)
2307 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2308 if (vec_stmt)
2309 gcc_assert (vectype_in);
2310 if (!vectype_in)
2312 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2315 "no vectype for scalar type ");
2316 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2317 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2320 return false;
2323 /* FORNOW */
2324 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2325 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2326 if (nunits_in == nunits_out / 2)
2327 modifier = NARROW;
2328 else if (nunits_out == nunits_in)
2329 modifier = NONE;
2330 else if (nunits_out == nunits_in / 2)
2331 modifier = WIDEN;
2332 else
2333 return false;
2335 /* For now, we only vectorize functions if a target specific builtin
2336 is available. TODO -- in some cases, it might be profitable to
2337 insert the calls for pieces of the vector, in order to be able
2338 to vectorize other operations in the loop. */
2339 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2340 if (fndecl == NULL_TREE)
2342 if (gimple_call_internal_p (stmt)
2343 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2344 && !slp_node
2345 && loop_vinfo
2346 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2347 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2349 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2351 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2352 { 0, 1, 2, ... vf - 1 } vector. */
2353 gcc_assert (nargs == 0);
2355 else
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "function is not vectorizable.\n");
2360 return false;
2364 gcc_assert (!gimple_vuse (stmt));
2366 if (slp_node || PURE_SLP_STMT (stmt_info))
2367 ncopies = 1;
2368 else if (modifier == NARROW)
2369 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2370 else
2371 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2373 /* Sanity check: make sure that at least one copy of the vectorized stmt
2374 needs to be generated. */
2375 gcc_assert (ncopies >= 1);
2377 if (!vec_stmt) /* transformation not required. */
2379 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2382 "\n");
2383 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2384 return true;
2387 /** Transform. **/
2389 if (dump_enabled_p ())
2390 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2392 /* Handle def. */
2393 scalar_dest = gimple_call_lhs (stmt);
2394 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2396 prev_stmt_info = NULL;
2397 switch (modifier)
2399 case NONE:
2400 for (j = 0; j < ncopies; ++j)
2402 /* Build argument list for the vectorized call. */
2403 if (j == 0)
2404 vargs.create (nargs);
2405 else
2406 vargs.truncate (0);
2408 if (slp_node)
2410 auto_vec<vec<tree> > vec_defs (nargs);
2411 vec<tree> vec_oprnds0;
2413 for (i = 0; i < nargs; i++)
2414 vargs.quick_push (gimple_call_arg (stmt, i));
2415 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2416 vec_oprnds0 = vec_defs[0];
2418 /* Arguments are ready. Create the new vector stmt. */
2419 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2421 size_t k;
2422 for (k = 0; k < nargs; k++)
2424 vec<tree> vec_oprndsk = vec_defs[k];
2425 vargs[k] = vec_oprndsk[i];
2427 new_stmt = gimple_build_call_vec (fndecl, vargs);
2428 new_temp = make_ssa_name (vec_dest, new_stmt);
2429 gimple_call_set_lhs (new_stmt, new_temp);
2430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2431 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2434 for (i = 0; i < nargs; i++)
2436 vec<tree> vec_oprndsi = vec_defs[i];
2437 vec_oprndsi.release ();
2439 continue;
2442 for (i = 0; i < nargs; i++)
2444 op = gimple_call_arg (stmt, i);
2445 if (j == 0)
2446 vec_oprnd0
2447 = vect_get_vec_def_for_operand (op, stmt, NULL);
2448 else
2450 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2451 vec_oprnd0
2452 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2455 vargs.quick_push (vec_oprnd0);
2458 if (gimple_call_internal_p (stmt)
2459 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2461 tree *v = XALLOCAVEC (tree, nunits_out);
2462 int k;
2463 for (k = 0; k < nunits_out; ++k)
2464 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2465 tree cst = build_vector (vectype_out, v);
2466 tree new_var
2467 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2468 gimple init_stmt = gimple_build_assign (new_var, cst);
2469 new_temp = make_ssa_name (new_var, init_stmt);
2470 gimple_assign_set_lhs (init_stmt, new_temp);
2471 vect_init_vector_1 (stmt, init_stmt, NULL);
2472 new_temp = make_ssa_name (vec_dest, NULL);
2473 new_stmt = gimple_build_assign (new_temp,
2474 gimple_assign_lhs (init_stmt));
2476 else
2478 new_stmt = gimple_build_call_vec (fndecl, vargs);
2479 new_temp = make_ssa_name (vec_dest, new_stmt);
2480 gimple_call_set_lhs (new_stmt, new_temp);
2482 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2484 if (j == 0)
2485 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2486 else
2487 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2489 prev_stmt_info = vinfo_for_stmt (new_stmt);
2492 break;
2494 case NARROW:
2495 for (j = 0; j < ncopies; ++j)
2497 /* Build argument list for the vectorized call. */
2498 if (j == 0)
2499 vargs.create (nargs * 2);
2500 else
2501 vargs.truncate (0);
2503 if (slp_node)
2505 auto_vec<vec<tree> > vec_defs (nargs);
2506 vec<tree> vec_oprnds0;
2508 for (i = 0; i < nargs; i++)
2509 vargs.quick_push (gimple_call_arg (stmt, i));
2510 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2511 vec_oprnds0 = vec_defs[0];
2513 /* Arguments are ready. Create the new vector stmt. */
2514 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2516 size_t k;
2517 vargs.truncate (0);
2518 for (k = 0; k < nargs; k++)
2520 vec<tree> vec_oprndsk = vec_defs[k];
2521 vargs.quick_push (vec_oprndsk[i]);
2522 vargs.quick_push (vec_oprndsk[i + 1]);
2524 new_stmt = gimple_build_call_vec (fndecl, vargs);
2525 new_temp = make_ssa_name (vec_dest, new_stmt);
2526 gimple_call_set_lhs (new_stmt, new_temp);
2527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2528 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2531 for (i = 0; i < nargs; i++)
2533 vec<tree> vec_oprndsi = vec_defs[i];
2534 vec_oprndsi.release ();
2536 continue;
2539 for (i = 0; i < nargs; i++)
2541 op = gimple_call_arg (stmt, i);
2542 if (j == 0)
2544 vec_oprnd0
2545 = vect_get_vec_def_for_operand (op, stmt, NULL);
2546 vec_oprnd1
2547 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2549 else
2551 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2552 vec_oprnd0
2553 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2554 vec_oprnd1
2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2558 vargs.quick_push (vec_oprnd0);
2559 vargs.quick_push (vec_oprnd1);
2562 new_stmt = gimple_build_call_vec (fndecl, vargs);
2563 new_temp = make_ssa_name (vec_dest, new_stmt);
2564 gimple_call_set_lhs (new_stmt, new_temp);
2565 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2567 if (j == 0)
2568 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2569 else
2570 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2572 prev_stmt_info = vinfo_for_stmt (new_stmt);
2575 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2577 break;
2579 case WIDEN:
2580 /* No current target implements this case. */
2581 return false;
2584 vargs.release ();
2586 /* The call in STMT might prevent it from being removed in dce.
2587 We however cannot remove it here, due to the way the ssa name
2588 it defines is mapped to the new definition. So just replace
2589 rhs of the statement with something harmless. */
2591 if (slp_node)
2592 return true;
2594 type = TREE_TYPE (scalar_dest);
2595 if (is_pattern_stmt_p (stmt_info))
2596 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2597 else
2598 lhs = gimple_call_lhs (stmt);
2599 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2600 set_vinfo_for_stmt (new_stmt, stmt_info);
2601 set_vinfo_for_stmt (stmt, NULL);
2602 STMT_VINFO_STMT (stmt_info) = new_stmt;
2603 gsi_replace (gsi, new_stmt, false);
2605 return true;
2609 struct simd_call_arg_info
2611 tree vectype;
2612 tree op;
2613 enum vect_def_type dt;
2614 HOST_WIDE_INT linear_step;
2615 unsigned int align;
2618 /* Function vectorizable_simd_clone_call.
2620 Check if STMT performs a function call that can be vectorized
2621 by calling a simd clone of the function.
2622 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2623 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2624 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2626 static bool
2627 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2628 gimple *vec_stmt, slp_tree slp_node)
2630 tree vec_dest;
2631 tree scalar_dest;
2632 tree op, type;
2633 tree vec_oprnd0 = NULL_TREE;
2634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2635 tree vectype;
2636 unsigned int nunits;
2637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2638 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2639 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2640 tree fndecl, new_temp, def;
2641 gimple def_stmt;
2642 gimple new_stmt = NULL;
2643 int ncopies, j;
2644 vec<simd_call_arg_info> arginfo = vNULL;
2645 vec<tree> vargs = vNULL;
2646 size_t i, nargs;
2647 tree lhs, rtype, ratype;
2648 vec<constructor_elt, va_gc> *ret_ctor_elts;
2650 /* Is STMT a vectorizable call? */
2651 if (!is_gimple_call (stmt))
2652 return false;
2654 fndecl = gimple_call_fndecl (stmt);
2655 if (fndecl == NULL_TREE)
2656 return false;
2658 struct cgraph_node *node = cgraph_node::get (fndecl);
2659 if (node == NULL || node->simd_clones == NULL)
2660 return false;
2662 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2663 return false;
2665 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2666 return false;
2668 if (gimple_call_lhs (stmt)
2669 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2670 return false;
2672 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2674 vectype = STMT_VINFO_VECTYPE (stmt_info);
2676 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2677 return false;
2679 /* FORNOW */
2680 if (slp_node || PURE_SLP_STMT (stmt_info))
2681 return false;
2683 /* Process function arguments. */
2684 nargs = gimple_call_num_args (stmt);
2686 /* Bail out if the function has zero arguments. */
2687 if (nargs == 0)
2688 return false;
2690 arginfo.create (nargs);
2692 for (i = 0; i < nargs; i++)
2694 simd_call_arg_info thisarginfo;
2695 affine_iv iv;
2697 thisarginfo.linear_step = 0;
2698 thisarginfo.align = 0;
2699 thisarginfo.op = NULL_TREE;
2701 op = gimple_call_arg (stmt, i);
2702 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2703 &def_stmt, &def, &thisarginfo.dt,
2704 &thisarginfo.vectype)
2705 || thisarginfo.dt == vect_uninitialized_def)
2707 if (dump_enabled_p ())
2708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2709 "use not simple.\n");
2710 arginfo.release ();
2711 return false;
2714 if (thisarginfo.dt == vect_constant_def
2715 || thisarginfo.dt == vect_external_def)
2716 gcc_assert (thisarginfo.vectype == NULL_TREE);
2717 else
2718 gcc_assert (thisarginfo.vectype != NULL_TREE);
2720 if (thisarginfo.dt != vect_constant_def
2721 && thisarginfo.dt != vect_external_def
2722 && loop_vinfo
2723 && TREE_CODE (op) == SSA_NAME
2724 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2725 && tree_fits_shwi_p (iv.step))
2727 thisarginfo.linear_step = tree_to_shwi (iv.step);
2728 thisarginfo.op = iv.base;
2730 else if ((thisarginfo.dt == vect_constant_def
2731 || thisarginfo.dt == vect_external_def)
2732 && POINTER_TYPE_P (TREE_TYPE (op)))
2733 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2735 arginfo.quick_push (thisarginfo);
2738 unsigned int badness = 0;
2739 struct cgraph_node *bestn = NULL;
2740 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2741 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2742 else
2743 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2744 n = n->simdclone->next_clone)
2746 unsigned int this_badness = 0;
2747 if (n->simdclone->simdlen
2748 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2749 || n->simdclone->nargs != nargs)
2750 continue;
2751 if (n->simdclone->simdlen
2752 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2753 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2754 - exact_log2 (n->simdclone->simdlen)) * 1024;
2755 if (n->simdclone->inbranch)
2756 this_badness += 2048;
2757 int target_badness = targetm.simd_clone.usable (n);
2758 if (target_badness < 0)
2759 continue;
2760 this_badness += target_badness * 512;
2761 /* FORNOW: Have to add code to add the mask argument. */
2762 if (n->simdclone->inbranch)
2763 continue;
2764 for (i = 0; i < nargs; i++)
2766 switch (n->simdclone->args[i].arg_type)
2768 case SIMD_CLONE_ARG_TYPE_VECTOR:
2769 if (!useless_type_conversion_p
2770 (n->simdclone->args[i].orig_type,
2771 TREE_TYPE (gimple_call_arg (stmt, i))))
2772 i = -1;
2773 else if (arginfo[i].dt == vect_constant_def
2774 || arginfo[i].dt == vect_external_def
2775 || arginfo[i].linear_step)
2776 this_badness += 64;
2777 break;
2778 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2779 if (arginfo[i].dt != vect_constant_def
2780 && arginfo[i].dt != vect_external_def)
2781 i = -1;
2782 break;
2783 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2784 if (arginfo[i].dt == vect_constant_def
2785 || arginfo[i].dt == vect_external_def
2786 || (arginfo[i].linear_step
2787 != n->simdclone->args[i].linear_step))
2788 i = -1;
2789 break;
2790 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2791 /* FORNOW */
2792 i = -1;
2793 break;
2794 case SIMD_CLONE_ARG_TYPE_MASK:
2795 gcc_unreachable ();
2797 if (i == (size_t) -1)
2798 break;
2799 if (n->simdclone->args[i].alignment > arginfo[i].align)
2801 i = -1;
2802 break;
2804 if (arginfo[i].align)
2805 this_badness += (exact_log2 (arginfo[i].align)
2806 - exact_log2 (n->simdclone->args[i].alignment));
2808 if (i == (size_t) -1)
2809 continue;
2810 if (bestn == NULL || this_badness < badness)
2812 bestn = n;
2813 badness = this_badness;
2817 if (bestn == NULL)
2819 arginfo.release ();
2820 return false;
2823 for (i = 0; i < nargs; i++)
2824 if ((arginfo[i].dt == vect_constant_def
2825 || arginfo[i].dt == vect_external_def)
2826 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2828 arginfo[i].vectype
2829 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2830 i)));
2831 if (arginfo[i].vectype == NULL
2832 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2833 > bestn->simdclone->simdlen))
2835 arginfo.release ();
2836 return false;
2840 fndecl = bestn->decl;
2841 nunits = bestn->simdclone->simdlen;
2842 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2844 /* If the function isn't const, only allow it in simd loops where user
2845 has asserted that at least nunits consecutive iterations can be
2846 performed using SIMD instructions. */
2847 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2848 && gimple_vuse (stmt))
2850 arginfo.release ();
2851 return false;
2854 /* Sanity check: make sure that at least one copy of the vectorized stmt
2855 needs to be generated. */
2856 gcc_assert (ncopies >= 1);
2858 if (!vec_stmt) /* transformation not required. */
2860 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2861 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2862 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_NOTE, vect_location,
2864 "=== vectorizable_simd_clone_call ===\n");
2865 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2866 arginfo.release ();
2867 return true;
2870 /** Transform. **/
2872 if (dump_enabled_p ())
2873 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2875 /* Handle def. */
2876 scalar_dest = gimple_call_lhs (stmt);
2877 vec_dest = NULL_TREE;
2878 rtype = NULL_TREE;
2879 ratype = NULL_TREE;
2880 if (scalar_dest)
2882 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2883 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2884 if (TREE_CODE (rtype) == ARRAY_TYPE)
2886 ratype = rtype;
2887 rtype = TREE_TYPE (ratype);
2891 prev_stmt_info = NULL;
2892 for (j = 0; j < ncopies; ++j)
2894 /* Build argument list for the vectorized call. */
2895 if (j == 0)
2896 vargs.create (nargs);
2897 else
2898 vargs.truncate (0);
2900 for (i = 0; i < nargs; i++)
2902 unsigned int k, l, m, o;
2903 tree atype;
2904 op = gimple_call_arg (stmt, i);
2905 switch (bestn->simdclone->args[i].arg_type)
2907 case SIMD_CLONE_ARG_TYPE_VECTOR:
2908 atype = bestn->simdclone->args[i].vector_type;
2909 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2910 for (m = j * o; m < (j + 1) * o; m++)
2912 if (TYPE_VECTOR_SUBPARTS (atype)
2913 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2915 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2916 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2917 / TYPE_VECTOR_SUBPARTS (atype));
2918 gcc_assert ((k & (k - 1)) == 0);
2919 if (m == 0)
2920 vec_oprnd0
2921 = vect_get_vec_def_for_operand (op, stmt, NULL);
2922 else
2924 vec_oprnd0 = arginfo[i].op;
2925 if ((m & (k - 1)) == 0)
2926 vec_oprnd0
2927 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2928 vec_oprnd0);
2930 arginfo[i].op = vec_oprnd0;
2931 vec_oprnd0
2932 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2933 size_int (prec),
2934 bitsize_int ((m & (k - 1)) * prec));
2935 new_stmt
2936 = gimple_build_assign (make_ssa_name (atype, NULL),
2937 vec_oprnd0);
2938 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2939 vargs.safe_push (gimple_assign_lhs (new_stmt));
2941 else
2943 k = (TYPE_VECTOR_SUBPARTS (atype)
2944 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2945 gcc_assert ((k & (k - 1)) == 0);
2946 vec<constructor_elt, va_gc> *ctor_elts;
2947 if (k != 1)
2948 vec_alloc (ctor_elts, k);
2949 else
2950 ctor_elts = NULL;
2951 for (l = 0; l < k; l++)
2953 if (m == 0 && l == 0)
2954 vec_oprnd0
2955 = vect_get_vec_def_for_operand (op, stmt, NULL);
2956 else
2957 vec_oprnd0
2958 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2959 arginfo[i].op);
2960 arginfo[i].op = vec_oprnd0;
2961 if (k == 1)
2962 break;
2963 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2964 vec_oprnd0);
2966 if (k == 1)
2967 vargs.safe_push (vec_oprnd0);
2968 else
2970 vec_oprnd0 = build_constructor (atype, ctor_elts);
2971 new_stmt
2972 = gimple_build_assign (make_ssa_name (atype, NULL),
2973 vec_oprnd0);
2974 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2975 vargs.safe_push (gimple_assign_lhs (new_stmt));
2979 break;
2980 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2981 vargs.safe_push (op);
2982 break;
2983 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2984 if (j == 0)
2986 gimple_seq stmts;
2987 arginfo[i].op
2988 = force_gimple_operand (arginfo[i].op, &stmts, true,
2989 NULL_TREE);
2990 if (stmts != NULL)
2992 basic_block new_bb;
2993 edge pe = loop_preheader_edge (loop);
2994 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2995 gcc_assert (!new_bb);
2997 tree phi_res = copy_ssa_name (op, NULL);
2998 gphi *new_phi = create_phi_node (phi_res, loop->header);
2999 set_vinfo_for_stmt (new_phi,
3000 new_stmt_vec_info (new_phi, loop_vinfo,
3001 NULL));
3002 add_phi_arg (new_phi, arginfo[i].op,
3003 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3004 enum tree_code code
3005 = POINTER_TYPE_P (TREE_TYPE (op))
3006 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3007 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3008 ? sizetype : TREE_TYPE (op);
3009 widest_int cst
3010 = wi::mul (bestn->simdclone->args[i].linear_step,
3011 ncopies * nunits);
3012 tree tcst = wide_int_to_tree (type, cst);
3013 tree phi_arg = copy_ssa_name (op, NULL);
3014 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3015 phi_res, tcst);
3016 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3017 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3018 set_vinfo_for_stmt (new_stmt,
3019 new_stmt_vec_info (new_stmt, loop_vinfo,
3020 NULL));
3021 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3022 UNKNOWN_LOCATION);
3023 arginfo[i].op = phi_res;
3024 vargs.safe_push (phi_res);
3026 else
3028 enum tree_code code
3029 = POINTER_TYPE_P (TREE_TYPE (op))
3030 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3031 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3032 ? sizetype : TREE_TYPE (op);
3033 widest_int cst
3034 = wi::mul (bestn->simdclone->args[i].linear_step,
3035 j * nunits);
3036 tree tcst = wide_int_to_tree (type, cst);
3037 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3038 new_stmt
3039 = gimple_build_assign_with_ops (code, new_temp,
3040 arginfo[i].op, tcst);
3041 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3042 vargs.safe_push (new_temp);
3044 break;
3045 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3046 default:
3047 gcc_unreachable ();
3051 new_stmt = gimple_build_call_vec (fndecl, vargs);
3052 if (vec_dest)
3054 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3055 if (ratype)
3056 new_temp = create_tmp_var (ratype, NULL);
3057 else if (TYPE_VECTOR_SUBPARTS (vectype)
3058 == TYPE_VECTOR_SUBPARTS (rtype))
3059 new_temp = make_ssa_name (vec_dest, new_stmt);
3060 else
3061 new_temp = make_ssa_name (rtype, new_stmt);
3062 gimple_call_set_lhs (new_stmt, new_temp);
3064 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3066 if (vec_dest)
3068 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3070 unsigned int k, l;
3071 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3072 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3073 gcc_assert ((k & (k - 1)) == 0);
3074 for (l = 0; l < k; l++)
3076 tree t;
3077 if (ratype)
3079 t = build_fold_addr_expr (new_temp);
3080 t = build2 (MEM_REF, vectype, t,
3081 build_int_cst (TREE_TYPE (t),
3082 l * prec / BITS_PER_UNIT));
3084 else
3085 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3086 size_int (prec), bitsize_int (l * prec));
3087 new_stmt
3088 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3089 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3090 if (j == 0 && l == 0)
3091 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3092 else
3093 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3095 prev_stmt_info = vinfo_for_stmt (new_stmt);
3098 if (ratype)
3100 tree clobber = build_constructor (ratype, NULL);
3101 TREE_THIS_VOLATILE (clobber) = 1;
3102 new_stmt = gimple_build_assign (new_temp, clobber);
3103 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3105 continue;
3107 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3109 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3110 / TYPE_VECTOR_SUBPARTS (rtype));
3111 gcc_assert ((k & (k - 1)) == 0);
3112 if ((j & (k - 1)) == 0)
3113 vec_alloc (ret_ctor_elts, k);
3114 if (ratype)
3116 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3117 for (m = 0; m < o; m++)
3119 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3120 size_int (m), NULL_TREE, NULL_TREE);
3121 new_stmt
3122 = gimple_build_assign (make_ssa_name (rtype, NULL),
3123 tem);
3124 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3125 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3126 gimple_assign_lhs (new_stmt));
3128 tree clobber = build_constructor (ratype, NULL);
3129 TREE_THIS_VOLATILE (clobber) = 1;
3130 new_stmt = gimple_build_assign (new_temp, clobber);
3131 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3133 else
3134 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3135 if ((j & (k - 1)) != k - 1)
3136 continue;
3137 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3138 new_stmt
3139 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3140 vec_oprnd0);
3141 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3143 if ((unsigned) j == k - 1)
3144 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3145 else
3146 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3148 prev_stmt_info = vinfo_for_stmt (new_stmt);
3149 continue;
3151 else if (ratype)
3153 tree t = build_fold_addr_expr (new_temp);
3154 t = build2 (MEM_REF, vectype, t,
3155 build_int_cst (TREE_TYPE (t), 0));
3156 new_stmt
3157 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3159 tree clobber = build_constructor (ratype, NULL);
3160 TREE_THIS_VOLATILE (clobber) = 1;
3161 vect_finish_stmt_generation (stmt,
3162 gimple_build_assign (new_temp,
3163 clobber), gsi);
3167 if (j == 0)
3168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3169 else
3170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3172 prev_stmt_info = vinfo_for_stmt (new_stmt);
3175 vargs.release ();
3177 /* The call in STMT might prevent it from being removed in dce.
3178 We however cannot remove it here, due to the way the ssa name
3179 it defines is mapped to the new definition. So just replace
3180 rhs of the statement with something harmless. */
3182 if (slp_node)
3183 return true;
3185 if (scalar_dest)
3187 type = TREE_TYPE (scalar_dest);
3188 if (is_pattern_stmt_p (stmt_info))
3189 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3190 else
3191 lhs = gimple_call_lhs (stmt);
3192 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3194 else
3195 new_stmt = gimple_build_nop ();
3196 set_vinfo_for_stmt (new_stmt, stmt_info);
3197 set_vinfo_for_stmt (stmt, NULL);
3198 STMT_VINFO_STMT (stmt_info) = new_stmt;
3199 gsi_replace (gsi, new_stmt, false);
3200 unlink_stmt_vdef (stmt);
3202 return true;
3206 /* Function vect_gen_widened_results_half
3208 Create a vector stmt whose code, type, number of arguments, and result
3209 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3210 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3211 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3212 needs to be created (DECL is a function-decl of a target-builtin).
3213 STMT is the original scalar stmt that we are vectorizing. */
3215 static gimple
3216 vect_gen_widened_results_half (enum tree_code code,
3217 tree decl,
3218 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3219 tree vec_dest, gimple_stmt_iterator *gsi,
3220 gimple stmt)
3222 gimple new_stmt;
3223 tree new_temp;
3225 /* Generate half of the widened result: */
3226 if (code == CALL_EXPR)
3228 /* Target specific support */
3229 if (op_type == binary_op)
3230 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3231 else
3232 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3233 new_temp = make_ssa_name (vec_dest, new_stmt);
3234 gimple_call_set_lhs (new_stmt, new_temp);
3236 else
3238 /* Generic support */
3239 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3240 if (op_type != binary_op)
3241 vec_oprnd1 = NULL;
3242 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3243 vec_oprnd1);
3244 new_temp = make_ssa_name (vec_dest, new_stmt);
3245 gimple_assign_set_lhs (new_stmt, new_temp);
3247 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3249 return new_stmt;
3253 /* Get vectorized definitions for loop-based vectorization. For the first
3254 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3255 scalar operand), and for the rest we get a copy with
3256 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3257 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3258 The vectors are collected into VEC_OPRNDS. */
3260 static void
3261 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3262 vec<tree> *vec_oprnds, int multi_step_cvt)
3264 tree vec_oprnd;
3266 /* Get first vector operand. */
3267 /* All the vector operands except the very first one (that is scalar oprnd)
3268 are stmt copies. */
3269 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3270 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3271 else
3272 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3274 vec_oprnds->quick_push (vec_oprnd);
3276 /* Get second vector operand. */
3277 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3278 vec_oprnds->quick_push (vec_oprnd);
3280 *oprnd = vec_oprnd;
3282 /* For conversion in multiple steps, continue to get operands
3283 recursively. */
3284 if (multi_step_cvt)
3285 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3289 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3290 For multi-step conversions store the resulting vectors and call the function
3291 recursively. */
3293 static void
3294 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3295 int multi_step_cvt, gimple stmt,
3296 vec<tree> vec_dsts,
3297 gimple_stmt_iterator *gsi,
3298 slp_tree slp_node, enum tree_code code,
3299 stmt_vec_info *prev_stmt_info)
3301 unsigned int i;
3302 tree vop0, vop1, new_tmp, vec_dest;
3303 gimple new_stmt;
3304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3306 vec_dest = vec_dsts.pop ();
3308 for (i = 0; i < vec_oprnds->length (); i += 2)
3310 /* Create demotion operation. */
3311 vop0 = (*vec_oprnds)[i];
3312 vop1 = (*vec_oprnds)[i + 1];
3313 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3314 new_tmp = make_ssa_name (vec_dest, new_stmt);
3315 gimple_assign_set_lhs (new_stmt, new_tmp);
3316 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3318 if (multi_step_cvt)
3319 /* Store the resulting vector for next recursive call. */
3320 (*vec_oprnds)[i/2] = new_tmp;
3321 else
3323 /* This is the last step of the conversion sequence. Store the
3324 vectors in SLP_NODE or in vector info of the scalar statement
3325 (or in STMT_VINFO_RELATED_STMT chain). */
3326 if (slp_node)
3327 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3328 else
3330 if (!*prev_stmt_info)
3331 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3332 else
3333 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3335 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3340 /* For multi-step demotion operations we first generate demotion operations
3341 from the source type to the intermediate types, and then combine the
3342 results (stored in VEC_OPRNDS) in demotion operation to the destination
3343 type. */
3344 if (multi_step_cvt)
3346 /* At each level of recursion we have half of the operands we had at the
3347 previous level. */
3348 vec_oprnds->truncate ((i+1)/2);
3349 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3350 stmt, vec_dsts, gsi, slp_node,
3351 VEC_PACK_TRUNC_EXPR,
3352 prev_stmt_info);
3355 vec_dsts.quick_push (vec_dest);
3359 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3360 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3361 the resulting vectors and call the function recursively. */
3363 static void
3364 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3365 vec<tree> *vec_oprnds1,
3366 gimple stmt, tree vec_dest,
3367 gimple_stmt_iterator *gsi,
3368 enum tree_code code1,
3369 enum tree_code code2, tree decl1,
3370 tree decl2, int op_type)
3372 int i;
3373 tree vop0, vop1, new_tmp1, new_tmp2;
3374 gimple new_stmt1, new_stmt2;
3375 vec<tree> vec_tmp = vNULL;
3377 vec_tmp.create (vec_oprnds0->length () * 2);
3378 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3380 if (op_type == binary_op)
3381 vop1 = (*vec_oprnds1)[i];
3382 else
3383 vop1 = NULL_TREE;
3385 /* Generate the two halves of promotion operation. */
3386 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3387 op_type, vec_dest, gsi, stmt);
3388 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3389 op_type, vec_dest, gsi, stmt);
3390 if (is_gimple_call (new_stmt1))
3392 new_tmp1 = gimple_call_lhs (new_stmt1);
3393 new_tmp2 = gimple_call_lhs (new_stmt2);
3395 else
3397 new_tmp1 = gimple_assign_lhs (new_stmt1);
3398 new_tmp2 = gimple_assign_lhs (new_stmt2);
3401 /* Store the results for the next step. */
3402 vec_tmp.quick_push (new_tmp1);
3403 vec_tmp.quick_push (new_tmp2);
3406 vec_oprnds0->release ();
3407 *vec_oprnds0 = vec_tmp;
3411 /* Check if STMT performs a conversion operation, that can be vectorized.
3412 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3413 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3414 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3416 static bool
3417 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3418 gimple *vec_stmt, slp_tree slp_node)
3420 tree vec_dest;
3421 tree scalar_dest;
3422 tree op0, op1 = NULL_TREE;
3423 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3424 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3426 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3427 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3428 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3429 tree new_temp;
3430 tree def;
3431 gimple def_stmt;
3432 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3433 gimple new_stmt = NULL;
3434 stmt_vec_info prev_stmt_info;
3435 int nunits_in;
3436 int nunits_out;
3437 tree vectype_out, vectype_in;
3438 int ncopies, i, j;
3439 tree lhs_type, rhs_type;
3440 enum { NARROW, NONE, WIDEN } modifier;
3441 vec<tree> vec_oprnds0 = vNULL;
3442 vec<tree> vec_oprnds1 = vNULL;
3443 tree vop0;
3444 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3445 int multi_step_cvt = 0;
3446 vec<tree> vec_dsts = vNULL;
3447 vec<tree> interm_types = vNULL;
3448 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3449 int op_type;
3450 enum machine_mode rhs_mode;
3451 unsigned short fltsz;
3453 /* Is STMT a vectorizable conversion? */
3455 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3456 return false;
3458 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3459 return false;
3461 if (!is_gimple_assign (stmt))
3462 return false;
3464 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3465 return false;
3467 code = gimple_assign_rhs_code (stmt);
3468 if (!CONVERT_EXPR_CODE_P (code)
3469 && code != FIX_TRUNC_EXPR
3470 && code != FLOAT_EXPR
3471 && code != WIDEN_MULT_EXPR
3472 && code != WIDEN_LSHIFT_EXPR)
3473 return false;
3475 op_type = TREE_CODE_LENGTH (code);
3477 /* Check types of lhs and rhs. */
3478 scalar_dest = gimple_assign_lhs (stmt);
3479 lhs_type = TREE_TYPE (scalar_dest);
3480 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3482 op0 = gimple_assign_rhs1 (stmt);
3483 rhs_type = TREE_TYPE (op0);
3485 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3486 && !((INTEGRAL_TYPE_P (lhs_type)
3487 && INTEGRAL_TYPE_P (rhs_type))
3488 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3489 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3490 return false;
3492 if ((INTEGRAL_TYPE_P (lhs_type)
3493 && (TYPE_PRECISION (lhs_type)
3494 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3495 || (INTEGRAL_TYPE_P (rhs_type)
3496 && (TYPE_PRECISION (rhs_type)
3497 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3499 if (dump_enabled_p ())
3500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3501 "type conversion to/from bit-precision unsupported."
3502 "\n");
3503 return false;
3506 /* Check the operands of the operation. */
3507 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3508 &def_stmt, &def, &dt[0], &vectype_in))
3510 if (dump_enabled_p ())
3511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3512 "use not simple.\n");
3513 return false;
3515 if (op_type == binary_op)
3517 bool ok;
3519 op1 = gimple_assign_rhs2 (stmt);
3520 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3521 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3522 OP1. */
3523 if (CONSTANT_CLASS_P (op0))
3524 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3525 &def_stmt, &def, &dt[1], &vectype_in);
3526 else
3527 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3528 &def, &dt[1]);
3530 if (!ok)
3532 if (dump_enabled_p ())
3533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3534 "use not simple.\n");
3535 return false;
3539 /* If op0 is an external or constant defs use a vector type of
3540 the same size as the output vector type. */
3541 if (!vectype_in)
3542 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3543 if (vec_stmt)
3544 gcc_assert (vectype_in);
3545 if (!vectype_in)
3547 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3550 "no vectype for scalar type ");
3551 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3552 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3555 return false;
3558 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3559 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3560 if (nunits_in < nunits_out)
3561 modifier = NARROW;
3562 else if (nunits_out == nunits_in)
3563 modifier = NONE;
3564 else
3565 modifier = WIDEN;
3567 /* Multiple types in SLP are handled by creating the appropriate number of
3568 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3569 case of SLP. */
3570 if (slp_node || PURE_SLP_STMT (stmt_info))
3571 ncopies = 1;
3572 else if (modifier == NARROW)
3573 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3574 else
3575 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3577 /* Sanity check: make sure that at least one copy of the vectorized stmt
3578 needs to be generated. */
3579 gcc_assert (ncopies >= 1);
3581 /* Supportable by target? */
3582 switch (modifier)
3584 case NONE:
3585 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3586 return false;
3587 if (supportable_convert_operation (code, vectype_out, vectype_in,
3588 &decl1, &code1))
3589 break;
3590 /* FALLTHRU */
3591 unsupported:
3592 if (dump_enabled_p ())
3593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594 "conversion not supported by target.\n");
3595 return false;
3597 case WIDEN:
3598 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3599 &code1, &code2, &multi_step_cvt,
3600 &interm_types))
3602 /* Binary widening operation can only be supported directly by the
3603 architecture. */
3604 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3605 break;
3608 if (code != FLOAT_EXPR
3609 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3610 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3611 goto unsupported;
3613 rhs_mode = TYPE_MODE (rhs_type);
3614 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3615 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3616 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3617 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3619 cvt_type
3620 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3621 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3622 if (cvt_type == NULL_TREE)
3623 goto unsupported;
3625 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3627 if (!supportable_convert_operation (code, vectype_out,
3628 cvt_type, &decl1, &codecvt1))
3629 goto unsupported;
3631 else if (!supportable_widening_operation (code, stmt, vectype_out,
3632 cvt_type, &codecvt1,
3633 &codecvt2, &multi_step_cvt,
3634 &interm_types))
3635 continue;
3636 else
3637 gcc_assert (multi_step_cvt == 0);
3639 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3640 vectype_in, &code1, &code2,
3641 &multi_step_cvt, &interm_types))
3642 break;
3645 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3646 goto unsupported;
3648 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3649 codecvt2 = ERROR_MARK;
3650 else
3652 multi_step_cvt++;
3653 interm_types.safe_push (cvt_type);
3654 cvt_type = NULL_TREE;
3656 break;
3658 case NARROW:
3659 gcc_assert (op_type == unary_op);
3660 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3661 &code1, &multi_step_cvt,
3662 &interm_types))
3663 break;
3665 if (code != FIX_TRUNC_EXPR
3666 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3667 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3668 goto unsupported;
3670 rhs_mode = TYPE_MODE (rhs_type);
3671 cvt_type
3672 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3673 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3674 if (cvt_type == NULL_TREE)
3675 goto unsupported;
3676 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3677 &decl1, &codecvt1))
3678 goto unsupported;
3679 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3680 &code1, &multi_step_cvt,
3681 &interm_types))
3682 break;
3683 goto unsupported;
3685 default:
3686 gcc_unreachable ();
3689 if (!vec_stmt) /* transformation not required. */
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_NOTE, vect_location,
3693 "=== vectorizable_conversion ===\n");
3694 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3696 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3697 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3699 else if (modifier == NARROW)
3701 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3702 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3704 else
3706 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3707 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3709 interm_types.release ();
3710 return true;
3713 /** Transform. **/
3714 if (dump_enabled_p ())
3715 dump_printf_loc (MSG_NOTE, vect_location,
3716 "transform conversion. ncopies = %d.\n", ncopies);
3718 if (op_type == binary_op)
3720 if (CONSTANT_CLASS_P (op0))
3721 op0 = fold_convert (TREE_TYPE (op1), op0);
3722 else if (CONSTANT_CLASS_P (op1))
3723 op1 = fold_convert (TREE_TYPE (op0), op1);
3726 /* In case of multi-step conversion, we first generate conversion operations
3727 to the intermediate types, and then from that types to the final one.
3728 We create vector destinations for the intermediate type (TYPES) received
3729 from supportable_*_operation, and store them in the correct order
3730 for future use in vect_create_vectorized_*_stmts (). */
3731 vec_dsts.create (multi_step_cvt + 1);
3732 vec_dest = vect_create_destination_var (scalar_dest,
3733 (cvt_type && modifier == WIDEN)
3734 ? cvt_type : vectype_out);
3735 vec_dsts.quick_push (vec_dest);
3737 if (multi_step_cvt)
3739 for (i = interm_types.length () - 1;
3740 interm_types.iterate (i, &intermediate_type); i--)
3742 vec_dest = vect_create_destination_var (scalar_dest,
3743 intermediate_type);
3744 vec_dsts.quick_push (vec_dest);
3748 if (cvt_type)
3749 vec_dest = vect_create_destination_var (scalar_dest,
3750 modifier == WIDEN
3751 ? vectype_out : cvt_type);
3753 if (!slp_node)
3755 if (modifier == WIDEN)
3757 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3758 if (op_type == binary_op)
3759 vec_oprnds1.create (1);
3761 else if (modifier == NARROW)
3762 vec_oprnds0.create (
3763 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3765 else if (code == WIDEN_LSHIFT_EXPR)
3766 vec_oprnds1.create (slp_node->vec_stmts_size);
3768 last_oprnd = op0;
3769 prev_stmt_info = NULL;
3770 switch (modifier)
3772 case NONE:
3773 for (j = 0; j < ncopies; j++)
3775 if (j == 0)
3776 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3777 -1);
3778 else
3779 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3781 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3783 /* Arguments are ready, create the new vector stmt. */
3784 if (code1 == CALL_EXPR)
3786 new_stmt = gimple_build_call (decl1, 1, vop0);
3787 new_temp = make_ssa_name (vec_dest, new_stmt);
3788 gimple_call_set_lhs (new_stmt, new_temp);
3790 else
3792 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3793 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3794 vop0, NULL);
3795 new_temp = make_ssa_name (vec_dest, new_stmt);
3796 gimple_assign_set_lhs (new_stmt, new_temp);
3799 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3800 if (slp_node)
3801 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3804 if (j == 0)
3805 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3806 else
3807 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3808 prev_stmt_info = vinfo_for_stmt (new_stmt);
3810 break;
3812 case WIDEN:
3813 /* In case the vectorization factor (VF) is bigger than the number
3814 of elements that we can fit in a vectype (nunits), we have to
3815 generate more than one vector stmt - i.e - we need to "unroll"
3816 the vector stmt by a factor VF/nunits. */
3817 for (j = 0; j < ncopies; j++)
3819 /* Handle uses. */
3820 if (j == 0)
3822 if (slp_node)
3824 if (code == WIDEN_LSHIFT_EXPR)
3826 unsigned int k;
3828 vec_oprnd1 = op1;
3829 /* Store vec_oprnd1 for every vector stmt to be created
3830 for SLP_NODE. We check during the analysis that all
3831 the shift arguments are the same. */
3832 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3833 vec_oprnds1.quick_push (vec_oprnd1);
3835 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3836 slp_node, -1);
3838 else
3839 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3840 &vec_oprnds1, slp_node, -1);
3842 else
3844 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3845 vec_oprnds0.quick_push (vec_oprnd0);
3846 if (op_type == binary_op)
3848 if (code == WIDEN_LSHIFT_EXPR)
3849 vec_oprnd1 = op1;
3850 else
3851 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3852 NULL);
3853 vec_oprnds1.quick_push (vec_oprnd1);
3857 else
3859 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3860 vec_oprnds0.truncate (0);
3861 vec_oprnds0.quick_push (vec_oprnd0);
3862 if (op_type == binary_op)
3864 if (code == WIDEN_LSHIFT_EXPR)
3865 vec_oprnd1 = op1;
3866 else
3867 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3868 vec_oprnd1);
3869 vec_oprnds1.truncate (0);
3870 vec_oprnds1.quick_push (vec_oprnd1);
3874 /* Arguments are ready. Create the new vector stmts. */
3875 for (i = multi_step_cvt; i >= 0; i--)
3877 tree this_dest = vec_dsts[i];
3878 enum tree_code c1 = code1, c2 = code2;
3879 if (i == 0 && codecvt2 != ERROR_MARK)
3881 c1 = codecvt1;
3882 c2 = codecvt2;
3884 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3885 &vec_oprnds1,
3886 stmt, this_dest, gsi,
3887 c1, c2, decl1, decl2,
3888 op_type);
3891 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3893 if (cvt_type)
3895 if (codecvt1 == CALL_EXPR)
3897 new_stmt = gimple_build_call (decl1, 1, vop0);
3898 new_temp = make_ssa_name (vec_dest, new_stmt);
3899 gimple_call_set_lhs (new_stmt, new_temp);
3901 else
3903 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3904 new_temp = make_ssa_name (vec_dest, NULL);
3905 new_stmt = gimple_build_assign_with_ops (codecvt1,
3906 new_temp,
3907 vop0, NULL);
3910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3912 else
3913 new_stmt = SSA_NAME_DEF_STMT (vop0);
3915 if (slp_node)
3916 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3917 else
3919 if (!prev_stmt_info)
3920 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3921 else
3922 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3923 prev_stmt_info = vinfo_for_stmt (new_stmt);
3928 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3929 break;
3931 case NARROW:
3932 /* In case the vectorization factor (VF) is bigger than the number
3933 of elements that we can fit in a vectype (nunits), we have to
3934 generate more than one vector stmt - i.e - we need to "unroll"
3935 the vector stmt by a factor VF/nunits. */
3936 for (j = 0; j < ncopies; j++)
3938 /* Handle uses. */
3939 if (slp_node)
3940 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3941 slp_node, -1);
3942 else
3944 vec_oprnds0.truncate (0);
3945 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3946 vect_pow2 (multi_step_cvt) - 1);
3949 /* Arguments are ready. Create the new vector stmts. */
3950 if (cvt_type)
3951 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3953 if (codecvt1 == CALL_EXPR)
3955 new_stmt = gimple_build_call (decl1, 1, vop0);
3956 new_temp = make_ssa_name (vec_dest, new_stmt);
3957 gimple_call_set_lhs (new_stmt, new_temp);
3959 else
3961 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3962 new_temp = make_ssa_name (vec_dest, NULL);
3963 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3964 vop0, NULL);
3967 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3968 vec_oprnds0[i] = new_temp;
3971 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3972 stmt, vec_dsts, gsi,
3973 slp_node, code1,
3974 &prev_stmt_info);
3977 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3978 break;
3981 vec_oprnds0.release ();
3982 vec_oprnds1.release ();
3983 vec_dsts.release ();
3984 interm_types.release ();
3986 return true;
3990 /* Function vectorizable_assignment.
3992 Check if STMT performs an assignment (copy) that can be vectorized.
3993 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3994 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3995 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3997 static bool
3998 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3999 gimple *vec_stmt, slp_tree slp_node)
4001 tree vec_dest;
4002 tree scalar_dest;
4003 tree op;
4004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4005 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4007 tree new_temp;
4008 tree def;
4009 gimple def_stmt;
4010 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4011 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4012 int ncopies;
4013 int i, j;
4014 vec<tree> vec_oprnds = vNULL;
4015 tree vop;
4016 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4017 gimple new_stmt = NULL;
4018 stmt_vec_info prev_stmt_info = NULL;
4019 enum tree_code code;
4020 tree vectype_in;
4022 /* Multiple types in SLP are handled by creating the appropriate number of
4023 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4024 case of SLP. */
4025 if (slp_node || PURE_SLP_STMT (stmt_info))
4026 ncopies = 1;
4027 else
4028 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4030 gcc_assert (ncopies >= 1);
4032 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4033 return false;
4035 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4036 return false;
4038 /* Is vectorizable assignment? */
4039 if (!is_gimple_assign (stmt))
4040 return false;
4042 scalar_dest = gimple_assign_lhs (stmt);
4043 if (TREE_CODE (scalar_dest) != SSA_NAME)
4044 return false;
4046 code = gimple_assign_rhs_code (stmt);
4047 if (gimple_assign_single_p (stmt)
4048 || code == PAREN_EXPR
4049 || CONVERT_EXPR_CODE_P (code))
4050 op = gimple_assign_rhs1 (stmt);
4051 else
4052 return false;
4054 if (code == VIEW_CONVERT_EXPR)
4055 op = TREE_OPERAND (op, 0);
4057 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4058 &def_stmt, &def, &dt[0], &vectype_in))
4060 if (dump_enabled_p ())
4061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4062 "use not simple.\n");
4063 return false;
4066 /* We can handle NOP_EXPR conversions that do not change the number
4067 of elements or the vector size. */
4068 if ((CONVERT_EXPR_CODE_P (code)
4069 || code == VIEW_CONVERT_EXPR)
4070 && (!vectype_in
4071 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4072 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4073 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4074 return false;
4076 /* We do not handle bit-precision changes. */
4077 if ((CONVERT_EXPR_CODE_P (code)
4078 || code == VIEW_CONVERT_EXPR)
4079 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4080 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4081 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4082 || ((TYPE_PRECISION (TREE_TYPE (op))
4083 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4084 /* But a conversion that does not change the bit-pattern is ok. */
4085 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4086 > TYPE_PRECISION (TREE_TYPE (op)))
4087 && TYPE_UNSIGNED (TREE_TYPE (op))))
4089 if (dump_enabled_p ())
4090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4091 "type conversion to/from bit-precision "
4092 "unsupported.\n");
4093 return false;
4096 if (!vec_stmt) /* transformation not required. */
4098 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4099 if (dump_enabled_p ())
4100 dump_printf_loc (MSG_NOTE, vect_location,
4101 "=== vectorizable_assignment ===\n");
4102 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4103 return true;
4106 /** Transform. **/
4107 if (dump_enabled_p ())
4108 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4110 /* Handle def. */
4111 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4113 /* Handle use. */
4114 for (j = 0; j < ncopies; j++)
4116 /* Handle uses. */
4117 if (j == 0)
4118 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4119 else
4120 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4122 /* Arguments are ready. create the new vector stmt. */
4123 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4125 if (CONVERT_EXPR_CODE_P (code)
4126 || code == VIEW_CONVERT_EXPR)
4127 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4128 new_stmt = gimple_build_assign (vec_dest, vop);
4129 new_temp = make_ssa_name (vec_dest, new_stmt);
4130 gimple_assign_set_lhs (new_stmt, new_temp);
4131 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4132 if (slp_node)
4133 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4136 if (slp_node)
4137 continue;
4139 if (j == 0)
4140 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4141 else
4142 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4144 prev_stmt_info = vinfo_for_stmt (new_stmt);
4147 vec_oprnds.release ();
4148 return true;
4152 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4153 either as shift by a scalar or by a vector. */
4155 bool
4156 vect_supportable_shift (enum tree_code code, tree scalar_type)
4159 enum machine_mode vec_mode;
4160 optab optab;
4161 int icode;
4162 tree vectype;
4164 vectype = get_vectype_for_scalar_type (scalar_type);
4165 if (!vectype)
4166 return false;
4168 optab = optab_for_tree_code (code, vectype, optab_scalar);
4169 if (!optab
4170 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4172 optab = optab_for_tree_code (code, vectype, optab_vector);
4173 if (!optab
4174 || (optab_handler (optab, TYPE_MODE (vectype))
4175 == CODE_FOR_nothing))
4176 return false;
4179 vec_mode = TYPE_MODE (vectype);
4180 icode = (int) optab_handler (optab, vec_mode);
4181 if (icode == CODE_FOR_nothing)
4182 return false;
4184 return true;
4188 /* Function vectorizable_shift.
4190 Check if STMT performs a shift operation that can be vectorized.
4191 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4192 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4193 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4195 static bool
4196 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4197 gimple *vec_stmt, slp_tree slp_node)
4199 tree vec_dest;
4200 tree scalar_dest;
4201 tree op0, op1 = NULL;
4202 tree vec_oprnd1 = NULL_TREE;
4203 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4204 tree vectype;
4205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4206 enum tree_code code;
4207 enum machine_mode vec_mode;
4208 tree new_temp;
4209 optab optab;
4210 int icode;
4211 enum machine_mode optab_op2_mode;
4212 tree def;
4213 gimple def_stmt;
4214 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4215 gimple new_stmt = NULL;
4216 stmt_vec_info prev_stmt_info;
4217 int nunits_in;
4218 int nunits_out;
4219 tree vectype_out;
4220 tree op1_vectype;
4221 int ncopies;
4222 int j, i;
4223 vec<tree> vec_oprnds0 = vNULL;
4224 vec<tree> vec_oprnds1 = vNULL;
4225 tree vop0, vop1;
4226 unsigned int k;
4227 bool scalar_shift_arg = true;
4228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4229 int vf;
4231 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4232 return false;
4234 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4235 return false;
4237 /* Is STMT a vectorizable binary/unary operation? */
4238 if (!is_gimple_assign (stmt))
4239 return false;
4241 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4242 return false;
4244 code = gimple_assign_rhs_code (stmt);
4246 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4247 || code == RROTATE_EXPR))
4248 return false;
4250 scalar_dest = gimple_assign_lhs (stmt);
4251 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4252 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4253 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4255 if (dump_enabled_p ())
4256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4257 "bit-precision shifts not supported.\n");
4258 return false;
4261 op0 = gimple_assign_rhs1 (stmt);
4262 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4263 &def_stmt, &def, &dt[0], &vectype))
4265 if (dump_enabled_p ())
4266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4267 "use not simple.\n");
4268 return false;
4270 /* If op0 is an external or constant def use a vector type with
4271 the same size as the output vector type. */
4272 if (!vectype)
4273 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4274 if (vec_stmt)
4275 gcc_assert (vectype);
4276 if (!vectype)
4278 if (dump_enabled_p ())
4279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4280 "no vectype for scalar type\n");
4281 return false;
4284 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4285 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4286 if (nunits_out != nunits_in)
4287 return false;
4289 op1 = gimple_assign_rhs2 (stmt);
4290 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4291 &def, &dt[1], &op1_vectype))
4293 if (dump_enabled_p ())
4294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4295 "use not simple.\n");
4296 return false;
4299 if (loop_vinfo)
4300 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4301 else
4302 vf = 1;
4304 /* Multiple types in SLP are handled by creating the appropriate number of
4305 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4306 case of SLP. */
4307 if (slp_node || PURE_SLP_STMT (stmt_info))
4308 ncopies = 1;
4309 else
4310 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4312 gcc_assert (ncopies >= 1);
4314 /* Determine whether the shift amount is a vector, or scalar. If the
4315 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4317 if (dt[1] == vect_internal_def && !slp_node)
4318 scalar_shift_arg = false;
4319 else if (dt[1] == vect_constant_def
4320 || dt[1] == vect_external_def
4321 || dt[1] == vect_internal_def)
4323 /* In SLP, need to check whether the shift count is the same,
4324 in loops if it is a constant or invariant, it is always
4325 a scalar shift. */
4326 if (slp_node)
4328 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4329 gimple slpstmt;
4331 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4332 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4333 scalar_shift_arg = false;
4336 else
4338 if (dump_enabled_p ())
4339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4340 "operand mode requires invariant argument.\n");
4341 return false;
4344 /* Vector shifted by vector. */
4345 if (!scalar_shift_arg)
4347 optab = optab_for_tree_code (code, vectype, optab_vector);
4348 if (dump_enabled_p ())
4349 dump_printf_loc (MSG_NOTE, vect_location,
4350 "vector/vector shift/rotate found.\n");
4352 if (!op1_vectype)
4353 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4354 if (op1_vectype == NULL_TREE
4355 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4357 if (dump_enabled_p ())
4358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4359 "unusable type for last operand in"
4360 " vector/vector shift/rotate.\n");
4361 return false;
4364 /* See if the machine has a vector shifted by scalar insn and if not
4365 then see if it has a vector shifted by vector insn. */
4366 else
4368 optab = optab_for_tree_code (code, vectype, optab_scalar);
4369 if (optab
4370 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4372 if (dump_enabled_p ())
4373 dump_printf_loc (MSG_NOTE, vect_location,
4374 "vector/scalar shift/rotate found.\n");
4376 else
4378 optab = optab_for_tree_code (code, vectype, optab_vector);
4379 if (optab
4380 && (optab_handler (optab, TYPE_MODE (vectype))
4381 != CODE_FOR_nothing))
4383 scalar_shift_arg = false;
4385 if (dump_enabled_p ())
4386 dump_printf_loc (MSG_NOTE, vect_location,
4387 "vector/vector shift/rotate found.\n");
4389 /* Unlike the other binary operators, shifts/rotates have
4390 the rhs being int, instead of the same type as the lhs,
4391 so make sure the scalar is the right type if we are
4392 dealing with vectors of long long/long/short/char. */
4393 if (dt[1] == vect_constant_def)
4394 op1 = fold_convert (TREE_TYPE (vectype), op1);
4395 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4396 TREE_TYPE (op1)))
4398 if (slp_node
4399 && TYPE_MODE (TREE_TYPE (vectype))
4400 != TYPE_MODE (TREE_TYPE (op1)))
4402 if (dump_enabled_p ())
4403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4404 "unusable type for last operand in"
4405 " vector/vector shift/rotate.\n");
4406 return false;
4408 if (vec_stmt && !slp_node)
4410 op1 = fold_convert (TREE_TYPE (vectype), op1);
4411 op1 = vect_init_vector (stmt, op1,
4412 TREE_TYPE (vectype), NULL);
4419 /* Supportable by target? */
4420 if (!optab)
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4424 "no optab.\n");
4425 return false;
4427 vec_mode = TYPE_MODE (vectype);
4428 icode = (int) optab_handler (optab, vec_mode);
4429 if (icode == CODE_FOR_nothing)
4431 if (dump_enabled_p ())
4432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4433 "op not supported by target.\n");
4434 /* Check only during analysis. */
4435 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4436 || (vf < vect_min_worthwhile_factor (code)
4437 && !vec_stmt))
4438 return false;
4439 if (dump_enabled_p ())
4440 dump_printf_loc (MSG_NOTE, vect_location,
4441 "proceeding using word mode.\n");
4444 /* Worthwhile without SIMD support? Check only during analysis. */
4445 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4446 && vf < vect_min_worthwhile_factor (code)
4447 && !vec_stmt)
4449 if (dump_enabled_p ())
4450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4451 "not worthwhile without SIMD support.\n");
4452 return false;
4455 if (!vec_stmt) /* transformation not required. */
4457 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4458 if (dump_enabled_p ())
4459 dump_printf_loc (MSG_NOTE, vect_location,
4460 "=== vectorizable_shift ===\n");
4461 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4462 return true;
4465 /** Transform. **/
4467 if (dump_enabled_p ())
4468 dump_printf_loc (MSG_NOTE, vect_location,
4469 "transform binary/unary operation.\n");
4471 /* Handle def. */
4472 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4474 prev_stmt_info = NULL;
4475 for (j = 0; j < ncopies; j++)
4477 /* Handle uses. */
4478 if (j == 0)
4480 if (scalar_shift_arg)
4482 /* Vector shl and shr insn patterns can be defined with scalar
4483 operand 2 (shift operand). In this case, use constant or loop
4484 invariant op1 directly, without extending it to vector mode
4485 first. */
4486 optab_op2_mode = insn_data[icode].operand[2].mode;
4487 if (!VECTOR_MODE_P (optab_op2_mode))
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE, vect_location,
4491 "operand 1 using scalar mode.\n");
4492 vec_oprnd1 = op1;
4493 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4494 vec_oprnds1.quick_push (vec_oprnd1);
4495 if (slp_node)
4497 /* Store vec_oprnd1 for every vector stmt to be created
4498 for SLP_NODE. We check during the analysis that all
4499 the shift arguments are the same.
4500 TODO: Allow different constants for different vector
4501 stmts generated for an SLP instance. */
4502 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4503 vec_oprnds1.quick_push (vec_oprnd1);
4508 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4509 (a special case for certain kind of vector shifts); otherwise,
4510 operand 1 should be of a vector type (the usual case). */
4511 if (vec_oprnd1)
4512 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4513 slp_node, -1);
4514 else
4515 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4516 slp_node, -1);
4518 else
4519 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4521 /* Arguments are ready. Create the new vector stmt. */
4522 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4524 vop1 = vec_oprnds1[i];
4525 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4526 new_temp = make_ssa_name (vec_dest, new_stmt);
4527 gimple_assign_set_lhs (new_stmt, new_temp);
4528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4529 if (slp_node)
4530 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4533 if (slp_node)
4534 continue;
4536 if (j == 0)
4537 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4538 else
4539 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4540 prev_stmt_info = vinfo_for_stmt (new_stmt);
4543 vec_oprnds0.release ();
4544 vec_oprnds1.release ();
4546 return true;
4550 /* Function vectorizable_operation.
4552 Check if STMT performs a binary, unary or ternary operation that can
4553 be vectorized.
4554 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4555 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4556 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4558 static bool
4559 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4560 gimple *vec_stmt, slp_tree slp_node)
4562 tree vec_dest;
4563 tree scalar_dest;
4564 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4565 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4566 tree vectype;
4567 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4568 enum tree_code code;
4569 enum machine_mode vec_mode;
4570 tree new_temp;
4571 int op_type;
4572 optab optab;
4573 int icode;
4574 tree def;
4575 gimple def_stmt;
4576 enum vect_def_type dt[3]
4577 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4578 gimple new_stmt = NULL;
4579 stmt_vec_info prev_stmt_info;
4580 int nunits_in;
4581 int nunits_out;
4582 tree vectype_out;
4583 int ncopies;
4584 int j, i;
4585 vec<tree> vec_oprnds0 = vNULL;
4586 vec<tree> vec_oprnds1 = vNULL;
4587 vec<tree> vec_oprnds2 = vNULL;
4588 tree vop0, vop1, vop2;
4589 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4590 int vf;
4592 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4593 return false;
4595 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4596 return false;
4598 /* Is STMT a vectorizable binary/unary operation? */
4599 if (!is_gimple_assign (stmt))
4600 return false;
4602 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4603 return false;
4605 code = gimple_assign_rhs_code (stmt);
4607 /* For pointer addition, we should use the normal plus for
4608 the vector addition. */
4609 if (code == POINTER_PLUS_EXPR)
4610 code = PLUS_EXPR;
4612 /* Support only unary or binary operations. */
4613 op_type = TREE_CODE_LENGTH (code);
4614 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4616 if (dump_enabled_p ())
4617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4618 "num. args = %d (not unary/binary/ternary op).\n",
4619 op_type);
4620 return false;
4623 scalar_dest = gimple_assign_lhs (stmt);
4624 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4626 /* Most operations cannot handle bit-precision types without extra
4627 truncations. */
4628 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4629 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4630 /* Exception are bitwise binary operations. */
4631 && code != BIT_IOR_EXPR
4632 && code != BIT_XOR_EXPR
4633 && code != BIT_AND_EXPR)
4635 if (dump_enabled_p ())
4636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4637 "bit-precision arithmetic not supported.\n");
4638 return false;
4641 op0 = gimple_assign_rhs1 (stmt);
4642 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4643 &def_stmt, &def, &dt[0], &vectype))
4645 if (dump_enabled_p ())
4646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4647 "use not simple.\n");
4648 return false;
4650 /* If op0 is an external or constant def use a vector type with
4651 the same size as the output vector type. */
4652 if (!vectype)
4653 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4654 if (vec_stmt)
4655 gcc_assert (vectype);
4656 if (!vectype)
4658 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4661 "no vectype for scalar type ");
4662 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4663 TREE_TYPE (op0));
4664 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4667 return false;
4670 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4671 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4672 if (nunits_out != nunits_in)
4673 return false;
4675 if (op_type == binary_op || op_type == ternary_op)
4677 op1 = gimple_assign_rhs2 (stmt);
4678 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4679 &def, &dt[1]))
4681 if (dump_enabled_p ())
4682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4683 "use not simple.\n");
4684 return false;
4687 if (op_type == ternary_op)
4689 op2 = gimple_assign_rhs3 (stmt);
4690 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4691 &def, &dt[2]))
4693 if (dump_enabled_p ())
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4695 "use not simple.\n");
4696 return false;
4700 if (loop_vinfo)
4701 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4702 else
4703 vf = 1;
4705 /* Multiple types in SLP are handled by creating the appropriate number of
4706 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4707 case of SLP. */
4708 if (slp_node || PURE_SLP_STMT (stmt_info))
4709 ncopies = 1;
4710 else
4711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4713 gcc_assert (ncopies >= 1);
4715 /* Shifts are handled in vectorizable_shift (). */
4716 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4717 || code == RROTATE_EXPR)
4718 return false;
4720 /* Supportable by target? */
4722 vec_mode = TYPE_MODE (vectype);
4723 if (code == MULT_HIGHPART_EXPR)
4725 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4726 icode = LAST_INSN_CODE;
4727 else
4728 icode = CODE_FOR_nothing;
4730 else
4732 optab = optab_for_tree_code (code, vectype, optab_default);
4733 if (!optab)
4735 if (dump_enabled_p ())
4736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4737 "no optab.\n");
4738 return false;
4740 icode = (int) optab_handler (optab, vec_mode);
4743 if (icode == CODE_FOR_nothing)
4745 if (dump_enabled_p ())
4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4747 "op not supported by target.\n");
4748 /* Check only during analysis. */
4749 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4750 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4751 return false;
4752 if (dump_enabled_p ())
4753 dump_printf_loc (MSG_NOTE, vect_location,
4754 "proceeding using word mode.\n");
4757 /* Worthwhile without SIMD support? Check only during analysis. */
4758 if (!VECTOR_MODE_P (vec_mode)
4759 && !vec_stmt
4760 && vf < vect_min_worthwhile_factor (code))
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4764 "not worthwhile without SIMD support.\n");
4765 return false;
4768 if (!vec_stmt) /* transformation not required. */
4770 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_NOTE, vect_location,
4773 "=== vectorizable_operation ===\n");
4774 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4775 return true;
4778 /** Transform. **/
4780 if (dump_enabled_p ())
4781 dump_printf_loc (MSG_NOTE, vect_location,
4782 "transform binary/unary operation.\n");
4784 /* Handle def. */
4785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4787 /* In case the vectorization factor (VF) is bigger than the number
4788 of elements that we can fit in a vectype (nunits), we have to generate
4789 more than one vector stmt - i.e - we need to "unroll" the
4790 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4791 from one copy of the vector stmt to the next, in the field
4792 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4793 stages to find the correct vector defs to be used when vectorizing
4794 stmts that use the defs of the current stmt. The example below
4795 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4796 we need to create 4 vectorized stmts):
4798 before vectorization:
4799 RELATED_STMT VEC_STMT
4800 S1: x = memref - -
4801 S2: z = x + 1 - -
4803 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4804 there):
4805 RELATED_STMT VEC_STMT
4806 VS1_0: vx0 = memref0 VS1_1 -
4807 VS1_1: vx1 = memref1 VS1_2 -
4808 VS1_2: vx2 = memref2 VS1_3 -
4809 VS1_3: vx3 = memref3 - -
4810 S1: x = load - VS1_0
4811 S2: z = x + 1 - -
4813 step2: vectorize stmt S2 (done here):
4814 To vectorize stmt S2 we first need to find the relevant vector
4815 def for the first operand 'x'. This is, as usual, obtained from
4816 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4817 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4818 relevant vector def 'vx0'. Having found 'vx0' we can generate
4819 the vector stmt VS2_0, and as usual, record it in the
4820 STMT_VINFO_VEC_STMT of stmt S2.
4821 When creating the second copy (VS2_1), we obtain the relevant vector
4822 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4823 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4824 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4825 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4826 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4827 chain of stmts and pointers:
4828 RELATED_STMT VEC_STMT
4829 VS1_0: vx0 = memref0 VS1_1 -
4830 VS1_1: vx1 = memref1 VS1_2 -
4831 VS1_2: vx2 = memref2 VS1_3 -
4832 VS1_3: vx3 = memref3 - -
4833 S1: x = load - VS1_0
4834 VS2_0: vz0 = vx0 + v1 VS2_1 -
4835 VS2_1: vz1 = vx1 + v1 VS2_2 -
4836 VS2_2: vz2 = vx2 + v1 VS2_3 -
4837 VS2_3: vz3 = vx3 + v1 - -
4838 S2: z = x + 1 - VS2_0 */
4840 prev_stmt_info = NULL;
4841 for (j = 0; j < ncopies; j++)
4843 /* Handle uses. */
4844 if (j == 0)
4846 if (op_type == binary_op || op_type == ternary_op)
4847 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4848 slp_node, -1);
4849 else
4850 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4851 slp_node, -1);
4852 if (op_type == ternary_op)
4854 vec_oprnds2.create (1);
4855 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4856 stmt,
4857 NULL));
4860 else
4862 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4863 if (op_type == ternary_op)
4865 tree vec_oprnd = vec_oprnds2.pop ();
4866 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4867 vec_oprnd));
4871 /* Arguments are ready. Create the new vector stmt. */
4872 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4874 vop1 = ((op_type == binary_op || op_type == ternary_op)
4875 ? vec_oprnds1[i] : NULL_TREE);
4876 vop2 = ((op_type == ternary_op)
4877 ? vec_oprnds2[i] : NULL_TREE);
4878 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4879 vop0, vop1, vop2);
4880 new_temp = make_ssa_name (vec_dest, new_stmt);
4881 gimple_assign_set_lhs (new_stmt, new_temp);
4882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4883 if (slp_node)
4884 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4887 if (slp_node)
4888 continue;
4890 if (j == 0)
4891 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4892 else
4893 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4894 prev_stmt_info = vinfo_for_stmt (new_stmt);
4897 vec_oprnds0.release ();
4898 vec_oprnds1.release ();
4899 vec_oprnds2.release ();
4901 return true;
4904 /* A helper function to ensure data reference DR's base alignment
4905 for STMT_INFO. */
4907 static void
4908 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4910 if (!dr->aux)
4911 return;
4913 if (((dataref_aux *)dr->aux)->base_misaligned)
4915 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4916 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4918 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4919 DECL_USER_ALIGN (base_decl) = 1;
4920 ((dataref_aux *)dr->aux)->base_misaligned = false;
4925 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4926 reversal of the vector elements. If that is impossible to do,
4927 returns NULL. */
4929 static tree
4930 perm_mask_for_reverse (tree vectype)
4932 int i, nunits;
4933 unsigned char *sel;
4935 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4936 sel = XALLOCAVEC (unsigned char, nunits);
4938 for (i = 0; i < nunits; ++i)
4939 sel[i] = nunits - 1 - i;
4941 return vect_gen_perm_mask (vectype, sel);
4944 /* Function vectorizable_store.
4946 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4947 can be vectorized.
4948 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4949 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4950 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4952 static bool
4953 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4954 slp_tree slp_node)
4956 tree scalar_dest;
4957 tree data_ref;
4958 tree op;
4959 tree vec_oprnd = NULL_TREE;
4960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4961 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4963 tree elem_type;
4964 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4965 struct loop *loop = NULL;
4966 enum machine_mode vec_mode;
4967 tree dummy;
4968 enum dr_alignment_support alignment_support_scheme;
4969 tree def;
4970 gimple def_stmt;
4971 enum vect_def_type dt;
4972 stmt_vec_info prev_stmt_info = NULL;
4973 tree dataref_ptr = NULL_TREE;
4974 tree dataref_offset = NULL_TREE;
4975 gimple ptr_incr = NULL;
4976 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4977 int ncopies;
4978 int j;
4979 gimple next_stmt, first_stmt = NULL;
4980 bool grouped_store = false;
4981 bool store_lanes_p = false;
4982 unsigned int group_size, i;
4983 vec<tree> dr_chain = vNULL;
4984 vec<tree> oprnds = vNULL;
4985 vec<tree> result_chain = vNULL;
4986 bool inv_p;
4987 bool negative = false;
4988 tree offset = NULL_TREE;
4989 vec<tree> vec_oprnds = vNULL;
4990 bool slp = (slp_node != NULL);
4991 unsigned int vec_num;
4992 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4993 tree aggr_type;
4995 if (loop_vinfo)
4996 loop = LOOP_VINFO_LOOP (loop_vinfo);
4998 /* Multiple types in SLP are handled by creating the appropriate number of
4999 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5000 case of SLP. */
5001 if (slp || PURE_SLP_STMT (stmt_info))
5002 ncopies = 1;
5003 else
5004 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5006 gcc_assert (ncopies >= 1);
5008 /* FORNOW. This restriction should be relaxed. */
5009 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5011 if (dump_enabled_p ())
5012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5013 "multiple types in nested loop.\n");
5014 return false;
5017 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5018 return false;
5020 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5021 return false;
5023 /* Is vectorizable store? */
5025 if (!is_gimple_assign (stmt))
5026 return false;
5028 scalar_dest = gimple_assign_lhs (stmt);
5029 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5030 && is_pattern_stmt_p (stmt_info))
5031 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5032 if (TREE_CODE (scalar_dest) != ARRAY_REF
5033 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5034 && TREE_CODE (scalar_dest) != INDIRECT_REF
5035 && TREE_CODE (scalar_dest) != COMPONENT_REF
5036 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5037 && TREE_CODE (scalar_dest) != REALPART_EXPR
5038 && TREE_CODE (scalar_dest) != MEM_REF)
5039 return false;
5041 gcc_assert (gimple_assign_single_p (stmt));
5042 op = gimple_assign_rhs1 (stmt);
5043 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5044 &def, &dt))
5046 if (dump_enabled_p ())
5047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5048 "use not simple.\n");
5049 return false;
5052 elem_type = TREE_TYPE (vectype);
5053 vec_mode = TYPE_MODE (vectype);
5055 /* FORNOW. In some cases can vectorize even if data-type not supported
5056 (e.g. - array initialization with 0). */
5057 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5058 return false;
5060 if (!STMT_VINFO_DATA_REF (stmt_info))
5061 return false;
5063 negative =
5064 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5065 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5066 size_zero_node) < 0;
5067 if (negative && ncopies > 1)
5069 if (dump_enabled_p ())
5070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5071 "multiple types with negative step.\n");
5072 return false;
5075 if (negative)
5077 gcc_assert (!grouped_store);
5078 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5079 if (alignment_support_scheme != dr_aligned
5080 && alignment_support_scheme != dr_unaligned_supported)
5082 if (dump_enabled_p ())
5083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5084 "negative step but alignment required.\n");
5085 return false;
5087 if (dt != vect_constant_def
5088 && dt != vect_external_def
5089 && !perm_mask_for_reverse (vectype))
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5093 "negative step and reversing not supported.\n");
5094 return false;
5098 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5100 grouped_store = true;
5101 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5102 if (!slp && !PURE_SLP_STMT (stmt_info))
5104 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5105 if (vect_store_lanes_supported (vectype, group_size))
5106 store_lanes_p = true;
5107 else if (!vect_grouped_store_supported (vectype, group_size))
5108 return false;
5111 if (first_stmt == stmt)
5113 /* STMT is the leader of the group. Check the operands of all the
5114 stmts of the group. */
5115 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5116 while (next_stmt)
5118 gcc_assert (gimple_assign_single_p (next_stmt));
5119 op = gimple_assign_rhs1 (next_stmt);
5120 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5121 &def_stmt, &def, &dt))
5123 if (dump_enabled_p ())
5124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5125 "use not simple.\n");
5126 return false;
5128 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5133 if (!vec_stmt) /* transformation not required. */
5135 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5136 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5137 NULL, NULL, NULL);
5138 return true;
5141 /** Transform. **/
5143 ensure_base_align (stmt_info, dr);
5145 if (grouped_store)
5147 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5148 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5150 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5152 /* FORNOW */
5153 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5155 /* We vectorize all the stmts of the interleaving group when we
5156 reach the last stmt in the group. */
5157 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5158 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5159 && !slp)
5161 *vec_stmt = NULL;
5162 return true;
5165 if (slp)
5167 grouped_store = false;
5168 /* VEC_NUM is the number of vect stmts to be created for this
5169 group. */
5170 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5171 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5172 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5173 op = gimple_assign_rhs1 (first_stmt);
5175 else
5176 /* VEC_NUM is the number of vect stmts to be created for this
5177 group. */
5178 vec_num = group_size;
5180 else
5182 first_stmt = stmt;
5183 first_dr = dr;
5184 group_size = vec_num = 1;
5187 if (dump_enabled_p ())
5188 dump_printf_loc (MSG_NOTE, vect_location,
5189 "transform store. ncopies = %d\n", ncopies);
5191 dr_chain.create (group_size);
5192 oprnds.create (group_size);
5194 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5195 gcc_assert (alignment_support_scheme);
5196 /* Targets with store-lane instructions must not require explicit
5197 realignment. */
5198 gcc_assert (!store_lanes_p
5199 || alignment_support_scheme == dr_aligned
5200 || alignment_support_scheme == dr_unaligned_supported);
5202 if (negative)
5203 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5205 if (store_lanes_p)
5206 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5207 else
5208 aggr_type = vectype;
5210 /* In case the vectorization factor (VF) is bigger than the number
5211 of elements that we can fit in a vectype (nunits), we have to generate
5212 more than one vector stmt - i.e - we need to "unroll" the
5213 vector stmt by a factor VF/nunits. For more details see documentation in
5214 vect_get_vec_def_for_copy_stmt. */
5216 /* In case of interleaving (non-unit grouped access):
5218 S1: &base + 2 = x2
5219 S2: &base = x0
5220 S3: &base + 1 = x1
5221 S4: &base + 3 = x3
5223 We create vectorized stores starting from base address (the access of the
5224 first stmt in the chain (S2 in the above example), when the last store stmt
5225 of the chain (S4) is reached:
5227 VS1: &base = vx2
5228 VS2: &base + vec_size*1 = vx0
5229 VS3: &base + vec_size*2 = vx1
5230 VS4: &base + vec_size*3 = vx3
5232 Then permutation statements are generated:
5234 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5235 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5238 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5239 (the order of the data-refs in the output of vect_permute_store_chain
5240 corresponds to the order of scalar stmts in the interleaving chain - see
5241 the documentation of vect_permute_store_chain()).
5243 In case of both multiple types and interleaving, above vector stores and
5244 permutation stmts are created for every copy. The result vector stmts are
5245 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5246 STMT_VINFO_RELATED_STMT for the next copies.
5249 prev_stmt_info = NULL;
5250 for (j = 0; j < ncopies; j++)
5252 gimple new_stmt;
5254 if (j == 0)
5256 if (slp)
5258 /* Get vectorized arguments for SLP_NODE. */
5259 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5260 NULL, slp_node, -1);
5262 vec_oprnd = vec_oprnds[0];
5264 else
5266 /* For interleaved stores we collect vectorized defs for all the
5267 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5268 used as an input to vect_permute_store_chain(), and OPRNDS as
5269 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5271 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5272 OPRNDS are of size 1. */
5273 next_stmt = first_stmt;
5274 for (i = 0; i < group_size; i++)
5276 /* Since gaps are not supported for interleaved stores,
5277 GROUP_SIZE is the exact number of stmts in the chain.
5278 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5279 there is no interleaving, GROUP_SIZE is 1, and only one
5280 iteration of the loop will be executed. */
5281 gcc_assert (next_stmt
5282 && gimple_assign_single_p (next_stmt));
5283 op = gimple_assign_rhs1 (next_stmt);
5285 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5286 NULL);
5287 dr_chain.quick_push (vec_oprnd);
5288 oprnds.quick_push (vec_oprnd);
5289 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5293 /* We should have catched mismatched types earlier. */
5294 gcc_assert (useless_type_conversion_p (vectype,
5295 TREE_TYPE (vec_oprnd)));
5296 bool simd_lane_access_p
5297 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5298 if (simd_lane_access_p
5299 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5300 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5301 && integer_zerop (DR_OFFSET (first_dr))
5302 && integer_zerop (DR_INIT (first_dr))
5303 && alias_sets_conflict_p (get_alias_set (aggr_type),
5304 get_alias_set (DR_REF (first_dr))))
5306 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5307 dataref_offset = build_int_cst (reference_alias_ptr_type
5308 (DR_REF (first_dr)), 0);
5309 inv_p = false;
5311 else
5312 dataref_ptr
5313 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5314 simd_lane_access_p ? loop : NULL,
5315 offset, &dummy, gsi, &ptr_incr,
5316 simd_lane_access_p, &inv_p);
5317 gcc_assert (bb_vinfo || !inv_p);
5319 else
5321 /* For interleaved stores we created vectorized defs for all the
5322 defs stored in OPRNDS in the previous iteration (previous copy).
5323 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5324 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5325 next copy.
5326 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5327 OPRNDS are of size 1. */
5328 for (i = 0; i < group_size; i++)
5330 op = oprnds[i];
5331 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5332 &def, &dt);
5333 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5334 dr_chain[i] = vec_oprnd;
5335 oprnds[i] = vec_oprnd;
5337 if (dataref_offset)
5338 dataref_offset
5339 = int_const_binop (PLUS_EXPR, dataref_offset,
5340 TYPE_SIZE_UNIT (aggr_type));
5341 else
5342 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5343 TYPE_SIZE_UNIT (aggr_type));
5346 if (store_lanes_p)
5348 tree vec_array;
5350 /* Combine all the vectors into an array. */
5351 vec_array = create_vector_array (vectype, vec_num);
5352 for (i = 0; i < vec_num; i++)
5354 vec_oprnd = dr_chain[i];
5355 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5358 /* Emit:
5359 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5360 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5361 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5362 gimple_call_set_lhs (new_stmt, data_ref);
5363 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5365 else
5367 new_stmt = NULL;
5368 if (grouped_store)
5370 if (j == 0)
5371 result_chain.create (group_size);
5372 /* Permute. */
5373 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5374 &result_chain);
5377 next_stmt = first_stmt;
5378 for (i = 0; i < vec_num; i++)
5380 unsigned align, misalign;
5382 if (i > 0)
5383 /* Bump the vector pointer. */
5384 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5385 stmt, NULL_TREE);
5387 if (slp)
5388 vec_oprnd = vec_oprnds[i];
5389 else if (grouped_store)
5390 /* For grouped stores vectorized defs are interleaved in
5391 vect_permute_store_chain(). */
5392 vec_oprnd = result_chain[i];
5394 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5395 dataref_offset
5396 ? dataref_offset
5397 : build_int_cst (reference_alias_ptr_type
5398 (DR_REF (first_dr)), 0));
5399 align = TYPE_ALIGN_UNIT (vectype);
5400 if (aligned_access_p (first_dr))
5401 misalign = 0;
5402 else if (DR_MISALIGNMENT (first_dr) == -1)
5404 TREE_TYPE (data_ref)
5405 = build_aligned_type (TREE_TYPE (data_ref),
5406 TYPE_ALIGN (elem_type));
5407 align = TYPE_ALIGN_UNIT (elem_type);
5408 misalign = 0;
5410 else
5412 TREE_TYPE (data_ref)
5413 = build_aligned_type (TREE_TYPE (data_ref),
5414 TYPE_ALIGN (elem_type));
5415 misalign = DR_MISALIGNMENT (first_dr);
5417 if (dataref_offset == NULL_TREE)
5418 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5419 misalign);
5421 if (negative
5422 && dt != vect_constant_def
5423 && dt != vect_external_def)
5425 tree perm_mask = perm_mask_for_reverse (vectype);
5426 tree perm_dest
5427 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5428 vectype);
5429 tree new_temp = make_ssa_name (perm_dest, NULL);
5431 /* Generate the permute statement. */
5432 gimple perm_stmt
5433 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5434 vec_oprnd, vec_oprnd,
5435 perm_mask);
5436 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5438 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5439 vec_oprnd = new_temp;
5442 /* Arguments are ready. Create the new vector stmt. */
5443 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5444 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5446 if (slp)
5447 continue;
5449 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5450 if (!next_stmt)
5451 break;
5454 if (!slp)
5456 if (j == 0)
5457 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5458 else
5459 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5460 prev_stmt_info = vinfo_for_stmt (new_stmt);
5464 dr_chain.release ();
5465 oprnds.release ();
5466 result_chain.release ();
5467 vec_oprnds.release ();
5469 return true;
5472 /* Given a vector type VECTYPE and permutation SEL returns
5473 the VECTOR_CST mask that implements the permutation of the
5474 vector elements. If that is impossible to do, returns NULL. */
5476 tree
5477 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5479 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5480 int i, nunits;
5482 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5484 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5485 return NULL;
5487 mask_elt_type = lang_hooks.types.type_for_mode
5488 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5489 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5491 mask_elts = XALLOCAVEC (tree, nunits);
5492 for (i = nunits - 1; i >= 0; i--)
5493 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5494 mask_vec = build_vector (mask_type, mask_elts);
5496 return mask_vec;
5499 /* Given a vector variable X and Y, that was generated for the scalar
5500 STMT, generate instructions to permute the vector elements of X and Y
5501 using permutation mask MASK_VEC, insert them at *GSI and return the
5502 permuted vector variable. */
5504 static tree
5505 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5506 gimple_stmt_iterator *gsi)
5508 tree vectype = TREE_TYPE (x);
5509 tree perm_dest, data_ref;
5510 gimple perm_stmt;
5512 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5513 data_ref = make_ssa_name (perm_dest, NULL);
5515 /* Generate the permute statement. */
5516 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5517 x, y, mask_vec);
5518 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5520 return data_ref;
5523 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5524 inserting them on the loops preheader edge. Returns true if we
5525 were successful in doing so (and thus STMT can be moved then),
5526 otherwise returns false. */
5528 static bool
5529 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5531 ssa_op_iter i;
5532 tree op;
5533 bool any = false;
5535 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5537 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5538 if (!gimple_nop_p (def_stmt)
5539 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5541 /* Make sure we don't need to recurse. While we could do
5542 so in simple cases when there are more complex use webs
5543 we don't have an easy way to preserve stmt order to fulfil
5544 dependencies within them. */
5545 tree op2;
5546 ssa_op_iter i2;
5547 if (gimple_code (def_stmt) == GIMPLE_PHI)
5548 return false;
5549 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5551 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5552 if (!gimple_nop_p (def_stmt2)
5553 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5554 return false;
5556 any = true;
5560 if (!any)
5561 return true;
5563 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5565 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5566 if (!gimple_nop_p (def_stmt)
5567 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5569 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5570 gsi_remove (&gsi, false);
5571 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5575 return true;
5578 /* vectorizable_load.
5580 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5581 can be vectorized.
5582 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5583 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5584 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5586 static bool
5587 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5588 slp_tree slp_node, slp_instance slp_node_instance)
5590 tree scalar_dest;
5591 tree vec_dest = NULL;
5592 tree data_ref = NULL;
5593 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5594 stmt_vec_info prev_stmt_info;
5595 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5596 struct loop *loop = NULL;
5597 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5598 bool nested_in_vect_loop = false;
5599 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5600 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5601 tree elem_type;
5602 tree new_temp;
5603 enum machine_mode mode;
5604 gimple new_stmt = NULL;
5605 tree dummy;
5606 enum dr_alignment_support alignment_support_scheme;
5607 tree dataref_ptr = NULL_TREE;
5608 tree dataref_offset = NULL_TREE;
5609 gimple ptr_incr = NULL;
5610 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5611 int ncopies;
5612 int i, j, group_size, group_gap;
5613 tree msq = NULL_TREE, lsq;
5614 tree offset = NULL_TREE;
5615 tree byte_offset = NULL_TREE;
5616 tree realignment_token = NULL_TREE;
5617 gphi *phi = NULL;
5618 vec<tree> dr_chain = vNULL;
5619 bool grouped_load = false;
5620 bool load_lanes_p = false;
5621 gimple first_stmt;
5622 bool inv_p;
5623 bool negative = false;
5624 bool compute_in_loop = false;
5625 struct loop *at_loop;
5626 int vec_num;
5627 bool slp = (slp_node != NULL);
5628 bool slp_perm = false;
5629 enum tree_code code;
5630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5631 int vf;
5632 tree aggr_type;
5633 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5634 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5635 int gather_scale = 1;
5636 enum vect_def_type gather_dt = vect_unknown_def_type;
5638 if (loop_vinfo)
5640 loop = LOOP_VINFO_LOOP (loop_vinfo);
5641 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5642 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5644 else
5645 vf = 1;
5647 /* Multiple types in SLP are handled by creating the appropriate number of
5648 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5649 case of SLP. */
5650 if (slp || PURE_SLP_STMT (stmt_info))
5651 ncopies = 1;
5652 else
5653 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5655 gcc_assert (ncopies >= 1);
5657 /* FORNOW. This restriction should be relaxed. */
5658 if (nested_in_vect_loop && ncopies > 1)
5660 if (dump_enabled_p ())
5661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5662 "multiple types in nested loop.\n");
5663 return false;
5666 /* Invalidate assumptions made by dependence analysis when vectorization
5667 on the unrolled body effectively re-orders stmts. */
5668 if (ncopies > 1
5669 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5670 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5671 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5675 "cannot perform implicit CSE when unrolling "
5676 "with negative dependence distance\n");
5677 return false;
5680 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5681 return false;
5683 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5684 return false;
5686 /* Is vectorizable load? */
5687 if (!is_gimple_assign (stmt))
5688 return false;
5690 scalar_dest = gimple_assign_lhs (stmt);
5691 if (TREE_CODE (scalar_dest) != SSA_NAME)
5692 return false;
5694 code = gimple_assign_rhs_code (stmt);
5695 if (code != ARRAY_REF
5696 && code != BIT_FIELD_REF
5697 && code != INDIRECT_REF
5698 && code != COMPONENT_REF
5699 && code != IMAGPART_EXPR
5700 && code != REALPART_EXPR
5701 && code != MEM_REF
5702 && TREE_CODE_CLASS (code) != tcc_declaration)
5703 return false;
5705 if (!STMT_VINFO_DATA_REF (stmt_info))
5706 return false;
5708 elem_type = TREE_TYPE (vectype);
5709 mode = TYPE_MODE (vectype);
5711 /* FORNOW. In some cases can vectorize even if data-type not supported
5712 (e.g. - data copies). */
5713 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5715 if (dump_enabled_p ())
5716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5717 "Aligned load, but unsupported type.\n");
5718 return false;
5721 /* Check if the load is a part of an interleaving chain. */
5722 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5724 grouped_load = true;
5725 /* FORNOW */
5726 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5728 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5729 if (!slp && !PURE_SLP_STMT (stmt_info))
5731 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5732 if (vect_load_lanes_supported (vectype, group_size))
5733 load_lanes_p = true;
5734 else if (!vect_grouped_load_supported (vectype, group_size))
5735 return false;
5738 /* Invalidate assumptions made by dependence analysis when vectorization
5739 on the unrolled body effectively re-orders stmts. */
5740 if (!PURE_SLP_STMT (stmt_info)
5741 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5742 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5743 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5745 if (dump_enabled_p ())
5746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5747 "cannot perform implicit CSE when performing "
5748 "group loads with negative dependence distance\n");
5749 return false;
5754 if (STMT_VINFO_GATHER_P (stmt_info))
5756 gimple def_stmt;
5757 tree def;
5758 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5759 &gather_off, &gather_scale);
5760 gcc_assert (gather_decl);
5761 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5762 &def_stmt, &def, &gather_dt,
5763 &gather_off_vectype))
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5767 "gather index use not simple.\n");
5768 return false;
5771 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5773 else
5775 negative = tree_int_cst_compare (nested_in_vect_loop
5776 ? STMT_VINFO_DR_STEP (stmt_info)
5777 : DR_STEP (dr),
5778 size_zero_node) < 0;
5779 if (negative && ncopies > 1)
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5783 "multiple types with negative step.\n");
5784 return false;
5787 if (negative)
5789 if (grouped_load)
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5793 "negative step for group load not supported"
5794 "\n");
5795 return false;
5797 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5798 if (alignment_support_scheme != dr_aligned
5799 && alignment_support_scheme != dr_unaligned_supported)
5801 if (dump_enabled_p ())
5802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5803 "negative step but alignment required.\n");
5804 return false;
5806 if (!perm_mask_for_reverse (vectype))
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5810 "negative step and reversing not supported."
5811 "\n");
5812 return false;
5817 if (!vec_stmt) /* transformation not required. */
5819 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5820 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5821 return true;
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_NOTE, vect_location,
5826 "transform load. ncopies = %d\n", ncopies);
5828 /** Transform. **/
5830 ensure_base_align (stmt_info, dr);
5832 if (STMT_VINFO_GATHER_P (stmt_info))
5834 tree vec_oprnd0 = NULL_TREE, op;
5835 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5836 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5837 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5838 edge pe = loop_preheader_edge (loop);
5839 gimple_seq seq;
5840 basic_block new_bb;
5841 enum { NARROW, NONE, WIDEN } modifier;
5842 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5844 if (nunits == gather_off_nunits)
5845 modifier = NONE;
5846 else if (nunits == gather_off_nunits / 2)
5848 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5849 modifier = WIDEN;
5851 for (i = 0; i < gather_off_nunits; ++i)
5852 sel[i] = i | nunits;
5854 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5855 gcc_assert (perm_mask != NULL_TREE);
5857 else if (nunits == gather_off_nunits * 2)
5859 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5860 modifier = NARROW;
5862 for (i = 0; i < nunits; ++i)
5863 sel[i] = i < gather_off_nunits
5864 ? i : i + nunits - gather_off_nunits;
5866 perm_mask = vect_gen_perm_mask (vectype, sel);
5867 gcc_assert (perm_mask != NULL_TREE);
5868 ncopies *= 2;
5870 else
5871 gcc_unreachable ();
5873 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5874 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5875 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5876 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5877 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5878 scaletype = TREE_VALUE (arglist);
5879 gcc_checking_assert (types_compatible_p (srctype, rettype));
5881 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5883 ptr = fold_convert (ptrtype, gather_base);
5884 if (!is_gimple_min_invariant (ptr))
5886 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5887 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5888 gcc_assert (!new_bb);
5891 /* Currently we support only unconditional gather loads,
5892 so mask should be all ones. */
5893 if (TREE_CODE (masktype) == INTEGER_TYPE)
5894 mask = build_int_cst (masktype, -1);
5895 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5897 mask = build_int_cst (TREE_TYPE (masktype), -1);
5898 mask = build_vector_from_val (masktype, mask);
5899 mask = vect_init_vector (stmt, mask, masktype, NULL);
5901 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5903 REAL_VALUE_TYPE r;
5904 long tmp[6];
5905 for (j = 0; j < 6; ++j)
5906 tmp[j] = -1;
5907 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5908 mask = build_real (TREE_TYPE (masktype), r);
5909 mask = build_vector_from_val (masktype, mask);
5910 mask = vect_init_vector (stmt, mask, masktype, NULL);
5912 else
5913 gcc_unreachable ();
5915 scale = build_int_cst (scaletype, gather_scale);
5917 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5918 merge = build_int_cst (TREE_TYPE (rettype), 0);
5919 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5921 REAL_VALUE_TYPE r;
5922 long tmp[6];
5923 for (j = 0; j < 6; ++j)
5924 tmp[j] = 0;
5925 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5926 merge = build_real (TREE_TYPE (rettype), r);
5928 else
5929 gcc_unreachable ();
5930 merge = build_vector_from_val (rettype, merge);
5931 merge = vect_init_vector (stmt, merge, rettype, NULL);
5933 prev_stmt_info = NULL;
5934 for (j = 0; j < ncopies; ++j)
5936 if (modifier == WIDEN && (j & 1))
5937 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5938 perm_mask, stmt, gsi);
5939 else if (j == 0)
5940 op = vec_oprnd0
5941 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5942 else
5943 op = vec_oprnd0
5944 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5946 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5948 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5949 == TYPE_VECTOR_SUBPARTS (idxtype));
5950 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5951 var = make_ssa_name (var, NULL);
5952 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5953 new_stmt
5954 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5955 op, NULL_TREE);
5956 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5957 op = var;
5960 new_stmt
5961 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5963 if (!useless_type_conversion_p (vectype, rettype))
5965 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5966 == TYPE_VECTOR_SUBPARTS (rettype));
5967 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5968 op = make_ssa_name (var, new_stmt);
5969 gimple_call_set_lhs (new_stmt, op);
5970 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5971 var = make_ssa_name (vec_dest, NULL);
5972 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5973 new_stmt
5974 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5975 NULL_TREE);
5977 else
5979 var = make_ssa_name (vec_dest, new_stmt);
5980 gimple_call_set_lhs (new_stmt, var);
5983 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5985 if (modifier == NARROW)
5987 if ((j & 1) == 0)
5989 prev_res = var;
5990 continue;
5992 var = permute_vec_elements (prev_res, var,
5993 perm_mask, stmt, gsi);
5994 new_stmt = SSA_NAME_DEF_STMT (var);
5997 if (prev_stmt_info == NULL)
5998 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5999 else
6000 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6001 prev_stmt_info = vinfo_for_stmt (new_stmt);
6003 return true;
6005 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6007 gimple_stmt_iterator incr_gsi;
6008 bool insert_after;
6009 gimple incr;
6010 tree offvar;
6011 tree ivstep;
6012 tree running_off;
6013 vec<constructor_elt, va_gc> *v = NULL;
6014 gimple_seq stmts = NULL;
6015 tree stride_base, stride_step, alias_off;
6017 gcc_assert (!nested_in_vect_loop);
6019 stride_base
6020 = fold_build_pointer_plus
6021 (unshare_expr (DR_BASE_ADDRESS (dr)),
6022 size_binop (PLUS_EXPR,
6023 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6024 convert_to_ptrofftype (DR_INIT (dr))));
6025 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6027 /* For a load with loop-invariant (but other than power-of-2)
6028 stride (i.e. not a grouped access) like so:
6030 for (i = 0; i < n; i += stride)
6031 ... = array[i];
6033 we generate a new induction variable and new accesses to
6034 form a new vector (or vectors, depending on ncopies):
6036 for (j = 0; ; j += VF*stride)
6037 tmp1 = array[j];
6038 tmp2 = array[j + stride];
6040 vectemp = {tmp1, tmp2, ...}
6043 ivstep = stride_step;
6044 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6045 build_int_cst (TREE_TYPE (ivstep), vf));
6047 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6049 create_iv (stride_base, ivstep, NULL,
6050 loop, &incr_gsi, insert_after,
6051 &offvar, NULL);
6052 incr = gsi_stmt (incr_gsi);
6053 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6055 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6056 if (stmts)
6057 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6059 prev_stmt_info = NULL;
6060 running_off = offvar;
6061 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6062 for (j = 0; j < ncopies; j++)
6064 tree vec_inv;
6066 vec_alloc (v, nunits);
6067 for (i = 0; i < nunits; i++)
6069 tree newref, newoff;
6070 gimple incr;
6071 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6072 running_off, alias_off);
6074 newref = force_gimple_operand_gsi (gsi, newref, true,
6075 NULL_TREE, true,
6076 GSI_SAME_STMT);
6077 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6078 newoff = copy_ssa_name (running_off, NULL);
6079 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6080 running_off, stride_step);
6081 vect_finish_stmt_generation (stmt, incr, gsi);
6083 running_off = newoff;
6086 vec_inv = build_constructor (vectype, v);
6087 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6088 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6090 if (j == 0)
6091 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6092 else
6093 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6094 prev_stmt_info = vinfo_for_stmt (new_stmt);
6096 return true;
6099 if (grouped_load)
6101 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6102 if (slp
6103 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6104 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6105 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6107 /* Check if the chain of loads is already vectorized. */
6108 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6109 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6110 ??? But we can only do so if there is exactly one
6111 as we have no way to get at the rest. Leave the CSE
6112 opportunity alone.
6113 ??? With the group load eventually participating
6114 in multiple different permutations (having multiple
6115 slp nodes which refer to the same group) the CSE
6116 is even wrong code. See PR56270. */
6117 && !slp)
6119 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6120 return true;
6122 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6123 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6125 /* VEC_NUM is the number of vect stmts to be created for this group. */
6126 if (slp)
6128 grouped_load = false;
6129 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6130 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6131 slp_perm = true;
6132 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6134 else
6136 vec_num = group_size;
6137 group_gap = 0;
6140 else
6142 first_stmt = stmt;
6143 first_dr = dr;
6144 group_size = vec_num = 1;
6145 group_gap = 0;
6148 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6149 gcc_assert (alignment_support_scheme);
6150 /* Targets with load-lane instructions must not require explicit
6151 realignment. */
6152 gcc_assert (!load_lanes_p
6153 || alignment_support_scheme == dr_aligned
6154 || alignment_support_scheme == dr_unaligned_supported);
6156 /* In case the vectorization factor (VF) is bigger than the number
6157 of elements that we can fit in a vectype (nunits), we have to generate
6158 more than one vector stmt - i.e - we need to "unroll" the
6159 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6160 from one copy of the vector stmt to the next, in the field
6161 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6162 stages to find the correct vector defs to be used when vectorizing
6163 stmts that use the defs of the current stmt. The example below
6164 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6165 need to create 4 vectorized stmts):
6167 before vectorization:
6168 RELATED_STMT VEC_STMT
6169 S1: x = memref - -
6170 S2: z = x + 1 - -
6172 step 1: vectorize stmt S1:
6173 We first create the vector stmt VS1_0, and, as usual, record a
6174 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6175 Next, we create the vector stmt VS1_1, and record a pointer to
6176 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6177 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6178 stmts and pointers:
6179 RELATED_STMT VEC_STMT
6180 VS1_0: vx0 = memref0 VS1_1 -
6181 VS1_1: vx1 = memref1 VS1_2 -
6182 VS1_2: vx2 = memref2 VS1_3 -
6183 VS1_3: vx3 = memref3 - -
6184 S1: x = load - VS1_0
6185 S2: z = x + 1 - -
6187 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6188 information we recorded in RELATED_STMT field is used to vectorize
6189 stmt S2. */
6191 /* In case of interleaving (non-unit grouped access):
6193 S1: x2 = &base + 2
6194 S2: x0 = &base
6195 S3: x1 = &base + 1
6196 S4: x3 = &base + 3
6198 Vectorized loads are created in the order of memory accesses
6199 starting from the access of the first stmt of the chain:
6201 VS1: vx0 = &base
6202 VS2: vx1 = &base + vec_size*1
6203 VS3: vx3 = &base + vec_size*2
6204 VS4: vx4 = &base + vec_size*3
6206 Then permutation statements are generated:
6208 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6209 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6212 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6213 (the order of the data-refs in the output of vect_permute_load_chain
6214 corresponds to the order of scalar stmts in the interleaving chain - see
6215 the documentation of vect_permute_load_chain()).
6216 The generation of permutation stmts and recording them in
6217 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6219 In case of both multiple types and interleaving, the vector loads and
6220 permutation stmts above are created for every copy. The result vector
6221 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6222 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6224 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6225 on a target that supports unaligned accesses (dr_unaligned_supported)
6226 we generate the following code:
6227 p = initial_addr;
6228 indx = 0;
6229 loop {
6230 p = p + indx * vectype_size;
6231 vec_dest = *(p);
6232 indx = indx + 1;
6235 Otherwise, the data reference is potentially unaligned on a target that
6236 does not support unaligned accesses (dr_explicit_realign_optimized) -
6237 then generate the following code, in which the data in each iteration is
6238 obtained by two vector loads, one from the previous iteration, and one
6239 from the current iteration:
6240 p1 = initial_addr;
6241 msq_init = *(floor(p1))
6242 p2 = initial_addr + VS - 1;
6243 realignment_token = call target_builtin;
6244 indx = 0;
6245 loop {
6246 p2 = p2 + indx * vectype_size
6247 lsq = *(floor(p2))
6248 vec_dest = realign_load (msq, lsq, realignment_token)
6249 indx = indx + 1;
6250 msq = lsq;
6251 } */
6253 /* If the misalignment remains the same throughout the execution of the
6254 loop, we can create the init_addr and permutation mask at the loop
6255 preheader. Otherwise, it needs to be created inside the loop.
6256 This can only occur when vectorizing memory accesses in the inner-loop
6257 nested within an outer-loop that is being vectorized. */
6259 if (nested_in_vect_loop
6260 && (TREE_INT_CST_LOW (DR_STEP (dr))
6261 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6263 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6264 compute_in_loop = true;
6267 if ((alignment_support_scheme == dr_explicit_realign_optimized
6268 || alignment_support_scheme == dr_explicit_realign)
6269 && !compute_in_loop)
6271 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6272 alignment_support_scheme, NULL_TREE,
6273 &at_loop);
6274 if (alignment_support_scheme == dr_explicit_realign_optimized)
6276 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6277 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6278 size_one_node);
6281 else
6282 at_loop = loop;
6284 if (negative)
6285 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6287 if (load_lanes_p)
6288 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6289 else
6290 aggr_type = vectype;
6292 prev_stmt_info = NULL;
6293 for (j = 0; j < ncopies; j++)
6295 /* 1. Create the vector or array pointer update chain. */
6296 if (j == 0)
6298 bool simd_lane_access_p
6299 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6300 if (simd_lane_access_p
6301 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6302 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6303 && integer_zerop (DR_OFFSET (first_dr))
6304 && integer_zerop (DR_INIT (first_dr))
6305 && alias_sets_conflict_p (get_alias_set (aggr_type),
6306 get_alias_set (DR_REF (first_dr)))
6307 && (alignment_support_scheme == dr_aligned
6308 || alignment_support_scheme == dr_unaligned_supported))
6310 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6311 dataref_offset = build_int_cst (reference_alias_ptr_type
6312 (DR_REF (first_dr)), 0);
6313 inv_p = false;
6315 else
6316 dataref_ptr
6317 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6318 offset, &dummy, gsi, &ptr_incr,
6319 simd_lane_access_p, &inv_p,
6320 byte_offset);
6322 else if (dataref_offset)
6323 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6324 TYPE_SIZE_UNIT (aggr_type));
6325 else
6326 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6327 TYPE_SIZE_UNIT (aggr_type));
6329 if (grouped_load || slp_perm)
6330 dr_chain.create (vec_num);
6332 if (load_lanes_p)
6334 tree vec_array;
6336 vec_array = create_vector_array (vectype, vec_num);
6338 /* Emit:
6339 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6340 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6341 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6342 gimple_call_set_lhs (new_stmt, vec_array);
6343 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6345 /* Extract each vector into an SSA_NAME. */
6346 for (i = 0; i < vec_num; i++)
6348 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6349 vec_array, i);
6350 dr_chain.quick_push (new_temp);
6353 /* Record the mapping between SSA_NAMEs and statements. */
6354 vect_record_grouped_load_vectors (stmt, dr_chain);
6356 else
6358 for (i = 0; i < vec_num; i++)
6360 if (i > 0)
6361 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6362 stmt, NULL_TREE);
6364 /* 2. Create the vector-load in the loop. */
6365 switch (alignment_support_scheme)
6367 case dr_aligned:
6368 case dr_unaligned_supported:
6370 unsigned int align, misalign;
6372 data_ref
6373 = build2 (MEM_REF, vectype, dataref_ptr,
6374 dataref_offset
6375 ? dataref_offset
6376 : build_int_cst (reference_alias_ptr_type
6377 (DR_REF (first_dr)), 0));
6378 align = TYPE_ALIGN_UNIT (vectype);
6379 if (alignment_support_scheme == dr_aligned)
6381 gcc_assert (aligned_access_p (first_dr));
6382 misalign = 0;
6384 else if (DR_MISALIGNMENT (first_dr) == -1)
6386 TREE_TYPE (data_ref)
6387 = build_aligned_type (TREE_TYPE (data_ref),
6388 TYPE_ALIGN (elem_type));
6389 align = TYPE_ALIGN_UNIT (elem_type);
6390 misalign = 0;
6392 else
6394 TREE_TYPE (data_ref)
6395 = build_aligned_type (TREE_TYPE (data_ref),
6396 TYPE_ALIGN (elem_type));
6397 misalign = DR_MISALIGNMENT (first_dr);
6399 if (dataref_offset == NULL_TREE)
6400 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6401 align, misalign);
6402 break;
6404 case dr_explicit_realign:
6406 tree ptr, bump;
6407 tree vs_minus_1;
6409 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6411 if (compute_in_loop)
6412 msq = vect_setup_realignment (first_stmt, gsi,
6413 &realignment_token,
6414 dr_explicit_realign,
6415 dataref_ptr, NULL);
6417 ptr = copy_ssa_name (dataref_ptr, NULL);
6418 new_stmt = gimple_build_assign_with_ops
6419 (BIT_AND_EXPR, ptr, dataref_ptr,
6420 build_int_cst
6421 (TREE_TYPE (dataref_ptr),
6422 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6423 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6424 data_ref
6425 = build2 (MEM_REF, vectype, ptr,
6426 build_int_cst (reference_alias_ptr_type
6427 (DR_REF (first_dr)), 0));
6428 vec_dest = vect_create_destination_var (scalar_dest,
6429 vectype);
6430 new_stmt = gimple_build_assign (vec_dest, data_ref);
6431 new_temp = make_ssa_name (vec_dest, new_stmt);
6432 gimple_assign_set_lhs (new_stmt, new_temp);
6433 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6434 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6435 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6436 msq = new_temp;
6438 bump = size_binop (MULT_EXPR, vs_minus_1,
6439 TYPE_SIZE_UNIT (elem_type));
6440 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6441 new_stmt = gimple_build_assign_with_ops
6442 (BIT_AND_EXPR, NULL_TREE, ptr,
6443 build_int_cst
6444 (TREE_TYPE (ptr),
6445 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6446 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6447 gimple_assign_set_lhs (new_stmt, ptr);
6448 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6449 data_ref
6450 = build2 (MEM_REF, vectype, ptr,
6451 build_int_cst (reference_alias_ptr_type
6452 (DR_REF (first_dr)), 0));
6453 break;
6455 case dr_explicit_realign_optimized:
6456 new_temp = copy_ssa_name (dataref_ptr, NULL);
6457 new_stmt = gimple_build_assign_with_ops
6458 (BIT_AND_EXPR, new_temp, dataref_ptr,
6459 build_int_cst
6460 (TREE_TYPE (dataref_ptr),
6461 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6463 data_ref
6464 = build2 (MEM_REF, vectype, new_temp,
6465 build_int_cst (reference_alias_ptr_type
6466 (DR_REF (first_dr)), 0));
6467 break;
6468 default:
6469 gcc_unreachable ();
6471 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6472 new_stmt = gimple_build_assign (vec_dest, data_ref);
6473 new_temp = make_ssa_name (vec_dest, new_stmt);
6474 gimple_assign_set_lhs (new_stmt, new_temp);
6475 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6477 /* 3. Handle explicit realignment if necessary/supported.
6478 Create in loop:
6479 vec_dest = realign_load (msq, lsq, realignment_token) */
6480 if (alignment_support_scheme == dr_explicit_realign_optimized
6481 || alignment_support_scheme == dr_explicit_realign)
6483 lsq = gimple_assign_lhs (new_stmt);
6484 if (!realignment_token)
6485 realignment_token = dataref_ptr;
6486 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6487 new_stmt
6488 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6489 vec_dest, msq, lsq,
6490 realignment_token);
6491 new_temp = make_ssa_name (vec_dest, new_stmt);
6492 gimple_assign_set_lhs (new_stmt, new_temp);
6493 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6495 if (alignment_support_scheme == dr_explicit_realign_optimized)
6497 gcc_assert (phi);
6498 if (i == vec_num - 1 && j == ncopies - 1)
6499 add_phi_arg (phi, lsq,
6500 loop_latch_edge (containing_loop),
6501 UNKNOWN_LOCATION);
6502 msq = lsq;
6506 /* 4. Handle invariant-load. */
6507 if (inv_p && !bb_vinfo)
6509 gcc_assert (!grouped_load);
6510 /* If we have versioned for aliasing or the loop doesn't
6511 have any data dependencies that would preclude this,
6512 then we are sure this is a loop invariant load and
6513 thus we can insert it on the preheader edge. */
6514 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6515 && !nested_in_vect_loop
6516 && hoist_defs_of_uses (stmt, loop))
6518 if (dump_enabled_p ())
6520 dump_printf_loc (MSG_NOTE, vect_location,
6521 "hoisting out of the vectorized "
6522 "loop: ");
6523 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6524 dump_printf (MSG_NOTE, "\n");
6526 tree tem = copy_ssa_name (scalar_dest, NULL);
6527 gsi_insert_on_edge_immediate
6528 (loop_preheader_edge (loop),
6529 gimple_build_assign (tem,
6530 unshare_expr
6531 (gimple_assign_rhs1 (stmt))));
6532 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6534 else
6536 gimple_stmt_iterator gsi2 = *gsi;
6537 gsi_next (&gsi2);
6538 new_temp = vect_init_vector (stmt, scalar_dest,
6539 vectype, &gsi2);
6541 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6542 set_vinfo_for_stmt (new_stmt,
6543 new_stmt_vec_info (new_stmt, loop_vinfo,
6544 bb_vinfo));
6547 if (negative)
6549 tree perm_mask = perm_mask_for_reverse (vectype);
6550 new_temp = permute_vec_elements (new_temp, new_temp,
6551 perm_mask, stmt, gsi);
6552 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6555 /* Collect vector loads and later create their permutation in
6556 vect_transform_grouped_load (). */
6557 if (grouped_load || slp_perm)
6558 dr_chain.quick_push (new_temp);
6560 /* Store vector loads in the corresponding SLP_NODE. */
6561 if (slp && !slp_perm)
6562 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6564 /* Bump the vector pointer to account for a gap. */
6565 if (slp && group_gap != 0)
6567 tree bump = size_binop (MULT_EXPR,
6568 TYPE_SIZE_UNIT (elem_type),
6569 size_int (group_gap));
6570 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6571 stmt, bump);
6575 if (slp && !slp_perm)
6576 continue;
6578 if (slp_perm)
6580 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6581 slp_node_instance, false))
6583 dr_chain.release ();
6584 return false;
6587 else
6589 if (grouped_load)
6591 if (!load_lanes_p)
6592 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6593 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6595 else
6597 if (j == 0)
6598 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6599 else
6600 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6601 prev_stmt_info = vinfo_for_stmt (new_stmt);
6604 dr_chain.release ();
6607 return true;
6610 /* Function vect_is_simple_cond.
6612 Input:
6613 LOOP - the loop that is being vectorized.
6614 COND - Condition that is checked for simple use.
6616 Output:
6617 *COMP_VECTYPE - the vector type for the comparison.
6619 Returns whether a COND can be vectorized. Checks whether
6620 condition operands are supportable using vec_is_simple_use. */
6622 static bool
6623 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6624 bb_vec_info bb_vinfo, tree *comp_vectype)
6626 tree lhs, rhs;
6627 tree def;
6628 enum vect_def_type dt;
6629 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6631 if (!COMPARISON_CLASS_P (cond))
6632 return false;
6634 lhs = TREE_OPERAND (cond, 0);
6635 rhs = TREE_OPERAND (cond, 1);
6637 if (TREE_CODE (lhs) == SSA_NAME)
6639 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6640 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6641 &lhs_def_stmt, &def, &dt, &vectype1))
6642 return false;
6644 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6645 && TREE_CODE (lhs) != FIXED_CST)
6646 return false;
6648 if (TREE_CODE (rhs) == SSA_NAME)
6650 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6651 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6652 &rhs_def_stmt, &def, &dt, &vectype2))
6653 return false;
6655 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6656 && TREE_CODE (rhs) != FIXED_CST)
6657 return false;
6659 *comp_vectype = vectype1 ? vectype1 : vectype2;
6660 return true;
6663 /* vectorizable_condition.
6665 Check if STMT is conditional modify expression that can be vectorized.
6666 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6667 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6668 at GSI.
6670 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6671 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6672 else caluse if it is 2).
6674 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6676 bool
6677 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6678 gimple *vec_stmt, tree reduc_def, int reduc_index,
6679 slp_tree slp_node)
6681 tree scalar_dest = NULL_TREE;
6682 tree vec_dest = NULL_TREE;
6683 tree cond_expr, then_clause, else_clause;
6684 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6686 tree comp_vectype = NULL_TREE;
6687 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6688 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6689 tree vec_compare, vec_cond_expr;
6690 tree new_temp;
6691 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6692 tree def;
6693 enum vect_def_type dt, dts[4];
6694 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6695 int ncopies;
6696 enum tree_code code;
6697 stmt_vec_info prev_stmt_info = NULL;
6698 int i, j;
6699 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6700 vec<tree> vec_oprnds0 = vNULL;
6701 vec<tree> vec_oprnds1 = vNULL;
6702 vec<tree> vec_oprnds2 = vNULL;
6703 vec<tree> vec_oprnds3 = vNULL;
6704 tree vec_cmp_type;
6706 if (slp_node || PURE_SLP_STMT (stmt_info))
6707 ncopies = 1;
6708 else
6709 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6711 gcc_assert (ncopies >= 1);
6712 if (reduc_index && ncopies > 1)
6713 return false; /* FORNOW */
6715 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6716 return false;
6718 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6719 return false;
6721 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6722 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6723 && reduc_def))
6724 return false;
6726 /* FORNOW: not yet supported. */
6727 if (STMT_VINFO_LIVE_P (stmt_info))
6729 if (dump_enabled_p ())
6730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6731 "value used after loop.\n");
6732 return false;
6735 /* Is vectorizable conditional operation? */
6736 if (!is_gimple_assign (stmt))
6737 return false;
6739 code = gimple_assign_rhs_code (stmt);
6741 if (code != COND_EXPR)
6742 return false;
6744 cond_expr = gimple_assign_rhs1 (stmt);
6745 then_clause = gimple_assign_rhs2 (stmt);
6746 else_clause = gimple_assign_rhs3 (stmt);
6748 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6749 &comp_vectype)
6750 || !comp_vectype)
6751 return false;
6753 if (TREE_CODE (then_clause) == SSA_NAME)
6755 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6756 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6757 &then_def_stmt, &def, &dt))
6758 return false;
6760 else if (TREE_CODE (then_clause) != INTEGER_CST
6761 && TREE_CODE (then_clause) != REAL_CST
6762 && TREE_CODE (then_clause) != FIXED_CST)
6763 return false;
6765 if (TREE_CODE (else_clause) == SSA_NAME)
6767 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6768 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6769 &else_def_stmt, &def, &dt))
6770 return false;
6772 else if (TREE_CODE (else_clause) != INTEGER_CST
6773 && TREE_CODE (else_clause) != REAL_CST
6774 && TREE_CODE (else_clause) != FIXED_CST)
6775 return false;
6777 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6778 /* The result of a vector comparison should be signed type. */
6779 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6780 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6781 if (vec_cmp_type == NULL_TREE)
6782 return false;
6784 if (!vec_stmt)
6786 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6787 return expand_vec_cond_expr_p (vectype, comp_vectype);
6790 /* Transform. */
6792 if (!slp_node)
6794 vec_oprnds0.create (1);
6795 vec_oprnds1.create (1);
6796 vec_oprnds2.create (1);
6797 vec_oprnds3.create (1);
6800 /* Handle def. */
6801 scalar_dest = gimple_assign_lhs (stmt);
6802 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6804 /* Handle cond expr. */
6805 for (j = 0; j < ncopies; j++)
6807 gassign *new_stmt = NULL;
6808 if (j == 0)
6810 if (slp_node)
6812 auto_vec<tree, 4> ops;
6813 auto_vec<vec<tree>, 4> vec_defs;
6815 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6816 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6817 ops.safe_push (then_clause);
6818 ops.safe_push (else_clause);
6819 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6820 vec_oprnds3 = vec_defs.pop ();
6821 vec_oprnds2 = vec_defs.pop ();
6822 vec_oprnds1 = vec_defs.pop ();
6823 vec_oprnds0 = vec_defs.pop ();
6825 ops.release ();
6826 vec_defs.release ();
6828 else
6830 gimple gtemp;
6831 vec_cond_lhs =
6832 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6833 stmt, NULL);
6834 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6835 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6837 vec_cond_rhs =
6838 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6839 stmt, NULL);
6840 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6841 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6842 if (reduc_index == 1)
6843 vec_then_clause = reduc_def;
6844 else
6846 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6847 stmt, NULL);
6848 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6849 NULL, &gtemp, &def, &dts[2]);
6851 if (reduc_index == 2)
6852 vec_else_clause = reduc_def;
6853 else
6855 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6856 stmt, NULL);
6857 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6858 NULL, &gtemp, &def, &dts[3]);
6862 else
6864 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6865 vec_oprnds0.pop ());
6866 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6867 vec_oprnds1.pop ());
6868 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6869 vec_oprnds2.pop ());
6870 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6871 vec_oprnds3.pop ());
6874 if (!slp_node)
6876 vec_oprnds0.quick_push (vec_cond_lhs);
6877 vec_oprnds1.quick_push (vec_cond_rhs);
6878 vec_oprnds2.quick_push (vec_then_clause);
6879 vec_oprnds3.quick_push (vec_else_clause);
6882 /* Arguments are ready. Create the new vector stmt. */
6883 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6885 vec_cond_rhs = vec_oprnds1[i];
6886 vec_then_clause = vec_oprnds2[i];
6887 vec_else_clause = vec_oprnds3[i];
6889 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6890 vec_cond_lhs, vec_cond_rhs);
6891 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6892 vec_compare, vec_then_clause, vec_else_clause);
6894 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6895 new_temp = make_ssa_name (vec_dest, new_stmt);
6896 gimple_assign_set_lhs (new_stmt, new_temp);
6897 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6898 if (slp_node)
6899 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6902 if (slp_node)
6903 continue;
6905 if (j == 0)
6906 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6907 else
6908 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6910 prev_stmt_info = vinfo_for_stmt (new_stmt);
6913 vec_oprnds0.release ();
6914 vec_oprnds1.release ();
6915 vec_oprnds2.release ();
6916 vec_oprnds3.release ();
6918 return true;
6922 /* Make sure the statement is vectorizable. */
6924 bool
6925 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6927 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6928 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6929 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6930 bool ok;
6931 tree scalar_type, vectype;
6932 gimple pattern_stmt;
6933 gimple_seq pattern_def_seq;
6935 if (dump_enabled_p ())
6937 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6938 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6939 dump_printf (MSG_NOTE, "\n");
6942 if (gimple_has_volatile_ops (stmt))
6944 if (dump_enabled_p ())
6945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6946 "not vectorized: stmt has volatile operands\n");
6948 return false;
6951 /* Skip stmts that do not need to be vectorized. In loops this is expected
6952 to include:
6953 - the COND_EXPR which is the loop exit condition
6954 - any LABEL_EXPRs in the loop
6955 - computations that are used only for array indexing or loop control.
6956 In basic blocks we only analyze statements that are a part of some SLP
6957 instance, therefore, all the statements are relevant.
6959 Pattern statement needs to be analyzed instead of the original statement
6960 if the original statement is not relevant. Otherwise, we analyze both
6961 statements. In basic blocks we are called from some SLP instance
6962 traversal, don't analyze pattern stmts instead, the pattern stmts
6963 already will be part of SLP instance. */
6965 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6966 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6967 && !STMT_VINFO_LIVE_P (stmt_info))
6969 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6970 && pattern_stmt
6971 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6972 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6974 /* Analyze PATTERN_STMT instead of the original stmt. */
6975 stmt = pattern_stmt;
6976 stmt_info = vinfo_for_stmt (pattern_stmt);
6977 if (dump_enabled_p ())
6979 dump_printf_loc (MSG_NOTE, vect_location,
6980 "==> examining pattern statement: ");
6981 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6982 dump_printf (MSG_NOTE, "\n");
6985 else
6987 if (dump_enabled_p ())
6988 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6990 return true;
6993 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6994 && node == NULL
6995 && pattern_stmt
6996 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6997 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6999 /* Analyze PATTERN_STMT too. */
7000 if (dump_enabled_p ())
7002 dump_printf_loc (MSG_NOTE, vect_location,
7003 "==> examining pattern statement: ");
7004 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7005 dump_printf (MSG_NOTE, "\n");
7008 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7009 return false;
7012 if (is_pattern_stmt_p (stmt_info)
7013 && node == NULL
7014 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7016 gimple_stmt_iterator si;
7018 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7020 gimple pattern_def_stmt = gsi_stmt (si);
7021 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7022 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7024 /* Analyze def stmt of STMT if it's a pattern stmt. */
7025 if (dump_enabled_p ())
7027 dump_printf_loc (MSG_NOTE, vect_location,
7028 "==> examining pattern def statement: ");
7029 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7030 dump_printf (MSG_NOTE, "\n");
7033 if (!vect_analyze_stmt (pattern_def_stmt,
7034 need_to_vectorize, node))
7035 return false;
7040 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7042 case vect_internal_def:
7043 break;
7045 case vect_reduction_def:
7046 case vect_nested_cycle:
7047 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7048 || relevance == vect_used_in_outer_by_reduction
7049 || relevance == vect_unused_in_scope));
7050 break;
7052 case vect_induction_def:
7053 case vect_constant_def:
7054 case vect_external_def:
7055 case vect_unknown_def_type:
7056 default:
7057 gcc_unreachable ();
7060 if (bb_vinfo)
7062 gcc_assert (PURE_SLP_STMT (stmt_info));
7064 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7065 if (dump_enabled_p ())
7067 dump_printf_loc (MSG_NOTE, vect_location,
7068 "get vectype for scalar type: ");
7069 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7070 dump_printf (MSG_NOTE, "\n");
7073 vectype = get_vectype_for_scalar_type (scalar_type);
7074 if (!vectype)
7076 if (dump_enabled_p ())
7078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7079 "not SLPed: unsupported data-type ");
7080 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7081 scalar_type);
7082 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7084 return false;
7087 if (dump_enabled_p ())
7089 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7090 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7091 dump_printf (MSG_NOTE, "\n");
7094 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7097 if (STMT_VINFO_RELEVANT_P (stmt_info))
7099 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7100 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7101 || (is_gimple_call (stmt)
7102 && gimple_call_lhs (stmt) == NULL_TREE));
7103 *need_to_vectorize = true;
7106 ok = true;
7107 if (!bb_vinfo
7108 && (STMT_VINFO_RELEVANT_P (stmt_info)
7109 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7110 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7111 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7112 || vectorizable_shift (stmt, NULL, NULL, NULL)
7113 || vectorizable_operation (stmt, NULL, NULL, NULL)
7114 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7115 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7116 || vectorizable_call (stmt, NULL, NULL, NULL)
7117 || vectorizable_store (stmt, NULL, NULL, NULL)
7118 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7119 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7120 else
7122 if (bb_vinfo)
7123 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7124 || vectorizable_conversion (stmt, NULL, NULL, node)
7125 || vectorizable_shift (stmt, NULL, NULL, node)
7126 || vectorizable_operation (stmt, NULL, NULL, node)
7127 || vectorizable_assignment (stmt, NULL, NULL, node)
7128 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7129 || vectorizable_call (stmt, NULL, NULL, node)
7130 || vectorizable_store (stmt, NULL, NULL, node)
7131 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7134 if (!ok)
7136 if (dump_enabled_p ())
7138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7139 "not vectorized: relevant stmt not ");
7140 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7141 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7142 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7145 return false;
7148 if (bb_vinfo)
7149 return true;
7151 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7152 need extra handling, except for vectorizable reductions. */
7153 if (STMT_VINFO_LIVE_P (stmt_info)
7154 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7155 ok = vectorizable_live_operation (stmt, NULL, NULL);
7157 if (!ok)
7159 if (dump_enabled_p ())
7161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7162 "not vectorized: live stmt not ");
7163 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7164 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7165 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7168 return false;
7171 return true;
7175 /* Function vect_transform_stmt.
7177 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7179 bool
7180 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7181 bool *grouped_store, slp_tree slp_node,
7182 slp_instance slp_node_instance)
7184 bool is_store = false;
7185 gimple vec_stmt = NULL;
7186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7187 bool done;
7189 switch (STMT_VINFO_TYPE (stmt_info))
7191 case type_demotion_vec_info_type:
7192 case type_promotion_vec_info_type:
7193 case type_conversion_vec_info_type:
7194 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7195 gcc_assert (done);
7196 break;
7198 case induc_vec_info_type:
7199 gcc_assert (!slp_node);
7200 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7201 gcc_assert (done);
7202 break;
7204 case shift_vec_info_type:
7205 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7206 gcc_assert (done);
7207 break;
7209 case op_vec_info_type:
7210 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7211 gcc_assert (done);
7212 break;
7214 case assignment_vec_info_type:
7215 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7216 gcc_assert (done);
7217 break;
7219 case load_vec_info_type:
7220 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7221 slp_node_instance);
7222 gcc_assert (done);
7223 break;
7225 case store_vec_info_type:
7226 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7227 gcc_assert (done);
7228 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7230 /* In case of interleaving, the whole chain is vectorized when the
7231 last store in the chain is reached. Store stmts before the last
7232 one are skipped, and there vec_stmt_info shouldn't be freed
7233 meanwhile. */
7234 *grouped_store = true;
7235 if (STMT_VINFO_VEC_STMT (stmt_info))
7236 is_store = true;
7238 else
7239 is_store = true;
7240 break;
7242 case condition_vec_info_type:
7243 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7244 gcc_assert (done);
7245 break;
7247 case call_vec_info_type:
7248 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7249 stmt = gsi_stmt (*gsi);
7250 if (is_gimple_call (stmt)
7251 && gimple_call_internal_p (stmt)
7252 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7253 is_store = true;
7254 break;
7256 case call_simd_clone_vec_info_type:
7257 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7258 stmt = gsi_stmt (*gsi);
7259 break;
7261 case reduc_vec_info_type:
7262 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7263 gcc_assert (done);
7264 break;
7266 default:
7267 if (!STMT_VINFO_LIVE_P (stmt_info))
7269 if (dump_enabled_p ())
7270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7271 "stmt not supported.\n");
7272 gcc_unreachable ();
7276 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7277 is being vectorized, but outside the immediately enclosing loop. */
7278 if (vec_stmt
7279 && STMT_VINFO_LOOP_VINFO (stmt_info)
7280 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7281 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7282 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7283 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7284 || STMT_VINFO_RELEVANT (stmt_info) ==
7285 vect_used_in_outer_by_reduction))
7287 struct loop *innerloop = LOOP_VINFO_LOOP (
7288 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7289 imm_use_iterator imm_iter;
7290 use_operand_p use_p;
7291 tree scalar_dest;
7292 gimple exit_phi;
7294 if (dump_enabled_p ())
7295 dump_printf_loc (MSG_NOTE, vect_location,
7296 "Record the vdef for outer-loop vectorization.\n");
7298 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7299 (to be used when vectorizing outer-loop stmts that use the DEF of
7300 STMT). */
7301 if (gimple_code (stmt) == GIMPLE_PHI)
7302 scalar_dest = PHI_RESULT (stmt);
7303 else
7304 scalar_dest = gimple_assign_lhs (stmt);
7306 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7308 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7310 exit_phi = USE_STMT (use_p);
7311 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7316 /* Handle stmts whose DEF is used outside the loop-nest that is
7317 being vectorized. */
7318 if (STMT_VINFO_LIVE_P (stmt_info)
7319 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7321 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7322 gcc_assert (done);
7325 if (vec_stmt)
7326 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7328 return is_store;
7332 /* Remove a group of stores (for SLP or interleaving), free their
7333 stmt_vec_info. */
7335 void
7336 vect_remove_stores (gimple first_stmt)
7338 gimple next = first_stmt;
7339 gimple tmp;
7340 gimple_stmt_iterator next_si;
7342 while (next)
7344 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7346 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7347 if (is_pattern_stmt_p (stmt_info))
7348 next = STMT_VINFO_RELATED_STMT (stmt_info);
7349 /* Free the attached stmt_vec_info and remove the stmt. */
7350 next_si = gsi_for_stmt (next);
7351 unlink_stmt_vdef (next);
7352 gsi_remove (&next_si, true);
7353 release_defs (next);
7354 free_stmt_vec_info (next);
7355 next = tmp;
7360 /* Function new_stmt_vec_info.
7362 Create and initialize a new stmt_vec_info struct for STMT. */
7364 stmt_vec_info
7365 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7366 bb_vec_info bb_vinfo)
7368 stmt_vec_info res;
7369 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7371 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7372 STMT_VINFO_STMT (res) = stmt;
7373 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7374 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7375 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7376 STMT_VINFO_LIVE_P (res) = false;
7377 STMT_VINFO_VECTYPE (res) = NULL;
7378 STMT_VINFO_VEC_STMT (res) = NULL;
7379 STMT_VINFO_VECTORIZABLE (res) = true;
7380 STMT_VINFO_IN_PATTERN_P (res) = false;
7381 STMT_VINFO_RELATED_STMT (res) = NULL;
7382 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7383 STMT_VINFO_DATA_REF (res) = NULL;
7385 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7386 STMT_VINFO_DR_OFFSET (res) = NULL;
7387 STMT_VINFO_DR_INIT (res) = NULL;
7388 STMT_VINFO_DR_STEP (res) = NULL;
7389 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7391 if (gimple_code (stmt) == GIMPLE_PHI
7392 && is_loop_header_bb_p (gimple_bb (stmt)))
7393 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7394 else
7395 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7397 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7398 STMT_SLP_TYPE (res) = loop_vect;
7399 GROUP_FIRST_ELEMENT (res) = NULL;
7400 GROUP_NEXT_ELEMENT (res) = NULL;
7401 GROUP_SIZE (res) = 0;
7402 GROUP_STORE_COUNT (res) = 0;
7403 GROUP_GAP (res) = 0;
7404 GROUP_SAME_DR_STMT (res) = NULL;
7406 return res;
7410 /* Create a hash table for stmt_vec_info. */
7412 void
7413 init_stmt_vec_info_vec (void)
7415 gcc_assert (!stmt_vec_info_vec.exists ());
7416 stmt_vec_info_vec.create (50);
7420 /* Free hash table for stmt_vec_info. */
7422 void
7423 free_stmt_vec_info_vec (void)
7425 unsigned int i;
7426 vec_void_p info;
7427 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7428 if (info != NULL)
7429 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7430 gcc_assert (stmt_vec_info_vec.exists ());
7431 stmt_vec_info_vec.release ();
7435 /* Free stmt vectorization related info. */
7437 void
7438 free_stmt_vec_info (gimple stmt)
7440 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7442 if (!stmt_info)
7443 return;
7445 /* Check if this statement has a related "pattern stmt"
7446 (introduced by the vectorizer during the pattern recognition
7447 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7448 too. */
7449 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7451 stmt_vec_info patt_info
7452 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7453 if (patt_info)
7455 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7456 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7457 gimple_set_bb (patt_stmt, NULL);
7458 tree lhs = gimple_get_lhs (patt_stmt);
7459 if (TREE_CODE (lhs) == SSA_NAME)
7460 release_ssa_name (lhs);
7461 if (seq)
7463 gimple_stmt_iterator si;
7464 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7466 gimple seq_stmt = gsi_stmt (si);
7467 gimple_set_bb (seq_stmt, NULL);
7468 lhs = gimple_get_lhs (patt_stmt);
7469 if (TREE_CODE (lhs) == SSA_NAME)
7470 release_ssa_name (lhs);
7471 free_stmt_vec_info (seq_stmt);
7474 free_stmt_vec_info (patt_stmt);
7478 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7479 set_vinfo_for_stmt (stmt, NULL);
7480 free (stmt_info);
7484 /* Function get_vectype_for_scalar_type_and_size.
7486 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7487 by the target. */
7489 static tree
7490 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7492 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
7493 enum machine_mode simd_mode;
7494 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7495 int nunits;
7496 tree vectype;
7498 if (nbytes == 0)
7499 return NULL_TREE;
7501 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7502 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7503 return NULL_TREE;
7505 /* For vector types of elements whose mode precision doesn't
7506 match their types precision we use a element type of mode
7507 precision. The vectorization routines will have to make sure
7508 they support the proper result truncation/extension.
7509 We also make sure to build vector types with INTEGER_TYPE
7510 component type only. */
7511 if (INTEGRAL_TYPE_P (scalar_type)
7512 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7513 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7514 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7515 TYPE_UNSIGNED (scalar_type));
7517 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7518 When the component mode passes the above test simply use a type
7519 corresponding to that mode. The theory is that any use that
7520 would cause problems with this will disable vectorization anyway. */
7521 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7522 && !INTEGRAL_TYPE_P (scalar_type))
7523 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7525 /* We can't build a vector type of elements with alignment bigger than
7526 their size. */
7527 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7528 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7529 TYPE_UNSIGNED (scalar_type));
7531 /* If we felt back to using the mode fail if there was
7532 no scalar type for it. */
7533 if (scalar_type == NULL_TREE)
7534 return NULL_TREE;
7536 /* If no size was supplied use the mode the target prefers. Otherwise
7537 lookup a vector mode of the specified size. */
7538 if (size == 0)
7539 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7540 else
7541 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7542 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7543 if (nunits <= 1)
7544 return NULL_TREE;
7546 vectype = build_vector_type (scalar_type, nunits);
7548 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7549 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7550 return NULL_TREE;
7552 return vectype;
7555 unsigned int current_vector_size;
7557 /* Function get_vectype_for_scalar_type.
7559 Returns the vector type corresponding to SCALAR_TYPE as supported
7560 by the target. */
7562 tree
7563 get_vectype_for_scalar_type (tree scalar_type)
7565 tree vectype;
7566 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7567 current_vector_size);
7568 if (vectype
7569 && current_vector_size == 0)
7570 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7571 return vectype;
7574 /* Function get_same_sized_vectype
7576 Returns a vector type corresponding to SCALAR_TYPE of size
7577 VECTOR_TYPE if supported by the target. */
7579 tree
7580 get_same_sized_vectype (tree scalar_type, tree vector_type)
7582 return get_vectype_for_scalar_type_and_size
7583 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7586 /* Function vect_is_simple_use.
7588 Input:
7589 LOOP_VINFO - the vect info of the loop that is being vectorized.
7590 BB_VINFO - the vect info of the basic block that is being vectorized.
7591 OPERAND - operand of STMT in the loop or bb.
7592 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7594 Returns whether a stmt with OPERAND can be vectorized.
7595 For loops, supportable operands are constants, loop invariants, and operands
7596 that are defined by the current iteration of the loop. Unsupportable
7597 operands are those that are defined by a previous iteration of the loop (as
7598 is the case in reduction/induction computations).
7599 For basic blocks, supportable operands are constants and bb invariants.
7600 For now, operands defined outside the basic block are not supported. */
7602 bool
7603 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7604 bb_vec_info bb_vinfo, gimple *def_stmt,
7605 tree *def, enum vect_def_type *dt)
7607 basic_block bb;
7608 stmt_vec_info stmt_vinfo;
7609 struct loop *loop = NULL;
7611 if (loop_vinfo)
7612 loop = LOOP_VINFO_LOOP (loop_vinfo);
7614 *def_stmt = NULL;
7615 *def = NULL_TREE;
7617 if (dump_enabled_p ())
7619 dump_printf_loc (MSG_NOTE, vect_location,
7620 "vect_is_simple_use: operand ");
7621 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7622 dump_printf (MSG_NOTE, "\n");
7625 if (CONSTANT_CLASS_P (operand))
7627 *dt = vect_constant_def;
7628 return true;
7631 if (is_gimple_min_invariant (operand))
7633 *def = operand;
7634 *dt = vect_external_def;
7635 return true;
7638 if (TREE_CODE (operand) == PAREN_EXPR)
7640 if (dump_enabled_p ())
7641 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7642 operand = TREE_OPERAND (operand, 0);
7645 if (TREE_CODE (operand) != SSA_NAME)
7647 if (dump_enabled_p ())
7648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7649 "not ssa-name.\n");
7650 return false;
7653 *def_stmt = SSA_NAME_DEF_STMT (operand);
7654 if (*def_stmt == NULL)
7656 if (dump_enabled_p ())
7657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7658 "no def_stmt.\n");
7659 return false;
7662 if (dump_enabled_p ())
7664 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7666 dump_printf (MSG_NOTE, "\n");
7669 /* Empty stmt is expected only in case of a function argument.
7670 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7671 if (gimple_nop_p (*def_stmt))
7673 *def = operand;
7674 *dt = vect_external_def;
7675 return true;
7678 bb = gimple_bb (*def_stmt);
7680 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7681 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7682 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7683 *dt = vect_external_def;
7684 else
7686 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7687 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7690 if (*dt == vect_unknown_def_type
7691 || (stmt
7692 && *dt == vect_double_reduction_def
7693 && gimple_code (stmt) != GIMPLE_PHI))
7695 if (dump_enabled_p ())
7696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7697 "Unsupported pattern.\n");
7698 return false;
7701 if (dump_enabled_p ())
7702 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7704 switch (gimple_code (*def_stmt))
7706 case GIMPLE_PHI:
7707 *def = gimple_phi_result (*def_stmt);
7708 break;
7710 case GIMPLE_ASSIGN:
7711 *def = gimple_assign_lhs (*def_stmt);
7712 break;
7714 case GIMPLE_CALL:
7715 *def = gimple_call_lhs (*def_stmt);
7716 if (*def != NULL)
7717 break;
7718 /* FALLTHRU */
7719 default:
7720 if (dump_enabled_p ())
7721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7722 "unsupported defining stmt:\n");
7723 return false;
7726 return true;
7729 /* Function vect_is_simple_use_1.
7731 Same as vect_is_simple_use_1 but also determines the vector operand
7732 type of OPERAND and stores it to *VECTYPE. If the definition of
7733 OPERAND is vect_uninitialized_def, vect_constant_def or
7734 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7735 is responsible to compute the best suited vector type for the
7736 scalar operand. */
7738 bool
7739 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7740 bb_vec_info bb_vinfo, gimple *def_stmt,
7741 tree *def, enum vect_def_type *dt, tree *vectype)
7743 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7744 def, dt))
7745 return false;
7747 /* Now get a vector type if the def is internal, otherwise supply
7748 NULL_TREE and leave it up to the caller to figure out a proper
7749 type for the use stmt. */
7750 if (*dt == vect_internal_def
7751 || *dt == vect_induction_def
7752 || *dt == vect_reduction_def
7753 || *dt == vect_double_reduction_def
7754 || *dt == vect_nested_cycle)
7756 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7758 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7759 && !STMT_VINFO_RELEVANT (stmt_info)
7760 && !STMT_VINFO_LIVE_P (stmt_info))
7761 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7763 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7764 gcc_assert (*vectype != NULL_TREE);
7766 else if (*dt == vect_uninitialized_def
7767 || *dt == vect_constant_def
7768 || *dt == vect_external_def)
7769 *vectype = NULL_TREE;
7770 else
7771 gcc_unreachable ();
7773 return true;
7777 /* Function supportable_widening_operation
7779 Check whether an operation represented by the code CODE is a
7780 widening operation that is supported by the target platform in
7781 vector form (i.e., when operating on arguments of type VECTYPE_IN
7782 producing a result of type VECTYPE_OUT).
7784 Widening operations we currently support are NOP (CONVERT), FLOAT
7785 and WIDEN_MULT. This function checks if these operations are supported
7786 by the target platform either directly (via vector tree-codes), or via
7787 target builtins.
7789 Output:
7790 - CODE1 and CODE2 are codes of vector operations to be used when
7791 vectorizing the operation, if available.
7792 - MULTI_STEP_CVT determines the number of required intermediate steps in
7793 case of multi-step conversion (like char->short->int - in that case
7794 MULTI_STEP_CVT will be 1).
7795 - INTERM_TYPES contains the intermediate type required to perform the
7796 widening operation (short in the above example). */
7798 bool
7799 supportable_widening_operation (enum tree_code code, gimple stmt,
7800 tree vectype_out, tree vectype_in,
7801 enum tree_code *code1, enum tree_code *code2,
7802 int *multi_step_cvt,
7803 vec<tree> *interm_types)
7805 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7806 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7807 struct loop *vect_loop = NULL;
7808 enum machine_mode vec_mode;
7809 enum insn_code icode1, icode2;
7810 optab optab1, optab2;
7811 tree vectype = vectype_in;
7812 tree wide_vectype = vectype_out;
7813 enum tree_code c1, c2;
7814 int i;
7815 tree prev_type, intermediate_type;
7816 enum machine_mode intermediate_mode, prev_mode;
7817 optab optab3, optab4;
7819 *multi_step_cvt = 0;
7820 if (loop_info)
7821 vect_loop = LOOP_VINFO_LOOP (loop_info);
7823 switch (code)
7825 case WIDEN_MULT_EXPR:
7826 /* The result of a vectorized widening operation usually requires
7827 two vectors (because the widened results do not fit into one vector).
7828 The generated vector results would normally be expected to be
7829 generated in the same order as in the original scalar computation,
7830 i.e. if 8 results are generated in each vector iteration, they are
7831 to be organized as follows:
7832 vect1: [res1,res2,res3,res4],
7833 vect2: [res5,res6,res7,res8].
7835 However, in the special case that the result of the widening
7836 operation is used in a reduction computation only, the order doesn't
7837 matter (because when vectorizing a reduction we change the order of
7838 the computation). Some targets can take advantage of this and
7839 generate more efficient code. For example, targets like Altivec,
7840 that support widen_mult using a sequence of {mult_even,mult_odd}
7841 generate the following vectors:
7842 vect1: [res1,res3,res5,res7],
7843 vect2: [res2,res4,res6,res8].
7845 When vectorizing outer-loops, we execute the inner-loop sequentially
7846 (each vectorized inner-loop iteration contributes to VF outer-loop
7847 iterations in parallel). We therefore don't allow to change the
7848 order of the computation in the inner-loop during outer-loop
7849 vectorization. */
7850 /* TODO: Another case in which order doesn't *really* matter is when we
7851 widen and then contract again, e.g. (short)((int)x * y >> 8).
7852 Normally, pack_trunc performs an even/odd permute, whereas the
7853 repack from an even/odd expansion would be an interleave, which
7854 would be significantly simpler for e.g. AVX2. */
7855 /* In any case, in order to avoid duplicating the code below, recurse
7856 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7857 are properly set up for the caller. If we fail, we'll continue with
7858 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7859 if (vect_loop
7860 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7861 && !nested_in_vect_loop_p (vect_loop, stmt)
7862 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7863 stmt, vectype_out, vectype_in,
7864 code1, code2, multi_step_cvt,
7865 interm_types))
7867 /* Elements in a vector with vect_used_by_reduction property cannot
7868 be reordered if the use chain with this property does not have the
7869 same operation. One such an example is s += a * b, where elements
7870 in a and b cannot be reordered. Here we check if the vector defined
7871 by STMT is only directly used in the reduction statement. */
7872 tree lhs = gimple_assign_lhs (stmt);
7873 use_operand_p dummy;
7874 gimple use_stmt;
7875 stmt_vec_info use_stmt_info = NULL;
7876 if (single_imm_use (lhs, &dummy, &use_stmt)
7877 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7878 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7879 return true;
7881 c1 = VEC_WIDEN_MULT_LO_EXPR;
7882 c2 = VEC_WIDEN_MULT_HI_EXPR;
7883 break;
7885 case VEC_WIDEN_MULT_EVEN_EXPR:
7886 /* Support the recursion induced just above. */
7887 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7888 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7889 break;
7891 case WIDEN_LSHIFT_EXPR:
7892 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7893 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7894 break;
7896 CASE_CONVERT:
7897 c1 = VEC_UNPACK_LO_EXPR;
7898 c2 = VEC_UNPACK_HI_EXPR;
7899 break;
7901 case FLOAT_EXPR:
7902 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7903 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7904 break;
7906 case FIX_TRUNC_EXPR:
7907 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7908 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7909 computing the operation. */
7910 return false;
7912 default:
7913 gcc_unreachable ();
7916 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7918 enum tree_code ctmp = c1;
7919 c1 = c2;
7920 c2 = ctmp;
7923 if (code == FIX_TRUNC_EXPR)
7925 /* The signedness is determined from output operand. */
7926 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7927 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7929 else
7931 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7932 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7935 if (!optab1 || !optab2)
7936 return false;
7938 vec_mode = TYPE_MODE (vectype);
7939 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7940 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7941 return false;
7943 *code1 = c1;
7944 *code2 = c2;
7946 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7947 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7948 return true;
7950 /* Check if it's a multi-step conversion that can be done using intermediate
7951 types. */
7953 prev_type = vectype;
7954 prev_mode = vec_mode;
7956 if (!CONVERT_EXPR_CODE_P (code))
7957 return false;
7959 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7960 intermediate steps in promotion sequence. We try
7961 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7962 not. */
7963 interm_types->create (MAX_INTERM_CVT_STEPS);
7964 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7966 intermediate_mode = insn_data[icode1].operand[0].mode;
7967 intermediate_type
7968 = lang_hooks.types.type_for_mode (intermediate_mode,
7969 TYPE_UNSIGNED (prev_type));
7970 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7971 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7973 if (!optab3 || !optab4
7974 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7975 || insn_data[icode1].operand[0].mode != intermediate_mode
7976 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7977 || insn_data[icode2].operand[0].mode != intermediate_mode
7978 || ((icode1 = optab_handler (optab3, intermediate_mode))
7979 == CODE_FOR_nothing)
7980 || ((icode2 = optab_handler (optab4, intermediate_mode))
7981 == CODE_FOR_nothing))
7982 break;
7984 interm_types->quick_push (intermediate_type);
7985 (*multi_step_cvt)++;
7987 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7988 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7989 return true;
7991 prev_type = intermediate_type;
7992 prev_mode = intermediate_mode;
7995 interm_types->release ();
7996 return false;
8000 /* Function supportable_narrowing_operation
8002 Check whether an operation represented by the code CODE is a
8003 narrowing operation that is supported by the target platform in
8004 vector form (i.e., when operating on arguments of type VECTYPE_IN
8005 and producing a result of type VECTYPE_OUT).
8007 Narrowing operations we currently support are NOP (CONVERT) and
8008 FIX_TRUNC. This function checks if these operations are supported by
8009 the target platform directly via vector tree-codes.
8011 Output:
8012 - CODE1 is the code of a vector operation to be used when
8013 vectorizing the operation, if available.
8014 - MULTI_STEP_CVT determines the number of required intermediate steps in
8015 case of multi-step conversion (like int->short->char - in that case
8016 MULTI_STEP_CVT will be 1).
8017 - INTERM_TYPES contains the intermediate type required to perform the
8018 narrowing operation (short in the above example). */
8020 bool
8021 supportable_narrowing_operation (enum tree_code code,
8022 tree vectype_out, tree vectype_in,
8023 enum tree_code *code1, int *multi_step_cvt,
8024 vec<tree> *interm_types)
8026 enum machine_mode vec_mode;
8027 enum insn_code icode1;
8028 optab optab1, interm_optab;
8029 tree vectype = vectype_in;
8030 tree narrow_vectype = vectype_out;
8031 enum tree_code c1;
8032 tree intermediate_type;
8033 enum machine_mode intermediate_mode, prev_mode;
8034 int i;
8035 bool uns;
8037 *multi_step_cvt = 0;
8038 switch (code)
8040 CASE_CONVERT:
8041 c1 = VEC_PACK_TRUNC_EXPR;
8042 break;
8044 case FIX_TRUNC_EXPR:
8045 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8046 break;
8048 case FLOAT_EXPR:
8049 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8050 tree code and optabs used for computing the operation. */
8051 return false;
8053 default:
8054 gcc_unreachable ();
8057 if (code == FIX_TRUNC_EXPR)
8058 /* The signedness is determined from output operand. */
8059 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8060 else
8061 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8063 if (!optab1)
8064 return false;
8066 vec_mode = TYPE_MODE (vectype);
8067 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8068 return false;
8070 *code1 = c1;
8072 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8073 return true;
8075 /* Check if it's a multi-step conversion that can be done using intermediate
8076 types. */
8077 prev_mode = vec_mode;
8078 if (code == FIX_TRUNC_EXPR)
8079 uns = TYPE_UNSIGNED (vectype_out);
8080 else
8081 uns = TYPE_UNSIGNED (vectype);
8083 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8084 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8085 costly than signed. */
8086 if (code == FIX_TRUNC_EXPR && uns)
8088 enum insn_code icode2;
8090 intermediate_type
8091 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8092 interm_optab
8093 = optab_for_tree_code (c1, intermediate_type, optab_default);
8094 if (interm_optab != unknown_optab
8095 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8096 && insn_data[icode1].operand[0].mode
8097 == insn_data[icode2].operand[0].mode)
8099 uns = false;
8100 optab1 = interm_optab;
8101 icode1 = icode2;
8105 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8106 intermediate steps in promotion sequence. We try
8107 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8108 interm_types->create (MAX_INTERM_CVT_STEPS);
8109 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8111 intermediate_mode = insn_data[icode1].operand[0].mode;
8112 intermediate_type
8113 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8114 interm_optab
8115 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8116 optab_default);
8117 if (!interm_optab
8118 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8119 || insn_data[icode1].operand[0].mode != intermediate_mode
8120 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8121 == CODE_FOR_nothing))
8122 break;
8124 interm_types->quick_push (intermediate_type);
8125 (*multi_step_cvt)++;
8127 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8128 return true;
8130 prev_mode = intermediate_mode;
8131 optab1 = interm_optab;
8134 interm_types->release ();
8135 return false;