2014-10-31 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob02559481f13c0585dfccc424f6ad85b4b6909f6c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "stor-layout.h"
29 #include "target.h"
30 #include "predict.h"
31 #include "vec.h"
32 #include "hashtab.h"
33 #include "hash-set.h"
34 #include "machmode.h"
35 #include "hard-reg-set.h"
36 #include "input.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "basic-block.h"
41 #include "gimple-pretty-print.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "tree-eh.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimplify.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
52 #include "tree-cfg.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-manip.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "expr.h"
62 #include "recog.h" /* FIXME: for insn_data */
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "tree-vectorizer.h"
66 #include "dumpfile.h"
67 #include "hash-map.h"
68 #include "plugin-api.h"
69 #include "ipa-ref.h"
70 #include "cgraph.h"
71 #include "builtins.h"
73 /* For lang_hooks.types.type_for_mode. */
74 #include "langhooks.h"
76 /* Return the vectorized type for the given statement. */
78 tree
79 stmt_vectype (struct _stmt_vec_info *stmt_info)
81 return STMT_VINFO_VECTYPE (stmt_info);
84 /* Return TRUE iff the given statement is in an inner loop relative to
85 the loop being vectorized. */
86 bool
87 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
89 gimple stmt = STMT_VINFO_STMT (stmt_info);
90 basic_block bb = gimple_bb (stmt);
91 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92 struct loop* loop;
94 if (!loop_vinfo)
95 return false;
97 loop = LOOP_VINFO_LOOP (loop_vinfo);
99 return (bb->loop_father == loop->inner);
102 /* Record the cost of a statement, either by directly informing the
103 target model or by saving it in a vector for later processing.
104 Return a preliminary estimate of the statement's cost. */
106 unsigned
107 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
108 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
109 int misalign, enum vect_cost_model_location where)
111 if (body_cost_vec)
113 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
114 add_stmt_info_to_vec (body_cost_vec, count, kind,
115 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
116 misalign);
117 return (unsigned)
118 (builtin_vectorization_cost (kind, vectype, misalign) * count);
121 else
123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
124 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
125 void *target_cost_data;
127 if (loop_vinfo)
128 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
129 else
130 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
132 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
133 misalign, where);
137 /* Return a variable of type ELEM_TYPE[NELEMS]. */
139 static tree
140 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
142 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
143 "vect_array");
146 /* ARRAY is an array of vectors created by create_vector_array.
147 Return an SSA_NAME for the vector in index N. The reference
148 is part of the vectorization of STMT and the vector is associated
149 with scalar destination SCALAR_DEST. */
151 static tree
152 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
153 tree array, unsigned HOST_WIDE_INT n)
155 tree vect_type, vect, vect_name, array_ref;
156 gimple new_stmt;
158 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
159 vect_type = TREE_TYPE (TREE_TYPE (array));
160 vect = vect_create_destination_var (scalar_dest, vect_type);
161 array_ref = build4 (ARRAY_REF, vect_type, array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (vect, array_ref);
166 vect_name = make_ssa_name (vect, new_stmt);
167 gimple_assign_set_lhs (new_stmt, vect_name);
168 vect_finish_stmt_generation (stmt, new_stmt, gsi);
170 return vect_name;
173 /* ARRAY is an array of vectors created by create_vector_array.
174 Emit code to store SSA_NAME VECT in index N of the array.
175 The store is part of the vectorization of STMT. */
177 static void
178 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
179 tree array, unsigned HOST_WIDE_INT n)
181 tree array_ref;
182 gimple new_stmt;
184 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
185 build_int_cst (size_type_node, n),
186 NULL_TREE, NULL_TREE);
188 new_stmt = gimple_build_assign (array_ref, vect);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
192 /* PTR is a pointer to an array of type TYPE. Return a representation
193 of *PTR. The memory reference replaces those in FIRST_DR
194 (and its group). */
196 static tree
197 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
199 tree mem_ref, alias_ptr_type;
201 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
202 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
203 /* Arrays have the same alignment as their type. */
204 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
205 return mem_ref;
208 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
210 /* Function vect_mark_relevant.
212 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
214 static void
215 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
216 enum vect_relevant relevant, bool live_p,
217 bool used_in_pattern)
219 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
220 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
221 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
222 gimple pattern_stmt;
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_NOTE, vect_location,
226 "mark relevant %d, live %d.\n", relevant, live_p);
228 /* If this stmt is an original stmt in a pattern, we might need to mark its
229 related pattern stmt instead of the original stmt. However, such stmts
230 may have their own uses that are not in any pattern, in such cases the
231 stmt itself should be marked. */
232 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
234 bool found = false;
235 if (!used_in_pattern)
237 imm_use_iterator imm_iter;
238 use_operand_p use_p;
239 gimple use_stmt;
240 tree lhs;
241 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
242 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
244 if (is_gimple_assign (stmt))
245 lhs = gimple_assign_lhs (stmt);
246 else
247 lhs = gimple_call_lhs (stmt);
249 /* This use is out of pattern use, if LHS has other uses that are
250 pattern uses, we should mark the stmt itself, and not the pattern
251 stmt. */
252 if (lhs && TREE_CODE (lhs) == SSA_NAME)
253 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
255 if (is_gimple_debug (USE_STMT (use_p)))
256 continue;
257 use_stmt = USE_STMT (use_p);
259 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
260 continue;
262 if (vinfo_for_stmt (use_stmt)
263 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
265 found = true;
266 break;
271 if (!found)
273 /* This is the last stmt in a sequence that was detected as a
274 pattern that can potentially be vectorized. Don't mark the stmt
275 as relevant/live because it's not going to be vectorized.
276 Instead mark the pattern-stmt that replaces it. */
278 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "last stmt in pattern. don't mark"
283 " relevant/live.\n");
284 stmt_info = vinfo_for_stmt (pattern_stmt);
285 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
286 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
287 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
288 stmt = pattern_stmt;
292 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
293 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
294 STMT_VINFO_RELEVANT (stmt_info) = relevant;
296 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
297 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "already marked relevant/live.\n");
302 return;
305 worklist->safe_push (stmt);
309 /* Function vect_stmt_relevant_p.
311 Return true if STMT in loop that is represented by LOOP_VINFO is
312 "relevant for vectorization".
314 A stmt is considered "relevant for vectorization" if:
315 - it has uses outside the loop.
316 - it has vdefs (it alters memory).
317 - control stmts in the loop (except for the exit condition).
319 CHECKME: what other side effects would the vectorizer allow? */
321 static bool
322 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
323 enum vect_relevant *relevant, bool *live_p)
325 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
326 ssa_op_iter op_iter;
327 imm_use_iterator imm_iter;
328 use_operand_p use_p;
329 def_operand_p def_p;
331 *relevant = vect_unused_in_scope;
332 *live_p = false;
334 /* cond stmt other than loop exit cond. */
335 if (is_ctrl_stmt (stmt)
336 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
337 != loop_exit_ctrl_vec_info_type)
338 *relevant = vect_used_in_scope;
340 /* changing memory. */
341 if (gimple_code (stmt) != GIMPLE_PHI)
342 if (gimple_vdef (stmt))
344 if (dump_enabled_p ())
345 dump_printf_loc (MSG_NOTE, vect_location,
346 "vec_stmt_relevant_p: stmt has vdefs.\n");
347 *relevant = vect_used_in_scope;
350 /* uses outside the loop. */
351 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
353 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
355 basic_block bb = gimple_bb (USE_STMT (use_p));
356 if (!flow_bb_inside_loop_p (loop, bb))
358 if (dump_enabled_p ())
359 dump_printf_loc (MSG_NOTE, vect_location,
360 "vec_stmt_relevant_p: used out of loop.\n");
362 if (is_gimple_debug (USE_STMT (use_p)))
363 continue;
365 /* We expect all such uses to be in the loop exit phis
366 (because of loop closed form) */
367 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
368 gcc_assert (bb == single_exit (loop)->dest);
370 *live_p = true;
375 return (*live_p || *relevant);
379 /* Function exist_non_indexing_operands_for_use_p
381 USE is one of the uses attached to STMT. Check if USE is
382 used in STMT for anything other than indexing an array. */
384 static bool
385 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
387 tree operand;
388 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
390 /* USE corresponds to some operand in STMT. If there is no data
391 reference in STMT, then any operand that corresponds to USE
392 is not indexing an array. */
393 if (!STMT_VINFO_DATA_REF (stmt_info))
394 return true;
396 /* STMT has a data_ref. FORNOW this means that its of one of
397 the following forms:
398 -1- ARRAY_REF = var
399 -2- var = ARRAY_REF
400 (This should have been verified in analyze_data_refs).
402 'var' in the second case corresponds to a def, not a use,
403 so USE cannot correspond to any operands that are not used
404 for array indexing.
406 Therefore, all we need to check is if STMT falls into the
407 first case, and whether var corresponds to USE. */
409 if (!gimple_assign_copy_p (stmt))
411 if (is_gimple_call (stmt)
412 && gimple_call_internal_p (stmt))
413 switch (gimple_call_internal_fn (stmt))
415 case IFN_MASK_STORE:
416 operand = gimple_call_arg (stmt, 3);
417 if (operand == use)
418 return true;
419 /* FALLTHRU */
420 case IFN_MASK_LOAD:
421 operand = gimple_call_arg (stmt, 2);
422 if (operand == use)
423 return true;
424 break;
425 default:
426 break;
428 return false;
431 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
432 return false;
433 operand = gimple_assign_rhs1 (stmt);
434 if (TREE_CODE (operand) != SSA_NAME)
435 return false;
437 if (operand == use)
438 return true;
440 return false;
445 Function process_use.
447 Inputs:
448 - a USE in STMT in a loop represented by LOOP_VINFO
449 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
450 that defined USE. This is done by calling mark_relevant and passing it
451 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
452 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
453 be performed.
455 Outputs:
456 Generally, LIVE_P and RELEVANT are used to define the liveness and
457 relevance info of the DEF_STMT of this USE:
458 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
459 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
460 Exceptions:
461 - case 1: If USE is used only for address computations (e.g. array indexing),
462 which does not need to be directly vectorized, then the liveness/relevance
463 of the respective DEF_STMT is left unchanged.
464 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
465 skip DEF_STMT cause it had already been processed.
466 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
467 be modified accordingly.
469 Return true if everything is as expected. Return false otherwise. */
471 static bool
472 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
473 enum vect_relevant relevant, vec<gimple> *worklist,
474 bool force)
476 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
477 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
478 stmt_vec_info dstmt_vinfo;
479 basic_block bb, def_bb;
480 tree def;
481 gimple def_stmt;
482 enum vect_def_type dt;
484 /* case 1: we are only interested in uses that need to be vectorized. Uses
485 that are used for address computation are not considered relevant. */
486 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
487 return true;
489 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
491 if (dump_enabled_p ())
492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
493 "not vectorized: unsupported use in stmt.\n");
494 return false;
497 if (!def_stmt || gimple_nop_p (def_stmt))
498 return true;
500 def_bb = gimple_bb (def_stmt);
501 if (!flow_bb_inside_loop_p (loop, def_bb))
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
505 return true;
508 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
509 DEF_STMT must have already been processed, because this should be the
510 only way that STMT, which is a reduction-phi, was put in the worklist,
511 as there should be no other uses for DEF_STMT in the loop. So we just
512 check that everything is as expected, and we are done. */
513 dstmt_vinfo = vinfo_for_stmt (def_stmt);
514 bb = gimple_bb (stmt);
515 if (gimple_code (stmt) == GIMPLE_PHI
516 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
517 && gimple_code (def_stmt) != GIMPLE_PHI
518 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
519 && bb->loop_father == def_bb->loop_father)
521 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE, vect_location,
523 "reduc-stmt defining reduc-phi in the same nest.\n");
524 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
525 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
526 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
527 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
528 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
529 return true;
532 /* case 3a: outer-loop stmt defining an inner-loop stmt:
533 outer-loop-header-bb:
534 d = def_stmt
535 inner-loop:
536 stmt # use (d)
537 outer-loop-tail-bb:
538 ... */
539 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "outer-loop def-stmt defining inner-loop stmt.\n");
545 switch (relevant)
547 case vect_unused_in_scope:
548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
549 vect_used_in_scope : vect_unused_in_scope;
550 break;
552 case vect_used_in_outer_by_reduction:
553 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
554 relevant = vect_used_by_reduction;
555 break;
557 case vect_used_in_outer:
558 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
559 relevant = vect_used_in_scope;
560 break;
562 case vect_used_in_scope:
563 break;
565 default:
566 gcc_unreachable ();
570 /* case 3b: inner-loop stmt defining an outer-loop stmt:
571 outer-loop-header-bb:
573 inner-loop:
574 d = def_stmt
575 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
576 stmt # use (d) */
577 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "inner-loop def-stmt defining outer-loop stmt.\n");
583 switch (relevant)
585 case vect_unused_in_scope:
586 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
587 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
588 vect_used_in_outer_by_reduction : vect_unused_in_scope;
589 break;
591 case vect_used_by_reduction:
592 relevant = vect_used_in_outer_by_reduction;
593 break;
595 case vect_used_in_scope:
596 relevant = vect_used_in_outer;
597 break;
599 default:
600 gcc_unreachable ();
604 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
605 is_pattern_stmt_p (stmt_vinfo));
606 return true;
610 /* Function vect_mark_stmts_to_be_vectorized.
612 Not all stmts in the loop need to be vectorized. For example:
614 for i...
615 for j...
616 1. T0 = i + j
617 2. T1 = a[T0]
619 3. j = j + 1
621 Stmt 1 and 3 do not need to be vectorized, because loop control and
622 addressing of vectorized data-refs are handled differently.
624 This pass detects such stmts. */
626 bool
627 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
629 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
630 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
631 unsigned int nbbs = loop->num_nodes;
632 gimple_stmt_iterator si;
633 gimple stmt;
634 unsigned int i;
635 stmt_vec_info stmt_vinfo;
636 basic_block bb;
637 gimple phi;
638 bool live_p;
639 enum vect_relevant relevant, tmp_relevant;
640 enum vect_def_type def_type;
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "=== vect_mark_stmts_to_be_vectorized ===\n");
646 auto_vec<gimple, 64> worklist;
648 /* 1. Init worklist. */
649 for (i = 0; i < nbbs; i++)
651 bb = bbs[i];
652 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
654 phi = gsi_stmt (si);
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
659 dump_printf (MSG_NOTE, "\n");
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
667 stmt = gsi_stmt (si);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
672 dump_printf (MSG_NOTE, "\n");
675 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
676 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
680 /* 2. Process_worklist */
681 while (worklist.length () > 0)
683 use_operand_p use_p;
684 ssa_op_iter iter;
686 stmt = worklist.pop ();
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
691 dump_printf (MSG_NOTE, "\n");
694 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
695 (DEF_STMT) as relevant/irrelevant and live/dead according to the
696 liveness and relevance properties of STMT. */
697 stmt_vinfo = vinfo_for_stmt (stmt);
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
699 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
701 /* Generally, the liveness and relevance properties of STMT are
702 propagated as is to the DEF_STMTs of its USEs:
703 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
704 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
706 One exception is when STMT has been identified as defining a reduction
707 variable; in this case we set the liveness/relevance as follows:
708 live_p = false
709 relevant = vect_used_by_reduction
710 This is because we distinguish between two kinds of relevant stmts -
711 those that are used by a reduction computation, and those that are
712 (also) used by a regular computation. This allows us later on to
713 identify stmts that are used solely by a reduction, and therefore the
714 order of the results that they produce does not have to be kept. */
716 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
717 tmp_relevant = relevant;
718 switch (def_type)
720 case vect_reduction_def:
721 switch (tmp_relevant)
723 case vect_unused_in_scope:
724 relevant = vect_used_by_reduction;
725 break;
727 case vect_used_by_reduction:
728 if (gimple_code (stmt) == GIMPLE_PHI)
729 break;
730 /* fall through */
732 default:
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of reduction.\n");
736 return false;
739 live_p = false;
740 break;
742 case vect_nested_cycle:
743 if (tmp_relevant != vect_unused_in_scope
744 && tmp_relevant != vect_used_in_outer_by_reduction
745 && tmp_relevant != vect_used_in_outer)
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
749 "unsupported use of nested cycle.\n");
751 return false;
754 live_p = false;
755 break;
757 case vect_double_reduction_def:
758 if (tmp_relevant != vect_unused_in_scope
759 && tmp_relevant != vect_used_by_reduction)
761 if (dump_enabled_p ())
762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
763 "unsupported use of double reduction.\n");
765 return false;
768 live_p = false;
769 break;
771 default:
772 break;
775 if (is_pattern_stmt_p (stmt_vinfo))
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (is_gimple_assign (stmt))
782 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
783 tree op = gimple_assign_rhs1 (stmt);
785 i = 1;
786 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
788 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
789 live_p, relevant, &worklist, false)
790 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
791 live_p, relevant, &worklist, false))
792 return false;
793 i = 2;
795 for (; i < gimple_num_ops (stmt); i++)
797 op = gimple_op (stmt, i);
798 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
799 &worklist, false))
800 return false;
803 else if (is_gimple_call (stmt))
805 for (i = 0; i < gimple_call_num_args (stmt); i++)
807 tree arg = gimple_call_arg (stmt, i);
808 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
809 &worklist, false))
810 return false;
814 else
815 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
817 tree op = USE_FROM_PTR (use_p);
818 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
819 &worklist, false))
820 return false;
823 if (STMT_VINFO_GATHER_P (stmt_vinfo))
825 tree off;
826 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
827 gcc_assert (decl);
828 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
829 &worklist, true))
830 return false;
832 } /* while worklist */
834 return true;
838 /* Function vect_model_simple_cost.
840 Models cost for simple operations, i.e. those that only emit ncopies of a
841 single op. Right now, this does not account for multiple insns that could
842 be generated for the single vector op. We will handle that shortly. */
844 void
845 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
846 enum vect_def_type *dt,
847 stmt_vector_for_cost *prologue_cost_vec,
848 stmt_vector_for_cost *body_cost_vec)
850 int i;
851 int inside_cost = 0, prologue_cost = 0;
853 /* The SLP costs were already calculated during SLP tree build. */
854 if (PURE_SLP_STMT (stmt_info))
855 return;
857 /* FORNOW: Assuming maximum 2 args per stmts. */
858 for (i = 0; i < 2; i++)
859 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
860 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
861 stmt_info, 0, vect_prologue);
863 /* Pass the inside-of-loop statements to the target-specific cost model. */
864 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
865 stmt_info, 0, vect_body);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_simple_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
874 /* Model cost for type demotion and promotion operations. PWR is normally
875 zero for single-step promotions and demotions. It will be one if
876 two-step promotion/demotion is required, and so on. Each additional
877 step doubles the number of instructions required. */
879 static void
880 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
881 enum vect_def_type *dt, int pwr)
883 int i, tmp;
884 int inside_cost = 0, prologue_cost = 0;
885 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
886 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
887 void *target_cost_data;
889 /* The SLP costs were already calculated during SLP tree build. */
890 if (PURE_SLP_STMT (stmt_info))
891 return;
893 if (loop_vinfo)
894 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
895 else
896 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
898 for (i = 0; i < pwr + 1; i++)
900 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
901 (i + 1) : i;
902 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
903 vec_promote_demote, stmt_info, 0,
904 vect_body);
907 /* FORNOW: Assuming maximum 2 args per stmts. */
908 for (i = 0; i < 2; i++)
909 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
910 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
911 stmt_info, 0, vect_prologue);
913 if (dump_enabled_p ())
914 dump_printf_loc (MSG_NOTE, vect_location,
915 "vect_model_promotion_demotion_cost: inside_cost = %d, "
916 "prologue_cost = %d .\n", inside_cost, prologue_cost);
919 /* Function vect_cost_group_size
921 For grouped load or store, return the group_size only if it is the first
922 load or store of a group, else return 1. This ensures that group size is
923 only returned once per group. */
925 static int
926 vect_cost_group_size (stmt_vec_info stmt_info)
928 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
930 if (first_stmt == STMT_VINFO_STMT (stmt_info))
931 return GROUP_SIZE (stmt_info);
933 return 1;
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
942 void
943 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
944 bool store_lanes_p, enum vect_def_type dt,
945 slp_tree slp_node,
946 stmt_vector_for_cost *prologue_cost_vec,
947 stmt_vector_for_cost *body_cost_vec)
949 int group_size;
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 struct data_reference *first_dr;
952 gimple first_stmt;
954 /* The SLP costs were already calculated during SLP tree build. */
955 if (PURE_SLP_STMT (stmt_info))
956 return;
958 if (dt == vect_constant_def || dt == vect_external_def)
959 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
960 stmt_info, 0, vect_prologue);
962 /* Grouped access? */
963 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
965 if (slp_node)
967 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
968 group_size = 1;
970 else
972 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
973 group_size = vect_cost_group_size (stmt_info);
976 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
978 /* Not a grouped access. */
979 else
981 group_size = 1;
982 first_dr = STMT_VINFO_DATA_REF (stmt_info);
985 /* We assume that the cost of a single store-lanes instruction is
986 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
987 access is instead being provided by a permute-and-store operation,
988 include the cost of the permutes. */
989 if (!store_lanes_p && group_size > 1)
991 /* Uses a high and low interleave or shuffle operations for each
992 needed permute. */
993 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
994 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
995 stmt_info, 0, vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: strided group_size = %d .\n",
1000 group_size);
1003 /* Costs of the stores. */
1004 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1006 if (dump_enabled_p ())
1007 dump_printf_loc (MSG_NOTE, vect_location,
1008 "vect_model_store_cost: inside_cost = %d, "
1009 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1013 /* Calculate cost of DR's memory access. */
1014 void
1015 vect_get_store_cost (struct data_reference *dr, int ncopies,
1016 unsigned int *inside_cost,
1017 stmt_vector_for_cost *body_cost_vec)
1019 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1020 gimple stmt = DR_STMT (dr);
1021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1023 switch (alignment_support_scheme)
1025 case dr_aligned:
1027 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1028 vector_store, stmt_info, 0,
1029 vect_body);
1031 if (dump_enabled_p ())
1032 dump_printf_loc (MSG_NOTE, vect_location,
1033 "vect_model_store_cost: aligned.\n");
1034 break;
1037 case dr_unaligned_supported:
1039 /* Here, we assign an additional cost for the unaligned store. */
1040 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1041 unaligned_store, stmt_info,
1042 DR_MISALIGNMENT (dr), vect_body);
1043 if (dump_enabled_p ())
1044 dump_printf_loc (MSG_NOTE, vect_location,
1045 "vect_model_store_cost: unaligned supported by "
1046 "hardware.\n");
1047 break;
1050 case dr_unaligned_unsupported:
1052 *inside_cost = VECT_MAX_COST;
1054 if (dump_enabled_p ())
1055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1056 "vect_model_store_cost: unsupported access.\n");
1057 break;
1060 default:
1061 gcc_unreachable ();
1066 /* Function vect_model_load_cost
1068 Models cost for loads. In the case of grouped accesses, the last access
1069 has the overhead of the grouped access attributed to it. Since unaligned
1070 accesses are supported for loads, we also account for the costs of the
1071 access scheme chosen. */
1073 void
1074 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1075 bool load_lanes_p, slp_tree slp_node,
1076 stmt_vector_for_cost *prologue_cost_vec,
1077 stmt_vector_for_cost *body_cost_vec)
1079 int group_size;
1080 gimple first_stmt;
1081 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1082 unsigned int inside_cost = 0, prologue_cost = 0;
1084 /* The SLP costs were already calculated during SLP tree build. */
1085 if (PURE_SLP_STMT (stmt_info))
1086 return;
1088 /* Grouped accesses? */
1089 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1090 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1092 group_size = vect_cost_group_size (stmt_info);
1093 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1095 /* Not a grouped access. */
1096 else
1098 group_size = 1;
1099 first_dr = dr;
1102 /* We assume that the cost of a single load-lanes instruction is
1103 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1104 access is instead being provided by a load-and-permute operation,
1105 include the cost of the permutes. */
1106 if (!load_lanes_p && group_size > 1)
1108 /* Uses an even and odd extract operations or shuffle operations
1109 for each needed permute. */
1110 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1111 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1112 stmt_info, 0, vect_body);
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE, vect_location,
1116 "vect_model_load_cost: strided group_size = %d .\n",
1117 group_size);
1120 /* The loads themselves. */
1121 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1123 /* N scalar loads plus gathering them into a vector. */
1124 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1125 inside_cost += record_stmt_cost (body_cost_vec,
1126 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1127 scalar_load, stmt_info, 0, vect_body);
1128 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1129 stmt_info, 0, vect_body);
1131 else
1132 vect_get_load_cost (first_dr, ncopies,
1133 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1134 || group_size > 1 || slp_node),
1135 &inside_cost, &prologue_cost,
1136 prologue_cost_vec, body_cost_vec, true);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: inside_cost = %d, "
1141 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1145 /* Calculate cost of DR's memory access. */
1146 void
1147 vect_get_load_cost (struct data_reference *dr, int ncopies,
1148 bool add_realign_cost, unsigned int *inside_cost,
1149 unsigned int *prologue_cost,
1150 stmt_vector_for_cost *prologue_cost_vec,
1151 stmt_vector_for_cost *body_cost_vec,
1152 bool record_prologue_costs)
1154 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1155 gimple stmt = DR_STMT (dr);
1156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1158 switch (alignment_support_scheme)
1160 case dr_aligned:
1162 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1163 stmt_info, 0, vect_body);
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE, vect_location,
1167 "vect_model_load_cost: aligned.\n");
1169 break;
1171 case dr_unaligned_supported:
1173 /* Here, we assign an additional cost for the unaligned load. */
1174 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1175 unaligned_load, stmt_info,
1176 DR_MISALIGNMENT (dr), vect_body);
1178 if (dump_enabled_p ())
1179 dump_printf_loc (MSG_NOTE, vect_location,
1180 "vect_model_load_cost: unaligned supported by "
1181 "hardware.\n");
1183 break;
1185 case dr_explicit_realign:
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1188 vector_load, stmt_info, 0, vect_body);
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1190 vec_perm, stmt_info, 0, vect_body);
1192 /* FIXME: If the misalignment remains fixed across the iterations of
1193 the containing loop, the following cost should be added to the
1194 prologue costs. */
1195 if (targetm.vectorize.builtin_mask_for_load)
1196 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1197 stmt_info, 0, vect_body);
1199 if (dump_enabled_p ())
1200 dump_printf_loc (MSG_NOTE, vect_location,
1201 "vect_model_load_cost: explicit realign\n");
1203 break;
1205 case dr_explicit_realign_optimized:
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE, vect_location,
1209 "vect_model_load_cost: unaligned software "
1210 "pipelined.\n");
1212 /* Unaligned software pipeline has a load of an address, an initial
1213 load, and possibly a mask operation to "prime" the loop. However,
1214 if this is an access in a group of loads, which provide grouped
1215 access, then the above cost should only be considered for one
1216 access in the group. Inside the loop, there is a load op
1217 and a realignment op. */
1219 if (add_realign_cost && record_prologue_costs)
1221 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1222 vector_stmt, stmt_info,
1223 0, vect_prologue);
1224 if (targetm.vectorize.builtin_mask_for_load)
1225 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1226 vector_stmt, stmt_info,
1227 0, vect_prologue);
1230 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1231 stmt_info, 0, vect_body);
1232 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1233 stmt_info, 0, vect_body);
1235 if (dump_enabled_p ())
1236 dump_printf_loc (MSG_NOTE, vect_location,
1237 "vect_model_load_cost: explicit realign optimized"
1238 "\n");
1240 break;
1243 case dr_unaligned_unsupported:
1245 *inside_cost = VECT_MAX_COST;
1247 if (dump_enabled_p ())
1248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1249 "vect_model_load_cost: unsupported access.\n");
1250 break;
1253 default:
1254 gcc_unreachable ();
1258 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1259 the loop preheader for the vectorized stmt STMT. */
1261 static void
1262 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1264 if (gsi)
1265 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1266 else
1268 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1269 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1271 if (loop_vinfo)
1273 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1274 basic_block new_bb;
1275 edge pe;
1277 if (nested_in_vect_loop_p (loop, stmt))
1278 loop = loop->inner;
1280 pe = loop_preheader_edge (loop);
1281 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1282 gcc_assert (!new_bb);
1284 else
1286 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1287 basic_block bb;
1288 gimple_stmt_iterator gsi_bb_start;
1290 gcc_assert (bb_vinfo);
1291 bb = BB_VINFO_BB (bb_vinfo);
1292 gsi_bb_start = gsi_after_labels (bb);
1293 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1297 if (dump_enabled_p ())
1299 dump_printf_loc (MSG_NOTE, vect_location,
1300 "created new init_stmt: ");
1301 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1302 dump_printf (MSG_NOTE, "\n");
1306 /* Function vect_init_vector.
1308 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1309 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1310 vector type a vector with all elements equal to VAL is created first.
1311 Place the initialization at BSI if it is not NULL. Otherwise, place the
1312 initialization at the loop preheader.
1313 Return the DEF of INIT_STMT.
1314 It will be used in the vectorization of STMT. */
1316 tree
1317 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1319 tree new_var;
1320 gimple init_stmt;
1321 tree vec_oprnd;
1322 tree new_temp;
1324 if (TREE_CODE (type) == VECTOR_TYPE
1325 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1327 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1329 if (CONSTANT_CLASS_P (val))
1330 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1331 else
1333 new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1334 init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1335 new_temp, val,
1336 NULL_TREE);
1337 vect_init_vector_1 (stmt, init_stmt, gsi);
1338 val = new_temp;
1341 val = build_vector_from_val (type, val);
1344 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1345 init_stmt = gimple_build_assign (new_var, val);
1346 new_temp = make_ssa_name (new_var, init_stmt);
1347 gimple_assign_set_lhs (init_stmt, new_temp);
1348 vect_init_vector_1 (stmt, init_stmt, gsi);
1349 vec_oprnd = gimple_assign_lhs (init_stmt);
1350 return vec_oprnd;
1354 /* Function vect_get_vec_def_for_operand.
1356 OP is an operand in STMT. This function returns a (vector) def that will be
1357 used in the vectorized stmt for STMT.
1359 In the case that OP is an SSA_NAME which is defined in the loop, then
1360 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1362 In case OP is an invariant or constant, a new stmt that creates a vector def
1363 needs to be introduced. */
1365 tree
1366 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1368 tree vec_oprnd;
1369 gimple vec_stmt;
1370 gimple def_stmt;
1371 stmt_vec_info def_stmt_info = NULL;
1372 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1373 unsigned int nunits;
1374 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1375 tree def;
1376 enum vect_def_type dt;
1377 bool is_simple_use;
1378 tree vector_type;
1380 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE, vect_location,
1383 "vect_get_vec_def_for_operand: ");
1384 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1385 dump_printf (MSG_NOTE, "\n");
1388 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1389 &def_stmt, &def, &dt);
1390 gcc_assert (is_simple_use);
1391 if (dump_enabled_p ())
1393 int loc_printed = 0;
1394 if (def)
1396 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1397 loc_printed = 1;
1398 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1399 dump_printf (MSG_NOTE, "\n");
1401 if (def_stmt)
1403 if (loc_printed)
1404 dump_printf (MSG_NOTE, " def_stmt = ");
1405 else
1406 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1407 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1408 dump_printf (MSG_NOTE, "\n");
1412 switch (dt)
1414 /* Case 1: operand is a constant. */
1415 case vect_constant_def:
1417 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1418 gcc_assert (vector_type);
1419 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1421 if (scalar_def)
1422 *scalar_def = op;
1424 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Create vector_cst. nunits = %d\n", nunits);
1429 return vect_init_vector (stmt, op, vector_type, NULL);
1432 /* Case 2: operand is defined outside the loop - loop invariant. */
1433 case vect_external_def:
1435 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1436 gcc_assert (vector_type);
1438 if (scalar_def)
1439 *scalar_def = def;
1441 /* Create 'vec_inv = {inv,inv,..,inv}' */
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1445 return vect_init_vector (stmt, def, vector_type, NULL);
1448 /* Case 3: operand is defined inside the loop. */
1449 case vect_internal_def:
1451 if (scalar_def)
1452 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1454 /* Get the def from the vectorized stmt. */
1455 def_stmt_info = vinfo_for_stmt (def_stmt);
1457 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1458 /* Get vectorized pattern statement. */
1459 if (!vec_stmt
1460 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1461 && !STMT_VINFO_RELEVANT (def_stmt_info))
1462 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1463 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1464 gcc_assert (vec_stmt);
1465 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1466 vec_oprnd = PHI_RESULT (vec_stmt);
1467 else if (is_gimple_call (vec_stmt))
1468 vec_oprnd = gimple_call_lhs (vec_stmt);
1469 else
1470 vec_oprnd = gimple_assign_lhs (vec_stmt);
1471 return vec_oprnd;
1474 /* Case 4: operand is defined by a loop header phi - reduction */
1475 case vect_reduction_def:
1476 case vect_double_reduction_def:
1477 case vect_nested_cycle:
1479 struct loop *loop;
1481 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1482 loop = (gimple_bb (def_stmt))->loop_father;
1484 /* Get the def before the loop */
1485 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1486 return get_initial_def_for_reduction (stmt, op, scalar_def);
1489 /* Case 5: operand is defined by loop-header phi - induction. */
1490 case vect_induction_def:
1492 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1494 /* Get the def from the vectorized stmt. */
1495 def_stmt_info = vinfo_for_stmt (def_stmt);
1496 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1497 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1498 vec_oprnd = PHI_RESULT (vec_stmt);
1499 else
1500 vec_oprnd = gimple_get_lhs (vec_stmt);
1501 return vec_oprnd;
1504 default:
1505 gcc_unreachable ();
1510 /* Function vect_get_vec_def_for_stmt_copy
1512 Return a vector-def for an operand. This function is used when the
1513 vectorized stmt to be created (by the caller to this function) is a "copy"
1514 created in case the vectorized result cannot fit in one vector, and several
1515 copies of the vector-stmt are required. In this case the vector-def is
1516 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1517 of the stmt that defines VEC_OPRND.
1518 DT is the type of the vector def VEC_OPRND.
1520 Context:
1521 In case the vectorization factor (VF) is bigger than the number
1522 of elements that can fit in a vectype (nunits), we have to generate
1523 more than one vector stmt to vectorize the scalar stmt. This situation
1524 arises when there are multiple data-types operated upon in the loop; the
1525 smallest data-type determines the VF, and as a result, when vectorizing
1526 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1527 vector stmt (each computing a vector of 'nunits' results, and together
1528 computing 'VF' results in each iteration). This function is called when
1529 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1530 which VF=16 and nunits=4, so the number of copies required is 4):
1532 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1534 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1535 VS1.1: vx.1 = memref1 VS1.2
1536 VS1.2: vx.2 = memref2 VS1.3
1537 VS1.3: vx.3 = memref3
1539 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1540 VSnew.1: vz1 = vx.1 + ... VSnew.2
1541 VSnew.2: vz2 = vx.2 + ... VSnew.3
1542 VSnew.3: vz3 = vx.3 + ...
1544 The vectorization of S1 is explained in vectorizable_load.
1545 The vectorization of S2:
1546 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1547 the function 'vect_get_vec_def_for_operand' is called to
1548 get the relevant vector-def for each operand of S2. For operand x it
1549 returns the vector-def 'vx.0'.
1551 To create the remaining copies of the vector-stmt (VSnew.j), this
1552 function is called to get the relevant vector-def for each operand. It is
1553 obtained from the respective VS1.j stmt, which is recorded in the
1554 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1556 For example, to obtain the vector-def 'vx.1' in order to create the
1557 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1558 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1559 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1560 and return its def ('vx.1').
1561 Overall, to create the above sequence this function will be called 3 times:
1562 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1563 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1564 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1566 tree
1567 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1569 gimple vec_stmt_for_operand;
1570 stmt_vec_info def_stmt_info;
1572 /* Do nothing; can reuse same def. */
1573 if (dt == vect_external_def || dt == vect_constant_def )
1574 return vec_oprnd;
1576 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1577 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1578 gcc_assert (def_stmt_info);
1579 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1580 gcc_assert (vec_stmt_for_operand);
1581 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1582 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1583 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1584 else
1585 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1586 return vec_oprnd;
1590 /* Get vectorized definitions for the operands to create a copy of an original
1591 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1593 static void
1594 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1595 vec<tree> *vec_oprnds0,
1596 vec<tree> *vec_oprnds1)
1598 tree vec_oprnd = vec_oprnds0->pop ();
1600 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1601 vec_oprnds0->quick_push (vec_oprnd);
1603 if (vec_oprnds1 && vec_oprnds1->length ())
1605 vec_oprnd = vec_oprnds1->pop ();
1606 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1607 vec_oprnds1->quick_push (vec_oprnd);
1612 /* Get vectorized definitions for OP0 and OP1.
1613 REDUC_INDEX is the index of reduction operand in case of reduction,
1614 and -1 otherwise. */
1616 void
1617 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1618 vec<tree> *vec_oprnds0,
1619 vec<tree> *vec_oprnds1,
1620 slp_tree slp_node, int reduc_index)
1622 if (slp_node)
1624 int nops = (op1 == NULL_TREE) ? 1 : 2;
1625 auto_vec<tree> ops (nops);
1626 auto_vec<vec<tree> > vec_defs (nops);
1628 ops.quick_push (op0);
1629 if (op1)
1630 ops.quick_push (op1);
1632 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1634 *vec_oprnds0 = vec_defs[0];
1635 if (op1)
1636 *vec_oprnds1 = vec_defs[1];
1638 else
1640 tree vec_oprnd;
1642 vec_oprnds0->create (1);
1643 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1644 vec_oprnds0->quick_push (vec_oprnd);
1646 if (op1)
1648 vec_oprnds1->create (1);
1649 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1650 vec_oprnds1->quick_push (vec_oprnd);
1656 /* Function vect_finish_stmt_generation.
1658 Insert a new stmt. */
1660 void
1661 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1662 gimple_stmt_iterator *gsi)
1664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1665 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1666 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1668 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1670 if (!gsi_end_p (*gsi)
1671 && gimple_has_mem_ops (vec_stmt))
1673 gimple at_stmt = gsi_stmt (*gsi);
1674 tree vuse = gimple_vuse (at_stmt);
1675 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1677 tree vdef = gimple_vdef (at_stmt);
1678 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1679 /* If we have an SSA vuse and insert a store, update virtual
1680 SSA form to avoid triggering the renamer. Do so only
1681 if we can easily see all uses - which is what almost always
1682 happens with the way vectorized stmts are inserted. */
1683 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1684 && ((is_gimple_assign (vec_stmt)
1685 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1686 || (is_gimple_call (vec_stmt)
1687 && !(gimple_call_flags (vec_stmt)
1688 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1690 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1691 gimple_set_vdef (vec_stmt, new_vdef);
1692 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1696 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1698 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1699 bb_vinfo));
1701 if (dump_enabled_p ())
1703 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1704 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1705 dump_printf (MSG_NOTE, "\n");
1708 gimple_set_location (vec_stmt, gimple_location (stmt));
1710 /* While EH edges will generally prevent vectorization, stmt might
1711 e.g. be in a must-not-throw region. Ensure newly created stmts
1712 that could throw are part of the same region. */
1713 int lp_nr = lookup_stmt_eh_lp (stmt);
1714 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1715 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1718 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1719 a function declaration if the target has a vectorized version
1720 of the function, or NULL_TREE if the function cannot be vectorized. */
1722 tree
1723 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1725 tree fndecl = gimple_call_fndecl (call);
1727 /* We only handle functions that do not read or clobber memory -- i.e.
1728 const or novops ones. */
1729 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1730 return NULL_TREE;
1732 if (!fndecl
1733 || TREE_CODE (fndecl) != FUNCTION_DECL
1734 || !DECL_BUILT_IN (fndecl))
1735 return NULL_TREE;
1737 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1738 vectype_in);
1742 static tree permute_vec_elements (tree, tree, tree, gimple,
1743 gimple_stmt_iterator *);
1746 /* Function vectorizable_mask_load_store.
1748 Check if STMT performs a conditional load or store that can be vectorized.
1749 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1750 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1751 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1753 static bool
1754 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1755 gimple *vec_stmt, slp_tree slp_node)
1757 tree vec_dest = NULL;
1758 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1759 stmt_vec_info prev_stmt_info;
1760 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1761 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1762 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1763 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1764 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1765 tree elem_type;
1766 gimple new_stmt;
1767 tree dummy;
1768 tree dataref_ptr = NULL_TREE;
1769 gimple ptr_incr;
1770 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1771 int ncopies;
1772 int i, j;
1773 bool inv_p;
1774 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1775 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1776 int gather_scale = 1;
1777 enum vect_def_type gather_dt = vect_unknown_def_type;
1778 bool is_store;
1779 tree mask;
1780 gimple def_stmt;
1781 tree def;
1782 enum vect_def_type dt;
1784 if (slp_node != NULL)
1785 return false;
1787 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1788 gcc_assert (ncopies >= 1);
1790 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1791 mask = gimple_call_arg (stmt, 2);
1792 if (TYPE_PRECISION (TREE_TYPE (mask))
1793 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1794 return false;
1796 /* FORNOW. This restriction should be relaxed. */
1797 if (nested_in_vect_loop && ncopies > 1)
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "multiple types in nested loop.");
1802 return false;
1805 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1806 return false;
1808 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1809 return false;
1811 if (!STMT_VINFO_DATA_REF (stmt_info))
1812 return false;
1814 elem_type = TREE_TYPE (vectype);
1816 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1817 return false;
1819 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1820 return false;
1822 if (STMT_VINFO_GATHER_P (stmt_info))
1824 gimple def_stmt;
1825 tree def;
1826 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1827 &gather_off, &gather_scale);
1828 gcc_assert (gather_decl);
1829 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1830 &def_stmt, &def, &gather_dt,
1831 &gather_off_vectype))
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "gather index use not simple.");
1836 return false;
1839 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1840 tree masktype
1841 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1842 if (TREE_CODE (masktype) == INTEGER_TYPE)
1844 if (dump_enabled_p ())
1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1846 "masked gather with integer mask not supported.");
1847 return false;
1850 else if (tree_int_cst_compare (nested_in_vect_loop
1851 ? STMT_VINFO_DR_STEP (stmt_info)
1852 : DR_STEP (dr), size_zero_node) <= 0)
1853 return false;
1854 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1855 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1856 return false;
1858 if (TREE_CODE (mask) != SSA_NAME)
1859 return false;
1861 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1862 &def_stmt, &def, &dt))
1863 return false;
1865 if (is_store)
1867 tree rhs = gimple_call_arg (stmt, 3);
1868 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1869 &def_stmt, &def, &dt))
1870 return false;
1873 if (!vec_stmt) /* transformation not required. */
1875 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1876 if (is_store)
1877 vect_model_store_cost (stmt_info, ncopies, false, dt,
1878 NULL, NULL, NULL);
1879 else
1880 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1881 return true;
1884 /** Transform. **/
1886 if (STMT_VINFO_GATHER_P (stmt_info))
1888 tree vec_oprnd0 = NULL_TREE, op;
1889 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1890 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1891 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1892 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1893 tree mask_perm_mask = NULL_TREE;
1894 edge pe = loop_preheader_edge (loop);
1895 gimple_seq seq;
1896 basic_block new_bb;
1897 enum { NARROW, NONE, WIDEN } modifier;
1898 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1900 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1901 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1902 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1903 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1904 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1905 scaletype = TREE_VALUE (arglist);
1906 gcc_checking_assert (types_compatible_p (srctype, rettype)
1907 && types_compatible_p (srctype, masktype));
1909 if (nunits == gather_off_nunits)
1910 modifier = NONE;
1911 else if (nunits == gather_off_nunits / 2)
1913 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1914 modifier = WIDEN;
1916 for (i = 0; i < gather_off_nunits; ++i)
1917 sel[i] = i | nunits;
1919 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
1920 gcc_assert (perm_mask != NULL_TREE);
1922 else if (nunits == gather_off_nunits * 2)
1924 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1925 modifier = NARROW;
1927 for (i = 0; i < nunits; ++i)
1928 sel[i] = i < gather_off_nunits
1929 ? i : i + nunits - gather_off_nunits;
1931 perm_mask = vect_gen_perm_mask (vectype, sel);
1932 gcc_assert (perm_mask != NULL_TREE);
1933 ncopies *= 2;
1934 for (i = 0; i < nunits; ++i)
1935 sel[i] = i | gather_off_nunits;
1936 mask_perm_mask = vect_gen_perm_mask (masktype, sel);
1937 gcc_assert (mask_perm_mask != NULL_TREE);
1939 else
1940 gcc_unreachable ();
1942 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1944 ptr = fold_convert (ptrtype, gather_base);
1945 if (!is_gimple_min_invariant (ptr))
1947 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1948 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1949 gcc_assert (!new_bb);
1952 scale = build_int_cst (scaletype, gather_scale);
1954 prev_stmt_info = NULL;
1955 for (j = 0; j < ncopies; ++j)
1957 if (modifier == WIDEN && (j & 1))
1958 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1959 perm_mask, stmt, gsi);
1960 else if (j == 0)
1961 op = vec_oprnd0
1962 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1963 else
1964 op = vec_oprnd0
1965 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1967 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1970 == TYPE_VECTOR_SUBPARTS (idxtype));
1971 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1972 var = make_ssa_name (var, NULL);
1973 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1974 new_stmt
1975 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
1976 op, NULL_TREE);
1977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1978 op = var;
1981 if (mask_perm_mask && (j & 1))
1982 mask_op = permute_vec_elements (mask_op, mask_op,
1983 mask_perm_mask, stmt, gsi);
1984 else
1986 if (j == 0)
1987 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1988 else
1990 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1991 &def_stmt, &def, &dt);
1992 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1995 mask_op = vec_mask;
1996 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1998 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1999 == TYPE_VECTOR_SUBPARTS (masktype));
2000 var = vect_get_new_vect_var (masktype, vect_simple_var,
2001 NULL);
2002 var = make_ssa_name (var, NULL);
2003 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2004 new_stmt
2005 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
2006 mask_op, NULL_TREE);
2007 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2008 mask_op = var;
2012 new_stmt
2013 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2014 scale);
2016 if (!useless_type_conversion_p (vectype, rettype))
2018 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2019 == TYPE_VECTOR_SUBPARTS (rettype));
2020 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2021 op = make_ssa_name (var, new_stmt);
2022 gimple_call_set_lhs (new_stmt, op);
2023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2024 var = make_ssa_name (vec_dest, NULL);
2025 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2026 new_stmt
2027 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
2028 NULL_TREE);
2030 else
2032 var = make_ssa_name (vec_dest, new_stmt);
2033 gimple_call_set_lhs (new_stmt, var);
2036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2038 if (modifier == NARROW)
2040 if ((j & 1) == 0)
2042 prev_res = var;
2043 continue;
2045 var = permute_vec_elements (prev_res, var,
2046 perm_mask, stmt, gsi);
2047 new_stmt = SSA_NAME_DEF_STMT (var);
2050 if (prev_stmt_info == NULL)
2051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2052 else
2053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2054 prev_stmt_info = vinfo_for_stmt (new_stmt);
2057 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2058 from the IL. */
2059 tree lhs = gimple_call_lhs (stmt);
2060 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2061 set_vinfo_for_stmt (new_stmt, stmt_info);
2062 set_vinfo_for_stmt (stmt, NULL);
2063 STMT_VINFO_STMT (stmt_info) = new_stmt;
2064 gsi_replace (gsi, new_stmt, true);
2065 return true;
2067 else if (is_store)
2069 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2070 prev_stmt_info = NULL;
2071 for (i = 0; i < ncopies; i++)
2073 unsigned align, misalign;
2075 if (i == 0)
2077 tree rhs = gimple_call_arg (stmt, 3);
2078 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2079 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2080 /* We should have catched mismatched types earlier. */
2081 gcc_assert (useless_type_conversion_p (vectype,
2082 TREE_TYPE (vec_rhs)));
2083 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2084 NULL_TREE, &dummy, gsi,
2085 &ptr_incr, false, &inv_p);
2086 gcc_assert (!inv_p);
2088 else
2090 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2091 &def, &dt);
2092 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2093 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2094 &def, &dt);
2095 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2096 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2097 TYPE_SIZE_UNIT (vectype));
2100 align = TYPE_ALIGN_UNIT (vectype);
2101 if (aligned_access_p (dr))
2102 misalign = 0;
2103 else if (DR_MISALIGNMENT (dr) == -1)
2105 align = TYPE_ALIGN_UNIT (elem_type);
2106 misalign = 0;
2108 else
2109 misalign = DR_MISALIGNMENT (dr);
2110 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2111 misalign);
2112 new_stmt
2113 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2114 gimple_call_arg (stmt, 1),
2115 vec_mask, vec_rhs);
2116 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2117 if (i == 0)
2118 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2119 else
2120 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2121 prev_stmt_info = vinfo_for_stmt (new_stmt);
2124 else
2126 tree vec_mask = NULL_TREE;
2127 prev_stmt_info = NULL;
2128 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2129 for (i = 0; i < ncopies; i++)
2131 unsigned align, misalign;
2133 if (i == 0)
2135 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2136 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2137 NULL_TREE, &dummy, gsi,
2138 &ptr_incr, false, &inv_p);
2139 gcc_assert (!inv_p);
2141 else
2143 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2144 &def, &dt);
2145 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2146 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2147 TYPE_SIZE_UNIT (vectype));
2150 align = TYPE_ALIGN_UNIT (vectype);
2151 if (aligned_access_p (dr))
2152 misalign = 0;
2153 else if (DR_MISALIGNMENT (dr) == -1)
2155 align = TYPE_ALIGN_UNIT (elem_type);
2156 misalign = 0;
2158 else
2159 misalign = DR_MISALIGNMENT (dr);
2160 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2161 misalign);
2162 new_stmt
2163 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2164 gimple_call_arg (stmt, 1),
2165 vec_mask);
2166 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest, NULL));
2167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2168 if (i == 0)
2169 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2170 else
2171 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2172 prev_stmt_info = vinfo_for_stmt (new_stmt);
2176 if (!is_store)
2178 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2179 from the IL. */
2180 tree lhs = gimple_call_lhs (stmt);
2181 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2182 set_vinfo_for_stmt (new_stmt, stmt_info);
2183 set_vinfo_for_stmt (stmt, NULL);
2184 STMT_VINFO_STMT (stmt_info) = new_stmt;
2185 gsi_replace (gsi, new_stmt, true);
2188 return true;
2192 /* Function vectorizable_call.
2194 Check if STMT performs a function call that can be vectorized.
2195 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2196 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2197 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2199 static bool
2200 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2201 slp_tree slp_node)
2203 tree vec_dest;
2204 tree scalar_dest;
2205 tree op, type;
2206 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2208 tree vectype_out, vectype_in;
2209 int nunits_in;
2210 int nunits_out;
2211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2212 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2213 tree fndecl, new_temp, def, rhs_type;
2214 gimple def_stmt;
2215 enum vect_def_type dt[3]
2216 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2217 gimple new_stmt = NULL;
2218 int ncopies, j;
2219 vec<tree> vargs = vNULL;
2220 enum { NARROW, NONE, WIDEN } modifier;
2221 size_t i, nargs;
2222 tree lhs;
2224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2225 return false;
2227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2228 return false;
2230 /* Is STMT a vectorizable call? */
2231 if (!is_gimple_call (stmt))
2232 return false;
2234 if (gimple_call_internal_p (stmt)
2235 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2236 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2237 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2238 slp_node);
2240 if (gimple_call_lhs (stmt) == NULL_TREE
2241 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2242 return false;
2244 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2246 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2248 /* Process function arguments. */
2249 rhs_type = NULL_TREE;
2250 vectype_in = NULL_TREE;
2251 nargs = gimple_call_num_args (stmt);
2253 /* Bail out if the function has more than three arguments, we do not have
2254 interesting builtin functions to vectorize with more than two arguments
2255 except for fma. No arguments is also not good. */
2256 if (nargs == 0 || nargs > 3)
2257 return false;
2259 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2260 if (gimple_call_internal_p (stmt)
2261 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2263 nargs = 0;
2264 rhs_type = unsigned_type_node;
2267 for (i = 0; i < nargs; i++)
2269 tree opvectype;
2271 op = gimple_call_arg (stmt, i);
2273 /* We can only handle calls with arguments of the same type. */
2274 if (rhs_type
2275 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2277 if (dump_enabled_p ())
2278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2279 "argument types differ.\n");
2280 return false;
2282 if (!rhs_type)
2283 rhs_type = TREE_TYPE (op);
2285 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2286 &def_stmt, &def, &dt[i], &opvectype))
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2290 "use not simple.\n");
2291 return false;
2294 if (!vectype_in)
2295 vectype_in = opvectype;
2296 else if (opvectype
2297 && opvectype != vectype_in)
2299 if (dump_enabled_p ())
2300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2301 "argument vector types differ.\n");
2302 return false;
2305 /* If all arguments are external or constant defs use a vector type with
2306 the same size as the output vector type. */
2307 if (!vectype_in)
2308 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2309 if (vec_stmt)
2310 gcc_assert (vectype_in);
2311 if (!vectype_in)
2313 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2316 "no vectype for scalar type ");
2317 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2318 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2321 return false;
2324 /* FORNOW */
2325 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2326 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2327 if (nunits_in == nunits_out / 2)
2328 modifier = NARROW;
2329 else if (nunits_out == nunits_in)
2330 modifier = NONE;
2331 else if (nunits_out == nunits_in / 2)
2332 modifier = WIDEN;
2333 else
2334 return false;
2336 /* For now, we only vectorize functions if a target specific builtin
2337 is available. TODO -- in some cases, it might be profitable to
2338 insert the calls for pieces of the vector, in order to be able
2339 to vectorize other operations in the loop. */
2340 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2341 if (fndecl == NULL_TREE)
2343 if (gimple_call_internal_p (stmt)
2344 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2345 && !slp_node
2346 && loop_vinfo
2347 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2348 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2349 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2350 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2352 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2353 { 0, 1, 2, ... vf - 1 } vector. */
2354 gcc_assert (nargs == 0);
2356 else
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "function is not vectorizable.\n");
2361 return false;
2365 gcc_assert (!gimple_vuse (stmt));
2367 if (slp_node || PURE_SLP_STMT (stmt_info))
2368 ncopies = 1;
2369 else if (modifier == NARROW)
2370 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2371 else
2372 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2374 /* Sanity check: make sure that at least one copy of the vectorized stmt
2375 needs to be generated. */
2376 gcc_assert (ncopies >= 1);
2378 if (!vec_stmt) /* transformation not required. */
2380 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2381 if (dump_enabled_p ())
2382 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2383 "\n");
2384 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2385 return true;
2388 /** Transform. **/
2390 if (dump_enabled_p ())
2391 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2393 /* Handle def. */
2394 scalar_dest = gimple_call_lhs (stmt);
2395 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2397 prev_stmt_info = NULL;
2398 switch (modifier)
2400 case NONE:
2401 for (j = 0; j < ncopies; ++j)
2403 /* Build argument list for the vectorized call. */
2404 if (j == 0)
2405 vargs.create (nargs);
2406 else
2407 vargs.truncate (0);
2409 if (slp_node)
2411 auto_vec<vec<tree> > vec_defs (nargs);
2412 vec<tree> vec_oprnds0;
2414 for (i = 0; i < nargs; i++)
2415 vargs.quick_push (gimple_call_arg (stmt, i));
2416 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2417 vec_oprnds0 = vec_defs[0];
2419 /* Arguments are ready. Create the new vector stmt. */
2420 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2422 size_t k;
2423 for (k = 0; k < nargs; k++)
2425 vec<tree> vec_oprndsk = vec_defs[k];
2426 vargs[k] = vec_oprndsk[i];
2428 new_stmt = gimple_build_call_vec (fndecl, vargs);
2429 new_temp = make_ssa_name (vec_dest, new_stmt);
2430 gimple_call_set_lhs (new_stmt, new_temp);
2431 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2432 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2435 for (i = 0; i < nargs; i++)
2437 vec<tree> vec_oprndsi = vec_defs[i];
2438 vec_oprndsi.release ();
2440 continue;
2443 for (i = 0; i < nargs; i++)
2445 op = gimple_call_arg (stmt, i);
2446 if (j == 0)
2447 vec_oprnd0
2448 = vect_get_vec_def_for_operand (op, stmt, NULL);
2449 else
2451 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2452 vec_oprnd0
2453 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2456 vargs.quick_push (vec_oprnd0);
2459 if (gimple_call_internal_p (stmt)
2460 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2462 tree *v = XALLOCAVEC (tree, nunits_out);
2463 int k;
2464 for (k = 0; k < nunits_out; ++k)
2465 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2466 tree cst = build_vector (vectype_out, v);
2467 tree new_var
2468 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2469 gimple init_stmt = gimple_build_assign (new_var, cst);
2470 new_temp = make_ssa_name (new_var, init_stmt);
2471 gimple_assign_set_lhs (init_stmt, new_temp);
2472 vect_init_vector_1 (stmt, init_stmt, NULL);
2473 new_temp = make_ssa_name (vec_dest, NULL);
2474 new_stmt = gimple_build_assign (new_temp,
2475 gimple_assign_lhs (init_stmt));
2477 else
2479 new_stmt = gimple_build_call_vec (fndecl, vargs);
2480 new_temp = make_ssa_name (vec_dest, new_stmt);
2481 gimple_call_set_lhs (new_stmt, new_temp);
2483 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2485 if (j == 0)
2486 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2487 else
2488 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2490 prev_stmt_info = vinfo_for_stmt (new_stmt);
2493 break;
2495 case NARROW:
2496 for (j = 0; j < ncopies; ++j)
2498 /* Build argument list for the vectorized call. */
2499 if (j == 0)
2500 vargs.create (nargs * 2);
2501 else
2502 vargs.truncate (0);
2504 if (slp_node)
2506 auto_vec<vec<tree> > vec_defs (nargs);
2507 vec<tree> vec_oprnds0;
2509 for (i = 0; i < nargs; i++)
2510 vargs.quick_push (gimple_call_arg (stmt, i));
2511 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2512 vec_oprnds0 = vec_defs[0];
2514 /* Arguments are ready. Create the new vector stmt. */
2515 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2517 size_t k;
2518 vargs.truncate (0);
2519 for (k = 0; k < nargs; k++)
2521 vec<tree> vec_oprndsk = vec_defs[k];
2522 vargs.quick_push (vec_oprndsk[i]);
2523 vargs.quick_push (vec_oprndsk[i + 1]);
2525 new_stmt = gimple_build_call_vec (fndecl, vargs);
2526 new_temp = make_ssa_name (vec_dest, new_stmt);
2527 gimple_call_set_lhs (new_stmt, new_temp);
2528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2529 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2532 for (i = 0; i < nargs; i++)
2534 vec<tree> vec_oprndsi = vec_defs[i];
2535 vec_oprndsi.release ();
2537 continue;
2540 for (i = 0; i < nargs; i++)
2542 op = gimple_call_arg (stmt, i);
2543 if (j == 0)
2545 vec_oprnd0
2546 = vect_get_vec_def_for_operand (op, stmt, NULL);
2547 vec_oprnd1
2548 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2550 else
2552 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2553 vec_oprnd0
2554 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2555 vec_oprnd1
2556 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2559 vargs.quick_push (vec_oprnd0);
2560 vargs.quick_push (vec_oprnd1);
2563 new_stmt = gimple_build_call_vec (fndecl, vargs);
2564 new_temp = make_ssa_name (vec_dest, new_stmt);
2565 gimple_call_set_lhs (new_stmt, new_temp);
2566 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2568 if (j == 0)
2569 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2570 else
2571 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2573 prev_stmt_info = vinfo_for_stmt (new_stmt);
2576 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2578 break;
2580 case WIDEN:
2581 /* No current target implements this case. */
2582 return false;
2585 vargs.release ();
2587 /* The call in STMT might prevent it from being removed in dce.
2588 We however cannot remove it here, due to the way the ssa name
2589 it defines is mapped to the new definition. So just replace
2590 rhs of the statement with something harmless. */
2592 if (slp_node)
2593 return true;
2595 type = TREE_TYPE (scalar_dest);
2596 if (is_pattern_stmt_p (stmt_info))
2597 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2598 else
2599 lhs = gimple_call_lhs (stmt);
2600 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2601 set_vinfo_for_stmt (new_stmt, stmt_info);
2602 set_vinfo_for_stmt (stmt, NULL);
2603 STMT_VINFO_STMT (stmt_info) = new_stmt;
2604 gsi_replace (gsi, new_stmt, false);
2606 return true;
2610 struct simd_call_arg_info
2612 tree vectype;
2613 tree op;
2614 enum vect_def_type dt;
2615 HOST_WIDE_INT linear_step;
2616 unsigned int align;
2619 /* Function vectorizable_simd_clone_call.
2621 Check if STMT performs a function call that can be vectorized
2622 by calling a simd clone of the function.
2623 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2624 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2625 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2627 static bool
2628 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2629 gimple *vec_stmt, slp_tree slp_node)
2631 tree vec_dest;
2632 tree scalar_dest;
2633 tree op, type;
2634 tree vec_oprnd0 = NULL_TREE;
2635 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2636 tree vectype;
2637 unsigned int nunits;
2638 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2640 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2641 tree fndecl, new_temp, def;
2642 gimple def_stmt;
2643 gimple new_stmt = NULL;
2644 int ncopies, j;
2645 vec<simd_call_arg_info> arginfo = vNULL;
2646 vec<tree> vargs = vNULL;
2647 size_t i, nargs;
2648 tree lhs, rtype, ratype;
2649 vec<constructor_elt, va_gc> *ret_ctor_elts;
2651 /* Is STMT a vectorizable call? */
2652 if (!is_gimple_call (stmt))
2653 return false;
2655 fndecl = gimple_call_fndecl (stmt);
2656 if (fndecl == NULL_TREE)
2657 return false;
2659 struct cgraph_node *node = cgraph_node::get (fndecl);
2660 if (node == NULL || node->simd_clones == NULL)
2661 return false;
2663 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2664 return false;
2666 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2667 return false;
2669 if (gimple_call_lhs (stmt)
2670 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2671 return false;
2673 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2675 vectype = STMT_VINFO_VECTYPE (stmt_info);
2677 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2678 return false;
2680 /* FORNOW */
2681 if (slp_node || PURE_SLP_STMT (stmt_info))
2682 return false;
2684 /* Process function arguments. */
2685 nargs = gimple_call_num_args (stmt);
2687 /* Bail out if the function has zero arguments. */
2688 if (nargs == 0)
2689 return false;
2691 arginfo.create (nargs);
2693 for (i = 0; i < nargs; i++)
2695 simd_call_arg_info thisarginfo;
2696 affine_iv iv;
2698 thisarginfo.linear_step = 0;
2699 thisarginfo.align = 0;
2700 thisarginfo.op = NULL_TREE;
2702 op = gimple_call_arg (stmt, i);
2703 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2704 &def_stmt, &def, &thisarginfo.dt,
2705 &thisarginfo.vectype)
2706 || thisarginfo.dt == vect_uninitialized_def)
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710 "use not simple.\n");
2711 arginfo.release ();
2712 return false;
2715 if (thisarginfo.dt == vect_constant_def
2716 || thisarginfo.dt == vect_external_def)
2717 gcc_assert (thisarginfo.vectype == NULL_TREE);
2718 else
2719 gcc_assert (thisarginfo.vectype != NULL_TREE);
2721 if (thisarginfo.dt != vect_constant_def
2722 && thisarginfo.dt != vect_external_def
2723 && loop_vinfo
2724 && TREE_CODE (op) == SSA_NAME
2725 && simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
2726 && tree_fits_shwi_p (iv.step))
2728 thisarginfo.linear_step = tree_to_shwi (iv.step);
2729 thisarginfo.op = iv.base;
2731 else if ((thisarginfo.dt == vect_constant_def
2732 || thisarginfo.dt == vect_external_def)
2733 && POINTER_TYPE_P (TREE_TYPE (op)))
2734 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2736 arginfo.quick_push (thisarginfo);
2739 unsigned int badness = 0;
2740 struct cgraph_node *bestn = NULL;
2741 if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
2742 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
2743 else
2744 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2745 n = n->simdclone->next_clone)
2747 unsigned int this_badness = 0;
2748 if (n->simdclone->simdlen
2749 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2750 || n->simdclone->nargs != nargs)
2751 continue;
2752 if (n->simdclone->simdlen
2753 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2754 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2755 - exact_log2 (n->simdclone->simdlen)) * 1024;
2756 if (n->simdclone->inbranch)
2757 this_badness += 2048;
2758 int target_badness = targetm.simd_clone.usable (n);
2759 if (target_badness < 0)
2760 continue;
2761 this_badness += target_badness * 512;
2762 /* FORNOW: Have to add code to add the mask argument. */
2763 if (n->simdclone->inbranch)
2764 continue;
2765 for (i = 0; i < nargs; i++)
2767 switch (n->simdclone->args[i].arg_type)
2769 case SIMD_CLONE_ARG_TYPE_VECTOR:
2770 if (!useless_type_conversion_p
2771 (n->simdclone->args[i].orig_type,
2772 TREE_TYPE (gimple_call_arg (stmt, i))))
2773 i = -1;
2774 else if (arginfo[i].dt == vect_constant_def
2775 || arginfo[i].dt == vect_external_def
2776 || arginfo[i].linear_step)
2777 this_badness += 64;
2778 break;
2779 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2780 if (arginfo[i].dt != vect_constant_def
2781 && arginfo[i].dt != vect_external_def)
2782 i = -1;
2783 break;
2784 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2785 if (arginfo[i].dt == vect_constant_def
2786 || arginfo[i].dt == vect_external_def
2787 || (arginfo[i].linear_step
2788 != n->simdclone->args[i].linear_step))
2789 i = -1;
2790 break;
2791 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2792 /* FORNOW */
2793 i = -1;
2794 break;
2795 case SIMD_CLONE_ARG_TYPE_MASK:
2796 gcc_unreachable ();
2798 if (i == (size_t) -1)
2799 break;
2800 if (n->simdclone->args[i].alignment > arginfo[i].align)
2802 i = -1;
2803 break;
2805 if (arginfo[i].align)
2806 this_badness += (exact_log2 (arginfo[i].align)
2807 - exact_log2 (n->simdclone->args[i].alignment));
2809 if (i == (size_t) -1)
2810 continue;
2811 if (bestn == NULL || this_badness < badness)
2813 bestn = n;
2814 badness = this_badness;
2818 if (bestn == NULL)
2820 arginfo.release ();
2821 return false;
2824 for (i = 0; i < nargs; i++)
2825 if ((arginfo[i].dt == vect_constant_def
2826 || arginfo[i].dt == vect_external_def)
2827 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2829 arginfo[i].vectype
2830 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2831 i)));
2832 if (arginfo[i].vectype == NULL
2833 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2834 > bestn->simdclone->simdlen))
2836 arginfo.release ();
2837 return false;
2841 fndecl = bestn->decl;
2842 nunits = bestn->simdclone->simdlen;
2843 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2845 /* If the function isn't const, only allow it in simd loops where user
2846 has asserted that at least nunits consecutive iterations can be
2847 performed using SIMD instructions. */
2848 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2849 && gimple_vuse (stmt))
2851 arginfo.release ();
2852 return false;
2855 /* Sanity check: make sure that at least one copy of the vectorized stmt
2856 needs to be generated. */
2857 gcc_assert (ncopies >= 1);
2859 if (!vec_stmt) /* transformation not required. */
2861 STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
2862 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2863 if (dump_enabled_p ())
2864 dump_printf_loc (MSG_NOTE, vect_location,
2865 "=== vectorizable_simd_clone_call ===\n");
2866 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2867 arginfo.release ();
2868 return true;
2871 /** Transform. **/
2873 if (dump_enabled_p ())
2874 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2876 /* Handle def. */
2877 scalar_dest = gimple_call_lhs (stmt);
2878 vec_dest = NULL_TREE;
2879 rtype = NULL_TREE;
2880 ratype = NULL_TREE;
2881 if (scalar_dest)
2883 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2884 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2885 if (TREE_CODE (rtype) == ARRAY_TYPE)
2887 ratype = rtype;
2888 rtype = TREE_TYPE (ratype);
2892 prev_stmt_info = NULL;
2893 for (j = 0; j < ncopies; ++j)
2895 /* Build argument list for the vectorized call. */
2896 if (j == 0)
2897 vargs.create (nargs);
2898 else
2899 vargs.truncate (0);
2901 for (i = 0; i < nargs; i++)
2903 unsigned int k, l, m, o;
2904 tree atype;
2905 op = gimple_call_arg (stmt, i);
2906 switch (bestn->simdclone->args[i].arg_type)
2908 case SIMD_CLONE_ARG_TYPE_VECTOR:
2909 atype = bestn->simdclone->args[i].vector_type;
2910 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2911 for (m = j * o; m < (j + 1) * o; m++)
2913 if (TYPE_VECTOR_SUBPARTS (atype)
2914 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2916 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2917 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2918 / TYPE_VECTOR_SUBPARTS (atype));
2919 gcc_assert ((k & (k - 1)) == 0);
2920 if (m == 0)
2921 vec_oprnd0
2922 = vect_get_vec_def_for_operand (op, stmt, NULL);
2923 else
2925 vec_oprnd0 = arginfo[i].op;
2926 if ((m & (k - 1)) == 0)
2927 vec_oprnd0
2928 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2929 vec_oprnd0);
2931 arginfo[i].op = vec_oprnd0;
2932 vec_oprnd0
2933 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2934 size_int (prec),
2935 bitsize_int ((m & (k - 1)) * prec));
2936 new_stmt
2937 = gimple_build_assign (make_ssa_name (atype, NULL),
2938 vec_oprnd0);
2939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2940 vargs.safe_push (gimple_assign_lhs (new_stmt));
2942 else
2944 k = (TYPE_VECTOR_SUBPARTS (atype)
2945 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2946 gcc_assert ((k & (k - 1)) == 0);
2947 vec<constructor_elt, va_gc> *ctor_elts;
2948 if (k != 1)
2949 vec_alloc (ctor_elts, k);
2950 else
2951 ctor_elts = NULL;
2952 for (l = 0; l < k; l++)
2954 if (m == 0 && l == 0)
2955 vec_oprnd0
2956 = vect_get_vec_def_for_operand (op, stmt, NULL);
2957 else
2958 vec_oprnd0
2959 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2960 arginfo[i].op);
2961 arginfo[i].op = vec_oprnd0;
2962 if (k == 1)
2963 break;
2964 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
2965 vec_oprnd0);
2967 if (k == 1)
2968 vargs.safe_push (vec_oprnd0);
2969 else
2971 vec_oprnd0 = build_constructor (atype, ctor_elts);
2972 new_stmt
2973 = gimple_build_assign (make_ssa_name (atype, NULL),
2974 vec_oprnd0);
2975 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2976 vargs.safe_push (gimple_assign_lhs (new_stmt));
2980 break;
2981 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2982 vargs.safe_push (op);
2983 break;
2984 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2985 if (j == 0)
2987 gimple_seq stmts;
2988 arginfo[i].op
2989 = force_gimple_operand (arginfo[i].op, &stmts, true,
2990 NULL_TREE);
2991 if (stmts != NULL)
2993 basic_block new_bb;
2994 edge pe = loop_preheader_edge (loop);
2995 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2996 gcc_assert (!new_bb);
2998 tree phi_res = copy_ssa_name (op, NULL);
2999 gimple new_phi = create_phi_node (phi_res, loop->header);
3000 set_vinfo_for_stmt (new_phi,
3001 new_stmt_vec_info (new_phi, loop_vinfo,
3002 NULL));
3003 add_phi_arg (new_phi, arginfo[i].op,
3004 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3005 enum tree_code code
3006 = POINTER_TYPE_P (TREE_TYPE (op))
3007 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3008 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3009 ? sizetype : TREE_TYPE (op);
3010 widest_int cst
3011 = wi::mul (bestn->simdclone->args[i].linear_step,
3012 ncopies * nunits);
3013 tree tcst = wide_int_to_tree (type, cst);
3014 tree phi_arg = copy_ssa_name (op, NULL);
3015 new_stmt = gimple_build_assign_with_ops (code, phi_arg,
3016 phi_res, tcst);
3017 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3018 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3019 set_vinfo_for_stmt (new_stmt,
3020 new_stmt_vec_info (new_stmt, loop_vinfo,
3021 NULL));
3022 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3023 UNKNOWN_LOCATION);
3024 arginfo[i].op = phi_res;
3025 vargs.safe_push (phi_res);
3027 else
3029 enum tree_code code
3030 = POINTER_TYPE_P (TREE_TYPE (op))
3031 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3032 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3033 ? sizetype : TREE_TYPE (op);
3034 widest_int cst
3035 = wi::mul (bestn->simdclone->args[i].linear_step,
3036 j * nunits);
3037 tree tcst = wide_int_to_tree (type, cst);
3038 new_temp = make_ssa_name (TREE_TYPE (op), NULL);
3039 new_stmt
3040 = gimple_build_assign_with_ops (code, new_temp,
3041 arginfo[i].op, tcst);
3042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3043 vargs.safe_push (new_temp);
3045 break;
3046 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3047 default:
3048 gcc_unreachable ();
3052 new_stmt = gimple_build_call_vec (fndecl, vargs);
3053 if (vec_dest)
3055 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3056 if (ratype)
3057 new_temp = create_tmp_var (ratype, NULL);
3058 else if (TYPE_VECTOR_SUBPARTS (vectype)
3059 == TYPE_VECTOR_SUBPARTS (rtype))
3060 new_temp = make_ssa_name (vec_dest, new_stmt);
3061 else
3062 new_temp = make_ssa_name (rtype, new_stmt);
3063 gimple_call_set_lhs (new_stmt, new_temp);
3065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3067 if (vec_dest)
3069 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3071 unsigned int k, l;
3072 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3073 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3074 gcc_assert ((k & (k - 1)) == 0);
3075 for (l = 0; l < k; l++)
3077 tree t;
3078 if (ratype)
3080 t = build_fold_addr_expr (new_temp);
3081 t = build2 (MEM_REF, vectype, t,
3082 build_int_cst (TREE_TYPE (t),
3083 l * prec / BITS_PER_UNIT));
3085 else
3086 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3087 size_int (prec), bitsize_int (l * prec));
3088 new_stmt
3089 = gimple_build_assign (make_ssa_name (vectype, NULL), t);
3090 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3091 if (j == 0 && l == 0)
3092 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3093 else
3094 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3096 prev_stmt_info = vinfo_for_stmt (new_stmt);
3099 if (ratype)
3101 tree clobber = build_constructor (ratype, NULL);
3102 TREE_THIS_VOLATILE (clobber) = 1;
3103 new_stmt = gimple_build_assign (new_temp, clobber);
3104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3106 continue;
3108 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3110 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3111 / TYPE_VECTOR_SUBPARTS (rtype));
3112 gcc_assert ((k & (k - 1)) == 0);
3113 if ((j & (k - 1)) == 0)
3114 vec_alloc (ret_ctor_elts, k);
3115 if (ratype)
3117 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3118 for (m = 0; m < o; m++)
3120 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3121 size_int (m), NULL_TREE, NULL_TREE);
3122 new_stmt
3123 = gimple_build_assign (make_ssa_name (rtype, NULL),
3124 tem);
3125 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3126 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3127 gimple_assign_lhs (new_stmt));
3129 tree clobber = build_constructor (ratype, NULL);
3130 TREE_THIS_VOLATILE (clobber) = 1;
3131 new_stmt = gimple_build_assign (new_temp, clobber);
3132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3134 else
3135 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3136 if ((j & (k - 1)) != k - 1)
3137 continue;
3138 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3139 new_stmt
3140 = gimple_build_assign (make_ssa_name (vec_dest, NULL),
3141 vec_oprnd0);
3142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3144 if ((unsigned) j == k - 1)
3145 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3146 else
3147 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3149 prev_stmt_info = vinfo_for_stmt (new_stmt);
3150 continue;
3152 else if (ratype)
3154 tree t = build_fold_addr_expr (new_temp);
3155 t = build2 (MEM_REF, vectype, t,
3156 build_int_cst (TREE_TYPE (t), 0));
3157 new_stmt
3158 = gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
3159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 tree clobber = build_constructor (ratype, NULL);
3161 TREE_THIS_VOLATILE (clobber) = 1;
3162 vect_finish_stmt_generation (stmt,
3163 gimple_build_assign (new_temp,
3164 clobber), gsi);
3168 if (j == 0)
3169 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3170 else
3171 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3173 prev_stmt_info = vinfo_for_stmt (new_stmt);
3176 vargs.release ();
3178 /* The call in STMT might prevent it from being removed in dce.
3179 We however cannot remove it here, due to the way the ssa name
3180 it defines is mapped to the new definition. So just replace
3181 rhs of the statement with something harmless. */
3183 if (slp_node)
3184 return true;
3186 if (scalar_dest)
3188 type = TREE_TYPE (scalar_dest);
3189 if (is_pattern_stmt_p (stmt_info))
3190 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3191 else
3192 lhs = gimple_call_lhs (stmt);
3193 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3195 else
3196 new_stmt = gimple_build_nop ();
3197 set_vinfo_for_stmt (new_stmt, stmt_info);
3198 set_vinfo_for_stmt (stmt, NULL);
3199 STMT_VINFO_STMT (stmt_info) = new_stmt;
3200 gsi_replace (gsi, new_stmt, false);
3201 unlink_stmt_vdef (stmt);
3203 return true;
3207 /* Function vect_gen_widened_results_half
3209 Create a vector stmt whose code, type, number of arguments, and result
3210 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3211 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3212 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3213 needs to be created (DECL is a function-decl of a target-builtin).
3214 STMT is the original scalar stmt that we are vectorizing. */
3216 static gimple
3217 vect_gen_widened_results_half (enum tree_code code,
3218 tree decl,
3219 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3220 tree vec_dest, gimple_stmt_iterator *gsi,
3221 gimple stmt)
3223 gimple new_stmt;
3224 tree new_temp;
3226 /* Generate half of the widened result: */
3227 if (code == CALL_EXPR)
3229 /* Target specific support */
3230 if (op_type == binary_op)
3231 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3232 else
3233 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3234 new_temp = make_ssa_name (vec_dest, new_stmt);
3235 gimple_call_set_lhs (new_stmt, new_temp);
3237 else
3239 /* Generic support */
3240 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3241 if (op_type != binary_op)
3242 vec_oprnd1 = NULL;
3243 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3244 vec_oprnd1);
3245 new_temp = make_ssa_name (vec_dest, new_stmt);
3246 gimple_assign_set_lhs (new_stmt, new_temp);
3248 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3250 return new_stmt;
3254 /* Get vectorized definitions for loop-based vectorization. For the first
3255 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3256 scalar operand), and for the rest we get a copy with
3257 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3258 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3259 The vectors are collected into VEC_OPRNDS. */
3261 static void
3262 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3263 vec<tree> *vec_oprnds, int multi_step_cvt)
3265 tree vec_oprnd;
3267 /* Get first vector operand. */
3268 /* All the vector operands except the very first one (that is scalar oprnd)
3269 are stmt copies. */
3270 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3271 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3272 else
3273 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3275 vec_oprnds->quick_push (vec_oprnd);
3277 /* Get second vector operand. */
3278 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3279 vec_oprnds->quick_push (vec_oprnd);
3281 *oprnd = vec_oprnd;
3283 /* For conversion in multiple steps, continue to get operands
3284 recursively. */
3285 if (multi_step_cvt)
3286 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3290 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3291 For multi-step conversions store the resulting vectors and call the function
3292 recursively. */
3294 static void
3295 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3296 int multi_step_cvt, gimple stmt,
3297 vec<tree> vec_dsts,
3298 gimple_stmt_iterator *gsi,
3299 slp_tree slp_node, enum tree_code code,
3300 stmt_vec_info *prev_stmt_info)
3302 unsigned int i;
3303 tree vop0, vop1, new_tmp, vec_dest;
3304 gimple new_stmt;
3305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3307 vec_dest = vec_dsts.pop ();
3309 for (i = 0; i < vec_oprnds->length (); i += 2)
3311 /* Create demotion operation. */
3312 vop0 = (*vec_oprnds)[i];
3313 vop1 = (*vec_oprnds)[i + 1];
3314 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3315 new_tmp = make_ssa_name (vec_dest, new_stmt);
3316 gimple_assign_set_lhs (new_stmt, new_tmp);
3317 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3319 if (multi_step_cvt)
3320 /* Store the resulting vector for next recursive call. */
3321 (*vec_oprnds)[i/2] = new_tmp;
3322 else
3324 /* This is the last step of the conversion sequence. Store the
3325 vectors in SLP_NODE or in vector info of the scalar statement
3326 (or in STMT_VINFO_RELATED_STMT chain). */
3327 if (slp_node)
3328 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3329 else
3331 if (!*prev_stmt_info)
3332 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3333 else
3334 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3336 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3341 /* For multi-step demotion operations we first generate demotion operations
3342 from the source type to the intermediate types, and then combine the
3343 results (stored in VEC_OPRNDS) in demotion operation to the destination
3344 type. */
3345 if (multi_step_cvt)
3347 /* At each level of recursion we have half of the operands we had at the
3348 previous level. */
3349 vec_oprnds->truncate ((i+1)/2);
3350 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3351 stmt, vec_dsts, gsi, slp_node,
3352 VEC_PACK_TRUNC_EXPR,
3353 prev_stmt_info);
3356 vec_dsts.quick_push (vec_dest);
3360 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3361 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3362 the resulting vectors and call the function recursively. */
3364 static void
3365 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3366 vec<tree> *vec_oprnds1,
3367 gimple stmt, tree vec_dest,
3368 gimple_stmt_iterator *gsi,
3369 enum tree_code code1,
3370 enum tree_code code2, tree decl1,
3371 tree decl2, int op_type)
3373 int i;
3374 tree vop0, vop1, new_tmp1, new_tmp2;
3375 gimple new_stmt1, new_stmt2;
3376 vec<tree> vec_tmp = vNULL;
3378 vec_tmp.create (vec_oprnds0->length () * 2);
3379 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3381 if (op_type == binary_op)
3382 vop1 = (*vec_oprnds1)[i];
3383 else
3384 vop1 = NULL_TREE;
3386 /* Generate the two halves of promotion operation. */
3387 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3388 op_type, vec_dest, gsi, stmt);
3389 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3390 op_type, vec_dest, gsi, stmt);
3391 if (is_gimple_call (new_stmt1))
3393 new_tmp1 = gimple_call_lhs (new_stmt1);
3394 new_tmp2 = gimple_call_lhs (new_stmt2);
3396 else
3398 new_tmp1 = gimple_assign_lhs (new_stmt1);
3399 new_tmp2 = gimple_assign_lhs (new_stmt2);
3402 /* Store the results for the next step. */
3403 vec_tmp.quick_push (new_tmp1);
3404 vec_tmp.quick_push (new_tmp2);
3407 vec_oprnds0->release ();
3408 *vec_oprnds0 = vec_tmp;
3412 /* Check if STMT performs a conversion operation, that can be vectorized.
3413 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3414 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3415 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3417 static bool
3418 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3419 gimple *vec_stmt, slp_tree slp_node)
3421 tree vec_dest;
3422 tree scalar_dest;
3423 tree op0, op1 = NULL_TREE;
3424 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3425 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3426 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3427 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3428 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3429 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3430 tree new_temp;
3431 tree def;
3432 gimple def_stmt;
3433 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3434 gimple new_stmt = NULL;
3435 stmt_vec_info prev_stmt_info;
3436 int nunits_in;
3437 int nunits_out;
3438 tree vectype_out, vectype_in;
3439 int ncopies, i, j;
3440 tree lhs_type, rhs_type;
3441 enum { NARROW, NONE, WIDEN } modifier;
3442 vec<tree> vec_oprnds0 = vNULL;
3443 vec<tree> vec_oprnds1 = vNULL;
3444 tree vop0;
3445 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3446 int multi_step_cvt = 0;
3447 vec<tree> vec_dsts = vNULL;
3448 vec<tree> interm_types = vNULL;
3449 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3450 int op_type;
3451 machine_mode rhs_mode;
3452 unsigned short fltsz;
3454 /* Is STMT a vectorizable conversion? */
3456 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3457 return false;
3459 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3460 return false;
3462 if (!is_gimple_assign (stmt))
3463 return false;
3465 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3466 return false;
3468 code = gimple_assign_rhs_code (stmt);
3469 if (!CONVERT_EXPR_CODE_P (code)
3470 && code != FIX_TRUNC_EXPR
3471 && code != FLOAT_EXPR
3472 && code != WIDEN_MULT_EXPR
3473 && code != WIDEN_LSHIFT_EXPR)
3474 return false;
3476 op_type = TREE_CODE_LENGTH (code);
3478 /* Check types of lhs and rhs. */
3479 scalar_dest = gimple_assign_lhs (stmt);
3480 lhs_type = TREE_TYPE (scalar_dest);
3481 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3483 op0 = gimple_assign_rhs1 (stmt);
3484 rhs_type = TREE_TYPE (op0);
3486 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3487 && !((INTEGRAL_TYPE_P (lhs_type)
3488 && INTEGRAL_TYPE_P (rhs_type))
3489 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3490 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3491 return false;
3493 if ((INTEGRAL_TYPE_P (lhs_type)
3494 && (TYPE_PRECISION (lhs_type)
3495 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3496 || (INTEGRAL_TYPE_P (rhs_type)
3497 && (TYPE_PRECISION (rhs_type)
3498 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3502 "type conversion to/from bit-precision unsupported."
3503 "\n");
3504 return false;
3507 /* Check the operands of the operation. */
3508 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3509 &def_stmt, &def, &dt[0], &vectype_in))
3511 if (dump_enabled_p ())
3512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3513 "use not simple.\n");
3514 return false;
3516 if (op_type == binary_op)
3518 bool ok;
3520 op1 = gimple_assign_rhs2 (stmt);
3521 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3522 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3523 OP1. */
3524 if (CONSTANT_CLASS_P (op0))
3525 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3526 &def_stmt, &def, &dt[1], &vectype_in);
3527 else
3528 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3529 &def, &dt[1]);
3531 if (!ok)
3533 if (dump_enabled_p ())
3534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3535 "use not simple.\n");
3536 return false;
3540 /* If op0 is an external or constant defs use a vector type of
3541 the same size as the output vector type. */
3542 if (!vectype_in)
3543 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3544 if (vec_stmt)
3545 gcc_assert (vectype_in);
3546 if (!vectype_in)
3548 if (dump_enabled_p ())
3550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3551 "no vectype for scalar type ");
3552 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3553 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3556 return false;
3559 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3560 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3561 if (nunits_in < nunits_out)
3562 modifier = NARROW;
3563 else if (nunits_out == nunits_in)
3564 modifier = NONE;
3565 else
3566 modifier = WIDEN;
3568 /* Multiple types in SLP are handled by creating the appropriate number of
3569 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3570 case of SLP. */
3571 if (slp_node || PURE_SLP_STMT (stmt_info))
3572 ncopies = 1;
3573 else if (modifier == NARROW)
3574 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3575 else
3576 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3578 /* Sanity check: make sure that at least one copy of the vectorized stmt
3579 needs to be generated. */
3580 gcc_assert (ncopies >= 1);
3582 /* Supportable by target? */
3583 switch (modifier)
3585 case NONE:
3586 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3587 return false;
3588 if (supportable_convert_operation (code, vectype_out, vectype_in,
3589 &decl1, &code1))
3590 break;
3591 /* FALLTHRU */
3592 unsupported:
3593 if (dump_enabled_p ())
3594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3595 "conversion not supported by target.\n");
3596 return false;
3598 case WIDEN:
3599 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3600 &code1, &code2, &multi_step_cvt,
3601 &interm_types))
3603 /* Binary widening operation can only be supported directly by the
3604 architecture. */
3605 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3606 break;
3609 if (code != FLOAT_EXPR
3610 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3611 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3612 goto unsupported;
3614 rhs_mode = TYPE_MODE (rhs_type);
3615 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3616 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3617 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3618 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3620 cvt_type
3621 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3622 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3623 if (cvt_type == NULL_TREE)
3624 goto unsupported;
3626 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3628 if (!supportable_convert_operation (code, vectype_out,
3629 cvt_type, &decl1, &codecvt1))
3630 goto unsupported;
3632 else if (!supportable_widening_operation (code, stmt, vectype_out,
3633 cvt_type, &codecvt1,
3634 &codecvt2, &multi_step_cvt,
3635 &interm_types))
3636 continue;
3637 else
3638 gcc_assert (multi_step_cvt == 0);
3640 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3641 vectype_in, &code1, &code2,
3642 &multi_step_cvt, &interm_types))
3643 break;
3646 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3647 goto unsupported;
3649 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3650 codecvt2 = ERROR_MARK;
3651 else
3653 multi_step_cvt++;
3654 interm_types.safe_push (cvt_type);
3655 cvt_type = NULL_TREE;
3657 break;
3659 case NARROW:
3660 gcc_assert (op_type == unary_op);
3661 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3662 &code1, &multi_step_cvt,
3663 &interm_types))
3664 break;
3666 if (code != FIX_TRUNC_EXPR
3667 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3668 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3669 goto unsupported;
3671 rhs_mode = TYPE_MODE (rhs_type);
3672 cvt_type
3673 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3674 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3675 if (cvt_type == NULL_TREE)
3676 goto unsupported;
3677 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3678 &decl1, &codecvt1))
3679 goto unsupported;
3680 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3681 &code1, &multi_step_cvt,
3682 &interm_types))
3683 break;
3684 goto unsupported;
3686 default:
3687 gcc_unreachable ();
3690 if (!vec_stmt) /* transformation not required. */
3692 if (dump_enabled_p ())
3693 dump_printf_loc (MSG_NOTE, vect_location,
3694 "=== vectorizable_conversion ===\n");
3695 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3697 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3698 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3700 else if (modifier == NARROW)
3702 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3703 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3705 else
3707 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3708 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3710 interm_types.release ();
3711 return true;
3714 /** Transform. **/
3715 if (dump_enabled_p ())
3716 dump_printf_loc (MSG_NOTE, vect_location,
3717 "transform conversion. ncopies = %d.\n", ncopies);
3719 if (op_type == binary_op)
3721 if (CONSTANT_CLASS_P (op0))
3722 op0 = fold_convert (TREE_TYPE (op1), op0);
3723 else if (CONSTANT_CLASS_P (op1))
3724 op1 = fold_convert (TREE_TYPE (op0), op1);
3727 /* In case of multi-step conversion, we first generate conversion operations
3728 to the intermediate types, and then from that types to the final one.
3729 We create vector destinations for the intermediate type (TYPES) received
3730 from supportable_*_operation, and store them in the correct order
3731 for future use in vect_create_vectorized_*_stmts (). */
3732 vec_dsts.create (multi_step_cvt + 1);
3733 vec_dest = vect_create_destination_var (scalar_dest,
3734 (cvt_type && modifier == WIDEN)
3735 ? cvt_type : vectype_out);
3736 vec_dsts.quick_push (vec_dest);
3738 if (multi_step_cvt)
3740 for (i = interm_types.length () - 1;
3741 interm_types.iterate (i, &intermediate_type); i--)
3743 vec_dest = vect_create_destination_var (scalar_dest,
3744 intermediate_type);
3745 vec_dsts.quick_push (vec_dest);
3749 if (cvt_type)
3750 vec_dest = vect_create_destination_var (scalar_dest,
3751 modifier == WIDEN
3752 ? vectype_out : cvt_type);
3754 if (!slp_node)
3756 if (modifier == WIDEN)
3758 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3759 if (op_type == binary_op)
3760 vec_oprnds1.create (1);
3762 else if (modifier == NARROW)
3763 vec_oprnds0.create (
3764 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3766 else if (code == WIDEN_LSHIFT_EXPR)
3767 vec_oprnds1.create (slp_node->vec_stmts_size);
3769 last_oprnd = op0;
3770 prev_stmt_info = NULL;
3771 switch (modifier)
3773 case NONE:
3774 for (j = 0; j < ncopies; j++)
3776 if (j == 0)
3777 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3778 -1);
3779 else
3780 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3782 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3784 /* Arguments are ready, create the new vector stmt. */
3785 if (code1 == CALL_EXPR)
3787 new_stmt = gimple_build_call (decl1, 1, vop0);
3788 new_temp = make_ssa_name (vec_dest, new_stmt);
3789 gimple_call_set_lhs (new_stmt, new_temp);
3791 else
3793 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3794 new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
3795 vop0, NULL);
3796 new_temp = make_ssa_name (vec_dest, new_stmt);
3797 gimple_assign_set_lhs (new_stmt, new_temp);
3800 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3801 if (slp_node)
3802 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3805 if (j == 0)
3806 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3807 else
3808 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3809 prev_stmt_info = vinfo_for_stmt (new_stmt);
3811 break;
3813 case WIDEN:
3814 /* In case the vectorization factor (VF) is bigger than the number
3815 of elements that we can fit in a vectype (nunits), we have to
3816 generate more than one vector stmt - i.e - we need to "unroll"
3817 the vector stmt by a factor VF/nunits. */
3818 for (j = 0; j < ncopies; j++)
3820 /* Handle uses. */
3821 if (j == 0)
3823 if (slp_node)
3825 if (code == WIDEN_LSHIFT_EXPR)
3827 unsigned int k;
3829 vec_oprnd1 = op1;
3830 /* Store vec_oprnd1 for every vector stmt to be created
3831 for SLP_NODE. We check during the analysis that all
3832 the shift arguments are the same. */
3833 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3834 vec_oprnds1.quick_push (vec_oprnd1);
3836 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3837 slp_node, -1);
3839 else
3840 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3841 &vec_oprnds1, slp_node, -1);
3843 else
3845 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3846 vec_oprnds0.quick_push (vec_oprnd0);
3847 if (op_type == binary_op)
3849 if (code == WIDEN_LSHIFT_EXPR)
3850 vec_oprnd1 = op1;
3851 else
3852 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3853 NULL);
3854 vec_oprnds1.quick_push (vec_oprnd1);
3858 else
3860 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3861 vec_oprnds0.truncate (0);
3862 vec_oprnds0.quick_push (vec_oprnd0);
3863 if (op_type == binary_op)
3865 if (code == WIDEN_LSHIFT_EXPR)
3866 vec_oprnd1 = op1;
3867 else
3868 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3869 vec_oprnd1);
3870 vec_oprnds1.truncate (0);
3871 vec_oprnds1.quick_push (vec_oprnd1);
3875 /* Arguments are ready. Create the new vector stmts. */
3876 for (i = multi_step_cvt; i >= 0; i--)
3878 tree this_dest = vec_dsts[i];
3879 enum tree_code c1 = code1, c2 = code2;
3880 if (i == 0 && codecvt2 != ERROR_MARK)
3882 c1 = codecvt1;
3883 c2 = codecvt2;
3885 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3886 &vec_oprnds1,
3887 stmt, this_dest, gsi,
3888 c1, c2, decl1, decl2,
3889 op_type);
3892 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3894 if (cvt_type)
3896 if (codecvt1 == CALL_EXPR)
3898 new_stmt = gimple_build_call (decl1, 1, vop0);
3899 new_temp = make_ssa_name (vec_dest, new_stmt);
3900 gimple_call_set_lhs (new_stmt, new_temp);
3902 else
3904 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3905 new_temp = make_ssa_name (vec_dest, NULL);
3906 new_stmt = gimple_build_assign_with_ops (codecvt1,
3907 new_temp,
3908 vop0, NULL);
3911 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3913 else
3914 new_stmt = SSA_NAME_DEF_STMT (vop0);
3916 if (slp_node)
3917 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3918 else
3920 if (!prev_stmt_info)
3921 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3922 else
3923 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3924 prev_stmt_info = vinfo_for_stmt (new_stmt);
3929 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3930 break;
3932 case NARROW:
3933 /* In case the vectorization factor (VF) is bigger than the number
3934 of elements that we can fit in a vectype (nunits), we have to
3935 generate more than one vector stmt - i.e - we need to "unroll"
3936 the vector stmt by a factor VF/nunits. */
3937 for (j = 0; j < ncopies; j++)
3939 /* Handle uses. */
3940 if (slp_node)
3941 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3942 slp_node, -1);
3943 else
3945 vec_oprnds0.truncate (0);
3946 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3947 vect_pow2 (multi_step_cvt) - 1);
3950 /* Arguments are ready. Create the new vector stmts. */
3951 if (cvt_type)
3952 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3954 if (codecvt1 == CALL_EXPR)
3956 new_stmt = gimple_build_call (decl1, 1, vop0);
3957 new_temp = make_ssa_name (vec_dest, new_stmt);
3958 gimple_call_set_lhs (new_stmt, new_temp);
3960 else
3962 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3963 new_temp = make_ssa_name (vec_dest, NULL);
3964 new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
3965 vop0, NULL);
3968 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3969 vec_oprnds0[i] = new_temp;
3972 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
3973 stmt, vec_dsts, gsi,
3974 slp_node, code1,
3975 &prev_stmt_info);
3978 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3979 break;
3982 vec_oprnds0.release ();
3983 vec_oprnds1.release ();
3984 vec_dsts.release ();
3985 interm_types.release ();
3987 return true;
3991 /* Function vectorizable_assignment.
3993 Check if STMT performs an assignment (copy) that can be vectorized.
3994 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3995 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3996 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3998 static bool
3999 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4000 gimple *vec_stmt, slp_tree slp_node)
4002 tree vec_dest;
4003 tree scalar_dest;
4004 tree op;
4005 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4006 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4007 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4008 tree new_temp;
4009 tree def;
4010 gimple def_stmt;
4011 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4012 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4013 int ncopies;
4014 int i, j;
4015 vec<tree> vec_oprnds = vNULL;
4016 tree vop;
4017 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4018 gimple new_stmt = NULL;
4019 stmt_vec_info prev_stmt_info = NULL;
4020 enum tree_code code;
4021 tree vectype_in;
4023 /* Multiple types in SLP are handled by creating the appropriate number of
4024 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4025 case of SLP. */
4026 if (slp_node || PURE_SLP_STMT (stmt_info))
4027 ncopies = 1;
4028 else
4029 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4031 gcc_assert (ncopies >= 1);
4033 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4034 return false;
4036 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4037 return false;
4039 /* Is vectorizable assignment? */
4040 if (!is_gimple_assign (stmt))
4041 return false;
4043 scalar_dest = gimple_assign_lhs (stmt);
4044 if (TREE_CODE (scalar_dest) != SSA_NAME)
4045 return false;
4047 code = gimple_assign_rhs_code (stmt);
4048 if (gimple_assign_single_p (stmt)
4049 || code == PAREN_EXPR
4050 || CONVERT_EXPR_CODE_P (code))
4051 op = gimple_assign_rhs1 (stmt);
4052 else
4053 return false;
4055 if (code == VIEW_CONVERT_EXPR)
4056 op = TREE_OPERAND (op, 0);
4058 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4059 &def_stmt, &def, &dt[0], &vectype_in))
4061 if (dump_enabled_p ())
4062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4063 "use not simple.\n");
4064 return false;
4067 /* We can handle NOP_EXPR conversions that do not change the number
4068 of elements or the vector size. */
4069 if ((CONVERT_EXPR_CODE_P (code)
4070 || code == VIEW_CONVERT_EXPR)
4071 && (!vectype_in
4072 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4073 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4074 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4075 return false;
4077 /* We do not handle bit-precision changes. */
4078 if ((CONVERT_EXPR_CODE_P (code)
4079 || code == VIEW_CONVERT_EXPR)
4080 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4081 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4082 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4083 || ((TYPE_PRECISION (TREE_TYPE (op))
4084 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4085 /* But a conversion that does not change the bit-pattern is ok. */
4086 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4087 > TYPE_PRECISION (TREE_TYPE (op)))
4088 && TYPE_UNSIGNED (TREE_TYPE (op))))
4090 if (dump_enabled_p ())
4091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4092 "type conversion to/from bit-precision "
4093 "unsupported.\n");
4094 return false;
4097 if (!vec_stmt) /* transformation not required. */
4099 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4100 if (dump_enabled_p ())
4101 dump_printf_loc (MSG_NOTE, vect_location,
4102 "=== vectorizable_assignment ===\n");
4103 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4104 return true;
4107 /** Transform. **/
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4111 /* Handle def. */
4112 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4114 /* Handle use. */
4115 for (j = 0; j < ncopies; j++)
4117 /* Handle uses. */
4118 if (j == 0)
4119 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4120 else
4121 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4123 /* Arguments are ready. create the new vector stmt. */
4124 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4126 if (CONVERT_EXPR_CODE_P (code)
4127 || code == VIEW_CONVERT_EXPR)
4128 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4129 new_stmt = gimple_build_assign (vec_dest, vop);
4130 new_temp = make_ssa_name (vec_dest, new_stmt);
4131 gimple_assign_set_lhs (new_stmt, new_temp);
4132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4133 if (slp_node)
4134 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4137 if (slp_node)
4138 continue;
4140 if (j == 0)
4141 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4142 else
4143 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4145 prev_stmt_info = vinfo_for_stmt (new_stmt);
4148 vec_oprnds.release ();
4149 return true;
4153 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4154 either as shift by a scalar or by a vector. */
4156 bool
4157 vect_supportable_shift (enum tree_code code, tree scalar_type)
4160 machine_mode vec_mode;
4161 optab optab;
4162 int icode;
4163 tree vectype;
4165 vectype = get_vectype_for_scalar_type (scalar_type);
4166 if (!vectype)
4167 return false;
4169 optab = optab_for_tree_code (code, vectype, optab_scalar);
4170 if (!optab
4171 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4173 optab = optab_for_tree_code (code, vectype, optab_vector);
4174 if (!optab
4175 || (optab_handler (optab, TYPE_MODE (vectype))
4176 == CODE_FOR_nothing))
4177 return false;
4180 vec_mode = TYPE_MODE (vectype);
4181 icode = (int) optab_handler (optab, vec_mode);
4182 if (icode == CODE_FOR_nothing)
4183 return false;
4185 return true;
4189 /* Function vectorizable_shift.
4191 Check if STMT performs a shift operation that can be vectorized.
4192 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4193 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4194 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4196 static bool
4197 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4198 gimple *vec_stmt, slp_tree slp_node)
4200 tree vec_dest;
4201 tree scalar_dest;
4202 tree op0, op1 = NULL;
4203 tree vec_oprnd1 = NULL_TREE;
4204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4205 tree vectype;
4206 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4207 enum tree_code code;
4208 machine_mode vec_mode;
4209 tree new_temp;
4210 optab optab;
4211 int icode;
4212 machine_mode optab_op2_mode;
4213 tree def;
4214 gimple def_stmt;
4215 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4216 gimple new_stmt = NULL;
4217 stmt_vec_info prev_stmt_info;
4218 int nunits_in;
4219 int nunits_out;
4220 tree vectype_out;
4221 tree op1_vectype;
4222 int ncopies;
4223 int j, i;
4224 vec<tree> vec_oprnds0 = vNULL;
4225 vec<tree> vec_oprnds1 = vNULL;
4226 tree vop0, vop1;
4227 unsigned int k;
4228 bool scalar_shift_arg = true;
4229 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4230 int vf;
4232 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4233 return false;
4235 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4236 return false;
4238 /* Is STMT a vectorizable binary/unary operation? */
4239 if (!is_gimple_assign (stmt))
4240 return false;
4242 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4243 return false;
4245 code = gimple_assign_rhs_code (stmt);
4247 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4248 || code == RROTATE_EXPR))
4249 return false;
4251 scalar_dest = gimple_assign_lhs (stmt);
4252 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4253 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4254 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4256 if (dump_enabled_p ())
4257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4258 "bit-precision shifts not supported.\n");
4259 return false;
4262 op0 = gimple_assign_rhs1 (stmt);
4263 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4264 &def_stmt, &def, &dt[0], &vectype))
4266 if (dump_enabled_p ())
4267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4268 "use not simple.\n");
4269 return false;
4271 /* If op0 is an external or constant def use a vector type with
4272 the same size as the output vector type. */
4273 if (!vectype)
4274 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4275 if (vec_stmt)
4276 gcc_assert (vectype);
4277 if (!vectype)
4279 if (dump_enabled_p ())
4280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4281 "no vectype for scalar type\n");
4282 return false;
4285 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4286 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4287 if (nunits_out != nunits_in)
4288 return false;
4290 op1 = gimple_assign_rhs2 (stmt);
4291 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4292 &def, &dt[1], &op1_vectype))
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4296 "use not simple.\n");
4297 return false;
4300 if (loop_vinfo)
4301 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4302 else
4303 vf = 1;
4305 /* Multiple types in SLP are handled by creating the appropriate number of
4306 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4307 case of SLP. */
4308 if (slp_node || PURE_SLP_STMT (stmt_info))
4309 ncopies = 1;
4310 else
4311 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4313 gcc_assert (ncopies >= 1);
4315 /* Determine whether the shift amount is a vector, or scalar. If the
4316 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4318 if (dt[1] == vect_internal_def && !slp_node)
4319 scalar_shift_arg = false;
4320 else if (dt[1] == vect_constant_def
4321 || dt[1] == vect_external_def
4322 || dt[1] == vect_internal_def)
4324 /* In SLP, need to check whether the shift count is the same,
4325 in loops if it is a constant or invariant, it is always
4326 a scalar shift. */
4327 if (slp_node)
4329 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4330 gimple slpstmt;
4332 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4333 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4334 scalar_shift_arg = false;
4337 else
4339 if (dump_enabled_p ())
4340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4341 "operand mode requires invariant argument.\n");
4342 return false;
4345 /* Vector shifted by vector. */
4346 if (!scalar_shift_arg)
4348 optab = optab_for_tree_code (code, vectype, optab_vector);
4349 if (dump_enabled_p ())
4350 dump_printf_loc (MSG_NOTE, vect_location,
4351 "vector/vector shift/rotate found.\n");
4353 if (!op1_vectype)
4354 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4355 if (op1_vectype == NULL_TREE
4356 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4358 if (dump_enabled_p ())
4359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4360 "unusable type for last operand in"
4361 " vector/vector shift/rotate.\n");
4362 return false;
4365 /* See if the machine has a vector shifted by scalar insn and if not
4366 then see if it has a vector shifted by vector insn. */
4367 else
4369 optab = optab_for_tree_code (code, vectype, optab_scalar);
4370 if (optab
4371 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4373 if (dump_enabled_p ())
4374 dump_printf_loc (MSG_NOTE, vect_location,
4375 "vector/scalar shift/rotate found.\n");
4377 else
4379 optab = optab_for_tree_code (code, vectype, optab_vector);
4380 if (optab
4381 && (optab_handler (optab, TYPE_MODE (vectype))
4382 != CODE_FOR_nothing))
4384 scalar_shift_arg = false;
4386 if (dump_enabled_p ())
4387 dump_printf_loc (MSG_NOTE, vect_location,
4388 "vector/vector shift/rotate found.\n");
4390 /* Unlike the other binary operators, shifts/rotates have
4391 the rhs being int, instead of the same type as the lhs,
4392 so make sure the scalar is the right type if we are
4393 dealing with vectors of long long/long/short/char. */
4394 if (dt[1] == vect_constant_def)
4395 op1 = fold_convert (TREE_TYPE (vectype), op1);
4396 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4397 TREE_TYPE (op1)))
4399 if (slp_node
4400 && TYPE_MODE (TREE_TYPE (vectype))
4401 != TYPE_MODE (TREE_TYPE (op1)))
4403 if (dump_enabled_p ())
4404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4405 "unusable type for last operand in"
4406 " vector/vector shift/rotate.\n");
4407 return false;
4409 if (vec_stmt && !slp_node)
4411 op1 = fold_convert (TREE_TYPE (vectype), op1);
4412 op1 = vect_init_vector (stmt, op1,
4413 TREE_TYPE (vectype), NULL);
4420 /* Supportable by target? */
4421 if (!optab)
4423 if (dump_enabled_p ())
4424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4425 "no optab.\n");
4426 return false;
4428 vec_mode = TYPE_MODE (vectype);
4429 icode = (int) optab_handler (optab, vec_mode);
4430 if (icode == CODE_FOR_nothing)
4432 if (dump_enabled_p ())
4433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4434 "op not supported by target.\n");
4435 /* Check only during analysis. */
4436 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4437 || (vf < vect_min_worthwhile_factor (code)
4438 && !vec_stmt))
4439 return false;
4440 if (dump_enabled_p ())
4441 dump_printf_loc (MSG_NOTE, vect_location,
4442 "proceeding using word mode.\n");
4445 /* Worthwhile without SIMD support? Check only during analysis. */
4446 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4447 && vf < vect_min_worthwhile_factor (code)
4448 && !vec_stmt)
4450 if (dump_enabled_p ())
4451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4452 "not worthwhile without SIMD support.\n");
4453 return false;
4456 if (!vec_stmt) /* transformation not required. */
4458 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4459 if (dump_enabled_p ())
4460 dump_printf_loc (MSG_NOTE, vect_location,
4461 "=== vectorizable_shift ===\n");
4462 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4463 return true;
4466 /** Transform. **/
4468 if (dump_enabled_p ())
4469 dump_printf_loc (MSG_NOTE, vect_location,
4470 "transform binary/unary operation.\n");
4472 /* Handle def. */
4473 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4475 prev_stmt_info = NULL;
4476 for (j = 0; j < ncopies; j++)
4478 /* Handle uses. */
4479 if (j == 0)
4481 if (scalar_shift_arg)
4483 /* Vector shl and shr insn patterns can be defined with scalar
4484 operand 2 (shift operand). In this case, use constant or loop
4485 invariant op1 directly, without extending it to vector mode
4486 first. */
4487 optab_op2_mode = insn_data[icode].operand[2].mode;
4488 if (!VECTOR_MODE_P (optab_op2_mode))
4490 if (dump_enabled_p ())
4491 dump_printf_loc (MSG_NOTE, vect_location,
4492 "operand 1 using scalar mode.\n");
4493 vec_oprnd1 = op1;
4494 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4495 vec_oprnds1.quick_push (vec_oprnd1);
4496 if (slp_node)
4498 /* Store vec_oprnd1 for every vector stmt to be created
4499 for SLP_NODE. We check during the analysis that all
4500 the shift arguments are the same.
4501 TODO: Allow different constants for different vector
4502 stmts generated for an SLP instance. */
4503 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4504 vec_oprnds1.quick_push (vec_oprnd1);
4509 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4510 (a special case for certain kind of vector shifts); otherwise,
4511 operand 1 should be of a vector type (the usual case). */
4512 if (vec_oprnd1)
4513 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4514 slp_node, -1);
4515 else
4516 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4517 slp_node, -1);
4519 else
4520 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4522 /* Arguments are ready. Create the new vector stmt. */
4523 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4525 vop1 = vec_oprnds1[i];
4526 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
4527 new_temp = make_ssa_name (vec_dest, new_stmt);
4528 gimple_assign_set_lhs (new_stmt, new_temp);
4529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4530 if (slp_node)
4531 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4534 if (slp_node)
4535 continue;
4537 if (j == 0)
4538 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4539 else
4540 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4541 prev_stmt_info = vinfo_for_stmt (new_stmt);
4544 vec_oprnds0.release ();
4545 vec_oprnds1.release ();
4547 return true;
4551 /* Function vectorizable_operation.
4553 Check if STMT performs a binary, unary or ternary operation that can
4554 be vectorized.
4555 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4556 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4557 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4559 static bool
4560 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4561 gimple *vec_stmt, slp_tree slp_node)
4563 tree vec_dest;
4564 tree scalar_dest;
4565 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4566 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4567 tree vectype;
4568 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4569 enum tree_code code;
4570 machine_mode vec_mode;
4571 tree new_temp;
4572 int op_type;
4573 optab optab;
4574 int icode;
4575 tree def;
4576 gimple def_stmt;
4577 enum vect_def_type dt[3]
4578 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4579 gimple new_stmt = NULL;
4580 stmt_vec_info prev_stmt_info;
4581 int nunits_in;
4582 int nunits_out;
4583 tree vectype_out;
4584 int ncopies;
4585 int j, i;
4586 vec<tree> vec_oprnds0 = vNULL;
4587 vec<tree> vec_oprnds1 = vNULL;
4588 vec<tree> vec_oprnds2 = vNULL;
4589 tree vop0, vop1, vop2;
4590 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4591 int vf;
4593 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4594 return false;
4596 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4597 return false;
4599 /* Is STMT a vectorizable binary/unary operation? */
4600 if (!is_gimple_assign (stmt))
4601 return false;
4603 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4604 return false;
4606 code = gimple_assign_rhs_code (stmt);
4608 /* For pointer addition, we should use the normal plus for
4609 the vector addition. */
4610 if (code == POINTER_PLUS_EXPR)
4611 code = PLUS_EXPR;
4613 /* Support only unary or binary operations. */
4614 op_type = TREE_CODE_LENGTH (code);
4615 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4617 if (dump_enabled_p ())
4618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4619 "num. args = %d (not unary/binary/ternary op).\n",
4620 op_type);
4621 return false;
4624 scalar_dest = gimple_assign_lhs (stmt);
4625 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4627 /* Most operations cannot handle bit-precision types without extra
4628 truncations. */
4629 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4630 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4631 /* Exception are bitwise binary operations. */
4632 && code != BIT_IOR_EXPR
4633 && code != BIT_XOR_EXPR
4634 && code != BIT_AND_EXPR)
4636 if (dump_enabled_p ())
4637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4638 "bit-precision arithmetic not supported.\n");
4639 return false;
4642 op0 = gimple_assign_rhs1 (stmt);
4643 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4644 &def_stmt, &def, &dt[0], &vectype))
4646 if (dump_enabled_p ())
4647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4648 "use not simple.\n");
4649 return false;
4651 /* If op0 is an external or constant def use a vector type with
4652 the same size as the output vector type. */
4653 if (!vectype)
4654 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4655 if (vec_stmt)
4656 gcc_assert (vectype);
4657 if (!vectype)
4659 if (dump_enabled_p ())
4661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4662 "no vectype for scalar type ");
4663 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4664 TREE_TYPE (op0));
4665 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4668 return false;
4671 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4672 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4673 if (nunits_out != nunits_in)
4674 return false;
4676 if (op_type == binary_op || op_type == ternary_op)
4678 op1 = gimple_assign_rhs2 (stmt);
4679 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4680 &def, &dt[1]))
4682 if (dump_enabled_p ())
4683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4684 "use not simple.\n");
4685 return false;
4688 if (op_type == ternary_op)
4690 op2 = gimple_assign_rhs3 (stmt);
4691 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4692 &def, &dt[2]))
4694 if (dump_enabled_p ())
4695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4696 "use not simple.\n");
4697 return false;
4701 if (loop_vinfo)
4702 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4703 else
4704 vf = 1;
4706 /* Multiple types in SLP are handled by creating the appropriate number of
4707 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4708 case of SLP. */
4709 if (slp_node || PURE_SLP_STMT (stmt_info))
4710 ncopies = 1;
4711 else
4712 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4714 gcc_assert (ncopies >= 1);
4716 /* Shifts are handled in vectorizable_shift (). */
4717 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4718 || code == RROTATE_EXPR)
4719 return false;
4721 /* Supportable by target? */
4723 vec_mode = TYPE_MODE (vectype);
4724 if (code == MULT_HIGHPART_EXPR)
4726 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4727 icode = LAST_INSN_CODE;
4728 else
4729 icode = CODE_FOR_nothing;
4731 else
4733 optab = optab_for_tree_code (code, vectype, optab_default);
4734 if (!optab)
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4738 "no optab.\n");
4739 return false;
4741 icode = (int) optab_handler (optab, vec_mode);
4744 if (icode == CODE_FOR_nothing)
4746 if (dump_enabled_p ())
4747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4748 "op not supported by target.\n");
4749 /* Check only during analysis. */
4750 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4751 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4752 return false;
4753 if (dump_enabled_p ())
4754 dump_printf_loc (MSG_NOTE, vect_location,
4755 "proceeding using word mode.\n");
4758 /* Worthwhile without SIMD support? Check only during analysis. */
4759 if (!VECTOR_MODE_P (vec_mode)
4760 && !vec_stmt
4761 && vf < vect_min_worthwhile_factor (code))
4763 if (dump_enabled_p ())
4764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4765 "not worthwhile without SIMD support.\n");
4766 return false;
4769 if (!vec_stmt) /* transformation not required. */
4771 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4772 if (dump_enabled_p ())
4773 dump_printf_loc (MSG_NOTE, vect_location,
4774 "=== vectorizable_operation ===\n");
4775 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4776 return true;
4779 /** Transform. **/
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_NOTE, vect_location,
4783 "transform binary/unary operation.\n");
4785 /* Handle def. */
4786 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4788 /* In case the vectorization factor (VF) is bigger than the number
4789 of elements that we can fit in a vectype (nunits), we have to generate
4790 more than one vector stmt - i.e - we need to "unroll" the
4791 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4792 from one copy of the vector stmt to the next, in the field
4793 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4794 stages to find the correct vector defs to be used when vectorizing
4795 stmts that use the defs of the current stmt. The example below
4796 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4797 we need to create 4 vectorized stmts):
4799 before vectorization:
4800 RELATED_STMT VEC_STMT
4801 S1: x = memref - -
4802 S2: z = x + 1 - -
4804 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4805 there):
4806 RELATED_STMT VEC_STMT
4807 VS1_0: vx0 = memref0 VS1_1 -
4808 VS1_1: vx1 = memref1 VS1_2 -
4809 VS1_2: vx2 = memref2 VS1_3 -
4810 VS1_3: vx3 = memref3 - -
4811 S1: x = load - VS1_0
4812 S2: z = x + 1 - -
4814 step2: vectorize stmt S2 (done here):
4815 To vectorize stmt S2 we first need to find the relevant vector
4816 def for the first operand 'x'. This is, as usual, obtained from
4817 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4818 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4819 relevant vector def 'vx0'. Having found 'vx0' we can generate
4820 the vector stmt VS2_0, and as usual, record it in the
4821 STMT_VINFO_VEC_STMT of stmt S2.
4822 When creating the second copy (VS2_1), we obtain the relevant vector
4823 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4824 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4825 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4826 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4827 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4828 chain of stmts and pointers:
4829 RELATED_STMT VEC_STMT
4830 VS1_0: vx0 = memref0 VS1_1 -
4831 VS1_1: vx1 = memref1 VS1_2 -
4832 VS1_2: vx2 = memref2 VS1_3 -
4833 VS1_3: vx3 = memref3 - -
4834 S1: x = load - VS1_0
4835 VS2_0: vz0 = vx0 + v1 VS2_1 -
4836 VS2_1: vz1 = vx1 + v1 VS2_2 -
4837 VS2_2: vz2 = vx2 + v1 VS2_3 -
4838 VS2_3: vz3 = vx3 + v1 - -
4839 S2: z = x + 1 - VS2_0 */
4841 prev_stmt_info = NULL;
4842 for (j = 0; j < ncopies; j++)
4844 /* Handle uses. */
4845 if (j == 0)
4847 if (op_type == binary_op || op_type == ternary_op)
4848 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4849 slp_node, -1);
4850 else
4851 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4852 slp_node, -1);
4853 if (op_type == ternary_op)
4855 vec_oprnds2.create (1);
4856 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4857 stmt,
4858 NULL));
4861 else
4863 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4864 if (op_type == ternary_op)
4866 tree vec_oprnd = vec_oprnds2.pop ();
4867 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4868 vec_oprnd));
4872 /* Arguments are ready. Create the new vector stmt. */
4873 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4875 vop1 = ((op_type == binary_op || op_type == ternary_op)
4876 ? vec_oprnds1[i] : NULL_TREE);
4877 vop2 = ((op_type == ternary_op)
4878 ? vec_oprnds2[i] : NULL_TREE);
4879 new_stmt = gimple_build_assign_with_ops (code, vec_dest,
4880 vop0, vop1, vop2);
4881 new_temp = make_ssa_name (vec_dest, new_stmt);
4882 gimple_assign_set_lhs (new_stmt, new_temp);
4883 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4884 if (slp_node)
4885 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4888 if (slp_node)
4889 continue;
4891 if (j == 0)
4892 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4893 else
4894 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4895 prev_stmt_info = vinfo_for_stmt (new_stmt);
4898 vec_oprnds0.release ();
4899 vec_oprnds1.release ();
4900 vec_oprnds2.release ();
4902 return true;
4905 /* A helper function to ensure data reference DR's base alignment
4906 for STMT_INFO. */
4908 static void
4909 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4911 if (!dr->aux)
4912 return;
4914 if (((dataref_aux *)dr->aux)->base_misaligned)
4916 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4917 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4919 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4920 DECL_USER_ALIGN (base_decl) = 1;
4921 ((dataref_aux *)dr->aux)->base_misaligned = false;
4926 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4927 reversal of the vector elements. If that is impossible to do,
4928 returns NULL. */
4930 static tree
4931 perm_mask_for_reverse (tree vectype)
4933 int i, nunits;
4934 unsigned char *sel;
4936 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4937 sel = XALLOCAVEC (unsigned char, nunits);
4939 for (i = 0; i < nunits; ++i)
4940 sel[i] = nunits - 1 - i;
4942 return vect_gen_perm_mask (vectype, sel);
4945 /* Function vectorizable_store.
4947 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4948 can be vectorized.
4949 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4950 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4951 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4953 static bool
4954 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4955 slp_tree slp_node)
4957 tree scalar_dest;
4958 tree data_ref;
4959 tree op;
4960 tree vec_oprnd = NULL_TREE;
4961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4962 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4963 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4964 tree elem_type;
4965 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4966 struct loop *loop = NULL;
4967 machine_mode vec_mode;
4968 tree dummy;
4969 enum dr_alignment_support alignment_support_scheme;
4970 tree def;
4971 gimple def_stmt;
4972 enum vect_def_type dt;
4973 stmt_vec_info prev_stmt_info = NULL;
4974 tree dataref_ptr = NULL_TREE;
4975 tree dataref_offset = NULL_TREE;
4976 gimple ptr_incr = NULL;
4977 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4978 int ncopies;
4979 int j;
4980 gimple next_stmt, first_stmt = NULL;
4981 bool grouped_store = false;
4982 bool store_lanes_p = false;
4983 unsigned int group_size, i;
4984 vec<tree> dr_chain = vNULL;
4985 vec<tree> oprnds = vNULL;
4986 vec<tree> result_chain = vNULL;
4987 bool inv_p;
4988 bool negative = false;
4989 tree offset = NULL_TREE;
4990 vec<tree> vec_oprnds = vNULL;
4991 bool slp = (slp_node != NULL);
4992 unsigned int vec_num;
4993 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4994 tree aggr_type;
4996 if (loop_vinfo)
4997 loop = LOOP_VINFO_LOOP (loop_vinfo);
4999 /* Multiple types in SLP are handled by creating the appropriate number of
5000 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5001 case of SLP. */
5002 if (slp || PURE_SLP_STMT (stmt_info))
5003 ncopies = 1;
5004 else
5005 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5007 gcc_assert (ncopies >= 1);
5009 /* FORNOW. This restriction should be relaxed. */
5010 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5012 if (dump_enabled_p ())
5013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5014 "multiple types in nested loop.\n");
5015 return false;
5018 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5019 return false;
5021 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5022 return false;
5024 /* Is vectorizable store? */
5026 if (!is_gimple_assign (stmt))
5027 return false;
5029 scalar_dest = gimple_assign_lhs (stmt);
5030 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5031 && is_pattern_stmt_p (stmt_info))
5032 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5033 if (TREE_CODE (scalar_dest) != ARRAY_REF
5034 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5035 && TREE_CODE (scalar_dest) != INDIRECT_REF
5036 && TREE_CODE (scalar_dest) != COMPONENT_REF
5037 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5038 && TREE_CODE (scalar_dest) != REALPART_EXPR
5039 && TREE_CODE (scalar_dest) != MEM_REF)
5040 return false;
5042 gcc_assert (gimple_assign_single_p (stmt));
5043 op = gimple_assign_rhs1 (stmt);
5044 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5045 &def, &dt))
5047 if (dump_enabled_p ())
5048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5049 "use not simple.\n");
5050 return false;
5053 elem_type = TREE_TYPE (vectype);
5054 vec_mode = TYPE_MODE (vectype);
5056 /* FORNOW. In some cases can vectorize even if data-type not supported
5057 (e.g. - array initialization with 0). */
5058 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5059 return false;
5061 if (!STMT_VINFO_DATA_REF (stmt_info))
5062 return false;
5064 negative =
5065 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5066 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5067 size_zero_node) < 0;
5068 if (negative && ncopies > 1)
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "multiple types with negative step.\n");
5073 return false;
5076 if (negative)
5078 gcc_assert (!grouped_store);
5079 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5080 if (alignment_support_scheme != dr_aligned
5081 && alignment_support_scheme != dr_unaligned_supported)
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5085 "negative step but alignment required.\n");
5086 return false;
5088 if (dt != vect_constant_def
5089 && dt != vect_external_def
5090 && !perm_mask_for_reverse (vectype))
5092 if (dump_enabled_p ())
5093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5094 "negative step and reversing not supported.\n");
5095 return false;
5099 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5101 grouped_store = true;
5102 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5103 if (!slp && !PURE_SLP_STMT (stmt_info))
5105 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5106 if (vect_store_lanes_supported (vectype, group_size))
5107 store_lanes_p = true;
5108 else if (!vect_grouped_store_supported (vectype, group_size))
5109 return false;
5112 if (first_stmt == stmt)
5114 /* STMT is the leader of the group. Check the operands of all the
5115 stmts of the group. */
5116 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5117 while (next_stmt)
5119 gcc_assert (gimple_assign_single_p (next_stmt));
5120 op = gimple_assign_rhs1 (next_stmt);
5121 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5122 &def_stmt, &def, &dt))
5124 if (dump_enabled_p ())
5125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5126 "use not simple.\n");
5127 return false;
5129 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5134 if (!vec_stmt) /* transformation not required. */
5136 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5137 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5138 NULL, NULL, NULL);
5139 return true;
5142 /** Transform. **/
5144 ensure_base_align (stmt_info, dr);
5146 if (grouped_store)
5148 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5149 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5151 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5153 /* FORNOW */
5154 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5156 /* We vectorize all the stmts of the interleaving group when we
5157 reach the last stmt in the group. */
5158 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5159 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5160 && !slp)
5162 *vec_stmt = NULL;
5163 return true;
5166 if (slp)
5168 grouped_store = false;
5169 /* VEC_NUM is the number of vect stmts to be created for this
5170 group. */
5171 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5172 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5173 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5174 op = gimple_assign_rhs1 (first_stmt);
5176 else
5177 /* VEC_NUM is the number of vect stmts to be created for this
5178 group. */
5179 vec_num = group_size;
5181 else
5183 first_stmt = stmt;
5184 first_dr = dr;
5185 group_size = vec_num = 1;
5188 if (dump_enabled_p ())
5189 dump_printf_loc (MSG_NOTE, vect_location,
5190 "transform store. ncopies = %d\n", ncopies);
5192 dr_chain.create (group_size);
5193 oprnds.create (group_size);
5195 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5196 gcc_assert (alignment_support_scheme);
5197 /* Targets with store-lane instructions must not require explicit
5198 realignment. */
5199 gcc_assert (!store_lanes_p
5200 || alignment_support_scheme == dr_aligned
5201 || alignment_support_scheme == dr_unaligned_supported);
5203 if (negative)
5204 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5206 if (store_lanes_p)
5207 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5208 else
5209 aggr_type = vectype;
5211 /* In case the vectorization factor (VF) is bigger than the number
5212 of elements that we can fit in a vectype (nunits), we have to generate
5213 more than one vector stmt - i.e - we need to "unroll" the
5214 vector stmt by a factor VF/nunits. For more details see documentation in
5215 vect_get_vec_def_for_copy_stmt. */
5217 /* In case of interleaving (non-unit grouped access):
5219 S1: &base + 2 = x2
5220 S2: &base = x0
5221 S3: &base + 1 = x1
5222 S4: &base + 3 = x3
5224 We create vectorized stores starting from base address (the access of the
5225 first stmt in the chain (S2 in the above example), when the last store stmt
5226 of the chain (S4) is reached:
5228 VS1: &base = vx2
5229 VS2: &base + vec_size*1 = vx0
5230 VS3: &base + vec_size*2 = vx1
5231 VS4: &base + vec_size*3 = vx3
5233 Then permutation statements are generated:
5235 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5236 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5239 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5240 (the order of the data-refs in the output of vect_permute_store_chain
5241 corresponds to the order of scalar stmts in the interleaving chain - see
5242 the documentation of vect_permute_store_chain()).
5244 In case of both multiple types and interleaving, above vector stores and
5245 permutation stmts are created for every copy. The result vector stmts are
5246 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5247 STMT_VINFO_RELATED_STMT for the next copies.
5250 prev_stmt_info = NULL;
5251 for (j = 0; j < ncopies; j++)
5253 gimple new_stmt;
5255 if (j == 0)
5257 if (slp)
5259 /* Get vectorized arguments for SLP_NODE. */
5260 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5261 NULL, slp_node, -1);
5263 vec_oprnd = vec_oprnds[0];
5265 else
5267 /* For interleaved stores we collect vectorized defs for all the
5268 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5269 used as an input to vect_permute_store_chain(), and OPRNDS as
5270 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5272 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5273 OPRNDS are of size 1. */
5274 next_stmt = first_stmt;
5275 for (i = 0; i < group_size; i++)
5277 /* Since gaps are not supported for interleaved stores,
5278 GROUP_SIZE is the exact number of stmts in the chain.
5279 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5280 there is no interleaving, GROUP_SIZE is 1, and only one
5281 iteration of the loop will be executed. */
5282 gcc_assert (next_stmt
5283 && gimple_assign_single_p (next_stmt));
5284 op = gimple_assign_rhs1 (next_stmt);
5286 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5287 NULL);
5288 dr_chain.quick_push (vec_oprnd);
5289 oprnds.quick_push (vec_oprnd);
5290 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5294 /* We should have catched mismatched types earlier. */
5295 gcc_assert (useless_type_conversion_p (vectype,
5296 TREE_TYPE (vec_oprnd)));
5297 bool simd_lane_access_p
5298 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5299 if (simd_lane_access_p
5300 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5301 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5302 && integer_zerop (DR_OFFSET (first_dr))
5303 && integer_zerop (DR_INIT (first_dr))
5304 && alias_sets_conflict_p (get_alias_set (aggr_type),
5305 get_alias_set (DR_REF (first_dr))))
5307 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5308 dataref_offset = build_int_cst (reference_alias_ptr_type
5309 (DR_REF (first_dr)), 0);
5310 inv_p = false;
5312 else
5313 dataref_ptr
5314 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5315 simd_lane_access_p ? loop : NULL,
5316 offset, &dummy, gsi, &ptr_incr,
5317 simd_lane_access_p, &inv_p);
5318 gcc_assert (bb_vinfo || !inv_p);
5320 else
5322 /* For interleaved stores we created vectorized defs for all the
5323 defs stored in OPRNDS in the previous iteration (previous copy).
5324 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5325 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5326 next copy.
5327 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5328 OPRNDS are of size 1. */
5329 for (i = 0; i < group_size; i++)
5331 op = oprnds[i];
5332 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5333 &def, &dt);
5334 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5335 dr_chain[i] = vec_oprnd;
5336 oprnds[i] = vec_oprnd;
5338 if (dataref_offset)
5339 dataref_offset
5340 = int_const_binop (PLUS_EXPR, dataref_offset,
5341 TYPE_SIZE_UNIT (aggr_type));
5342 else
5343 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5344 TYPE_SIZE_UNIT (aggr_type));
5347 if (store_lanes_p)
5349 tree vec_array;
5351 /* Combine all the vectors into an array. */
5352 vec_array = create_vector_array (vectype, vec_num);
5353 for (i = 0; i < vec_num; i++)
5355 vec_oprnd = dr_chain[i];
5356 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5359 /* Emit:
5360 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5361 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5362 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5363 gimple_call_set_lhs (new_stmt, data_ref);
5364 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5366 else
5368 new_stmt = NULL;
5369 if (grouped_store)
5371 if (j == 0)
5372 result_chain.create (group_size);
5373 /* Permute. */
5374 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5375 &result_chain);
5378 next_stmt = first_stmt;
5379 for (i = 0; i < vec_num; i++)
5381 unsigned align, misalign;
5383 if (i > 0)
5384 /* Bump the vector pointer. */
5385 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5386 stmt, NULL_TREE);
5388 if (slp)
5389 vec_oprnd = vec_oprnds[i];
5390 else if (grouped_store)
5391 /* For grouped stores vectorized defs are interleaved in
5392 vect_permute_store_chain(). */
5393 vec_oprnd = result_chain[i];
5395 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5396 dataref_offset
5397 ? dataref_offset
5398 : build_int_cst (reference_alias_ptr_type
5399 (DR_REF (first_dr)), 0));
5400 align = TYPE_ALIGN_UNIT (vectype);
5401 if (aligned_access_p (first_dr))
5402 misalign = 0;
5403 else if (DR_MISALIGNMENT (first_dr) == -1)
5405 TREE_TYPE (data_ref)
5406 = build_aligned_type (TREE_TYPE (data_ref),
5407 TYPE_ALIGN (elem_type));
5408 align = TYPE_ALIGN_UNIT (elem_type);
5409 misalign = 0;
5411 else
5413 TREE_TYPE (data_ref)
5414 = build_aligned_type (TREE_TYPE (data_ref),
5415 TYPE_ALIGN (elem_type));
5416 misalign = DR_MISALIGNMENT (first_dr);
5418 if (dataref_offset == NULL_TREE)
5419 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5420 misalign);
5422 if (negative
5423 && dt != vect_constant_def
5424 && dt != vect_external_def)
5426 tree perm_mask = perm_mask_for_reverse (vectype);
5427 tree perm_dest
5428 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5429 vectype);
5430 tree new_temp = make_ssa_name (perm_dest, NULL);
5432 /* Generate the permute statement. */
5433 gimple perm_stmt
5434 = gimple_build_assign_with_ops (VEC_PERM_EXPR, new_temp,
5435 vec_oprnd, vec_oprnd,
5436 perm_mask);
5437 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5439 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5440 vec_oprnd = new_temp;
5443 /* Arguments are ready. Create the new vector stmt. */
5444 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5445 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5447 if (slp)
5448 continue;
5450 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5451 if (!next_stmt)
5452 break;
5455 if (!slp)
5457 if (j == 0)
5458 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5459 else
5460 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5461 prev_stmt_info = vinfo_for_stmt (new_stmt);
5465 dr_chain.release ();
5466 oprnds.release ();
5467 result_chain.release ();
5468 vec_oprnds.release ();
5470 return true;
5473 /* Given a vector type VECTYPE and permutation SEL returns
5474 the VECTOR_CST mask that implements the permutation of the
5475 vector elements. If that is impossible to do, returns NULL. */
5477 tree
5478 vect_gen_perm_mask (tree vectype, unsigned char *sel)
5480 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5481 int i, nunits;
5483 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5485 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5486 return NULL;
5488 mask_elt_type = lang_hooks.types.type_for_mode
5489 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5490 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5492 mask_elts = XALLOCAVEC (tree, nunits);
5493 for (i = nunits - 1; i >= 0; i--)
5494 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5495 mask_vec = build_vector (mask_type, mask_elts);
5497 return mask_vec;
5500 /* Given a vector variable X and Y, that was generated for the scalar
5501 STMT, generate instructions to permute the vector elements of X and Y
5502 using permutation mask MASK_VEC, insert them at *GSI and return the
5503 permuted vector variable. */
5505 static tree
5506 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5507 gimple_stmt_iterator *gsi)
5509 tree vectype = TREE_TYPE (x);
5510 tree perm_dest, data_ref;
5511 gimple perm_stmt;
5513 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5514 data_ref = make_ssa_name (perm_dest, NULL);
5516 /* Generate the permute statement. */
5517 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5518 x, y, mask_vec);
5519 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5521 return data_ref;
5524 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5525 inserting them on the loops preheader edge. Returns true if we
5526 were successful in doing so (and thus STMT can be moved then),
5527 otherwise returns false. */
5529 static bool
5530 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5532 ssa_op_iter i;
5533 tree op;
5534 bool any = false;
5536 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5538 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5539 if (!gimple_nop_p (def_stmt)
5540 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5542 /* Make sure we don't need to recurse. While we could do
5543 so in simple cases when there are more complex use webs
5544 we don't have an easy way to preserve stmt order to fulfil
5545 dependencies within them. */
5546 tree op2;
5547 ssa_op_iter i2;
5548 if (gimple_code (def_stmt) == GIMPLE_PHI)
5549 return false;
5550 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5552 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5553 if (!gimple_nop_p (def_stmt2)
5554 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5555 return false;
5557 any = true;
5561 if (!any)
5562 return true;
5564 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5566 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5567 if (!gimple_nop_p (def_stmt)
5568 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5570 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5571 gsi_remove (&gsi, false);
5572 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5576 return true;
5579 /* vectorizable_load.
5581 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5582 can be vectorized.
5583 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5584 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5585 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5587 static bool
5588 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5589 slp_tree slp_node, slp_instance slp_node_instance)
5591 tree scalar_dest;
5592 tree vec_dest = NULL;
5593 tree data_ref = NULL;
5594 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5595 stmt_vec_info prev_stmt_info;
5596 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5597 struct loop *loop = NULL;
5598 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5599 bool nested_in_vect_loop = false;
5600 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5601 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5602 tree elem_type;
5603 tree new_temp;
5604 machine_mode mode;
5605 gimple new_stmt = NULL;
5606 tree dummy;
5607 enum dr_alignment_support alignment_support_scheme;
5608 tree dataref_ptr = NULL_TREE;
5609 tree dataref_offset = NULL_TREE;
5610 gimple ptr_incr = NULL;
5611 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5612 int ncopies;
5613 int i, j, group_size, group_gap;
5614 tree msq = NULL_TREE, lsq;
5615 tree offset = NULL_TREE;
5616 tree byte_offset = NULL_TREE;
5617 tree realignment_token = NULL_TREE;
5618 gimple phi = NULL;
5619 vec<tree> dr_chain = vNULL;
5620 bool grouped_load = false;
5621 bool load_lanes_p = false;
5622 gimple first_stmt;
5623 bool inv_p;
5624 bool negative = false;
5625 bool compute_in_loop = false;
5626 struct loop *at_loop;
5627 int vec_num;
5628 bool slp = (slp_node != NULL);
5629 bool slp_perm = false;
5630 enum tree_code code;
5631 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5632 int vf;
5633 tree aggr_type;
5634 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5635 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5636 int gather_scale = 1;
5637 enum vect_def_type gather_dt = vect_unknown_def_type;
5639 if (loop_vinfo)
5641 loop = LOOP_VINFO_LOOP (loop_vinfo);
5642 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5643 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5645 else
5646 vf = 1;
5648 /* Multiple types in SLP are handled by creating the appropriate number of
5649 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5650 case of SLP. */
5651 if (slp || PURE_SLP_STMT (stmt_info))
5652 ncopies = 1;
5653 else
5654 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5656 gcc_assert (ncopies >= 1);
5658 /* FORNOW. This restriction should be relaxed. */
5659 if (nested_in_vect_loop && ncopies > 1)
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5663 "multiple types in nested loop.\n");
5664 return false;
5667 /* Invalidate assumptions made by dependence analysis when vectorization
5668 on the unrolled body effectively re-orders stmts. */
5669 if (ncopies > 1
5670 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5671 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5672 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5676 "cannot perform implicit CSE when unrolling "
5677 "with negative dependence distance\n");
5678 return false;
5681 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5682 return false;
5684 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5685 return false;
5687 /* Is vectorizable load? */
5688 if (!is_gimple_assign (stmt))
5689 return false;
5691 scalar_dest = gimple_assign_lhs (stmt);
5692 if (TREE_CODE (scalar_dest) != SSA_NAME)
5693 return false;
5695 code = gimple_assign_rhs_code (stmt);
5696 if (code != ARRAY_REF
5697 && code != BIT_FIELD_REF
5698 && code != INDIRECT_REF
5699 && code != COMPONENT_REF
5700 && code != IMAGPART_EXPR
5701 && code != REALPART_EXPR
5702 && code != MEM_REF
5703 && TREE_CODE_CLASS (code) != tcc_declaration)
5704 return false;
5706 if (!STMT_VINFO_DATA_REF (stmt_info))
5707 return false;
5709 elem_type = TREE_TYPE (vectype);
5710 mode = TYPE_MODE (vectype);
5712 /* FORNOW. In some cases can vectorize even if data-type not supported
5713 (e.g. - data copies). */
5714 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718 "Aligned load, but unsupported type.\n");
5719 return false;
5722 /* Check if the load is a part of an interleaving chain. */
5723 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5725 grouped_load = true;
5726 /* FORNOW */
5727 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5729 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5730 if (!slp && !PURE_SLP_STMT (stmt_info))
5732 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5733 if (vect_load_lanes_supported (vectype, group_size))
5734 load_lanes_p = true;
5735 else if (!vect_grouped_load_supported (vectype, group_size))
5736 return false;
5739 /* Invalidate assumptions made by dependence analysis when vectorization
5740 on the unrolled body effectively re-orders stmts. */
5741 if (!PURE_SLP_STMT (stmt_info)
5742 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5743 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5744 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5746 if (dump_enabled_p ())
5747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5748 "cannot perform implicit CSE when performing "
5749 "group loads with negative dependence distance\n");
5750 return false;
5755 if (STMT_VINFO_GATHER_P (stmt_info))
5757 gimple def_stmt;
5758 tree def;
5759 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5760 &gather_off, &gather_scale);
5761 gcc_assert (gather_decl);
5762 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5763 &def_stmt, &def, &gather_dt,
5764 &gather_off_vectype))
5766 if (dump_enabled_p ())
5767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5768 "gather index use not simple.\n");
5769 return false;
5772 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5774 else
5776 negative = tree_int_cst_compare (nested_in_vect_loop
5777 ? STMT_VINFO_DR_STEP (stmt_info)
5778 : DR_STEP (dr),
5779 size_zero_node) < 0;
5780 if (negative && ncopies > 1)
5782 if (dump_enabled_p ())
5783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5784 "multiple types with negative step.\n");
5785 return false;
5788 if (negative)
5790 if (grouped_load)
5792 if (dump_enabled_p ())
5793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5794 "negative step for group load not supported"
5795 "\n");
5796 return false;
5798 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5799 if (alignment_support_scheme != dr_aligned
5800 && alignment_support_scheme != dr_unaligned_supported)
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5804 "negative step but alignment required.\n");
5805 return false;
5807 if (!perm_mask_for_reverse (vectype))
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5811 "negative step and reversing not supported."
5812 "\n");
5813 return false;
5818 if (!vec_stmt) /* transformation not required. */
5820 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5821 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5822 return true;
5825 if (dump_enabled_p ())
5826 dump_printf_loc (MSG_NOTE, vect_location,
5827 "transform load. ncopies = %d\n", ncopies);
5829 /** Transform. **/
5831 ensure_base_align (stmt_info, dr);
5833 if (STMT_VINFO_GATHER_P (stmt_info))
5835 tree vec_oprnd0 = NULL_TREE, op;
5836 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5837 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5838 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5839 edge pe = loop_preheader_edge (loop);
5840 gimple_seq seq;
5841 basic_block new_bb;
5842 enum { NARROW, NONE, WIDEN } modifier;
5843 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5845 if (nunits == gather_off_nunits)
5846 modifier = NONE;
5847 else if (nunits == gather_off_nunits / 2)
5849 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5850 modifier = WIDEN;
5852 for (i = 0; i < gather_off_nunits; ++i)
5853 sel[i] = i | nunits;
5855 perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
5856 gcc_assert (perm_mask != NULL_TREE);
5858 else if (nunits == gather_off_nunits * 2)
5860 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5861 modifier = NARROW;
5863 for (i = 0; i < nunits; ++i)
5864 sel[i] = i < gather_off_nunits
5865 ? i : i + nunits - gather_off_nunits;
5867 perm_mask = vect_gen_perm_mask (vectype, sel);
5868 gcc_assert (perm_mask != NULL_TREE);
5869 ncopies *= 2;
5871 else
5872 gcc_unreachable ();
5874 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5875 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5876 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5877 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5878 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5879 scaletype = TREE_VALUE (arglist);
5880 gcc_checking_assert (types_compatible_p (srctype, rettype));
5882 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5884 ptr = fold_convert (ptrtype, gather_base);
5885 if (!is_gimple_min_invariant (ptr))
5887 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5888 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5889 gcc_assert (!new_bb);
5892 /* Currently we support only unconditional gather loads,
5893 so mask should be all ones. */
5894 if (TREE_CODE (masktype) == INTEGER_TYPE)
5895 mask = build_int_cst (masktype, -1);
5896 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5898 mask = build_int_cst (TREE_TYPE (masktype), -1);
5899 mask = build_vector_from_val (masktype, mask);
5900 mask = vect_init_vector (stmt, mask, masktype, NULL);
5902 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5904 REAL_VALUE_TYPE r;
5905 long tmp[6];
5906 for (j = 0; j < 6; ++j)
5907 tmp[j] = -1;
5908 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5909 mask = build_real (TREE_TYPE (masktype), r);
5910 mask = build_vector_from_val (masktype, mask);
5911 mask = vect_init_vector (stmt, mask, masktype, NULL);
5913 else
5914 gcc_unreachable ();
5916 scale = build_int_cst (scaletype, gather_scale);
5918 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5919 merge = build_int_cst (TREE_TYPE (rettype), 0);
5920 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5922 REAL_VALUE_TYPE r;
5923 long tmp[6];
5924 for (j = 0; j < 6; ++j)
5925 tmp[j] = 0;
5926 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5927 merge = build_real (TREE_TYPE (rettype), r);
5929 else
5930 gcc_unreachable ();
5931 merge = build_vector_from_val (rettype, merge);
5932 merge = vect_init_vector (stmt, merge, rettype, NULL);
5934 prev_stmt_info = NULL;
5935 for (j = 0; j < ncopies; ++j)
5937 if (modifier == WIDEN && (j & 1))
5938 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5939 perm_mask, stmt, gsi);
5940 else if (j == 0)
5941 op = vec_oprnd0
5942 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5943 else
5944 op = vec_oprnd0
5945 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5947 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5949 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5950 == TYPE_VECTOR_SUBPARTS (idxtype));
5951 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5952 var = make_ssa_name (var, NULL);
5953 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5954 new_stmt
5955 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
5956 op, NULL_TREE);
5957 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5958 op = var;
5961 new_stmt
5962 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
5964 if (!useless_type_conversion_p (vectype, rettype))
5966 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
5967 == TYPE_VECTOR_SUBPARTS (rettype));
5968 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
5969 op = make_ssa_name (var, new_stmt);
5970 gimple_call_set_lhs (new_stmt, op);
5971 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5972 var = make_ssa_name (vec_dest, NULL);
5973 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
5974 new_stmt
5975 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
5976 NULL_TREE);
5978 else
5980 var = make_ssa_name (vec_dest, new_stmt);
5981 gimple_call_set_lhs (new_stmt, var);
5984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5986 if (modifier == NARROW)
5988 if ((j & 1) == 0)
5990 prev_res = var;
5991 continue;
5993 var = permute_vec_elements (prev_res, var,
5994 perm_mask, stmt, gsi);
5995 new_stmt = SSA_NAME_DEF_STMT (var);
5998 if (prev_stmt_info == NULL)
5999 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6000 else
6001 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6002 prev_stmt_info = vinfo_for_stmt (new_stmt);
6004 return true;
6006 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6008 gimple_stmt_iterator incr_gsi;
6009 bool insert_after;
6010 gimple incr;
6011 tree offvar;
6012 tree ivstep;
6013 tree running_off;
6014 vec<constructor_elt, va_gc> *v = NULL;
6015 gimple_seq stmts = NULL;
6016 tree stride_base, stride_step, alias_off;
6018 gcc_assert (!nested_in_vect_loop);
6020 stride_base
6021 = fold_build_pointer_plus
6022 (unshare_expr (DR_BASE_ADDRESS (dr)),
6023 size_binop (PLUS_EXPR,
6024 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6025 convert_to_ptrofftype (DR_INIT (dr))));
6026 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6028 /* For a load with loop-invariant (but other than power-of-2)
6029 stride (i.e. not a grouped access) like so:
6031 for (i = 0; i < n; i += stride)
6032 ... = array[i];
6034 we generate a new induction variable and new accesses to
6035 form a new vector (or vectors, depending on ncopies):
6037 for (j = 0; ; j += VF*stride)
6038 tmp1 = array[j];
6039 tmp2 = array[j + stride];
6041 vectemp = {tmp1, tmp2, ...}
6044 ivstep = stride_step;
6045 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6046 build_int_cst (TREE_TYPE (ivstep), vf));
6048 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6050 create_iv (stride_base, ivstep, NULL,
6051 loop, &incr_gsi, insert_after,
6052 &offvar, NULL);
6053 incr = gsi_stmt (incr_gsi);
6054 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6056 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6057 if (stmts)
6058 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6060 prev_stmt_info = NULL;
6061 running_off = offvar;
6062 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6063 for (j = 0; j < ncopies; j++)
6065 tree vec_inv;
6067 vec_alloc (v, nunits);
6068 for (i = 0; i < nunits; i++)
6070 tree newref, newoff;
6071 gimple incr;
6072 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6073 running_off, alias_off);
6075 newref = force_gimple_operand_gsi (gsi, newref, true,
6076 NULL_TREE, true,
6077 GSI_SAME_STMT);
6078 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6079 newoff = copy_ssa_name (running_off, NULL);
6080 incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
6081 running_off, stride_step);
6082 vect_finish_stmt_generation (stmt, incr, gsi);
6084 running_off = newoff;
6087 vec_inv = build_constructor (vectype, v);
6088 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6089 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6091 if (j == 0)
6092 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6093 else
6094 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6095 prev_stmt_info = vinfo_for_stmt (new_stmt);
6097 return true;
6100 if (grouped_load)
6102 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6103 if (slp
6104 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6105 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6106 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6108 /* Check if the chain of loads is already vectorized. */
6109 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6110 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6111 ??? But we can only do so if there is exactly one
6112 as we have no way to get at the rest. Leave the CSE
6113 opportunity alone.
6114 ??? With the group load eventually participating
6115 in multiple different permutations (having multiple
6116 slp nodes which refer to the same group) the CSE
6117 is even wrong code. See PR56270. */
6118 && !slp)
6120 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6121 return true;
6123 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6124 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6126 /* VEC_NUM is the number of vect stmts to be created for this group. */
6127 if (slp)
6129 grouped_load = false;
6130 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6131 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6132 slp_perm = true;
6133 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6135 else
6137 vec_num = group_size;
6138 group_gap = 0;
6141 else
6143 first_stmt = stmt;
6144 first_dr = dr;
6145 group_size = vec_num = 1;
6146 group_gap = 0;
6149 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6150 gcc_assert (alignment_support_scheme);
6151 /* Targets with load-lane instructions must not require explicit
6152 realignment. */
6153 gcc_assert (!load_lanes_p
6154 || alignment_support_scheme == dr_aligned
6155 || alignment_support_scheme == dr_unaligned_supported);
6157 /* In case the vectorization factor (VF) is bigger than the number
6158 of elements that we can fit in a vectype (nunits), we have to generate
6159 more than one vector stmt - i.e - we need to "unroll" the
6160 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6161 from one copy of the vector stmt to the next, in the field
6162 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6163 stages to find the correct vector defs to be used when vectorizing
6164 stmts that use the defs of the current stmt. The example below
6165 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6166 need to create 4 vectorized stmts):
6168 before vectorization:
6169 RELATED_STMT VEC_STMT
6170 S1: x = memref - -
6171 S2: z = x + 1 - -
6173 step 1: vectorize stmt S1:
6174 We first create the vector stmt VS1_0, and, as usual, record a
6175 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6176 Next, we create the vector stmt VS1_1, and record a pointer to
6177 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6178 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6179 stmts and pointers:
6180 RELATED_STMT VEC_STMT
6181 VS1_0: vx0 = memref0 VS1_1 -
6182 VS1_1: vx1 = memref1 VS1_2 -
6183 VS1_2: vx2 = memref2 VS1_3 -
6184 VS1_3: vx3 = memref3 - -
6185 S1: x = load - VS1_0
6186 S2: z = x + 1 - -
6188 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6189 information we recorded in RELATED_STMT field is used to vectorize
6190 stmt S2. */
6192 /* In case of interleaving (non-unit grouped access):
6194 S1: x2 = &base + 2
6195 S2: x0 = &base
6196 S3: x1 = &base + 1
6197 S4: x3 = &base + 3
6199 Vectorized loads are created in the order of memory accesses
6200 starting from the access of the first stmt of the chain:
6202 VS1: vx0 = &base
6203 VS2: vx1 = &base + vec_size*1
6204 VS3: vx3 = &base + vec_size*2
6205 VS4: vx4 = &base + vec_size*3
6207 Then permutation statements are generated:
6209 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6210 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6213 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6214 (the order of the data-refs in the output of vect_permute_load_chain
6215 corresponds to the order of scalar stmts in the interleaving chain - see
6216 the documentation of vect_permute_load_chain()).
6217 The generation of permutation stmts and recording them in
6218 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6220 In case of both multiple types and interleaving, the vector loads and
6221 permutation stmts above are created for every copy. The result vector
6222 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6223 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6225 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6226 on a target that supports unaligned accesses (dr_unaligned_supported)
6227 we generate the following code:
6228 p = initial_addr;
6229 indx = 0;
6230 loop {
6231 p = p + indx * vectype_size;
6232 vec_dest = *(p);
6233 indx = indx + 1;
6236 Otherwise, the data reference is potentially unaligned on a target that
6237 does not support unaligned accesses (dr_explicit_realign_optimized) -
6238 then generate the following code, in which the data in each iteration is
6239 obtained by two vector loads, one from the previous iteration, and one
6240 from the current iteration:
6241 p1 = initial_addr;
6242 msq_init = *(floor(p1))
6243 p2 = initial_addr + VS - 1;
6244 realignment_token = call target_builtin;
6245 indx = 0;
6246 loop {
6247 p2 = p2 + indx * vectype_size
6248 lsq = *(floor(p2))
6249 vec_dest = realign_load (msq, lsq, realignment_token)
6250 indx = indx + 1;
6251 msq = lsq;
6252 } */
6254 /* If the misalignment remains the same throughout the execution of the
6255 loop, we can create the init_addr and permutation mask at the loop
6256 preheader. Otherwise, it needs to be created inside the loop.
6257 This can only occur when vectorizing memory accesses in the inner-loop
6258 nested within an outer-loop that is being vectorized. */
6260 if (nested_in_vect_loop
6261 && (TREE_INT_CST_LOW (DR_STEP (dr))
6262 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6264 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6265 compute_in_loop = true;
6268 if ((alignment_support_scheme == dr_explicit_realign_optimized
6269 || alignment_support_scheme == dr_explicit_realign)
6270 && !compute_in_loop)
6272 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6273 alignment_support_scheme, NULL_TREE,
6274 &at_loop);
6275 if (alignment_support_scheme == dr_explicit_realign_optimized)
6277 phi = SSA_NAME_DEF_STMT (msq);
6278 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6279 size_one_node);
6282 else
6283 at_loop = loop;
6285 if (negative)
6286 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6288 if (load_lanes_p)
6289 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6290 else
6291 aggr_type = vectype;
6293 prev_stmt_info = NULL;
6294 for (j = 0; j < ncopies; j++)
6296 /* 1. Create the vector or array pointer update chain. */
6297 if (j == 0)
6299 bool simd_lane_access_p
6300 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6301 if (simd_lane_access_p
6302 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6303 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6304 && integer_zerop (DR_OFFSET (first_dr))
6305 && integer_zerop (DR_INIT (first_dr))
6306 && alias_sets_conflict_p (get_alias_set (aggr_type),
6307 get_alias_set (DR_REF (first_dr)))
6308 && (alignment_support_scheme == dr_aligned
6309 || alignment_support_scheme == dr_unaligned_supported))
6311 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6312 dataref_offset = build_int_cst (reference_alias_ptr_type
6313 (DR_REF (first_dr)), 0);
6314 inv_p = false;
6316 else
6317 dataref_ptr
6318 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6319 offset, &dummy, gsi, &ptr_incr,
6320 simd_lane_access_p, &inv_p,
6321 byte_offset);
6323 else if (dataref_offset)
6324 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6325 TYPE_SIZE_UNIT (aggr_type));
6326 else
6327 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6328 TYPE_SIZE_UNIT (aggr_type));
6330 if (grouped_load || slp_perm)
6331 dr_chain.create (vec_num);
6333 if (load_lanes_p)
6335 tree vec_array;
6337 vec_array = create_vector_array (vectype, vec_num);
6339 /* Emit:
6340 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6341 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6342 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6343 gimple_call_set_lhs (new_stmt, vec_array);
6344 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6346 /* Extract each vector into an SSA_NAME. */
6347 for (i = 0; i < vec_num; i++)
6349 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6350 vec_array, i);
6351 dr_chain.quick_push (new_temp);
6354 /* Record the mapping between SSA_NAMEs and statements. */
6355 vect_record_grouped_load_vectors (stmt, dr_chain);
6357 else
6359 for (i = 0; i < vec_num; i++)
6361 if (i > 0)
6362 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6363 stmt, NULL_TREE);
6365 /* 2. Create the vector-load in the loop. */
6366 switch (alignment_support_scheme)
6368 case dr_aligned:
6369 case dr_unaligned_supported:
6371 unsigned int align, misalign;
6373 data_ref
6374 = build2 (MEM_REF, vectype, dataref_ptr,
6375 dataref_offset
6376 ? dataref_offset
6377 : build_int_cst (reference_alias_ptr_type
6378 (DR_REF (first_dr)), 0));
6379 align = TYPE_ALIGN_UNIT (vectype);
6380 if (alignment_support_scheme == dr_aligned)
6382 gcc_assert (aligned_access_p (first_dr));
6383 misalign = 0;
6385 else if (DR_MISALIGNMENT (first_dr) == -1)
6387 TREE_TYPE (data_ref)
6388 = build_aligned_type (TREE_TYPE (data_ref),
6389 TYPE_ALIGN (elem_type));
6390 align = TYPE_ALIGN_UNIT (elem_type);
6391 misalign = 0;
6393 else
6395 TREE_TYPE (data_ref)
6396 = build_aligned_type (TREE_TYPE (data_ref),
6397 TYPE_ALIGN (elem_type));
6398 misalign = DR_MISALIGNMENT (first_dr);
6400 if (dataref_offset == NULL_TREE)
6401 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6402 align, misalign);
6403 break;
6405 case dr_explicit_realign:
6407 tree ptr, bump;
6408 tree vs_minus_1;
6410 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6412 if (compute_in_loop)
6413 msq = vect_setup_realignment (first_stmt, gsi,
6414 &realignment_token,
6415 dr_explicit_realign,
6416 dataref_ptr, NULL);
6418 ptr = copy_ssa_name (dataref_ptr, NULL);
6419 new_stmt = gimple_build_assign_with_ops
6420 (BIT_AND_EXPR, ptr, dataref_ptr,
6421 build_int_cst
6422 (TREE_TYPE (dataref_ptr),
6423 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6424 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6425 data_ref
6426 = build2 (MEM_REF, vectype, ptr,
6427 build_int_cst (reference_alias_ptr_type
6428 (DR_REF (first_dr)), 0));
6429 vec_dest = vect_create_destination_var (scalar_dest,
6430 vectype);
6431 new_stmt = gimple_build_assign (vec_dest, data_ref);
6432 new_temp = make_ssa_name (vec_dest, new_stmt);
6433 gimple_assign_set_lhs (new_stmt, new_temp);
6434 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6435 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6436 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6437 msq = new_temp;
6439 bump = size_binop (MULT_EXPR, vs_minus_1,
6440 TYPE_SIZE_UNIT (elem_type));
6441 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6442 new_stmt = gimple_build_assign_with_ops
6443 (BIT_AND_EXPR, NULL_TREE, ptr,
6444 build_int_cst
6445 (TREE_TYPE (ptr),
6446 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6447 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6448 gimple_assign_set_lhs (new_stmt, ptr);
6449 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6450 data_ref
6451 = build2 (MEM_REF, vectype, ptr,
6452 build_int_cst (reference_alias_ptr_type
6453 (DR_REF (first_dr)), 0));
6454 break;
6456 case dr_explicit_realign_optimized:
6457 new_temp = copy_ssa_name (dataref_ptr, NULL);
6458 new_stmt = gimple_build_assign_with_ops
6459 (BIT_AND_EXPR, new_temp, dataref_ptr,
6460 build_int_cst
6461 (TREE_TYPE (dataref_ptr),
6462 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6463 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6464 data_ref
6465 = build2 (MEM_REF, vectype, new_temp,
6466 build_int_cst (reference_alias_ptr_type
6467 (DR_REF (first_dr)), 0));
6468 break;
6469 default:
6470 gcc_unreachable ();
6472 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6473 new_stmt = gimple_build_assign (vec_dest, data_ref);
6474 new_temp = make_ssa_name (vec_dest, new_stmt);
6475 gimple_assign_set_lhs (new_stmt, new_temp);
6476 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6478 /* 3. Handle explicit realignment if necessary/supported.
6479 Create in loop:
6480 vec_dest = realign_load (msq, lsq, realignment_token) */
6481 if (alignment_support_scheme == dr_explicit_realign_optimized
6482 || alignment_support_scheme == dr_explicit_realign)
6484 lsq = gimple_assign_lhs (new_stmt);
6485 if (!realignment_token)
6486 realignment_token = dataref_ptr;
6487 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6488 new_stmt
6489 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
6490 vec_dest, msq, lsq,
6491 realignment_token);
6492 new_temp = make_ssa_name (vec_dest, new_stmt);
6493 gimple_assign_set_lhs (new_stmt, new_temp);
6494 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6496 if (alignment_support_scheme == dr_explicit_realign_optimized)
6498 gcc_assert (phi);
6499 if (i == vec_num - 1 && j == ncopies - 1)
6500 add_phi_arg (phi, lsq,
6501 loop_latch_edge (containing_loop),
6502 UNKNOWN_LOCATION);
6503 msq = lsq;
6507 /* 4. Handle invariant-load. */
6508 if (inv_p && !bb_vinfo)
6510 gcc_assert (!grouped_load);
6511 /* If we have versioned for aliasing or the loop doesn't
6512 have any data dependencies that would preclude this,
6513 then we are sure this is a loop invariant load and
6514 thus we can insert it on the preheader edge. */
6515 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6516 && !nested_in_vect_loop
6517 && hoist_defs_of_uses (stmt, loop))
6519 if (dump_enabled_p ())
6521 dump_printf_loc (MSG_NOTE, vect_location,
6522 "hoisting out of the vectorized "
6523 "loop: ");
6524 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6525 dump_printf (MSG_NOTE, "\n");
6527 tree tem = copy_ssa_name (scalar_dest, NULL);
6528 gsi_insert_on_edge_immediate
6529 (loop_preheader_edge (loop),
6530 gimple_build_assign (tem,
6531 unshare_expr
6532 (gimple_assign_rhs1 (stmt))));
6533 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6535 else
6537 gimple_stmt_iterator gsi2 = *gsi;
6538 gsi_next (&gsi2);
6539 new_temp = vect_init_vector (stmt, scalar_dest,
6540 vectype, &gsi2);
6542 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6543 set_vinfo_for_stmt (new_stmt,
6544 new_stmt_vec_info (new_stmt, loop_vinfo,
6545 bb_vinfo));
6548 if (negative)
6550 tree perm_mask = perm_mask_for_reverse (vectype);
6551 new_temp = permute_vec_elements (new_temp, new_temp,
6552 perm_mask, stmt, gsi);
6553 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6556 /* Collect vector loads and later create their permutation in
6557 vect_transform_grouped_load (). */
6558 if (grouped_load || slp_perm)
6559 dr_chain.quick_push (new_temp);
6561 /* Store vector loads in the corresponding SLP_NODE. */
6562 if (slp && !slp_perm)
6563 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6565 /* Bump the vector pointer to account for a gap. */
6566 if (slp && group_gap != 0)
6568 tree bump = size_binop (MULT_EXPR,
6569 TYPE_SIZE_UNIT (elem_type),
6570 size_int (group_gap));
6571 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6572 stmt, bump);
6576 if (slp && !slp_perm)
6577 continue;
6579 if (slp_perm)
6581 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6582 slp_node_instance, false))
6584 dr_chain.release ();
6585 return false;
6588 else
6590 if (grouped_load)
6592 if (!load_lanes_p)
6593 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6594 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6596 else
6598 if (j == 0)
6599 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6600 else
6601 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6602 prev_stmt_info = vinfo_for_stmt (new_stmt);
6605 dr_chain.release ();
6608 return true;
6611 /* Function vect_is_simple_cond.
6613 Input:
6614 LOOP - the loop that is being vectorized.
6615 COND - Condition that is checked for simple use.
6617 Output:
6618 *COMP_VECTYPE - the vector type for the comparison.
6620 Returns whether a COND can be vectorized. Checks whether
6621 condition operands are supportable using vec_is_simple_use. */
6623 static bool
6624 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6625 bb_vec_info bb_vinfo, tree *comp_vectype)
6627 tree lhs, rhs;
6628 tree def;
6629 enum vect_def_type dt;
6630 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6632 if (!COMPARISON_CLASS_P (cond))
6633 return false;
6635 lhs = TREE_OPERAND (cond, 0);
6636 rhs = TREE_OPERAND (cond, 1);
6638 if (TREE_CODE (lhs) == SSA_NAME)
6640 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6641 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6642 &lhs_def_stmt, &def, &dt, &vectype1))
6643 return false;
6645 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6646 && TREE_CODE (lhs) != FIXED_CST)
6647 return false;
6649 if (TREE_CODE (rhs) == SSA_NAME)
6651 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6652 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6653 &rhs_def_stmt, &def, &dt, &vectype2))
6654 return false;
6656 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6657 && TREE_CODE (rhs) != FIXED_CST)
6658 return false;
6660 *comp_vectype = vectype1 ? vectype1 : vectype2;
6661 return true;
6664 /* vectorizable_condition.
6666 Check if STMT is conditional modify expression that can be vectorized.
6667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6668 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6669 at GSI.
6671 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6672 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6673 else caluse if it is 2).
6675 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6677 bool
6678 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6679 gimple *vec_stmt, tree reduc_def, int reduc_index,
6680 slp_tree slp_node)
6682 tree scalar_dest = NULL_TREE;
6683 tree vec_dest = NULL_TREE;
6684 tree cond_expr, then_clause, else_clause;
6685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6687 tree comp_vectype = NULL_TREE;
6688 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6689 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6690 tree vec_compare, vec_cond_expr;
6691 tree new_temp;
6692 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6693 tree def;
6694 enum vect_def_type dt, dts[4];
6695 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6696 int ncopies;
6697 enum tree_code code;
6698 stmt_vec_info prev_stmt_info = NULL;
6699 int i, j;
6700 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6701 vec<tree> vec_oprnds0 = vNULL;
6702 vec<tree> vec_oprnds1 = vNULL;
6703 vec<tree> vec_oprnds2 = vNULL;
6704 vec<tree> vec_oprnds3 = vNULL;
6705 tree vec_cmp_type;
6707 if (slp_node || PURE_SLP_STMT (stmt_info))
6708 ncopies = 1;
6709 else
6710 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6712 gcc_assert (ncopies >= 1);
6713 if (reduc_index && ncopies > 1)
6714 return false; /* FORNOW */
6716 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6717 return false;
6719 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6720 return false;
6722 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6723 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6724 && reduc_def))
6725 return false;
6727 /* FORNOW: not yet supported. */
6728 if (STMT_VINFO_LIVE_P (stmt_info))
6730 if (dump_enabled_p ())
6731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6732 "value used after loop.\n");
6733 return false;
6736 /* Is vectorizable conditional operation? */
6737 if (!is_gimple_assign (stmt))
6738 return false;
6740 code = gimple_assign_rhs_code (stmt);
6742 if (code != COND_EXPR)
6743 return false;
6745 cond_expr = gimple_assign_rhs1 (stmt);
6746 then_clause = gimple_assign_rhs2 (stmt);
6747 else_clause = gimple_assign_rhs3 (stmt);
6749 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6750 &comp_vectype)
6751 || !comp_vectype)
6752 return false;
6754 if (TREE_CODE (then_clause) == SSA_NAME)
6756 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6757 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6758 &then_def_stmt, &def, &dt))
6759 return false;
6761 else if (TREE_CODE (then_clause) != INTEGER_CST
6762 && TREE_CODE (then_clause) != REAL_CST
6763 && TREE_CODE (then_clause) != FIXED_CST)
6764 return false;
6766 if (TREE_CODE (else_clause) == SSA_NAME)
6768 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6769 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6770 &else_def_stmt, &def, &dt))
6771 return false;
6773 else if (TREE_CODE (else_clause) != INTEGER_CST
6774 && TREE_CODE (else_clause) != REAL_CST
6775 && TREE_CODE (else_clause) != FIXED_CST)
6776 return false;
6778 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6779 /* The result of a vector comparison should be signed type. */
6780 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6781 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6782 if (vec_cmp_type == NULL_TREE)
6783 return false;
6785 if (!vec_stmt)
6787 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6788 return expand_vec_cond_expr_p (vectype, comp_vectype);
6791 /* Transform. */
6793 if (!slp_node)
6795 vec_oprnds0.create (1);
6796 vec_oprnds1.create (1);
6797 vec_oprnds2.create (1);
6798 vec_oprnds3.create (1);
6801 /* Handle def. */
6802 scalar_dest = gimple_assign_lhs (stmt);
6803 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6805 /* Handle cond expr. */
6806 for (j = 0; j < ncopies; j++)
6808 gimple new_stmt = NULL;
6809 if (j == 0)
6811 if (slp_node)
6813 auto_vec<tree, 4> ops;
6814 auto_vec<vec<tree>, 4> vec_defs;
6816 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6817 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6818 ops.safe_push (then_clause);
6819 ops.safe_push (else_clause);
6820 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6821 vec_oprnds3 = vec_defs.pop ();
6822 vec_oprnds2 = vec_defs.pop ();
6823 vec_oprnds1 = vec_defs.pop ();
6824 vec_oprnds0 = vec_defs.pop ();
6826 ops.release ();
6827 vec_defs.release ();
6829 else
6831 gimple gtemp;
6832 vec_cond_lhs =
6833 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6834 stmt, NULL);
6835 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6836 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6838 vec_cond_rhs =
6839 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6840 stmt, NULL);
6841 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6842 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6843 if (reduc_index == 1)
6844 vec_then_clause = reduc_def;
6845 else
6847 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6848 stmt, NULL);
6849 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6850 NULL, &gtemp, &def, &dts[2]);
6852 if (reduc_index == 2)
6853 vec_else_clause = reduc_def;
6854 else
6856 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6857 stmt, NULL);
6858 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6859 NULL, &gtemp, &def, &dts[3]);
6863 else
6865 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6866 vec_oprnds0.pop ());
6867 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6868 vec_oprnds1.pop ());
6869 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6870 vec_oprnds2.pop ());
6871 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6872 vec_oprnds3.pop ());
6875 if (!slp_node)
6877 vec_oprnds0.quick_push (vec_cond_lhs);
6878 vec_oprnds1.quick_push (vec_cond_rhs);
6879 vec_oprnds2.quick_push (vec_then_clause);
6880 vec_oprnds3.quick_push (vec_else_clause);
6883 /* Arguments are ready. Create the new vector stmt. */
6884 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6886 vec_cond_rhs = vec_oprnds1[i];
6887 vec_then_clause = vec_oprnds2[i];
6888 vec_else_clause = vec_oprnds3[i];
6890 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6891 vec_cond_lhs, vec_cond_rhs);
6892 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6893 vec_compare, vec_then_clause, vec_else_clause);
6895 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6896 new_temp = make_ssa_name (vec_dest, new_stmt);
6897 gimple_assign_set_lhs (new_stmt, new_temp);
6898 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6899 if (slp_node)
6900 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6903 if (slp_node)
6904 continue;
6906 if (j == 0)
6907 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6908 else
6909 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6911 prev_stmt_info = vinfo_for_stmt (new_stmt);
6914 vec_oprnds0.release ();
6915 vec_oprnds1.release ();
6916 vec_oprnds2.release ();
6917 vec_oprnds3.release ();
6919 return true;
6923 /* Make sure the statement is vectorizable. */
6925 bool
6926 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6929 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6930 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6931 bool ok;
6932 tree scalar_type, vectype;
6933 gimple pattern_stmt;
6934 gimple_seq pattern_def_seq;
6936 if (dump_enabled_p ())
6938 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6939 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6940 dump_printf (MSG_NOTE, "\n");
6943 if (gimple_has_volatile_ops (stmt))
6945 if (dump_enabled_p ())
6946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6947 "not vectorized: stmt has volatile operands\n");
6949 return false;
6952 /* Skip stmts that do not need to be vectorized. In loops this is expected
6953 to include:
6954 - the COND_EXPR which is the loop exit condition
6955 - any LABEL_EXPRs in the loop
6956 - computations that are used only for array indexing or loop control.
6957 In basic blocks we only analyze statements that are a part of some SLP
6958 instance, therefore, all the statements are relevant.
6960 Pattern statement needs to be analyzed instead of the original statement
6961 if the original statement is not relevant. Otherwise, we analyze both
6962 statements. In basic blocks we are called from some SLP instance
6963 traversal, don't analyze pattern stmts instead, the pattern stmts
6964 already will be part of SLP instance. */
6966 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6967 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6968 && !STMT_VINFO_LIVE_P (stmt_info))
6970 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6971 && pattern_stmt
6972 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6973 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6975 /* Analyze PATTERN_STMT instead of the original stmt. */
6976 stmt = pattern_stmt;
6977 stmt_info = vinfo_for_stmt (pattern_stmt);
6978 if (dump_enabled_p ())
6980 dump_printf_loc (MSG_NOTE, vect_location,
6981 "==> examining pattern statement: ");
6982 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6983 dump_printf (MSG_NOTE, "\n");
6986 else
6988 if (dump_enabled_p ())
6989 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
6991 return true;
6994 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6995 && node == NULL
6996 && pattern_stmt
6997 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6998 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7000 /* Analyze PATTERN_STMT too. */
7001 if (dump_enabled_p ())
7003 dump_printf_loc (MSG_NOTE, vect_location,
7004 "==> examining pattern statement: ");
7005 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7006 dump_printf (MSG_NOTE, "\n");
7009 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7010 return false;
7013 if (is_pattern_stmt_p (stmt_info)
7014 && node == NULL
7015 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7017 gimple_stmt_iterator si;
7019 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7021 gimple pattern_def_stmt = gsi_stmt (si);
7022 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7023 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7025 /* Analyze def stmt of STMT if it's a pattern stmt. */
7026 if (dump_enabled_p ())
7028 dump_printf_loc (MSG_NOTE, vect_location,
7029 "==> examining pattern def statement: ");
7030 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7031 dump_printf (MSG_NOTE, "\n");
7034 if (!vect_analyze_stmt (pattern_def_stmt,
7035 need_to_vectorize, node))
7036 return false;
7041 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7043 case vect_internal_def:
7044 break;
7046 case vect_reduction_def:
7047 case vect_nested_cycle:
7048 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7049 || relevance == vect_used_in_outer_by_reduction
7050 || relevance == vect_unused_in_scope));
7051 break;
7053 case vect_induction_def:
7054 case vect_constant_def:
7055 case vect_external_def:
7056 case vect_unknown_def_type:
7057 default:
7058 gcc_unreachable ();
7061 if (bb_vinfo)
7063 gcc_assert (PURE_SLP_STMT (stmt_info));
7065 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7066 if (dump_enabled_p ())
7068 dump_printf_loc (MSG_NOTE, vect_location,
7069 "get vectype for scalar type: ");
7070 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7071 dump_printf (MSG_NOTE, "\n");
7074 vectype = get_vectype_for_scalar_type (scalar_type);
7075 if (!vectype)
7077 if (dump_enabled_p ())
7079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7080 "not SLPed: unsupported data-type ");
7081 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7082 scalar_type);
7083 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7085 return false;
7088 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7091 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7092 dump_printf (MSG_NOTE, "\n");
7095 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7098 if (STMT_VINFO_RELEVANT_P (stmt_info))
7100 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7101 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7102 || (is_gimple_call (stmt)
7103 && gimple_call_lhs (stmt) == NULL_TREE));
7104 *need_to_vectorize = true;
7107 ok = true;
7108 if (!bb_vinfo
7109 && (STMT_VINFO_RELEVANT_P (stmt_info)
7110 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7111 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7112 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7113 || vectorizable_shift (stmt, NULL, NULL, NULL)
7114 || vectorizable_operation (stmt, NULL, NULL, NULL)
7115 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7116 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7117 || vectorizable_call (stmt, NULL, NULL, NULL)
7118 || vectorizable_store (stmt, NULL, NULL, NULL)
7119 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7120 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7121 else
7123 if (bb_vinfo)
7124 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7125 || vectorizable_conversion (stmt, NULL, NULL, node)
7126 || vectorizable_shift (stmt, NULL, NULL, node)
7127 || vectorizable_operation (stmt, NULL, NULL, node)
7128 || vectorizable_assignment (stmt, NULL, NULL, node)
7129 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7130 || vectorizable_call (stmt, NULL, NULL, node)
7131 || vectorizable_store (stmt, NULL, NULL, node)
7132 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7135 if (!ok)
7137 if (dump_enabled_p ())
7139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7140 "not vectorized: relevant stmt not ");
7141 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7142 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7143 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7146 return false;
7149 if (bb_vinfo)
7150 return true;
7152 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7153 need extra handling, except for vectorizable reductions. */
7154 if (STMT_VINFO_LIVE_P (stmt_info)
7155 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7156 ok = vectorizable_live_operation (stmt, NULL, NULL);
7158 if (!ok)
7160 if (dump_enabled_p ())
7162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7163 "not vectorized: live stmt not ");
7164 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7165 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7166 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7169 return false;
7172 return true;
7176 /* Function vect_transform_stmt.
7178 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7180 bool
7181 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7182 bool *grouped_store, slp_tree slp_node,
7183 slp_instance slp_node_instance)
7185 bool is_store = false;
7186 gimple vec_stmt = NULL;
7187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7188 bool done;
7190 switch (STMT_VINFO_TYPE (stmt_info))
7192 case type_demotion_vec_info_type:
7193 case type_promotion_vec_info_type:
7194 case type_conversion_vec_info_type:
7195 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7196 gcc_assert (done);
7197 break;
7199 case induc_vec_info_type:
7200 gcc_assert (!slp_node);
7201 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7202 gcc_assert (done);
7203 break;
7205 case shift_vec_info_type:
7206 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7207 gcc_assert (done);
7208 break;
7210 case op_vec_info_type:
7211 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7212 gcc_assert (done);
7213 break;
7215 case assignment_vec_info_type:
7216 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7217 gcc_assert (done);
7218 break;
7220 case load_vec_info_type:
7221 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7222 slp_node_instance);
7223 gcc_assert (done);
7224 break;
7226 case store_vec_info_type:
7227 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7228 gcc_assert (done);
7229 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7231 /* In case of interleaving, the whole chain is vectorized when the
7232 last store in the chain is reached. Store stmts before the last
7233 one are skipped, and there vec_stmt_info shouldn't be freed
7234 meanwhile. */
7235 *grouped_store = true;
7236 if (STMT_VINFO_VEC_STMT (stmt_info))
7237 is_store = true;
7239 else
7240 is_store = true;
7241 break;
7243 case condition_vec_info_type:
7244 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7245 gcc_assert (done);
7246 break;
7248 case call_vec_info_type:
7249 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7250 stmt = gsi_stmt (*gsi);
7251 if (is_gimple_call (stmt)
7252 && gimple_call_internal_p (stmt)
7253 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7254 is_store = true;
7255 break;
7257 case call_simd_clone_vec_info_type:
7258 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7259 stmt = gsi_stmt (*gsi);
7260 break;
7262 case reduc_vec_info_type:
7263 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7264 gcc_assert (done);
7265 break;
7267 default:
7268 if (!STMT_VINFO_LIVE_P (stmt_info))
7270 if (dump_enabled_p ())
7271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7272 "stmt not supported.\n");
7273 gcc_unreachable ();
7277 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7278 is being vectorized, but outside the immediately enclosing loop. */
7279 if (vec_stmt
7280 && STMT_VINFO_LOOP_VINFO (stmt_info)
7281 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7282 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7283 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7284 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7285 || STMT_VINFO_RELEVANT (stmt_info) ==
7286 vect_used_in_outer_by_reduction))
7288 struct loop *innerloop = LOOP_VINFO_LOOP (
7289 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7290 imm_use_iterator imm_iter;
7291 use_operand_p use_p;
7292 tree scalar_dest;
7293 gimple exit_phi;
7295 if (dump_enabled_p ())
7296 dump_printf_loc (MSG_NOTE, vect_location,
7297 "Record the vdef for outer-loop vectorization.\n");
7299 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7300 (to be used when vectorizing outer-loop stmts that use the DEF of
7301 STMT). */
7302 if (gimple_code (stmt) == GIMPLE_PHI)
7303 scalar_dest = PHI_RESULT (stmt);
7304 else
7305 scalar_dest = gimple_assign_lhs (stmt);
7307 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7309 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7311 exit_phi = USE_STMT (use_p);
7312 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7317 /* Handle stmts whose DEF is used outside the loop-nest that is
7318 being vectorized. */
7319 if (STMT_VINFO_LIVE_P (stmt_info)
7320 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7322 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7323 gcc_assert (done);
7326 if (vec_stmt)
7327 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7329 return is_store;
7333 /* Remove a group of stores (for SLP or interleaving), free their
7334 stmt_vec_info. */
7336 void
7337 vect_remove_stores (gimple first_stmt)
7339 gimple next = first_stmt;
7340 gimple tmp;
7341 gimple_stmt_iterator next_si;
7343 while (next)
7345 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7347 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7348 if (is_pattern_stmt_p (stmt_info))
7349 next = STMT_VINFO_RELATED_STMT (stmt_info);
7350 /* Free the attached stmt_vec_info and remove the stmt. */
7351 next_si = gsi_for_stmt (next);
7352 unlink_stmt_vdef (next);
7353 gsi_remove (&next_si, true);
7354 release_defs (next);
7355 free_stmt_vec_info (next);
7356 next = tmp;
7361 /* Function new_stmt_vec_info.
7363 Create and initialize a new stmt_vec_info struct for STMT. */
7365 stmt_vec_info
7366 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7367 bb_vec_info bb_vinfo)
7369 stmt_vec_info res;
7370 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7372 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7373 STMT_VINFO_STMT (res) = stmt;
7374 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7375 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7376 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7377 STMT_VINFO_LIVE_P (res) = false;
7378 STMT_VINFO_VECTYPE (res) = NULL;
7379 STMT_VINFO_VEC_STMT (res) = NULL;
7380 STMT_VINFO_VECTORIZABLE (res) = true;
7381 STMT_VINFO_IN_PATTERN_P (res) = false;
7382 STMT_VINFO_RELATED_STMT (res) = NULL;
7383 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7384 STMT_VINFO_DATA_REF (res) = NULL;
7386 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7387 STMT_VINFO_DR_OFFSET (res) = NULL;
7388 STMT_VINFO_DR_INIT (res) = NULL;
7389 STMT_VINFO_DR_STEP (res) = NULL;
7390 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7392 if (gimple_code (stmt) == GIMPLE_PHI
7393 && is_loop_header_bb_p (gimple_bb (stmt)))
7394 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7395 else
7396 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7398 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7399 STMT_SLP_TYPE (res) = loop_vect;
7400 GROUP_FIRST_ELEMENT (res) = NULL;
7401 GROUP_NEXT_ELEMENT (res) = NULL;
7402 GROUP_SIZE (res) = 0;
7403 GROUP_STORE_COUNT (res) = 0;
7404 GROUP_GAP (res) = 0;
7405 GROUP_SAME_DR_STMT (res) = NULL;
7407 return res;
7411 /* Create a hash table for stmt_vec_info. */
7413 void
7414 init_stmt_vec_info_vec (void)
7416 gcc_assert (!stmt_vec_info_vec.exists ());
7417 stmt_vec_info_vec.create (50);
7421 /* Free hash table for stmt_vec_info. */
7423 void
7424 free_stmt_vec_info_vec (void)
7426 unsigned int i;
7427 vec_void_p info;
7428 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7429 if (info != NULL)
7430 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7431 gcc_assert (stmt_vec_info_vec.exists ());
7432 stmt_vec_info_vec.release ();
7436 /* Free stmt vectorization related info. */
7438 void
7439 free_stmt_vec_info (gimple stmt)
7441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7443 if (!stmt_info)
7444 return;
7446 /* Check if this statement has a related "pattern stmt"
7447 (introduced by the vectorizer during the pattern recognition
7448 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7449 too. */
7450 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7452 stmt_vec_info patt_info
7453 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7454 if (patt_info)
7456 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7457 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7458 gimple_set_bb (patt_stmt, NULL);
7459 tree lhs = gimple_get_lhs (patt_stmt);
7460 if (TREE_CODE (lhs) == SSA_NAME)
7461 release_ssa_name (lhs);
7462 if (seq)
7464 gimple_stmt_iterator si;
7465 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7467 gimple seq_stmt = gsi_stmt (si);
7468 gimple_set_bb (seq_stmt, NULL);
7469 lhs = gimple_get_lhs (patt_stmt);
7470 if (TREE_CODE (lhs) == SSA_NAME)
7471 release_ssa_name (lhs);
7472 free_stmt_vec_info (seq_stmt);
7475 free_stmt_vec_info (patt_stmt);
7479 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7480 set_vinfo_for_stmt (stmt, NULL);
7481 free (stmt_info);
7485 /* Function get_vectype_for_scalar_type_and_size.
7487 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7488 by the target. */
7490 static tree
7491 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7493 machine_mode inner_mode = TYPE_MODE (scalar_type);
7494 machine_mode simd_mode;
7495 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7496 int nunits;
7497 tree vectype;
7499 if (nbytes == 0)
7500 return NULL_TREE;
7502 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7503 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7504 return NULL_TREE;
7506 /* For vector types of elements whose mode precision doesn't
7507 match their types precision we use a element type of mode
7508 precision. The vectorization routines will have to make sure
7509 they support the proper result truncation/extension.
7510 We also make sure to build vector types with INTEGER_TYPE
7511 component type only. */
7512 if (INTEGRAL_TYPE_P (scalar_type)
7513 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7514 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7515 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7516 TYPE_UNSIGNED (scalar_type));
7518 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7519 When the component mode passes the above test simply use a type
7520 corresponding to that mode. The theory is that any use that
7521 would cause problems with this will disable vectorization anyway. */
7522 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7523 && !INTEGRAL_TYPE_P (scalar_type))
7524 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7526 /* We can't build a vector type of elements with alignment bigger than
7527 their size. */
7528 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7529 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7530 TYPE_UNSIGNED (scalar_type));
7532 /* If we felt back to using the mode fail if there was
7533 no scalar type for it. */
7534 if (scalar_type == NULL_TREE)
7535 return NULL_TREE;
7537 /* If no size was supplied use the mode the target prefers. Otherwise
7538 lookup a vector mode of the specified size. */
7539 if (size == 0)
7540 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7541 else
7542 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7543 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7544 if (nunits <= 1)
7545 return NULL_TREE;
7547 vectype = build_vector_type (scalar_type, nunits);
7549 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7550 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7551 return NULL_TREE;
7553 return vectype;
7556 unsigned int current_vector_size;
7558 /* Function get_vectype_for_scalar_type.
7560 Returns the vector type corresponding to SCALAR_TYPE as supported
7561 by the target. */
7563 tree
7564 get_vectype_for_scalar_type (tree scalar_type)
7566 tree vectype;
7567 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7568 current_vector_size);
7569 if (vectype
7570 && current_vector_size == 0)
7571 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7572 return vectype;
7575 /* Function get_same_sized_vectype
7577 Returns a vector type corresponding to SCALAR_TYPE of size
7578 VECTOR_TYPE if supported by the target. */
7580 tree
7581 get_same_sized_vectype (tree scalar_type, tree vector_type)
7583 return get_vectype_for_scalar_type_and_size
7584 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7587 /* Function vect_is_simple_use.
7589 Input:
7590 LOOP_VINFO - the vect info of the loop that is being vectorized.
7591 BB_VINFO - the vect info of the basic block that is being vectorized.
7592 OPERAND - operand of STMT in the loop or bb.
7593 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7595 Returns whether a stmt with OPERAND can be vectorized.
7596 For loops, supportable operands are constants, loop invariants, and operands
7597 that are defined by the current iteration of the loop. Unsupportable
7598 operands are those that are defined by a previous iteration of the loop (as
7599 is the case in reduction/induction computations).
7600 For basic blocks, supportable operands are constants and bb invariants.
7601 For now, operands defined outside the basic block are not supported. */
7603 bool
7604 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7605 bb_vec_info bb_vinfo, gimple *def_stmt,
7606 tree *def, enum vect_def_type *dt)
7608 basic_block bb;
7609 stmt_vec_info stmt_vinfo;
7610 struct loop *loop = NULL;
7612 if (loop_vinfo)
7613 loop = LOOP_VINFO_LOOP (loop_vinfo);
7615 *def_stmt = NULL;
7616 *def = NULL_TREE;
7618 if (dump_enabled_p ())
7620 dump_printf_loc (MSG_NOTE, vect_location,
7621 "vect_is_simple_use: operand ");
7622 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7623 dump_printf (MSG_NOTE, "\n");
7626 if (CONSTANT_CLASS_P (operand))
7628 *dt = vect_constant_def;
7629 return true;
7632 if (is_gimple_min_invariant (operand))
7634 *def = operand;
7635 *dt = vect_external_def;
7636 return true;
7639 if (TREE_CODE (operand) == PAREN_EXPR)
7641 if (dump_enabled_p ())
7642 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7643 operand = TREE_OPERAND (operand, 0);
7646 if (TREE_CODE (operand) != SSA_NAME)
7648 if (dump_enabled_p ())
7649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7650 "not ssa-name.\n");
7651 return false;
7654 *def_stmt = SSA_NAME_DEF_STMT (operand);
7655 if (*def_stmt == NULL)
7657 if (dump_enabled_p ())
7658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7659 "no def_stmt.\n");
7660 return false;
7663 if (dump_enabled_p ())
7665 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7667 dump_printf (MSG_NOTE, "\n");
7670 /* Empty stmt is expected only in case of a function argument.
7671 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7672 if (gimple_nop_p (*def_stmt))
7674 *def = operand;
7675 *dt = vect_external_def;
7676 return true;
7679 bb = gimple_bb (*def_stmt);
7681 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7682 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7683 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7684 *dt = vect_external_def;
7685 else
7687 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7688 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7691 if (*dt == vect_unknown_def_type
7692 || (stmt
7693 && *dt == vect_double_reduction_def
7694 && gimple_code (stmt) != GIMPLE_PHI))
7696 if (dump_enabled_p ())
7697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7698 "Unsupported pattern.\n");
7699 return false;
7702 if (dump_enabled_p ())
7703 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7705 switch (gimple_code (*def_stmt))
7707 case GIMPLE_PHI:
7708 *def = gimple_phi_result (*def_stmt);
7709 break;
7711 case GIMPLE_ASSIGN:
7712 *def = gimple_assign_lhs (*def_stmt);
7713 break;
7715 case GIMPLE_CALL:
7716 *def = gimple_call_lhs (*def_stmt);
7717 if (*def != NULL)
7718 break;
7719 /* FALLTHRU */
7720 default:
7721 if (dump_enabled_p ())
7722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7723 "unsupported defining stmt:\n");
7724 return false;
7727 return true;
7730 /* Function vect_is_simple_use_1.
7732 Same as vect_is_simple_use_1 but also determines the vector operand
7733 type of OPERAND and stores it to *VECTYPE. If the definition of
7734 OPERAND is vect_uninitialized_def, vect_constant_def or
7735 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7736 is responsible to compute the best suited vector type for the
7737 scalar operand. */
7739 bool
7740 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7741 bb_vec_info bb_vinfo, gimple *def_stmt,
7742 tree *def, enum vect_def_type *dt, tree *vectype)
7744 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7745 def, dt))
7746 return false;
7748 /* Now get a vector type if the def is internal, otherwise supply
7749 NULL_TREE and leave it up to the caller to figure out a proper
7750 type for the use stmt. */
7751 if (*dt == vect_internal_def
7752 || *dt == vect_induction_def
7753 || *dt == vect_reduction_def
7754 || *dt == vect_double_reduction_def
7755 || *dt == vect_nested_cycle)
7757 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7759 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7760 && !STMT_VINFO_RELEVANT (stmt_info)
7761 && !STMT_VINFO_LIVE_P (stmt_info))
7762 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7764 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7765 gcc_assert (*vectype != NULL_TREE);
7767 else if (*dt == vect_uninitialized_def
7768 || *dt == vect_constant_def
7769 || *dt == vect_external_def)
7770 *vectype = NULL_TREE;
7771 else
7772 gcc_unreachable ();
7774 return true;
7778 /* Function supportable_widening_operation
7780 Check whether an operation represented by the code CODE is a
7781 widening operation that is supported by the target platform in
7782 vector form (i.e., when operating on arguments of type VECTYPE_IN
7783 producing a result of type VECTYPE_OUT).
7785 Widening operations we currently support are NOP (CONVERT), FLOAT
7786 and WIDEN_MULT. This function checks if these operations are supported
7787 by the target platform either directly (via vector tree-codes), or via
7788 target builtins.
7790 Output:
7791 - CODE1 and CODE2 are codes of vector operations to be used when
7792 vectorizing the operation, if available.
7793 - MULTI_STEP_CVT determines the number of required intermediate steps in
7794 case of multi-step conversion (like char->short->int - in that case
7795 MULTI_STEP_CVT will be 1).
7796 - INTERM_TYPES contains the intermediate type required to perform the
7797 widening operation (short in the above example). */
7799 bool
7800 supportable_widening_operation (enum tree_code code, gimple stmt,
7801 tree vectype_out, tree vectype_in,
7802 enum tree_code *code1, enum tree_code *code2,
7803 int *multi_step_cvt,
7804 vec<tree> *interm_types)
7806 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7807 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7808 struct loop *vect_loop = NULL;
7809 machine_mode vec_mode;
7810 enum insn_code icode1, icode2;
7811 optab optab1, optab2;
7812 tree vectype = vectype_in;
7813 tree wide_vectype = vectype_out;
7814 enum tree_code c1, c2;
7815 int i;
7816 tree prev_type, intermediate_type;
7817 machine_mode intermediate_mode, prev_mode;
7818 optab optab3, optab4;
7820 *multi_step_cvt = 0;
7821 if (loop_info)
7822 vect_loop = LOOP_VINFO_LOOP (loop_info);
7824 switch (code)
7826 case WIDEN_MULT_EXPR:
7827 /* The result of a vectorized widening operation usually requires
7828 two vectors (because the widened results do not fit into one vector).
7829 The generated vector results would normally be expected to be
7830 generated in the same order as in the original scalar computation,
7831 i.e. if 8 results are generated in each vector iteration, they are
7832 to be organized as follows:
7833 vect1: [res1,res2,res3,res4],
7834 vect2: [res5,res6,res7,res8].
7836 However, in the special case that the result of the widening
7837 operation is used in a reduction computation only, the order doesn't
7838 matter (because when vectorizing a reduction we change the order of
7839 the computation). Some targets can take advantage of this and
7840 generate more efficient code. For example, targets like Altivec,
7841 that support widen_mult using a sequence of {mult_even,mult_odd}
7842 generate the following vectors:
7843 vect1: [res1,res3,res5,res7],
7844 vect2: [res2,res4,res6,res8].
7846 When vectorizing outer-loops, we execute the inner-loop sequentially
7847 (each vectorized inner-loop iteration contributes to VF outer-loop
7848 iterations in parallel). We therefore don't allow to change the
7849 order of the computation in the inner-loop during outer-loop
7850 vectorization. */
7851 /* TODO: Another case in which order doesn't *really* matter is when we
7852 widen and then contract again, e.g. (short)((int)x * y >> 8).
7853 Normally, pack_trunc performs an even/odd permute, whereas the
7854 repack from an even/odd expansion would be an interleave, which
7855 would be significantly simpler for e.g. AVX2. */
7856 /* In any case, in order to avoid duplicating the code below, recurse
7857 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7858 are properly set up for the caller. If we fail, we'll continue with
7859 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7860 if (vect_loop
7861 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7862 && !nested_in_vect_loop_p (vect_loop, stmt)
7863 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7864 stmt, vectype_out, vectype_in,
7865 code1, code2, multi_step_cvt,
7866 interm_types))
7868 /* Elements in a vector with vect_used_by_reduction property cannot
7869 be reordered if the use chain with this property does not have the
7870 same operation. One such an example is s += a * b, where elements
7871 in a and b cannot be reordered. Here we check if the vector defined
7872 by STMT is only directly used in the reduction statement. */
7873 tree lhs = gimple_assign_lhs (stmt);
7874 use_operand_p dummy;
7875 gimple use_stmt;
7876 stmt_vec_info use_stmt_info = NULL;
7877 if (single_imm_use (lhs, &dummy, &use_stmt)
7878 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7879 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7880 return true;
7882 c1 = VEC_WIDEN_MULT_LO_EXPR;
7883 c2 = VEC_WIDEN_MULT_HI_EXPR;
7884 break;
7886 case VEC_WIDEN_MULT_EVEN_EXPR:
7887 /* Support the recursion induced just above. */
7888 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7889 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7890 break;
7892 case WIDEN_LSHIFT_EXPR:
7893 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7894 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7895 break;
7897 CASE_CONVERT:
7898 c1 = VEC_UNPACK_LO_EXPR;
7899 c2 = VEC_UNPACK_HI_EXPR;
7900 break;
7902 case FLOAT_EXPR:
7903 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7904 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7905 break;
7907 case FIX_TRUNC_EXPR:
7908 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7909 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7910 computing the operation. */
7911 return false;
7913 default:
7914 gcc_unreachable ();
7917 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7919 enum tree_code ctmp = c1;
7920 c1 = c2;
7921 c2 = ctmp;
7924 if (code == FIX_TRUNC_EXPR)
7926 /* The signedness is determined from output operand. */
7927 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7928 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7930 else
7932 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7933 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7936 if (!optab1 || !optab2)
7937 return false;
7939 vec_mode = TYPE_MODE (vectype);
7940 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7941 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7942 return false;
7944 *code1 = c1;
7945 *code2 = c2;
7947 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7948 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7949 return true;
7951 /* Check if it's a multi-step conversion that can be done using intermediate
7952 types. */
7954 prev_type = vectype;
7955 prev_mode = vec_mode;
7957 if (!CONVERT_EXPR_CODE_P (code))
7958 return false;
7960 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7961 intermediate steps in promotion sequence. We try
7962 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
7963 not. */
7964 interm_types->create (MAX_INTERM_CVT_STEPS);
7965 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
7967 intermediate_mode = insn_data[icode1].operand[0].mode;
7968 intermediate_type
7969 = lang_hooks.types.type_for_mode (intermediate_mode,
7970 TYPE_UNSIGNED (prev_type));
7971 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
7972 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
7974 if (!optab3 || !optab4
7975 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
7976 || insn_data[icode1].operand[0].mode != intermediate_mode
7977 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
7978 || insn_data[icode2].operand[0].mode != intermediate_mode
7979 || ((icode1 = optab_handler (optab3, intermediate_mode))
7980 == CODE_FOR_nothing)
7981 || ((icode2 = optab_handler (optab4, intermediate_mode))
7982 == CODE_FOR_nothing))
7983 break;
7985 interm_types->quick_push (intermediate_type);
7986 (*multi_step_cvt)++;
7988 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7989 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7990 return true;
7992 prev_type = intermediate_type;
7993 prev_mode = intermediate_mode;
7996 interm_types->release ();
7997 return false;
8001 /* Function supportable_narrowing_operation
8003 Check whether an operation represented by the code CODE is a
8004 narrowing operation that is supported by the target platform in
8005 vector form (i.e., when operating on arguments of type VECTYPE_IN
8006 and producing a result of type VECTYPE_OUT).
8008 Narrowing operations we currently support are NOP (CONVERT) and
8009 FIX_TRUNC. This function checks if these operations are supported by
8010 the target platform directly via vector tree-codes.
8012 Output:
8013 - CODE1 is the code of a vector operation to be used when
8014 vectorizing the operation, if available.
8015 - MULTI_STEP_CVT determines the number of required intermediate steps in
8016 case of multi-step conversion (like int->short->char - in that case
8017 MULTI_STEP_CVT will be 1).
8018 - INTERM_TYPES contains the intermediate type required to perform the
8019 narrowing operation (short in the above example). */
8021 bool
8022 supportable_narrowing_operation (enum tree_code code,
8023 tree vectype_out, tree vectype_in,
8024 enum tree_code *code1, int *multi_step_cvt,
8025 vec<tree> *interm_types)
8027 machine_mode vec_mode;
8028 enum insn_code icode1;
8029 optab optab1, interm_optab;
8030 tree vectype = vectype_in;
8031 tree narrow_vectype = vectype_out;
8032 enum tree_code c1;
8033 tree intermediate_type;
8034 machine_mode intermediate_mode, prev_mode;
8035 int i;
8036 bool uns;
8038 *multi_step_cvt = 0;
8039 switch (code)
8041 CASE_CONVERT:
8042 c1 = VEC_PACK_TRUNC_EXPR;
8043 break;
8045 case FIX_TRUNC_EXPR:
8046 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8047 break;
8049 case FLOAT_EXPR:
8050 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8051 tree code and optabs used for computing the operation. */
8052 return false;
8054 default:
8055 gcc_unreachable ();
8058 if (code == FIX_TRUNC_EXPR)
8059 /* The signedness is determined from output operand. */
8060 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8061 else
8062 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8064 if (!optab1)
8065 return false;
8067 vec_mode = TYPE_MODE (vectype);
8068 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8069 return false;
8071 *code1 = c1;
8073 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8074 return true;
8076 /* Check if it's a multi-step conversion that can be done using intermediate
8077 types. */
8078 prev_mode = vec_mode;
8079 if (code == FIX_TRUNC_EXPR)
8080 uns = TYPE_UNSIGNED (vectype_out);
8081 else
8082 uns = TYPE_UNSIGNED (vectype);
8084 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8085 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8086 costly than signed. */
8087 if (code == FIX_TRUNC_EXPR && uns)
8089 enum insn_code icode2;
8091 intermediate_type
8092 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8093 interm_optab
8094 = optab_for_tree_code (c1, intermediate_type, optab_default);
8095 if (interm_optab != unknown_optab
8096 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8097 && insn_data[icode1].operand[0].mode
8098 == insn_data[icode2].operand[0].mode)
8100 uns = false;
8101 optab1 = interm_optab;
8102 icode1 = icode2;
8106 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8107 intermediate steps in promotion sequence. We try
8108 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8109 interm_types->create (MAX_INTERM_CVT_STEPS);
8110 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8112 intermediate_mode = insn_data[icode1].operand[0].mode;
8113 intermediate_type
8114 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8115 interm_optab
8116 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8117 optab_default);
8118 if (!interm_optab
8119 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8120 || insn_data[icode1].operand[0].mode != intermediate_mode
8121 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8122 == CODE_FOR_nothing))
8123 break;
8125 interm_types->quick_push (intermediate_type);
8126 (*multi_step_cvt)++;
8128 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8129 return true;
8131 prev_mode = intermediate_mode;
8132 optab1 = interm_optab;
8135 interm_types->release ();
8136 return false;