[ARM/AArch64][testsuite] Add vpaddl tests.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob6c3037de850e94826fc4c1355703a91a7beca84c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "tree-eh.h"
50 #include "gimple-expr.h"
51 #include "is-a.h"
52 #include "gimple.h"
53 #include "gimplify.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
57 #include "tree-cfg.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
63 #include "cfgloop.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
66 #include "hashtab.h"
67 #include "rtl.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
83 #include "optabs.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
86 #include "hash-map.h"
87 #include "plugin-api.h"
88 #include "ipa-ref.h"
89 #include "cgraph.h"
90 #include "builtins.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
97 tree
98 stmt_vectype (struct _stmt_vec_info *stmt_info)
100 return STMT_VINFO_VECTYPE (stmt_info);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
105 bool
106 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
108 gimple stmt = STMT_VINFO_STMT (stmt_info);
109 basic_block bb = gimple_bb (stmt);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111 struct loop* loop;
113 if (!loop_vinfo)
114 return false;
116 loop = LOOP_VINFO_LOOP (loop_vinfo);
118 return (bb->loop_father == loop->inner);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
125 unsigned
126 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128 int misalign, enum vect_cost_model_location where)
130 if (body_cost_vec)
132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133 add_stmt_info_to_vec (body_cost_vec, count, kind,
134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 misalign);
136 return (unsigned)
137 (builtin_vectorization_cost (kind, vectype, misalign) * count);
140 else
142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144 void *target_cost_data;
146 if (loop_vinfo)
147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148 else
149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
151 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 misalign, where);
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
158 static tree
159 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
161 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 "vect_array");
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
170 static tree
171 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 tree array, unsigned HOST_WIDE_INT n)
174 tree vect_type, vect, vect_name, array_ref;
175 gimple new_stmt;
177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178 vect_type = TREE_TYPE (TREE_TYPE (array));
179 vect = vect_create_destination_var (scalar_dest, vect_type);
180 array_ref = build4 (ARRAY_REF, vect_type, array,
181 build_int_cst (size_type_node, n),
182 NULL_TREE, NULL_TREE);
184 new_stmt = gimple_build_assign (vect, array_ref);
185 vect_name = make_ssa_name (vect, new_stmt);
186 gimple_assign_set_lhs (new_stmt, vect_name);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
189 return vect_name;
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
196 static void
197 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 tree array, unsigned HOST_WIDE_INT n)
200 tree array_ref;
201 gimple new_stmt;
203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 build_int_cst (size_type_node, n),
205 NULL_TREE, NULL_TREE);
207 new_stmt = gimple_build_assign (array_ref, vect);
208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
213 (and its group). */
215 static tree
216 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
218 tree mem_ref, alias_ptr_type;
220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224 return mem_ref;
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
233 static void
234 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235 enum vect_relevant relevant, bool live_p,
236 bool used_in_pattern)
238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241 gimple pattern_stmt;
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "mark relevant %d, live %d.\n", relevant, live_p);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 bool found = false;
254 if (!used_in_pattern)
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 gimple use_stmt;
259 tree lhs;
260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
263 if (is_gimple_assign (stmt))
264 lhs = gimple_assign_lhs (stmt);
265 else
266 lhs = gimple_call_lhs (stmt);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
270 stmt. */
271 if (lhs && TREE_CODE (lhs) == SSA_NAME)
272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
274 if (is_gimple_debug (USE_STMT (use_p)))
275 continue;
276 use_stmt = USE_STMT (use_p);
278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 continue;
281 if (vinfo_for_stmt (use_stmt)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
284 found = true;
285 break;
290 if (!found)
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info = vinfo_for_stmt (pattern_stmt);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307 stmt = pattern_stmt;
311 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313 STMT_VINFO_RELEVANT (stmt_info) = relevant;
315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "already marked relevant/live.\n");
321 return;
324 worklist->safe_push (stmt);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
340 static bool
341 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 enum vect_relevant *relevant, bool *live_p)
344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345 ssa_op_iter op_iter;
346 imm_use_iterator imm_iter;
347 use_operand_p use_p;
348 def_operand_p def_p;
350 *relevant = vect_unused_in_scope;
351 *live_p = false;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356 != loop_exit_ctrl_vec_info_type)
357 *relevant = vect_used_in_scope;
359 /* changing memory. */
360 if (gimple_code (stmt) != GIMPLE_PHI)
361 if (gimple_vdef (stmt)
362 && !gimple_clobber_p (stmt))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE, vect_location,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant = vect_used_in_scope;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
375 basic_block bb = gimple_bb (USE_STMT (use_p));
376 if (!flow_bb_inside_loop_p (loop, bb))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE, vect_location,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p)))
383 continue;
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 gcc_assert (bb == single_exit (loop)->dest);
390 *live_p = true;
395 return (*live_p || *relevant);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
404 static bool
405 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
407 tree operand;
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info))
414 return true;
416 /* STMT has a data_ref. FORNOW this means that its of one of
417 the following forms:
418 -1- ARRAY_REF = var
419 -2- var = ARRAY_REF
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
424 for array indexing.
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt))
431 if (is_gimple_call (stmt)
432 && gimple_call_internal_p (stmt))
433 switch (gimple_call_internal_fn (stmt))
435 case IFN_MASK_STORE:
436 operand = gimple_call_arg (stmt, 3);
437 if (operand == use)
438 return true;
439 /* FALLTHRU */
440 case IFN_MASK_LOAD:
441 operand = gimple_call_arg (stmt, 2);
442 if (operand == use)
443 return true;
444 break;
445 default:
446 break;
448 return false;
451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452 return false;
453 operand = gimple_assign_rhs1 (stmt);
454 if (TREE_CODE (operand) != SSA_NAME)
455 return false;
457 if (operand == use)
458 return true;
460 return false;
465 Function process_use.
467 Inputs:
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473 be performed.
475 Outputs:
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480 Exceptions:
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
491 static bool
492 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493 enum vect_relevant relevant, vec<gimple> *worklist,
494 bool force)
496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498 stmt_vec_info dstmt_vinfo;
499 basic_block bb, def_bb;
500 tree def;
501 gimple def_stmt;
502 enum vect_def_type dt;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507 return true;
509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "not vectorized: unsupported use in stmt.\n");
514 return false;
517 if (!def_stmt || gimple_nop_p (def_stmt))
518 return true;
520 def_bb = gimple_bb (def_stmt);
521 if (!flow_bb_inside_loop_p (loop, def_bb))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525 return true;
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo = vinfo_for_stmt (def_stmt);
534 bb = gimple_bb (stmt);
535 if (gimple_code (stmt) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537 && gimple_code (def_stmt) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539 && bb->loop_father == def_bb->loop_father)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549 return true;
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
554 d = def_stmt
555 inner-loop:
556 stmt # use (d)
557 outer-loop-tail-bb:
558 ... */
559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE, vect_location,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
565 switch (relevant)
567 case vect_unused_in_scope:
568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 vect_used_in_scope : vect_unused_in_scope;
570 break;
572 case vect_used_in_outer_by_reduction:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574 relevant = vect_used_by_reduction;
575 break;
577 case vect_used_in_outer:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579 relevant = vect_used_in_scope;
580 break;
582 case vect_used_in_scope:
583 break;
585 default:
586 gcc_unreachable ();
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
593 inner-loop:
594 d = def_stmt
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596 stmt # use (d) */
597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
603 switch (relevant)
605 case vect_unused_in_scope:
606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608 vect_used_in_outer_by_reduction : vect_unused_in_scope;
609 break;
611 case vect_used_by_reduction:
612 relevant = vect_used_in_outer_by_reduction;
613 break;
615 case vect_used_in_scope:
616 relevant = vect_used_in_outer;
617 break;
619 default:
620 gcc_unreachable ();
624 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625 is_pattern_stmt_p (stmt_vinfo));
626 return true;
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
634 for i...
635 for j...
636 1. T0 = i + j
637 2. T1 = a[T0]
639 3. j = j + 1
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
646 bool
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651 unsigned int nbbs = loop->num_nodes;
652 gimple_stmt_iterator si;
653 gimple stmt;
654 unsigned int i;
655 stmt_vec_info stmt_vinfo;
656 basic_block bb;
657 gimple phi;
658 bool live_p;
659 enum vect_relevant relevant, tmp_relevant;
660 enum vect_def_type def_type;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE, vect_location,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec<gimple, 64> worklist;
668 /* 1. Init worklist. */
669 for (i = 0; i < nbbs; i++)
671 bb = bbs[i];
672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
674 phi = gsi_stmt (si);
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
679 dump_printf (MSG_NOTE, "\n");
682 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
683 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
685 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
687 stmt = gsi_stmt (si);
688 if (dump_enabled_p ())
690 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
692 dump_printf (MSG_NOTE, "\n");
695 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
696 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
700 /* 2. Process_worklist */
701 while (worklist.length () > 0)
703 use_operand_p use_p;
704 ssa_op_iter iter;
706 stmt = worklist.pop ();
707 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
710 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
711 dump_printf (MSG_NOTE, "\n");
714 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
715 (DEF_STMT) as relevant/irrelevant and live/dead according to the
716 liveness and relevance properties of STMT. */
717 stmt_vinfo = vinfo_for_stmt (stmt);
718 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
719 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
721 /* Generally, the liveness and relevance properties of STMT are
722 propagated as is to the DEF_STMTs of its USEs:
723 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
724 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
726 One exception is when STMT has been identified as defining a reduction
727 variable; in this case we set the liveness/relevance as follows:
728 live_p = false
729 relevant = vect_used_by_reduction
730 This is because we distinguish between two kinds of relevant stmts -
731 those that are used by a reduction computation, and those that are
732 (also) used by a regular computation. This allows us later on to
733 identify stmts that are used solely by a reduction, and therefore the
734 order of the results that they produce does not have to be kept. */
736 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
737 tmp_relevant = relevant;
738 switch (def_type)
740 case vect_reduction_def:
741 switch (tmp_relevant)
743 case vect_unused_in_scope:
744 relevant = vect_used_by_reduction;
745 break;
747 case vect_used_by_reduction:
748 if (gimple_code (stmt) == GIMPLE_PHI)
749 break;
750 /* fall through */
752 default:
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "unsupported use of reduction.\n");
756 return false;
759 live_p = false;
760 break;
762 case vect_nested_cycle:
763 if (tmp_relevant != vect_unused_in_scope
764 && tmp_relevant != vect_used_in_outer_by_reduction
765 && tmp_relevant != vect_used_in_outer)
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769 "unsupported use of nested cycle.\n");
771 return false;
774 live_p = false;
775 break;
777 case vect_double_reduction_def:
778 if (tmp_relevant != vect_unused_in_scope
779 && tmp_relevant != vect_used_by_reduction)
781 if (dump_enabled_p ())
782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
783 "unsupported use of double reduction.\n");
785 return false;
788 live_p = false;
789 break;
791 default:
792 break;
795 if (is_pattern_stmt_p (stmt_vinfo))
797 /* Pattern statements are not inserted into the code, so
798 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
799 have to scan the RHS or function arguments instead. */
800 if (is_gimple_assign (stmt))
802 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
803 tree op = gimple_assign_rhs1 (stmt);
805 i = 1;
806 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
808 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
809 live_p, relevant, &worklist, false)
810 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
811 live_p, relevant, &worklist, false))
812 return false;
813 i = 2;
815 for (; i < gimple_num_ops (stmt); i++)
817 op = gimple_op (stmt, i);
818 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
819 &worklist, false))
820 return false;
823 else if (is_gimple_call (stmt))
825 for (i = 0; i < gimple_call_num_args (stmt); i++)
827 tree arg = gimple_call_arg (stmt, i);
828 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
829 &worklist, false))
830 return false;
834 else
835 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
837 tree op = USE_FROM_PTR (use_p);
838 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
839 &worklist, false))
840 return false;
843 if (STMT_VINFO_GATHER_P (stmt_vinfo))
845 tree off;
846 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
847 gcc_assert (decl);
848 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
849 &worklist, true))
850 return false;
852 } /* while worklist */
854 return true;
858 /* Function vect_model_simple_cost.
860 Models cost for simple operations, i.e. those that only emit ncopies of a
861 single op. Right now, this does not account for multiple insns that could
862 be generated for the single vector op. We will handle that shortly. */
864 void
865 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
866 enum vect_def_type *dt,
867 stmt_vector_for_cost *prologue_cost_vec,
868 stmt_vector_for_cost *body_cost_vec)
870 int i;
871 int inside_cost = 0, prologue_cost = 0;
873 /* The SLP costs were already calculated during SLP tree build. */
874 if (PURE_SLP_STMT (stmt_info))
875 return;
877 /* FORNOW: Assuming maximum 2 args per stmts. */
878 for (i = 0; i < 2; i++)
879 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
880 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
881 stmt_info, 0, vect_prologue);
883 /* Pass the inside-of-loop statements to the target-specific cost model. */
884 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
885 stmt_info, 0, vect_body);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE, vect_location,
889 "vect_model_simple_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost, prologue_cost);
894 /* Model cost for type demotion and promotion operations. PWR is normally
895 zero for single-step promotions and demotions. It will be one if
896 two-step promotion/demotion is required, and so on. Each additional
897 step doubles the number of instructions required. */
899 static void
900 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
901 enum vect_def_type *dt, int pwr)
903 int i, tmp;
904 int inside_cost = 0, prologue_cost = 0;
905 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
906 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
907 void *target_cost_data;
909 /* The SLP costs were already calculated during SLP tree build. */
910 if (PURE_SLP_STMT (stmt_info))
911 return;
913 if (loop_vinfo)
914 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
915 else
916 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
918 for (i = 0; i < pwr + 1; i++)
920 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
921 (i + 1) : i;
922 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
923 vec_promote_demote, stmt_info, 0,
924 vect_body);
927 /* FORNOW: Assuming maximum 2 args per stmts. */
928 for (i = 0; i < 2; i++)
929 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
930 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
931 stmt_info, 0, vect_prologue);
933 if (dump_enabled_p ())
934 dump_printf_loc (MSG_NOTE, vect_location,
935 "vect_model_promotion_demotion_cost: inside_cost = %d, "
936 "prologue_cost = %d .\n", inside_cost, prologue_cost);
939 /* Function vect_cost_group_size
941 For grouped load or store, return the group_size only if it is the first
942 load or store of a group, else return 1. This ensures that group size is
943 only returned once per group. */
945 static int
946 vect_cost_group_size (stmt_vec_info stmt_info)
948 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
950 if (first_stmt == STMT_VINFO_STMT (stmt_info))
951 return GROUP_SIZE (stmt_info);
953 return 1;
957 /* Function vect_model_store_cost
959 Models cost for stores. In the case of grouped accesses, one access
960 has the overhead of the grouped access attributed to it. */
962 void
963 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
964 bool store_lanes_p, enum vect_def_type dt,
965 slp_tree slp_node,
966 stmt_vector_for_cost *prologue_cost_vec,
967 stmt_vector_for_cost *body_cost_vec)
969 int group_size;
970 unsigned int inside_cost = 0, prologue_cost = 0;
971 struct data_reference *first_dr;
972 gimple first_stmt;
974 /* The SLP costs were already calculated during SLP tree build. */
975 if (PURE_SLP_STMT (stmt_info))
976 return;
978 if (dt == vect_constant_def || dt == vect_external_def)
979 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
980 stmt_info, 0, vect_prologue);
982 /* Grouped access? */
983 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
985 if (slp_node)
987 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
988 group_size = 1;
990 else
992 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
993 group_size = vect_cost_group_size (stmt_info);
996 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
998 /* Not a grouped access. */
999 else
1001 group_size = 1;
1002 first_dr = STMT_VINFO_DATA_REF (stmt_info);
1005 /* We assume that the cost of a single store-lanes instruction is
1006 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1007 access is instead being provided by a permute-and-store operation,
1008 include the cost of the permutes. */
1009 if (!store_lanes_p && group_size > 1)
1011 /* Uses a high and low interleave or shuffle operations for each
1012 needed permute. */
1013 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1014 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1015 stmt_info, 0, vect_body);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: strided group_size = %d .\n",
1020 group_size);
1023 /* Costs of the stores. */
1024 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1026 if (dump_enabled_p ())
1027 dump_printf_loc (MSG_NOTE, vect_location,
1028 "vect_model_store_cost: inside_cost = %d, "
1029 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1033 /* Calculate cost of DR's memory access. */
1034 void
1035 vect_get_store_cost (struct data_reference *dr, int ncopies,
1036 unsigned int *inside_cost,
1037 stmt_vector_for_cost *body_cost_vec)
1039 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1040 gimple stmt = DR_STMT (dr);
1041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1043 switch (alignment_support_scheme)
1045 case dr_aligned:
1047 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1048 vector_store, stmt_info, 0,
1049 vect_body);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: aligned.\n");
1054 break;
1057 case dr_unaligned_supported:
1059 /* Here, we assign an additional cost for the unaligned store. */
1060 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1061 unaligned_store, stmt_info,
1062 DR_MISALIGNMENT (dr), vect_body);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "vect_model_store_cost: unaligned supported by "
1066 "hardware.\n");
1067 break;
1070 case dr_unaligned_unsupported:
1072 *inside_cost = VECT_MAX_COST;
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1076 "vect_model_store_cost: unsupported access.\n");
1077 break;
1080 default:
1081 gcc_unreachable ();
1086 /* Function vect_model_load_cost
1088 Models cost for loads. In the case of grouped accesses, the last access
1089 has the overhead of the grouped access attributed to it. Since unaligned
1090 accesses are supported for loads, we also account for the costs of the
1091 access scheme chosen. */
1093 void
1094 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1095 bool load_lanes_p, slp_tree slp_node,
1096 stmt_vector_for_cost *prologue_cost_vec,
1097 stmt_vector_for_cost *body_cost_vec)
1099 int group_size;
1100 gimple first_stmt;
1101 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1102 unsigned int inside_cost = 0, prologue_cost = 0;
1104 /* The SLP costs were already calculated during SLP tree build. */
1105 if (PURE_SLP_STMT (stmt_info))
1106 return;
1108 /* Grouped accesses? */
1109 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1110 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1112 group_size = vect_cost_group_size (stmt_info);
1113 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1115 /* Not a grouped access. */
1116 else
1118 group_size = 1;
1119 first_dr = dr;
1122 /* We assume that the cost of a single load-lanes instruction is
1123 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1124 access is instead being provided by a load-and-permute operation,
1125 include the cost of the permutes. */
1126 if (!load_lanes_p && group_size > 1)
1128 /* Uses an even and odd extract operations or shuffle operations
1129 for each needed permute. */
1130 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1131 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1132 stmt_info, 0, vect_body);
1134 if (dump_enabled_p ())
1135 dump_printf_loc (MSG_NOTE, vect_location,
1136 "vect_model_load_cost: strided group_size = %d .\n",
1137 group_size);
1140 /* The loads themselves. */
1141 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1143 /* N scalar loads plus gathering them into a vector. */
1144 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1145 inside_cost += record_stmt_cost (body_cost_vec,
1146 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1147 scalar_load, stmt_info, 0, vect_body);
1148 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1149 stmt_info, 0, vect_body);
1151 else
1152 vect_get_load_cost (first_dr, ncopies,
1153 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1154 || group_size > 1 || slp_node),
1155 &inside_cost, &prologue_cost,
1156 prologue_cost_vec, body_cost_vec, true);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "vect_model_load_cost: inside_cost = %d, "
1161 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1165 /* Calculate cost of DR's memory access. */
1166 void
1167 vect_get_load_cost (struct data_reference *dr, int ncopies,
1168 bool add_realign_cost, unsigned int *inside_cost,
1169 unsigned int *prologue_cost,
1170 stmt_vector_for_cost *prologue_cost_vec,
1171 stmt_vector_for_cost *body_cost_vec,
1172 bool record_prologue_costs)
1174 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1175 gimple stmt = DR_STMT (dr);
1176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1178 switch (alignment_support_scheme)
1180 case dr_aligned:
1182 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1183 stmt_info, 0, vect_body);
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_NOTE, vect_location,
1187 "vect_model_load_cost: aligned.\n");
1189 break;
1191 case dr_unaligned_supported:
1193 /* Here, we assign an additional cost for the unaligned load. */
1194 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1195 unaligned_load, stmt_info,
1196 DR_MISALIGNMENT (dr), vect_body);
1198 if (dump_enabled_p ())
1199 dump_printf_loc (MSG_NOTE, vect_location,
1200 "vect_model_load_cost: unaligned supported by "
1201 "hardware.\n");
1203 break;
1205 case dr_explicit_realign:
1207 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1208 vector_load, stmt_info, 0, vect_body);
1209 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1210 vec_perm, stmt_info, 0, vect_body);
1212 /* FIXME: If the misalignment remains fixed across the iterations of
1213 the containing loop, the following cost should be added to the
1214 prologue costs. */
1215 if (targetm.vectorize.builtin_mask_for_load)
1216 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1217 stmt_info, 0, vect_body);
1219 if (dump_enabled_p ())
1220 dump_printf_loc (MSG_NOTE, vect_location,
1221 "vect_model_load_cost: explicit realign\n");
1223 break;
1225 case dr_explicit_realign_optimized:
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_NOTE, vect_location,
1229 "vect_model_load_cost: unaligned software "
1230 "pipelined.\n");
1232 /* Unaligned software pipeline has a load of an address, an initial
1233 load, and possibly a mask operation to "prime" the loop. However,
1234 if this is an access in a group of loads, which provide grouped
1235 access, then the above cost should only be considered for one
1236 access in the group. Inside the loop, there is a load op
1237 and a realignment op. */
1239 if (add_realign_cost && record_prologue_costs)
1241 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1242 vector_stmt, stmt_info,
1243 0, vect_prologue);
1244 if (targetm.vectorize.builtin_mask_for_load)
1245 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1246 vector_stmt, stmt_info,
1247 0, vect_prologue);
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1251 stmt_info, 0, vect_body);
1252 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1253 stmt_info, 0, vect_body);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_model_load_cost: explicit realign optimized"
1258 "\n");
1260 break;
1263 case dr_unaligned_unsupported:
1265 *inside_cost = VECT_MAX_COST;
1267 if (dump_enabled_p ())
1268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1269 "vect_model_load_cost: unsupported access.\n");
1270 break;
1273 default:
1274 gcc_unreachable ();
1278 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1279 the loop preheader for the vectorized stmt STMT. */
1281 static void
1282 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1284 if (gsi)
1285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1286 else
1288 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1291 if (loop_vinfo)
1293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1294 basic_block new_bb;
1295 edge pe;
1297 if (nested_in_vect_loop_p (loop, stmt))
1298 loop = loop->inner;
1300 pe = loop_preheader_edge (loop);
1301 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1302 gcc_assert (!new_bb);
1304 else
1306 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1307 basic_block bb;
1308 gimple_stmt_iterator gsi_bb_start;
1310 gcc_assert (bb_vinfo);
1311 bb = BB_VINFO_BB (bb_vinfo);
1312 gsi_bb_start = gsi_after_labels (bb);
1313 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1317 if (dump_enabled_p ())
1319 dump_printf_loc (MSG_NOTE, vect_location,
1320 "created new init_stmt: ");
1321 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1322 dump_printf (MSG_NOTE, "\n");
1326 /* Function vect_init_vector.
1328 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1329 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1330 vector type a vector with all elements equal to VAL is created first.
1331 Place the initialization at BSI if it is not NULL. Otherwise, place the
1332 initialization at the loop preheader.
1333 Return the DEF of INIT_STMT.
1334 It will be used in the vectorization of STMT. */
1336 tree
1337 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1339 tree new_var;
1340 gimple init_stmt;
1341 tree vec_oprnd;
1342 tree new_temp;
1344 if (TREE_CODE (type) == VECTOR_TYPE
1345 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1347 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1349 if (CONSTANT_CLASS_P (val))
1350 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1351 else
1353 new_temp = make_ssa_name (TREE_TYPE (type));
1354 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1355 vect_init_vector_1 (stmt, init_stmt, gsi);
1356 val = new_temp;
1359 val = build_vector_from_val (type, val);
1362 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1363 init_stmt = gimple_build_assign (new_var, val);
1364 new_temp = make_ssa_name (new_var, init_stmt);
1365 gimple_assign_set_lhs (init_stmt, new_temp);
1366 vect_init_vector_1 (stmt, init_stmt, gsi);
1367 vec_oprnd = gimple_assign_lhs (init_stmt);
1368 return vec_oprnd;
1372 /* Function vect_get_vec_def_for_operand.
1374 OP is an operand in STMT. This function returns a (vector) def that will be
1375 used in the vectorized stmt for STMT.
1377 In the case that OP is an SSA_NAME which is defined in the loop, then
1378 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1380 In case OP is an invariant or constant, a new stmt that creates a vector def
1381 needs to be introduced. */
1383 tree
1384 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1386 tree vec_oprnd;
1387 gimple vec_stmt;
1388 gimple def_stmt;
1389 stmt_vec_info def_stmt_info = NULL;
1390 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1391 unsigned int nunits;
1392 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1393 tree def;
1394 enum vect_def_type dt;
1395 bool is_simple_use;
1396 tree vector_type;
1398 if (dump_enabled_p ())
1400 dump_printf_loc (MSG_NOTE, vect_location,
1401 "vect_get_vec_def_for_operand: ");
1402 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1403 dump_printf (MSG_NOTE, "\n");
1406 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1407 &def_stmt, &def, &dt);
1408 gcc_assert (is_simple_use);
1409 if (dump_enabled_p ())
1411 int loc_printed = 0;
1412 if (def)
1414 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1415 loc_printed = 1;
1416 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1417 dump_printf (MSG_NOTE, "\n");
1419 if (def_stmt)
1421 if (loc_printed)
1422 dump_printf (MSG_NOTE, " def_stmt = ");
1423 else
1424 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1425 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1426 dump_printf (MSG_NOTE, "\n");
1430 switch (dt)
1432 /* Case 1: operand is a constant. */
1433 case vect_constant_def:
1435 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1436 gcc_assert (vector_type);
1437 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1439 if (scalar_def)
1440 *scalar_def = op;
1442 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "Create vector_cst. nunits = %d\n", nunits);
1447 return vect_init_vector (stmt, op, vector_type, NULL);
1450 /* Case 2: operand is defined outside the loop - loop invariant. */
1451 case vect_external_def:
1453 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1454 gcc_assert (vector_type);
1456 if (scalar_def)
1457 *scalar_def = def;
1459 /* Create 'vec_inv = {inv,inv,..,inv}' */
1460 if (dump_enabled_p ())
1461 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1463 return vect_init_vector (stmt, def, vector_type, NULL);
1466 /* Case 3: operand is defined inside the loop. */
1467 case vect_internal_def:
1469 if (scalar_def)
1470 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1472 /* Get the def from the vectorized stmt. */
1473 def_stmt_info = vinfo_for_stmt (def_stmt);
1475 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1476 /* Get vectorized pattern statement. */
1477 if (!vec_stmt
1478 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1479 && !STMT_VINFO_RELEVANT (def_stmt_info))
1480 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1481 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1482 gcc_assert (vec_stmt);
1483 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1484 vec_oprnd = PHI_RESULT (vec_stmt);
1485 else if (is_gimple_call (vec_stmt))
1486 vec_oprnd = gimple_call_lhs (vec_stmt);
1487 else
1488 vec_oprnd = gimple_assign_lhs (vec_stmt);
1489 return vec_oprnd;
1492 /* Case 4: operand is defined by a loop header phi - reduction */
1493 case vect_reduction_def:
1494 case vect_double_reduction_def:
1495 case vect_nested_cycle:
1497 struct loop *loop;
1499 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1500 loop = (gimple_bb (def_stmt))->loop_father;
1502 /* Get the def before the loop */
1503 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1504 return get_initial_def_for_reduction (stmt, op, scalar_def);
1507 /* Case 5: operand is defined by loop-header phi - induction. */
1508 case vect_induction_def:
1510 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1512 /* Get the def from the vectorized stmt. */
1513 def_stmt_info = vinfo_for_stmt (def_stmt);
1514 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1515 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1516 vec_oprnd = PHI_RESULT (vec_stmt);
1517 else
1518 vec_oprnd = gimple_get_lhs (vec_stmt);
1519 return vec_oprnd;
1522 default:
1523 gcc_unreachable ();
1528 /* Function vect_get_vec_def_for_stmt_copy
1530 Return a vector-def for an operand. This function is used when the
1531 vectorized stmt to be created (by the caller to this function) is a "copy"
1532 created in case the vectorized result cannot fit in one vector, and several
1533 copies of the vector-stmt are required. In this case the vector-def is
1534 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1535 of the stmt that defines VEC_OPRND.
1536 DT is the type of the vector def VEC_OPRND.
1538 Context:
1539 In case the vectorization factor (VF) is bigger than the number
1540 of elements that can fit in a vectype (nunits), we have to generate
1541 more than one vector stmt to vectorize the scalar stmt. This situation
1542 arises when there are multiple data-types operated upon in the loop; the
1543 smallest data-type determines the VF, and as a result, when vectorizing
1544 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1545 vector stmt (each computing a vector of 'nunits' results, and together
1546 computing 'VF' results in each iteration). This function is called when
1547 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1548 which VF=16 and nunits=4, so the number of copies required is 4):
1550 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1552 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1553 VS1.1: vx.1 = memref1 VS1.2
1554 VS1.2: vx.2 = memref2 VS1.3
1555 VS1.3: vx.3 = memref3
1557 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1558 VSnew.1: vz1 = vx.1 + ... VSnew.2
1559 VSnew.2: vz2 = vx.2 + ... VSnew.3
1560 VSnew.3: vz3 = vx.3 + ...
1562 The vectorization of S1 is explained in vectorizable_load.
1563 The vectorization of S2:
1564 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1565 the function 'vect_get_vec_def_for_operand' is called to
1566 get the relevant vector-def for each operand of S2. For operand x it
1567 returns the vector-def 'vx.0'.
1569 To create the remaining copies of the vector-stmt (VSnew.j), this
1570 function is called to get the relevant vector-def for each operand. It is
1571 obtained from the respective VS1.j stmt, which is recorded in the
1572 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1574 For example, to obtain the vector-def 'vx.1' in order to create the
1575 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1576 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1577 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1578 and return its def ('vx.1').
1579 Overall, to create the above sequence this function will be called 3 times:
1580 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1581 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1582 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1584 tree
1585 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1587 gimple vec_stmt_for_operand;
1588 stmt_vec_info def_stmt_info;
1590 /* Do nothing; can reuse same def. */
1591 if (dt == vect_external_def || dt == vect_constant_def )
1592 return vec_oprnd;
1594 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1595 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1596 gcc_assert (def_stmt_info);
1597 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1598 gcc_assert (vec_stmt_for_operand);
1599 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1600 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1601 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1602 else
1603 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1604 return vec_oprnd;
1608 /* Get vectorized definitions for the operands to create a copy of an original
1609 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1611 static void
1612 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1613 vec<tree> *vec_oprnds0,
1614 vec<tree> *vec_oprnds1)
1616 tree vec_oprnd = vec_oprnds0->pop ();
1618 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1619 vec_oprnds0->quick_push (vec_oprnd);
1621 if (vec_oprnds1 && vec_oprnds1->length ())
1623 vec_oprnd = vec_oprnds1->pop ();
1624 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1625 vec_oprnds1->quick_push (vec_oprnd);
1630 /* Get vectorized definitions for OP0 and OP1.
1631 REDUC_INDEX is the index of reduction operand in case of reduction,
1632 and -1 otherwise. */
1634 void
1635 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1636 vec<tree> *vec_oprnds0,
1637 vec<tree> *vec_oprnds1,
1638 slp_tree slp_node, int reduc_index)
1640 if (slp_node)
1642 int nops = (op1 == NULL_TREE) ? 1 : 2;
1643 auto_vec<tree> ops (nops);
1644 auto_vec<vec<tree> > vec_defs (nops);
1646 ops.quick_push (op0);
1647 if (op1)
1648 ops.quick_push (op1);
1650 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1652 *vec_oprnds0 = vec_defs[0];
1653 if (op1)
1654 *vec_oprnds1 = vec_defs[1];
1656 else
1658 tree vec_oprnd;
1660 vec_oprnds0->create (1);
1661 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1662 vec_oprnds0->quick_push (vec_oprnd);
1664 if (op1)
1666 vec_oprnds1->create (1);
1667 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1668 vec_oprnds1->quick_push (vec_oprnd);
1674 /* Function vect_finish_stmt_generation.
1676 Insert a new stmt. */
1678 void
1679 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1680 gimple_stmt_iterator *gsi)
1682 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1683 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1684 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1686 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1688 if (!gsi_end_p (*gsi)
1689 && gimple_has_mem_ops (vec_stmt))
1691 gimple at_stmt = gsi_stmt (*gsi);
1692 tree vuse = gimple_vuse (at_stmt);
1693 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1695 tree vdef = gimple_vdef (at_stmt);
1696 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1697 /* If we have an SSA vuse and insert a store, update virtual
1698 SSA form to avoid triggering the renamer. Do so only
1699 if we can easily see all uses - which is what almost always
1700 happens with the way vectorized stmts are inserted. */
1701 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1702 && ((is_gimple_assign (vec_stmt)
1703 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1704 || (is_gimple_call (vec_stmt)
1705 && !(gimple_call_flags (vec_stmt)
1706 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1708 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1709 gimple_set_vdef (vec_stmt, new_vdef);
1710 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1714 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1716 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1717 bb_vinfo));
1719 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1722 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1723 dump_printf (MSG_NOTE, "\n");
1726 gimple_set_location (vec_stmt, gimple_location (stmt));
1728 /* While EH edges will generally prevent vectorization, stmt might
1729 e.g. be in a must-not-throw region. Ensure newly created stmts
1730 that could throw are part of the same region. */
1731 int lp_nr = lookup_stmt_eh_lp (stmt);
1732 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1733 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1736 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1737 a function declaration if the target has a vectorized version
1738 of the function, or NULL_TREE if the function cannot be vectorized. */
1740 tree
1741 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1743 tree fndecl = gimple_call_fndecl (call);
1745 /* We only handle functions that do not read or clobber memory -- i.e.
1746 const or novops ones. */
1747 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1748 return NULL_TREE;
1750 if (!fndecl
1751 || TREE_CODE (fndecl) != FUNCTION_DECL
1752 || !DECL_BUILT_IN (fndecl))
1753 return NULL_TREE;
1755 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1756 vectype_in);
1760 static tree permute_vec_elements (tree, tree, tree, gimple,
1761 gimple_stmt_iterator *);
1764 /* Function vectorizable_mask_load_store.
1766 Check if STMT performs a conditional load or store that can be vectorized.
1767 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1768 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1769 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1771 static bool
1772 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1773 gimple *vec_stmt, slp_tree slp_node)
1775 tree vec_dest = NULL;
1776 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1777 stmt_vec_info prev_stmt_info;
1778 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1779 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1780 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1781 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1782 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1783 tree elem_type;
1784 gimple new_stmt;
1785 tree dummy;
1786 tree dataref_ptr = NULL_TREE;
1787 gimple ptr_incr;
1788 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1789 int ncopies;
1790 int i, j;
1791 bool inv_p;
1792 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1793 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1794 int gather_scale = 1;
1795 enum vect_def_type gather_dt = vect_unknown_def_type;
1796 bool is_store;
1797 tree mask;
1798 gimple def_stmt;
1799 tree def;
1800 enum vect_def_type dt;
1802 if (slp_node != NULL)
1803 return false;
1805 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1806 gcc_assert (ncopies >= 1);
1808 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1809 mask = gimple_call_arg (stmt, 2);
1810 if (TYPE_PRECISION (TREE_TYPE (mask))
1811 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1812 return false;
1814 /* FORNOW. This restriction should be relaxed. */
1815 if (nested_in_vect_loop && ncopies > 1)
1817 if (dump_enabled_p ())
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1819 "multiple types in nested loop.");
1820 return false;
1823 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1824 return false;
1826 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1827 return false;
1829 if (!STMT_VINFO_DATA_REF (stmt_info))
1830 return false;
1832 elem_type = TREE_TYPE (vectype);
1834 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1835 return false;
1837 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1838 return false;
1840 if (STMT_VINFO_GATHER_P (stmt_info))
1842 gimple def_stmt;
1843 tree def;
1844 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1845 &gather_off, &gather_scale);
1846 gcc_assert (gather_decl);
1847 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1848 &def_stmt, &def, &gather_dt,
1849 &gather_off_vectype))
1851 if (dump_enabled_p ())
1852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1853 "gather index use not simple.");
1854 return false;
1857 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1858 tree masktype
1859 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1860 if (TREE_CODE (masktype) == INTEGER_TYPE)
1862 if (dump_enabled_p ())
1863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1864 "masked gather with integer mask not supported.");
1865 return false;
1868 else if (tree_int_cst_compare (nested_in_vect_loop
1869 ? STMT_VINFO_DR_STEP (stmt_info)
1870 : DR_STEP (dr), size_zero_node) <= 0)
1871 return false;
1872 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1873 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1874 return false;
1876 if (TREE_CODE (mask) != SSA_NAME)
1877 return false;
1879 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1880 &def_stmt, &def, &dt))
1881 return false;
1883 if (is_store)
1885 tree rhs = gimple_call_arg (stmt, 3);
1886 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1887 &def_stmt, &def, &dt))
1888 return false;
1891 if (!vec_stmt) /* transformation not required. */
1893 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1894 if (is_store)
1895 vect_model_store_cost (stmt_info, ncopies, false, dt,
1896 NULL, NULL, NULL);
1897 else
1898 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1899 return true;
1902 /** Transform. **/
1904 if (STMT_VINFO_GATHER_P (stmt_info))
1906 tree vec_oprnd0 = NULL_TREE, op;
1907 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1908 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1909 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1910 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1911 tree mask_perm_mask = NULL_TREE;
1912 edge pe = loop_preheader_edge (loop);
1913 gimple_seq seq;
1914 basic_block new_bb;
1915 enum { NARROW, NONE, WIDEN } modifier;
1916 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1918 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1919 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1920 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1921 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1922 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1923 scaletype = TREE_VALUE (arglist);
1924 gcc_checking_assert (types_compatible_p (srctype, rettype)
1925 && types_compatible_p (srctype, masktype));
1927 if (nunits == gather_off_nunits)
1928 modifier = NONE;
1929 else if (nunits == gather_off_nunits / 2)
1931 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1932 modifier = WIDEN;
1934 for (i = 0; i < gather_off_nunits; ++i)
1935 sel[i] = i | nunits;
1937 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1939 else if (nunits == gather_off_nunits * 2)
1941 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1942 modifier = NARROW;
1944 for (i = 0; i < nunits; ++i)
1945 sel[i] = i < gather_off_nunits
1946 ? i : i + nunits - gather_off_nunits;
1948 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1949 ncopies *= 2;
1950 for (i = 0; i < nunits; ++i)
1951 sel[i] = i | gather_off_nunits;
1952 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1954 else
1955 gcc_unreachable ();
1957 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1959 ptr = fold_convert (ptrtype, gather_base);
1960 if (!is_gimple_min_invariant (ptr))
1962 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1963 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1964 gcc_assert (!new_bb);
1967 scale = build_int_cst (scaletype, gather_scale);
1969 prev_stmt_info = NULL;
1970 for (j = 0; j < ncopies; ++j)
1972 if (modifier == WIDEN && (j & 1))
1973 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1974 perm_mask, stmt, gsi);
1975 else if (j == 0)
1976 op = vec_oprnd0
1977 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1978 else
1979 op = vec_oprnd0
1980 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1982 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1984 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1985 == TYPE_VECTOR_SUBPARTS (idxtype));
1986 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1987 var = make_ssa_name (var);
1988 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1989 new_stmt
1990 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1991 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1992 op = var;
1995 if (mask_perm_mask && (j & 1))
1996 mask_op = permute_vec_elements (mask_op, mask_op,
1997 mask_perm_mask, stmt, gsi);
1998 else
2000 if (j == 0)
2001 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2002 else
2004 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
2005 &def_stmt, &def, &dt);
2006 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2009 mask_op = vec_mask;
2010 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2012 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2013 == TYPE_VECTOR_SUBPARTS (masktype));
2014 var = vect_get_new_vect_var (masktype, vect_simple_var,
2015 NULL);
2016 var = make_ssa_name (var);
2017 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2018 new_stmt
2019 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2020 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2021 mask_op = var;
2025 new_stmt
2026 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2027 scale);
2029 if (!useless_type_conversion_p (vectype, rettype))
2031 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2032 == TYPE_VECTOR_SUBPARTS (rettype));
2033 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2034 op = make_ssa_name (var, new_stmt);
2035 gimple_call_set_lhs (new_stmt, op);
2036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2037 var = make_ssa_name (vec_dest);
2038 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2039 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2041 else
2043 var = make_ssa_name (vec_dest, new_stmt);
2044 gimple_call_set_lhs (new_stmt, var);
2047 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2049 if (modifier == NARROW)
2051 if ((j & 1) == 0)
2053 prev_res = var;
2054 continue;
2056 var = permute_vec_elements (prev_res, var,
2057 perm_mask, stmt, gsi);
2058 new_stmt = SSA_NAME_DEF_STMT (var);
2061 if (prev_stmt_info == NULL)
2062 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2063 else
2064 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2065 prev_stmt_info = vinfo_for_stmt (new_stmt);
2068 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2069 from the IL. */
2070 tree lhs = gimple_call_lhs (stmt);
2071 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2072 set_vinfo_for_stmt (new_stmt, stmt_info);
2073 set_vinfo_for_stmt (stmt, NULL);
2074 STMT_VINFO_STMT (stmt_info) = new_stmt;
2075 gsi_replace (gsi, new_stmt, true);
2076 return true;
2078 else if (is_store)
2080 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2081 prev_stmt_info = NULL;
2082 for (i = 0; i < ncopies; i++)
2084 unsigned align, misalign;
2086 if (i == 0)
2088 tree rhs = gimple_call_arg (stmt, 3);
2089 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2090 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2091 /* We should have catched mismatched types earlier. */
2092 gcc_assert (useless_type_conversion_p (vectype,
2093 TREE_TYPE (vec_rhs)));
2094 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2095 NULL_TREE, &dummy, gsi,
2096 &ptr_incr, false, &inv_p);
2097 gcc_assert (!inv_p);
2099 else
2101 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2102 &def, &dt);
2103 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2104 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2105 &def, &dt);
2106 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2107 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2108 TYPE_SIZE_UNIT (vectype));
2111 align = TYPE_ALIGN_UNIT (vectype);
2112 if (aligned_access_p (dr))
2113 misalign = 0;
2114 else if (DR_MISALIGNMENT (dr) == -1)
2116 align = TYPE_ALIGN_UNIT (elem_type);
2117 misalign = 0;
2119 else
2120 misalign = DR_MISALIGNMENT (dr);
2121 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2122 misalign);
2123 new_stmt
2124 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2125 gimple_call_arg (stmt, 1),
2126 vec_mask, vec_rhs);
2127 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2128 if (i == 0)
2129 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2130 else
2131 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2132 prev_stmt_info = vinfo_for_stmt (new_stmt);
2135 else
2137 tree vec_mask = NULL_TREE;
2138 prev_stmt_info = NULL;
2139 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2140 for (i = 0; i < ncopies; i++)
2142 unsigned align, misalign;
2144 if (i == 0)
2146 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2147 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2148 NULL_TREE, &dummy, gsi,
2149 &ptr_incr, false, &inv_p);
2150 gcc_assert (!inv_p);
2152 else
2154 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2155 &def, &dt);
2156 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2157 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2158 TYPE_SIZE_UNIT (vectype));
2161 align = TYPE_ALIGN_UNIT (vectype);
2162 if (aligned_access_p (dr))
2163 misalign = 0;
2164 else if (DR_MISALIGNMENT (dr) == -1)
2166 align = TYPE_ALIGN_UNIT (elem_type);
2167 misalign = 0;
2169 else
2170 misalign = DR_MISALIGNMENT (dr);
2171 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2172 misalign);
2173 new_stmt
2174 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2175 gimple_call_arg (stmt, 1),
2176 vec_mask);
2177 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2179 if (i == 0)
2180 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2181 else
2182 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2183 prev_stmt_info = vinfo_for_stmt (new_stmt);
2187 if (!is_store)
2189 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2190 from the IL. */
2191 tree lhs = gimple_call_lhs (stmt);
2192 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2193 set_vinfo_for_stmt (new_stmt, stmt_info);
2194 set_vinfo_for_stmt (stmt, NULL);
2195 STMT_VINFO_STMT (stmt_info) = new_stmt;
2196 gsi_replace (gsi, new_stmt, true);
2199 return true;
2203 /* Function vectorizable_call.
2205 Check if GS performs a function call that can be vectorized.
2206 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2207 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2208 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2210 static bool
2211 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2212 slp_tree slp_node)
2214 gcall *stmt;
2215 tree vec_dest;
2216 tree scalar_dest;
2217 tree op, type;
2218 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2219 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2220 tree vectype_out, vectype_in;
2221 int nunits_in;
2222 int nunits_out;
2223 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2225 tree fndecl, new_temp, def, rhs_type;
2226 gimple def_stmt;
2227 enum vect_def_type dt[3]
2228 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2229 gimple new_stmt = NULL;
2230 int ncopies, j;
2231 vec<tree> vargs = vNULL;
2232 enum { NARROW, NONE, WIDEN } modifier;
2233 size_t i, nargs;
2234 tree lhs;
2236 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2237 return false;
2239 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2240 return false;
2242 /* Is GS a vectorizable call? */
2243 stmt = dyn_cast <gcall *> (gs);
2244 if (!stmt)
2245 return false;
2247 if (gimple_call_internal_p (stmt)
2248 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2249 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2250 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2251 slp_node);
2253 if (gimple_call_lhs (stmt) == NULL_TREE
2254 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2255 return false;
2257 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2259 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2261 /* Process function arguments. */
2262 rhs_type = NULL_TREE;
2263 vectype_in = NULL_TREE;
2264 nargs = gimple_call_num_args (stmt);
2266 /* Bail out if the function has more than three arguments, we do not have
2267 interesting builtin functions to vectorize with more than two arguments
2268 except for fma. No arguments is also not good. */
2269 if (nargs == 0 || nargs > 3)
2270 return false;
2272 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2273 if (gimple_call_internal_p (stmt)
2274 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2276 nargs = 0;
2277 rhs_type = unsigned_type_node;
2280 for (i = 0; i < nargs; i++)
2282 tree opvectype;
2284 op = gimple_call_arg (stmt, i);
2286 /* We can only handle calls with arguments of the same type. */
2287 if (rhs_type
2288 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2290 if (dump_enabled_p ())
2291 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2292 "argument types differ.\n");
2293 return false;
2295 if (!rhs_type)
2296 rhs_type = TREE_TYPE (op);
2298 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2299 &def_stmt, &def, &dt[i], &opvectype))
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "use not simple.\n");
2304 return false;
2307 if (!vectype_in)
2308 vectype_in = opvectype;
2309 else if (opvectype
2310 && opvectype != vectype_in)
2312 if (dump_enabled_p ())
2313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2314 "argument vector types differ.\n");
2315 return false;
2318 /* If all arguments are external or constant defs use a vector type with
2319 the same size as the output vector type. */
2320 if (!vectype_in)
2321 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2322 if (vec_stmt)
2323 gcc_assert (vectype_in);
2324 if (!vectype_in)
2326 if (dump_enabled_p ())
2328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2329 "no vectype for scalar type ");
2330 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2331 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2334 return false;
2337 /* FORNOW */
2338 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2339 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2340 if (nunits_in == nunits_out / 2)
2341 modifier = NARROW;
2342 else if (nunits_out == nunits_in)
2343 modifier = NONE;
2344 else if (nunits_out == nunits_in / 2)
2345 modifier = WIDEN;
2346 else
2347 return false;
2349 /* For now, we only vectorize functions if a target specific builtin
2350 is available. TODO -- in some cases, it might be profitable to
2351 insert the calls for pieces of the vector, in order to be able
2352 to vectorize other operations in the loop. */
2353 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2354 if (fndecl == NULL_TREE)
2356 if (gimple_call_internal_p (stmt)
2357 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2358 && !slp_node
2359 && loop_vinfo
2360 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2361 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2362 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2363 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2365 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2366 { 0, 1, 2, ... vf - 1 } vector. */
2367 gcc_assert (nargs == 0);
2369 else
2371 if (dump_enabled_p ())
2372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2373 "function is not vectorizable.\n");
2374 return false;
2378 gcc_assert (!gimple_vuse (stmt));
2380 if (slp_node || PURE_SLP_STMT (stmt_info))
2381 ncopies = 1;
2382 else if (modifier == NARROW)
2383 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2384 else
2385 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2387 /* Sanity check: make sure that at least one copy of the vectorized stmt
2388 needs to be generated. */
2389 gcc_assert (ncopies >= 1);
2391 if (!vec_stmt) /* transformation not required. */
2393 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2396 "\n");
2397 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2398 return true;
2401 /** Transform. **/
2403 if (dump_enabled_p ())
2404 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2406 /* Handle def. */
2407 scalar_dest = gimple_call_lhs (stmt);
2408 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2410 prev_stmt_info = NULL;
2411 switch (modifier)
2413 case NONE:
2414 for (j = 0; j < ncopies; ++j)
2416 /* Build argument list for the vectorized call. */
2417 if (j == 0)
2418 vargs.create (nargs);
2419 else
2420 vargs.truncate (0);
2422 if (slp_node)
2424 auto_vec<vec<tree> > vec_defs (nargs);
2425 vec<tree> vec_oprnds0;
2427 for (i = 0; i < nargs; i++)
2428 vargs.quick_push (gimple_call_arg (stmt, i));
2429 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2430 vec_oprnds0 = vec_defs[0];
2432 /* Arguments are ready. Create the new vector stmt. */
2433 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2435 size_t k;
2436 for (k = 0; k < nargs; k++)
2438 vec<tree> vec_oprndsk = vec_defs[k];
2439 vargs[k] = vec_oprndsk[i];
2441 new_stmt = gimple_build_call_vec (fndecl, vargs);
2442 new_temp = make_ssa_name (vec_dest, new_stmt);
2443 gimple_call_set_lhs (new_stmt, new_temp);
2444 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2445 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2448 for (i = 0; i < nargs; i++)
2450 vec<tree> vec_oprndsi = vec_defs[i];
2451 vec_oprndsi.release ();
2453 continue;
2456 for (i = 0; i < nargs; i++)
2458 op = gimple_call_arg (stmt, i);
2459 if (j == 0)
2460 vec_oprnd0
2461 = vect_get_vec_def_for_operand (op, stmt, NULL);
2462 else
2464 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2465 vec_oprnd0
2466 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2469 vargs.quick_push (vec_oprnd0);
2472 if (gimple_call_internal_p (stmt)
2473 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2475 tree *v = XALLOCAVEC (tree, nunits_out);
2476 int k;
2477 for (k = 0; k < nunits_out; ++k)
2478 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2479 tree cst = build_vector (vectype_out, v);
2480 tree new_var
2481 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2482 gimple init_stmt = gimple_build_assign (new_var, cst);
2483 new_temp = make_ssa_name (new_var, init_stmt);
2484 gimple_assign_set_lhs (init_stmt, new_temp);
2485 vect_init_vector_1 (stmt, init_stmt, NULL);
2486 new_temp = make_ssa_name (vec_dest);
2487 new_stmt = gimple_build_assign (new_temp,
2488 gimple_assign_lhs (init_stmt));
2490 else
2492 new_stmt = gimple_build_call_vec (fndecl, vargs);
2493 new_temp = make_ssa_name (vec_dest, new_stmt);
2494 gimple_call_set_lhs (new_stmt, new_temp);
2496 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2498 if (j == 0)
2499 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2500 else
2501 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2503 prev_stmt_info = vinfo_for_stmt (new_stmt);
2506 break;
2508 case NARROW:
2509 for (j = 0; j < ncopies; ++j)
2511 /* Build argument list for the vectorized call. */
2512 if (j == 0)
2513 vargs.create (nargs * 2);
2514 else
2515 vargs.truncate (0);
2517 if (slp_node)
2519 auto_vec<vec<tree> > vec_defs (nargs);
2520 vec<tree> vec_oprnds0;
2522 for (i = 0; i < nargs; i++)
2523 vargs.quick_push (gimple_call_arg (stmt, i));
2524 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2525 vec_oprnds0 = vec_defs[0];
2527 /* Arguments are ready. Create the new vector stmt. */
2528 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2530 size_t k;
2531 vargs.truncate (0);
2532 for (k = 0; k < nargs; k++)
2534 vec<tree> vec_oprndsk = vec_defs[k];
2535 vargs.quick_push (vec_oprndsk[i]);
2536 vargs.quick_push (vec_oprndsk[i + 1]);
2538 new_stmt = gimple_build_call_vec (fndecl, vargs);
2539 new_temp = make_ssa_name (vec_dest, new_stmt);
2540 gimple_call_set_lhs (new_stmt, new_temp);
2541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2542 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2545 for (i = 0; i < nargs; i++)
2547 vec<tree> vec_oprndsi = vec_defs[i];
2548 vec_oprndsi.release ();
2550 continue;
2553 for (i = 0; i < nargs; i++)
2555 op = gimple_call_arg (stmt, i);
2556 if (j == 0)
2558 vec_oprnd0
2559 = vect_get_vec_def_for_operand (op, stmt, NULL);
2560 vec_oprnd1
2561 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2563 else
2565 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2566 vec_oprnd0
2567 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2568 vec_oprnd1
2569 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2572 vargs.quick_push (vec_oprnd0);
2573 vargs.quick_push (vec_oprnd1);
2576 new_stmt = gimple_build_call_vec (fndecl, vargs);
2577 new_temp = make_ssa_name (vec_dest, new_stmt);
2578 gimple_call_set_lhs (new_stmt, new_temp);
2579 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2581 if (j == 0)
2582 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2583 else
2584 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2586 prev_stmt_info = vinfo_for_stmt (new_stmt);
2589 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2591 break;
2593 case WIDEN:
2594 /* No current target implements this case. */
2595 return false;
2598 vargs.release ();
2600 /* The call in STMT might prevent it from being removed in dce.
2601 We however cannot remove it here, due to the way the ssa name
2602 it defines is mapped to the new definition. So just replace
2603 rhs of the statement with something harmless. */
2605 if (slp_node)
2606 return true;
2608 type = TREE_TYPE (scalar_dest);
2609 if (is_pattern_stmt_p (stmt_info))
2610 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2611 else
2612 lhs = gimple_call_lhs (stmt);
2613 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2614 set_vinfo_for_stmt (new_stmt, stmt_info);
2615 set_vinfo_for_stmt (stmt, NULL);
2616 STMT_VINFO_STMT (stmt_info) = new_stmt;
2617 gsi_replace (gsi, new_stmt, false);
2619 return true;
2623 struct simd_call_arg_info
2625 tree vectype;
2626 tree op;
2627 enum vect_def_type dt;
2628 HOST_WIDE_INT linear_step;
2629 unsigned int align;
2632 /* Function vectorizable_simd_clone_call.
2634 Check if STMT performs a function call that can be vectorized
2635 by calling a simd clone of the function.
2636 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2637 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2638 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2640 static bool
2641 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2642 gimple *vec_stmt, slp_tree slp_node)
2644 tree vec_dest;
2645 tree scalar_dest;
2646 tree op, type;
2647 tree vec_oprnd0 = NULL_TREE;
2648 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2649 tree vectype;
2650 unsigned int nunits;
2651 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2652 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2653 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2654 tree fndecl, new_temp, def;
2655 gimple def_stmt;
2656 gimple new_stmt = NULL;
2657 int ncopies, j;
2658 vec<simd_call_arg_info> arginfo = vNULL;
2659 vec<tree> vargs = vNULL;
2660 size_t i, nargs;
2661 tree lhs, rtype, ratype;
2662 vec<constructor_elt, va_gc> *ret_ctor_elts;
2664 /* Is STMT a vectorizable call? */
2665 if (!is_gimple_call (stmt))
2666 return false;
2668 fndecl = gimple_call_fndecl (stmt);
2669 if (fndecl == NULL_TREE)
2670 return false;
2672 struct cgraph_node *node = cgraph_node::get (fndecl);
2673 if (node == NULL || node->simd_clones == NULL)
2674 return false;
2676 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2677 return false;
2679 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2680 return false;
2682 if (gimple_call_lhs (stmt)
2683 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2684 return false;
2686 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2688 vectype = STMT_VINFO_VECTYPE (stmt_info);
2690 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2691 return false;
2693 /* FORNOW */
2694 if (slp_node || PURE_SLP_STMT (stmt_info))
2695 return false;
2697 /* Process function arguments. */
2698 nargs = gimple_call_num_args (stmt);
2700 /* Bail out if the function has zero arguments. */
2701 if (nargs == 0)
2702 return false;
2704 arginfo.create (nargs);
2706 for (i = 0; i < nargs; i++)
2708 simd_call_arg_info thisarginfo;
2709 affine_iv iv;
2711 thisarginfo.linear_step = 0;
2712 thisarginfo.align = 0;
2713 thisarginfo.op = NULL_TREE;
2715 op = gimple_call_arg (stmt, i);
2716 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2717 &def_stmt, &def, &thisarginfo.dt,
2718 &thisarginfo.vectype)
2719 || thisarginfo.dt == vect_uninitialized_def)
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2723 "use not simple.\n");
2724 arginfo.release ();
2725 return false;
2728 if (thisarginfo.dt == vect_constant_def
2729 || thisarginfo.dt == vect_external_def)
2730 gcc_assert (thisarginfo.vectype == NULL_TREE);
2731 else
2732 gcc_assert (thisarginfo.vectype != NULL_TREE);
2734 /* For linear arguments, the analyze phase should have saved
2735 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2736 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2737 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2739 gcc_assert (vec_stmt);
2740 thisarginfo.linear_step
2741 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2742 thisarginfo.op
2743 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2744 /* If loop has been peeled for alignment, we need to adjust it. */
2745 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2746 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2747 if (n1 != n2)
2749 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2750 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2751 tree opt = TREE_TYPE (thisarginfo.op);
2752 bias = fold_convert (TREE_TYPE (step), bias);
2753 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2754 thisarginfo.op
2755 = fold_build2 (POINTER_TYPE_P (opt)
2756 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2757 thisarginfo.op, bias);
2760 else if (!vec_stmt
2761 && thisarginfo.dt != vect_constant_def
2762 && thisarginfo.dt != vect_external_def
2763 && loop_vinfo
2764 && TREE_CODE (op) == SSA_NAME
2765 && simple_iv (loop, loop_containing_stmt (stmt), op,
2766 &iv, false)
2767 && tree_fits_shwi_p (iv.step))
2769 thisarginfo.linear_step = tree_to_shwi (iv.step);
2770 thisarginfo.op = iv.base;
2772 else if ((thisarginfo.dt == vect_constant_def
2773 || thisarginfo.dt == vect_external_def)
2774 && POINTER_TYPE_P (TREE_TYPE (op)))
2775 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2777 arginfo.quick_push (thisarginfo);
2780 unsigned int badness = 0;
2781 struct cgraph_node *bestn = NULL;
2782 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2783 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2784 else
2785 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2786 n = n->simdclone->next_clone)
2788 unsigned int this_badness = 0;
2789 if (n->simdclone->simdlen
2790 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2791 || n->simdclone->nargs != nargs)
2792 continue;
2793 if (n->simdclone->simdlen
2794 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2795 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2796 - exact_log2 (n->simdclone->simdlen)) * 1024;
2797 if (n->simdclone->inbranch)
2798 this_badness += 2048;
2799 int target_badness = targetm.simd_clone.usable (n);
2800 if (target_badness < 0)
2801 continue;
2802 this_badness += target_badness * 512;
2803 /* FORNOW: Have to add code to add the mask argument. */
2804 if (n->simdclone->inbranch)
2805 continue;
2806 for (i = 0; i < nargs; i++)
2808 switch (n->simdclone->args[i].arg_type)
2810 case SIMD_CLONE_ARG_TYPE_VECTOR:
2811 if (!useless_type_conversion_p
2812 (n->simdclone->args[i].orig_type,
2813 TREE_TYPE (gimple_call_arg (stmt, i))))
2814 i = -1;
2815 else if (arginfo[i].dt == vect_constant_def
2816 || arginfo[i].dt == vect_external_def
2817 || arginfo[i].linear_step)
2818 this_badness += 64;
2819 break;
2820 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2821 if (arginfo[i].dt != vect_constant_def
2822 && arginfo[i].dt != vect_external_def)
2823 i = -1;
2824 break;
2825 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2826 if (arginfo[i].dt == vect_constant_def
2827 || arginfo[i].dt == vect_external_def
2828 || (arginfo[i].linear_step
2829 != n->simdclone->args[i].linear_step))
2830 i = -1;
2831 break;
2832 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2833 /* FORNOW */
2834 i = -1;
2835 break;
2836 case SIMD_CLONE_ARG_TYPE_MASK:
2837 gcc_unreachable ();
2839 if (i == (size_t) -1)
2840 break;
2841 if (n->simdclone->args[i].alignment > arginfo[i].align)
2843 i = -1;
2844 break;
2846 if (arginfo[i].align)
2847 this_badness += (exact_log2 (arginfo[i].align)
2848 - exact_log2 (n->simdclone->args[i].alignment));
2850 if (i == (size_t) -1)
2851 continue;
2852 if (bestn == NULL || this_badness < badness)
2854 bestn = n;
2855 badness = this_badness;
2859 if (bestn == NULL)
2861 arginfo.release ();
2862 return false;
2865 for (i = 0; i < nargs; i++)
2866 if ((arginfo[i].dt == vect_constant_def
2867 || arginfo[i].dt == vect_external_def)
2868 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2870 arginfo[i].vectype
2871 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2872 i)));
2873 if (arginfo[i].vectype == NULL
2874 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2875 > bestn->simdclone->simdlen))
2877 arginfo.release ();
2878 return false;
2882 fndecl = bestn->decl;
2883 nunits = bestn->simdclone->simdlen;
2884 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2886 /* If the function isn't const, only allow it in simd loops where user
2887 has asserted that at least nunits consecutive iterations can be
2888 performed using SIMD instructions. */
2889 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2890 && gimple_vuse (stmt))
2892 arginfo.release ();
2893 return false;
2896 /* Sanity check: make sure that at least one copy of the vectorized stmt
2897 needs to be generated. */
2898 gcc_assert (ncopies >= 1);
2900 if (!vec_stmt) /* transformation not required. */
2902 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2903 for (i = 0; i < nargs; i++)
2904 if (bestn->simdclone->args[i].arg_type
2905 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2907 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2908 + 1);
2909 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2910 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2911 ? size_type_node : TREE_TYPE (arginfo[i].op);
2912 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2913 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2915 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2916 if (dump_enabled_p ())
2917 dump_printf_loc (MSG_NOTE, vect_location,
2918 "=== vectorizable_simd_clone_call ===\n");
2919 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2920 arginfo.release ();
2921 return true;
2924 /** Transform. **/
2926 if (dump_enabled_p ())
2927 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2929 /* Handle def. */
2930 scalar_dest = gimple_call_lhs (stmt);
2931 vec_dest = NULL_TREE;
2932 rtype = NULL_TREE;
2933 ratype = NULL_TREE;
2934 if (scalar_dest)
2936 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2937 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2938 if (TREE_CODE (rtype) == ARRAY_TYPE)
2940 ratype = rtype;
2941 rtype = TREE_TYPE (ratype);
2945 prev_stmt_info = NULL;
2946 for (j = 0; j < ncopies; ++j)
2948 /* Build argument list for the vectorized call. */
2949 if (j == 0)
2950 vargs.create (nargs);
2951 else
2952 vargs.truncate (0);
2954 for (i = 0; i < nargs; i++)
2956 unsigned int k, l, m, o;
2957 tree atype;
2958 op = gimple_call_arg (stmt, i);
2959 switch (bestn->simdclone->args[i].arg_type)
2961 case SIMD_CLONE_ARG_TYPE_VECTOR:
2962 atype = bestn->simdclone->args[i].vector_type;
2963 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2964 for (m = j * o; m < (j + 1) * o; m++)
2966 if (TYPE_VECTOR_SUBPARTS (atype)
2967 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2969 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2970 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2971 / TYPE_VECTOR_SUBPARTS (atype));
2972 gcc_assert ((k & (k - 1)) == 0);
2973 if (m == 0)
2974 vec_oprnd0
2975 = vect_get_vec_def_for_operand (op, stmt, NULL);
2976 else
2978 vec_oprnd0 = arginfo[i].op;
2979 if ((m & (k - 1)) == 0)
2980 vec_oprnd0
2981 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2982 vec_oprnd0);
2984 arginfo[i].op = vec_oprnd0;
2985 vec_oprnd0
2986 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2987 size_int (prec),
2988 bitsize_int ((m & (k - 1)) * prec));
2989 new_stmt
2990 = gimple_build_assign (make_ssa_name (atype),
2991 vec_oprnd0);
2992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2993 vargs.safe_push (gimple_assign_lhs (new_stmt));
2995 else
2997 k = (TYPE_VECTOR_SUBPARTS (atype)
2998 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2999 gcc_assert ((k & (k - 1)) == 0);
3000 vec<constructor_elt, va_gc> *ctor_elts;
3001 if (k != 1)
3002 vec_alloc (ctor_elts, k);
3003 else
3004 ctor_elts = NULL;
3005 for (l = 0; l < k; l++)
3007 if (m == 0 && l == 0)
3008 vec_oprnd0
3009 = vect_get_vec_def_for_operand (op, stmt, NULL);
3010 else
3011 vec_oprnd0
3012 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3013 arginfo[i].op);
3014 arginfo[i].op = vec_oprnd0;
3015 if (k == 1)
3016 break;
3017 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3018 vec_oprnd0);
3020 if (k == 1)
3021 vargs.safe_push (vec_oprnd0);
3022 else
3024 vec_oprnd0 = build_constructor (atype, ctor_elts);
3025 new_stmt
3026 = gimple_build_assign (make_ssa_name (atype),
3027 vec_oprnd0);
3028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3029 vargs.safe_push (gimple_assign_lhs (new_stmt));
3033 break;
3034 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3035 vargs.safe_push (op);
3036 break;
3037 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3038 if (j == 0)
3040 gimple_seq stmts;
3041 arginfo[i].op
3042 = force_gimple_operand (arginfo[i].op, &stmts, true,
3043 NULL_TREE);
3044 if (stmts != NULL)
3046 basic_block new_bb;
3047 edge pe = loop_preheader_edge (loop);
3048 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3049 gcc_assert (!new_bb);
3051 tree phi_res = copy_ssa_name (op);
3052 gphi *new_phi = create_phi_node (phi_res, loop->header);
3053 set_vinfo_for_stmt (new_phi,
3054 new_stmt_vec_info (new_phi, loop_vinfo,
3055 NULL));
3056 add_phi_arg (new_phi, arginfo[i].op,
3057 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3058 enum tree_code code
3059 = POINTER_TYPE_P (TREE_TYPE (op))
3060 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3061 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3062 ? sizetype : TREE_TYPE (op);
3063 widest_int cst
3064 = wi::mul (bestn->simdclone->args[i].linear_step,
3065 ncopies * nunits);
3066 tree tcst = wide_int_to_tree (type, cst);
3067 tree phi_arg = copy_ssa_name (op);
3068 new_stmt
3069 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3070 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3071 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3072 set_vinfo_for_stmt (new_stmt,
3073 new_stmt_vec_info (new_stmt, loop_vinfo,
3074 NULL));
3075 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3076 UNKNOWN_LOCATION);
3077 arginfo[i].op = phi_res;
3078 vargs.safe_push (phi_res);
3080 else
3082 enum tree_code code
3083 = POINTER_TYPE_P (TREE_TYPE (op))
3084 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3085 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3086 ? sizetype : TREE_TYPE (op);
3087 widest_int cst
3088 = wi::mul (bestn->simdclone->args[i].linear_step,
3089 j * nunits);
3090 tree tcst = wide_int_to_tree (type, cst);
3091 new_temp = make_ssa_name (TREE_TYPE (op));
3092 new_stmt = gimple_build_assign (new_temp, code,
3093 arginfo[i].op, tcst);
3094 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3095 vargs.safe_push (new_temp);
3097 break;
3098 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3099 default:
3100 gcc_unreachable ();
3104 new_stmt = gimple_build_call_vec (fndecl, vargs);
3105 if (vec_dest)
3107 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3108 if (ratype)
3109 new_temp = create_tmp_var (ratype);
3110 else if (TYPE_VECTOR_SUBPARTS (vectype)
3111 == TYPE_VECTOR_SUBPARTS (rtype))
3112 new_temp = make_ssa_name (vec_dest, new_stmt);
3113 else
3114 new_temp = make_ssa_name (rtype, new_stmt);
3115 gimple_call_set_lhs (new_stmt, new_temp);
3117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3119 if (vec_dest)
3121 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3123 unsigned int k, l;
3124 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3125 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3126 gcc_assert ((k & (k - 1)) == 0);
3127 for (l = 0; l < k; l++)
3129 tree t;
3130 if (ratype)
3132 t = build_fold_addr_expr (new_temp);
3133 t = build2 (MEM_REF, vectype, t,
3134 build_int_cst (TREE_TYPE (t),
3135 l * prec / BITS_PER_UNIT));
3137 else
3138 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3139 size_int (prec), bitsize_int (l * prec));
3140 new_stmt
3141 = gimple_build_assign (make_ssa_name (vectype), t);
3142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3143 if (j == 0 && l == 0)
3144 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3145 else
3146 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3148 prev_stmt_info = vinfo_for_stmt (new_stmt);
3151 if (ratype)
3153 tree clobber = build_constructor (ratype, NULL);
3154 TREE_THIS_VOLATILE (clobber) = 1;
3155 new_stmt = gimple_build_assign (new_temp, clobber);
3156 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3158 continue;
3160 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3162 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3163 / TYPE_VECTOR_SUBPARTS (rtype));
3164 gcc_assert ((k & (k - 1)) == 0);
3165 if ((j & (k - 1)) == 0)
3166 vec_alloc (ret_ctor_elts, k);
3167 if (ratype)
3169 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3170 for (m = 0; m < o; m++)
3172 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3173 size_int (m), NULL_TREE, NULL_TREE);
3174 new_stmt
3175 = gimple_build_assign (make_ssa_name (rtype), tem);
3176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3177 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3178 gimple_assign_lhs (new_stmt));
3180 tree clobber = build_constructor (ratype, NULL);
3181 TREE_THIS_VOLATILE (clobber) = 1;
3182 new_stmt = gimple_build_assign (new_temp, clobber);
3183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3185 else
3186 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3187 if ((j & (k - 1)) != k - 1)
3188 continue;
3189 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3190 new_stmt
3191 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3192 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3194 if ((unsigned) j == k - 1)
3195 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3196 else
3197 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3199 prev_stmt_info = vinfo_for_stmt (new_stmt);
3200 continue;
3202 else if (ratype)
3204 tree t = build_fold_addr_expr (new_temp);
3205 t = build2 (MEM_REF, vectype, t,
3206 build_int_cst (TREE_TYPE (t), 0));
3207 new_stmt
3208 = gimple_build_assign (make_ssa_name (vec_dest), t);
3209 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3210 tree clobber = build_constructor (ratype, NULL);
3211 TREE_THIS_VOLATILE (clobber) = 1;
3212 vect_finish_stmt_generation (stmt,
3213 gimple_build_assign (new_temp,
3214 clobber), gsi);
3218 if (j == 0)
3219 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3220 else
3221 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3223 prev_stmt_info = vinfo_for_stmt (new_stmt);
3226 vargs.release ();
3228 /* The call in STMT might prevent it from being removed in dce.
3229 We however cannot remove it here, due to the way the ssa name
3230 it defines is mapped to the new definition. So just replace
3231 rhs of the statement with something harmless. */
3233 if (slp_node)
3234 return true;
3236 if (scalar_dest)
3238 type = TREE_TYPE (scalar_dest);
3239 if (is_pattern_stmt_p (stmt_info))
3240 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3241 else
3242 lhs = gimple_call_lhs (stmt);
3243 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3245 else
3246 new_stmt = gimple_build_nop ();
3247 set_vinfo_for_stmt (new_stmt, stmt_info);
3248 set_vinfo_for_stmt (stmt, NULL);
3249 STMT_VINFO_STMT (stmt_info) = new_stmt;
3250 gsi_replace (gsi, new_stmt, true);
3251 unlink_stmt_vdef (stmt);
3253 return true;
3257 /* Function vect_gen_widened_results_half
3259 Create a vector stmt whose code, type, number of arguments, and result
3260 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3261 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3262 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3263 needs to be created (DECL is a function-decl of a target-builtin).
3264 STMT is the original scalar stmt that we are vectorizing. */
3266 static gimple
3267 vect_gen_widened_results_half (enum tree_code code,
3268 tree decl,
3269 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3270 tree vec_dest, gimple_stmt_iterator *gsi,
3271 gimple stmt)
3273 gimple new_stmt;
3274 tree new_temp;
3276 /* Generate half of the widened result: */
3277 if (code == CALL_EXPR)
3279 /* Target specific support */
3280 if (op_type == binary_op)
3281 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3282 else
3283 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3284 new_temp = make_ssa_name (vec_dest, new_stmt);
3285 gimple_call_set_lhs (new_stmt, new_temp);
3287 else
3289 /* Generic support */
3290 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3291 if (op_type != binary_op)
3292 vec_oprnd1 = NULL;
3293 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3294 new_temp = make_ssa_name (vec_dest, new_stmt);
3295 gimple_assign_set_lhs (new_stmt, new_temp);
3297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3299 return new_stmt;
3303 /* Get vectorized definitions for loop-based vectorization. For the first
3304 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3305 scalar operand), and for the rest we get a copy with
3306 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3307 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3308 The vectors are collected into VEC_OPRNDS. */
3310 static void
3311 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3312 vec<tree> *vec_oprnds, int multi_step_cvt)
3314 tree vec_oprnd;
3316 /* Get first vector operand. */
3317 /* All the vector operands except the very first one (that is scalar oprnd)
3318 are stmt copies. */
3319 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3320 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3321 else
3322 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3324 vec_oprnds->quick_push (vec_oprnd);
3326 /* Get second vector operand. */
3327 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3328 vec_oprnds->quick_push (vec_oprnd);
3330 *oprnd = vec_oprnd;
3332 /* For conversion in multiple steps, continue to get operands
3333 recursively. */
3334 if (multi_step_cvt)
3335 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3339 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3340 For multi-step conversions store the resulting vectors and call the function
3341 recursively. */
3343 static void
3344 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3345 int multi_step_cvt, gimple stmt,
3346 vec<tree> vec_dsts,
3347 gimple_stmt_iterator *gsi,
3348 slp_tree slp_node, enum tree_code code,
3349 stmt_vec_info *prev_stmt_info)
3351 unsigned int i;
3352 tree vop0, vop1, new_tmp, vec_dest;
3353 gimple new_stmt;
3354 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3356 vec_dest = vec_dsts.pop ();
3358 for (i = 0; i < vec_oprnds->length (); i += 2)
3360 /* Create demotion operation. */
3361 vop0 = (*vec_oprnds)[i];
3362 vop1 = (*vec_oprnds)[i + 1];
3363 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3364 new_tmp = make_ssa_name (vec_dest, new_stmt);
3365 gimple_assign_set_lhs (new_stmt, new_tmp);
3366 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3368 if (multi_step_cvt)
3369 /* Store the resulting vector for next recursive call. */
3370 (*vec_oprnds)[i/2] = new_tmp;
3371 else
3373 /* This is the last step of the conversion sequence. Store the
3374 vectors in SLP_NODE or in vector info of the scalar statement
3375 (or in STMT_VINFO_RELATED_STMT chain). */
3376 if (slp_node)
3377 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3378 else
3380 if (!*prev_stmt_info)
3381 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3382 else
3383 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3385 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3390 /* For multi-step demotion operations we first generate demotion operations
3391 from the source type to the intermediate types, and then combine the
3392 results (stored in VEC_OPRNDS) in demotion operation to the destination
3393 type. */
3394 if (multi_step_cvt)
3396 /* At each level of recursion we have half of the operands we had at the
3397 previous level. */
3398 vec_oprnds->truncate ((i+1)/2);
3399 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3400 stmt, vec_dsts, gsi, slp_node,
3401 VEC_PACK_TRUNC_EXPR,
3402 prev_stmt_info);
3405 vec_dsts.quick_push (vec_dest);
3409 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3410 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3411 the resulting vectors and call the function recursively. */
3413 static void
3414 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3415 vec<tree> *vec_oprnds1,
3416 gimple stmt, tree vec_dest,
3417 gimple_stmt_iterator *gsi,
3418 enum tree_code code1,
3419 enum tree_code code2, tree decl1,
3420 tree decl2, int op_type)
3422 int i;
3423 tree vop0, vop1, new_tmp1, new_tmp2;
3424 gimple new_stmt1, new_stmt2;
3425 vec<tree> vec_tmp = vNULL;
3427 vec_tmp.create (vec_oprnds0->length () * 2);
3428 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3430 if (op_type == binary_op)
3431 vop1 = (*vec_oprnds1)[i];
3432 else
3433 vop1 = NULL_TREE;
3435 /* Generate the two halves of promotion operation. */
3436 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3437 op_type, vec_dest, gsi, stmt);
3438 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3439 op_type, vec_dest, gsi, stmt);
3440 if (is_gimple_call (new_stmt1))
3442 new_tmp1 = gimple_call_lhs (new_stmt1);
3443 new_tmp2 = gimple_call_lhs (new_stmt2);
3445 else
3447 new_tmp1 = gimple_assign_lhs (new_stmt1);
3448 new_tmp2 = gimple_assign_lhs (new_stmt2);
3451 /* Store the results for the next step. */
3452 vec_tmp.quick_push (new_tmp1);
3453 vec_tmp.quick_push (new_tmp2);
3456 vec_oprnds0->release ();
3457 *vec_oprnds0 = vec_tmp;
3461 /* Check if STMT performs a conversion operation, that can be vectorized.
3462 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3463 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3464 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3466 static bool
3467 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3468 gimple *vec_stmt, slp_tree slp_node)
3470 tree vec_dest;
3471 tree scalar_dest;
3472 tree op0, op1 = NULL_TREE;
3473 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3475 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3476 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3477 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3478 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3479 tree new_temp;
3480 tree def;
3481 gimple def_stmt;
3482 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3483 gimple new_stmt = NULL;
3484 stmt_vec_info prev_stmt_info;
3485 int nunits_in;
3486 int nunits_out;
3487 tree vectype_out, vectype_in;
3488 int ncopies, i, j;
3489 tree lhs_type, rhs_type;
3490 enum { NARROW, NONE, WIDEN } modifier;
3491 vec<tree> vec_oprnds0 = vNULL;
3492 vec<tree> vec_oprnds1 = vNULL;
3493 tree vop0;
3494 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3495 int multi_step_cvt = 0;
3496 vec<tree> vec_dsts = vNULL;
3497 vec<tree> interm_types = vNULL;
3498 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3499 int op_type;
3500 machine_mode rhs_mode;
3501 unsigned short fltsz;
3503 /* Is STMT a vectorizable conversion? */
3505 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3506 return false;
3508 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3509 return false;
3511 if (!is_gimple_assign (stmt))
3512 return false;
3514 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3515 return false;
3517 code = gimple_assign_rhs_code (stmt);
3518 if (!CONVERT_EXPR_CODE_P (code)
3519 && code != FIX_TRUNC_EXPR
3520 && code != FLOAT_EXPR
3521 && code != WIDEN_MULT_EXPR
3522 && code != WIDEN_LSHIFT_EXPR)
3523 return false;
3525 op_type = TREE_CODE_LENGTH (code);
3527 /* Check types of lhs and rhs. */
3528 scalar_dest = gimple_assign_lhs (stmt);
3529 lhs_type = TREE_TYPE (scalar_dest);
3530 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3532 op0 = gimple_assign_rhs1 (stmt);
3533 rhs_type = TREE_TYPE (op0);
3535 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3536 && !((INTEGRAL_TYPE_P (lhs_type)
3537 && INTEGRAL_TYPE_P (rhs_type))
3538 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3539 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3540 return false;
3542 if ((INTEGRAL_TYPE_P (lhs_type)
3543 && (TYPE_PRECISION (lhs_type)
3544 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3545 || (INTEGRAL_TYPE_P (rhs_type)
3546 && (TYPE_PRECISION (rhs_type)
3547 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3549 if (dump_enabled_p ())
3550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3551 "type conversion to/from bit-precision unsupported."
3552 "\n");
3553 return false;
3556 /* Check the operands of the operation. */
3557 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3558 &def_stmt, &def, &dt[0], &vectype_in))
3560 if (dump_enabled_p ())
3561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3562 "use not simple.\n");
3563 return false;
3565 if (op_type == binary_op)
3567 bool ok;
3569 op1 = gimple_assign_rhs2 (stmt);
3570 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3571 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3572 OP1. */
3573 if (CONSTANT_CLASS_P (op0))
3574 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3575 &def_stmt, &def, &dt[1], &vectype_in);
3576 else
3577 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3578 &def, &dt[1]);
3580 if (!ok)
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3584 "use not simple.\n");
3585 return false;
3589 /* If op0 is an external or constant defs use a vector type of
3590 the same size as the output vector type. */
3591 if (!vectype_in)
3592 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3593 if (vec_stmt)
3594 gcc_assert (vectype_in);
3595 if (!vectype_in)
3597 if (dump_enabled_p ())
3599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3600 "no vectype for scalar type ");
3601 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3602 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3605 return false;
3608 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3609 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3610 if (nunits_in < nunits_out)
3611 modifier = NARROW;
3612 else if (nunits_out == nunits_in)
3613 modifier = NONE;
3614 else
3615 modifier = WIDEN;
3617 /* Multiple types in SLP are handled by creating the appropriate number of
3618 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3619 case of SLP. */
3620 if (slp_node || PURE_SLP_STMT (stmt_info))
3621 ncopies = 1;
3622 else if (modifier == NARROW)
3623 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3624 else
3625 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3627 /* Sanity check: make sure that at least one copy of the vectorized stmt
3628 needs to be generated. */
3629 gcc_assert (ncopies >= 1);
3631 /* Supportable by target? */
3632 switch (modifier)
3634 case NONE:
3635 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3636 return false;
3637 if (supportable_convert_operation (code, vectype_out, vectype_in,
3638 &decl1, &code1))
3639 break;
3640 /* FALLTHRU */
3641 unsupported:
3642 if (dump_enabled_p ())
3643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3644 "conversion not supported by target.\n");
3645 return false;
3647 case WIDEN:
3648 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3649 &code1, &code2, &multi_step_cvt,
3650 &interm_types))
3652 /* Binary widening operation can only be supported directly by the
3653 architecture. */
3654 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3655 break;
3658 if (code != FLOAT_EXPR
3659 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3660 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3661 goto unsupported;
3663 rhs_mode = TYPE_MODE (rhs_type);
3664 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3665 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3666 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3667 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3669 cvt_type
3670 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3671 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3672 if (cvt_type == NULL_TREE)
3673 goto unsupported;
3675 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3677 if (!supportable_convert_operation (code, vectype_out,
3678 cvt_type, &decl1, &codecvt1))
3679 goto unsupported;
3681 else if (!supportable_widening_operation (code, stmt, vectype_out,
3682 cvt_type, &codecvt1,
3683 &codecvt2, &multi_step_cvt,
3684 &interm_types))
3685 continue;
3686 else
3687 gcc_assert (multi_step_cvt == 0);
3689 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3690 vectype_in, &code1, &code2,
3691 &multi_step_cvt, &interm_types))
3692 break;
3695 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3696 goto unsupported;
3698 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3699 codecvt2 = ERROR_MARK;
3700 else
3702 multi_step_cvt++;
3703 interm_types.safe_push (cvt_type);
3704 cvt_type = NULL_TREE;
3706 break;
3708 case NARROW:
3709 gcc_assert (op_type == unary_op);
3710 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3711 &code1, &multi_step_cvt,
3712 &interm_types))
3713 break;
3715 if (code != FIX_TRUNC_EXPR
3716 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3717 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3718 goto unsupported;
3720 rhs_mode = TYPE_MODE (rhs_type);
3721 cvt_type
3722 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3723 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3724 if (cvt_type == NULL_TREE)
3725 goto unsupported;
3726 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3727 &decl1, &codecvt1))
3728 goto unsupported;
3729 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3730 &code1, &multi_step_cvt,
3731 &interm_types))
3732 break;
3733 goto unsupported;
3735 default:
3736 gcc_unreachable ();
3739 if (!vec_stmt) /* transformation not required. */
3741 if (dump_enabled_p ())
3742 dump_printf_loc (MSG_NOTE, vect_location,
3743 "=== vectorizable_conversion ===\n");
3744 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3746 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3747 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3749 else if (modifier == NARROW)
3751 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3752 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3754 else
3756 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3757 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3759 interm_types.release ();
3760 return true;
3763 /** Transform. **/
3764 if (dump_enabled_p ())
3765 dump_printf_loc (MSG_NOTE, vect_location,
3766 "transform conversion. ncopies = %d.\n", ncopies);
3768 if (op_type == binary_op)
3770 if (CONSTANT_CLASS_P (op0))
3771 op0 = fold_convert (TREE_TYPE (op1), op0);
3772 else if (CONSTANT_CLASS_P (op1))
3773 op1 = fold_convert (TREE_TYPE (op0), op1);
3776 /* In case of multi-step conversion, we first generate conversion operations
3777 to the intermediate types, and then from that types to the final one.
3778 We create vector destinations for the intermediate type (TYPES) received
3779 from supportable_*_operation, and store them in the correct order
3780 for future use in vect_create_vectorized_*_stmts (). */
3781 vec_dsts.create (multi_step_cvt + 1);
3782 vec_dest = vect_create_destination_var (scalar_dest,
3783 (cvt_type && modifier == WIDEN)
3784 ? cvt_type : vectype_out);
3785 vec_dsts.quick_push (vec_dest);
3787 if (multi_step_cvt)
3789 for (i = interm_types.length () - 1;
3790 interm_types.iterate (i, &intermediate_type); i--)
3792 vec_dest = vect_create_destination_var (scalar_dest,
3793 intermediate_type);
3794 vec_dsts.quick_push (vec_dest);
3798 if (cvt_type)
3799 vec_dest = vect_create_destination_var (scalar_dest,
3800 modifier == WIDEN
3801 ? vectype_out : cvt_type);
3803 if (!slp_node)
3805 if (modifier == WIDEN)
3807 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3808 if (op_type == binary_op)
3809 vec_oprnds1.create (1);
3811 else if (modifier == NARROW)
3812 vec_oprnds0.create (
3813 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3815 else if (code == WIDEN_LSHIFT_EXPR)
3816 vec_oprnds1.create (slp_node->vec_stmts_size);
3818 last_oprnd = op0;
3819 prev_stmt_info = NULL;
3820 switch (modifier)
3822 case NONE:
3823 for (j = 0; j < ncopies; j++)
3825 if (j == 0)
3826 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3827 -1);
3828 else
3829 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3831 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3833 /* Arguments are ready, create the new vector stmt. */
3834 if (code1 == CALL_EXPR)
3836 new_stmt = gimple_build_call (decl1, 1, vop0);
3837 new_temp = make_ssa_name (vec_dest, new_stmt);
3838 gimple_call_set_lhs (new_stmt, new_temp);
3840 else
3842 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3843 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3844 new_temp = make_ssa_name (vec_dest, new_stmt);
3845 gimple_assign_set_lhs (new_stmt, new_temp);
3848 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3849 if (slp_node)
3850 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3853 if (j == 0)
3854 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3855 else
3856 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3857 prev_stmt_info = vinfo_for_stmt (new_stmt);
3859 break;
3861 case WIDEN:
3862 /* In case the vectorization factor (VF) is bigger than the number
3863 of elements that we can fit in a vectype (nunits), we have to
3864 generate more than one vector stmt - i.e - we need to "unroll"
3865 the vector stmt by a factor VF/nunits. */
3866 for (j = 0; j < ncopies; j++)
3868 /* Handle uses. */
3869 if (j == 0)
3871 if (slp_node)
3873 if (code == WIDEN_LSHIFT_EXPR)
3875 unsigned int k;
3877 vec_oprnd1 = op1;
3878 /* Store vec_oprnd1 for every vector stmt to be created
3879 for SLP_NODE. We check during the analysis that all
3880 the shift arguments are the same. */
3881 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3882 vec_oprnds1.quick_push (vec_oprnd1);
3884 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3885 slp_node, -1);
3887 else
3888 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3889 &vec_oprnds1, slp_node, -1);
3891 else
3893 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3894 vec_oprnds0.quick_push (vec_oprnd0);
3895 if (op_type == binary_op)
3897 if (code == WIDEN_LSHIFT_EXPR)
3898 vec_oprnd1 = op1;
3899 else
3900 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3901 NULL);
3902 vec_oprnds1.quick_push (vec_oprnd1);
3906 else
3908 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3909 vec_oprnds0.truncate (0);
3910 vec_oprnds0.quick_push (vec_oprnd0);
3911 if (op_type == binary_op)
3913 if (code == WIDEN_LSHIFT_EXPR)
3914 vec_oprnd1 = op1;
3915 else
3916 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3917 vec_oprnd1);
3918 vec_oprnds1.truncate (0);
3919 vec_oprnds1.quick_push (vec_oprnd1);
3923 /* Arguments are ready. Create the new vector stmts. */
3924 for (i = multi_step_cvt; i >= 0; i--)
3926 tree this_dest = vec_dsts[i];
3927 enum tree_code c1 = code1, c2 = code2;
3928 if (i == 0 && codecvt2 != ERROR_MARK)
3930 c1 = codecvt1;
3931 c2 = codecvt2;
3933 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3934 &vec_oprnds1,
3935 stmt, this_dest, gsi,
3936 c1, c2, decl1, decl2,
3937 op_type);
3940 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3942 if (cvt_type)
3944 if (codecvt1 == CALL_EXPR)
3946 new_stmt = gimple_build_call (decl1, 1, vop0);
3947 new_temp = make_ssa_name (vec_dest, new_stmt);
3948 gimple_call_set_lhs (new_stmt, new_temp);
3950 else
3952 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3953 new_temp = make_ssa_name (vec_dest);
3954 new_stmt = gimple_build_assign (new_temp, codecvt1,
3955 vop0);
3958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3960 else
3961 new_stmt = SSA_NAME_DEF_STMT (vop0);
3963 if (slp_node)
3964 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3965 else
3967 if (!prev_stmt_info)
3968 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3969 else
3970 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3971 prev_stmt_info = vinfo_for_stmt (new_stmt);
3976 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3977 break;
3979 case NARROW:
3980 /* In case the vectorization factor (VF) is bigger than the number
3981 of elements that we can fit in a vectype (nunits), we have to
3982 generate more than one vector stmt - i.e - we need to "unroll"
3983 the vector stmt by a factor VF/nunits. */
3984 for (j = 0; j < ncopies; j++)
3986 /* Handle uses. */
3987 if (slp_node)
3988 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3989 slp_node, -1);
3990 else
3992 vec_oprnds0.truncate (0);
3993 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3994 vect_pow2 (multi_step_cvt) - 1);
3997 /* Arguments are ready. Create the new vector stmts. */
3998 if (cvt_type)
3999 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4001 if (codecvt1 == CALL_EXPR)
4003 new_stmt = gimple_build_call (decl1, 1, vop0);
4004 new_temp = make_ssa_name (vec_dest, new_stmt);
4005 gimple_call_set_lhs (new_stmt, new_temp);
4007 else
4009 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4010 new_temp = make_ssa_name (vec_dest);
4011 new_stmt = gimple_build_assign (new_temp, codecvt1,
4012 vop0);
4015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4016 vec_oprnds0[i] = new_temp;
4019 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4020 stmt, vec_dsts, gsi,
4021 slp_node, code1,
4022 &prev_stmt_info);
4025 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4026 break;
4029 vec_oprnds0.release ();
4030 vec_oprnds1.release ();
4031 vec_dsts.release ();
4032 interm_types.release ();
4034 return true;
4038 /* Function vectorizable_assignment.
4040 Check if STMT performs an assignment (copy) that can be vectorized.
4041 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4042 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4043 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4045 static bool
4046 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4047 gimple *vec_stmt, slp_tree slp_node)
4049 tree vec_dest;
4050 tree scalar_dest;
4051 tree op;
4052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4053 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4055 tree new_temp;
4056 tree def;
4057 gimple def_stmt;
4058 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4059 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4060 int ncopies;
4061 int i, j;
4062 vec<tree> vec_oprnds = vNULL;
4063 tree vop;
4064 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4065 gimple new_stmt = NULL;
4066 stmt_vec_info prev_stmt_info = NULL;
4067 enum tree_code code;
4068 tree vectype_in;
4070 /* Multiple types in SLP are handled by creating the appropriate number of
4071 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4072 case of SLP. */
4073 if (slp_node || PURE_SLP_STMT (stmt_info))
4074 ncopies = 1;
4075 else
4076 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4078 gcc_assert (ncopies >= 1);
4080 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4081 return false;
4083 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4084 return false;
4086 /* Is vectorizable assignment? */
4087 if (!is_gimple_assign (stmt))
4088 return false;
4090 scalar_dest = gimple_assign_lhs (stmt);
4091 if (TREE_CODE (scalar_dest) != SSA_NAME)
4092 return false;
4094 code = gimple_assign_rhs_code (stmt);
4095 if (gimple_assign_single_p (stmt)
4096 || code == PAREN_EXPR
4097 || CONVERT_EXPR_CODE_P (code))
4098 op = gimple_assign_rhs1 (stmt);
4099 else
4100 return false;
4102 if (code == VIEW_CONVERT_EXPR)
4103 op = TREE_OPERAND (op, 0);
4105 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4106 &def_stmt, &def, &dt[0], &vectype_in))
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4110 "use not simple.\n");
4111 return false;
4114 /* We can handle NOP_EXPR conversions that do not change the number
4115 of elements or the vector size. */
4116 if ((CONVERT_EXPR_CODE_P (code)
4117 || code == VIEW_CONVERT_EXPR)
4118 && (!vectype_in
4119 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4120 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4121 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4122 return false;
4124 /* We do not handle bit-precision changes. */
4125 if ((CONVERT_EXPR_CODE_P (code)
4126 || code == VIEW_CONVERT_EXPR)
4127 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4128 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4129 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4130 || ((TYPE_PRECISION (TREE_TYPE (op))
4131 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4132 /* But a conversion that does not change the bit-pattern is ok. */
4133 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4134 > TYPE_PRECISION (TREE_TYPE (op)))
4135 && TYPE_UNSIGNED (TREE_TYPE (op))))
4137 if (dump_enabled_p ())
4138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4139 "type conversion to/from bit-precision "
4140 "unsupported.\n");
4141 return false;
4144 if (!vec_stmt) /* transformation not required. */
4146 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4147 if (dump_enabled_p ())
4148 dump_printf_loc (MSG_NOTE, vect_location,
4149 "=== vectorizable_assignment ===\n");
4150 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4151 return true;
4154 /** Transform. **/
4155 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4158 /* Handle def. */
4159 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4161 /* Handle use. */
4162 for (j = 0; j < ncopies; j++)
4164 /* Handle uses. */
4165 if (j == 0)
4166 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4167 else
4168 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4170 /* Arguments are ready. create the new vector stmt. */
4171 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4173 if (CONVERT_EXPR_CODE_P (code)
4174 || code == VIEW_CONVERT_EXPR)
4175 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4176 new_stmt = gimple_build_assign (vec_dest, vop);
4177 new_temp = make_ssa_name (vec_dest, new_stmt);
4178 gimple_assign_set_lhs (new_stmt, new_temp);
4179 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4180 if (slp_node)
4181 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4184 if (slp_node)
4185 continue;
4187 if (j == 0)
4188 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4189 else
4190 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4192 prev_stmt_info = vinfo_for_stmt (new_stmt);
4195 vec_oprnds.release ();
4196 return true;
4200 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4201 either as shift by a scalar or by a vector. */
4203 bool
4204 vect_supportable_shift (enum tree_code code, tree scalar_type)
4207 machine_mode vec_mode;
4208 optab optab;
4209 int icode;
4210 tree vectype;
4212 vectype = get_vectype_for_scalar_type (scalar_type);
4213 if (!vectype)
4214 return false;
4216 optab = optab_for_tree_code (code, vectype, optab_scalar);
4217 if (!optab
4218 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4220 optab = optab_for_tree_code (code, vectype, optab_vector);
4221 if (!optab
4222 || (optab_handler (optab, TYPE_MODE (vectype))
4223 == CODE_FOR_nothing))
4224 return false;
4227 vec_mode = TYPE_MODE (vectype);
4228 icode = (int) optab_handler (optab, vec_mode);
4229 if (icode == CODE_FOR_nothing)
4230 return false;
4232 return true;
4236 /* Function vectorizable_shift.
4238 Check if STMT performs a shift operation that can be vectorized.
4239 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4240 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4241 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4243 static bool
4244 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4245 gimple *vec_stmt, slp_tree slp_node)
4247 tree vec_dest;
4248 tree scalar_dest;
4249 tree op0, op1 = NULL;
4250 tree vec_oprnd1 = NULL_TREE;
4251 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4252 tree vectype;
4253 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4254 enum tree_code code;
4255 machine_mode vec_mode;
4256 tree new_temp;
4257 optab optab;
4258 int icode;
4259 machine_mode optab_op2_mode;
4260 tree def;
4261 gimple def_stmt;
4262 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4263 gimple new_stmt = NULL;
4264 stmt_vec_info prev_stmt_info;
4265 int nunits_in;
4266 int nunits_out;
4267 tree vectype_out;
4268 tree op1_vectype;
4269 int ncopies;
4270 int j, i;
4271 vec<tree> vec_oprnds0 = vNULL;
4272 vec<tree> vec_oprnds1 = vNULL;
4273 tree vop0, vop1;
4274 unsigned int k;
4275 bool scalar_shift_arg = true;
4276 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4277 int vf;
4279 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4280 return false;
4282 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4283 return false;
4285 /* Is STMT a vectorizable binary/unary operation? */
4286 if (!is_gimple_assign (stmt))
4287 return false;
4289 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4290 return false;
4292 code = gimple_assign_rhs_code (stmt);
4294 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4295 || code == RROTATE_EXPR))
4296 return false;
4298 scalar_dest = gimple_assign_lhs (stmt);
4299 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4300 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4301 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4303 if (dump_enabled_p ())
4304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4305 "bit-precision shifts not supported.\n");
4306 return false;
4309 op0 = gimple_assign_rhs1 (stmt);
4310 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4311 &def_stmt, &def, &dt[0], &vectype))
4313 if (dump_enabled_p ())
4314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4315 "use not simple.\n");
4316 return false;
4318 /* If op0 is an external or constant def use a vector type with
4319 the same size as the output vector type. */
4320 if (!vectype)
4321 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4322 if (vec_stmt)
4323 gcc_assert (vectype);
4324 if (!vectype)
4326 if (dump_enabled_p ())
4327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4328 "no vectype for scalar type\n");
4329 return false;
4332 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4333 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4334 if (nunits_out != nunits_in)
4335 return false;
4337 op1 = gimple_assign_rhs2 (stmt);
4338 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4339 &def, &dt[1], &op1_vectype))
4341 if (dump_enabled_p ())
4342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4343 "use not simple.\n");
4344 return false;
4347 if (loop_vinfo)
4348 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4349 else
4350 vf = 1;
4352 /* Multiple types in SLP are handled by creating the appropriate number of
4353 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4354 case of SLP. */
4355 if (slp_node || PURE_SLP_STMT (stmt_info))
4356 ncopies = 1;
4357 else
4358 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4360 gcc_assert (ncopies >= 1);
4362 /* Determine whether the shift amount is a vector, or scalar. If the
4363 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4365 if (dt[1] == vect_internal_def && !slp_node)
4366 scalar_shift_arg = false;
4367 else if (dt[1] == vect_constant_def
4368 || dt[1] == vect_external_def
4369 || dt[1] == vect_internal_def)
4371 /* In SLP, need to check whether the shift count is the same,
4372 in loops if it is a constant or invariant, it is always
4373 a scalar shift. */
4374 if (slp_node)
4376 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4377 gimple slpstmt;
4379 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4380 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4381 scalar_shift_arg = false;
4384 else
4386 if (dump_enabled_p ())
4387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4388 "operand mode requires invariant argument.\n");
4389 return false;
4392 /* Vector shifted by vector. */
4393 if (!scalar_shift_arg)
4395 optab = optab_for_tree_code (code, vectype, optab_vector);
4396 if (dump_enabled_p ())
4397 dump_printf_loc (MSG_NOTE, vect_location,
4398 "vector/vector shift/rotate found.\n");
4400 if (!op1_vectype)
4401 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4402 if (op1_vectype == NULL_TREE
4403 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4405 if (dump_enabled_p ())
4406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4407 "unusable type for last operand in"
4408 " vector/vector shift/rotate.\n");
4409 return false;
4412 /* See if the machine has a vector shifted by scalar insn and if not
4413 then see if it has a vector shifted by vector insn. */
4414 else
4416 optab = optab_for_tree_code (code, vectype, optab_scalar);
4417 if (optab
4418 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4420 if (dump_enabled_p ())
4421 dump_printf_loc (MSG_NOTE, vect_location,
4422 "vector/scalar shift/rotate found.\n");
4424 else
4426 optab = optab_for_tree_code (code, vectype, optab_vector);
4427 if (optab
4428 && (optab_handler (optab, TYPE_MODE (vectype))
4429 != CODE_FOR_nothing))
4431 scalar_shift_arg = false;
4433 if (dump_enabled_p ())
4434 dump_printf_loc (MSG_NOTE, vect_location,
4435 "vector/vector shift/rotate found.\n");
4437 /* Unlike the other binary operators, shifts/rotates have
4438 the rhs being int, instead of the same type as the lhs,
4439 so make sure the scalar is the right type if we are
4440 dealing with vectors of long long/long/short/char. */
4441 if (dt[1] == vect_constant_def)
4442 op1 = fold_convert (TREE_TYPE (vectype), op1);
4443 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4444 TREE_TYPE (op1)))
4446 if (slp_node
4447 && TYPE_MODE (TREE_TYPE (vectype))
4448 != TYPE_MODE (TREE_TYPE (op1)))
4450 if (dump_enabled_p ())
4451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4452 "unusable type for last operand in"
4453 " vector/vector shift/rotate.\n");
4454 return false;
4456 if (vec_stmt && !slp_node)
4458 op1 = fold_convert (TREE_TYPE (vectype), op1);
4459 op1 = vect_init_vector (stmt, op1,
4460 TREE_TYPE (vectype), NULL);
4467 /* Supportable by target? */
4468 if (!optab)
4470 if (dump_enabled_p ())
4471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4472 "no optab.\n");
4473 return false;
4475 vec_mode = TYPE_MODE (vectype);
4476 icode = (int) optab_handler (optab, vec_mode);
4477 if (icode == CODE_FOR_nothing)
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "op not supported by target.\n");
4482 /* Check only during analysis. */
4483 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4484 || (vf < vect_min_worthwhile_factor (code)
4485 && !vec_stmt))
4486 return false;
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_NOTE, vect_location,
4489 "proceeding using word mode.\n");
4492 /* Worthwhile without SIMD support? Check only during analysis. */
4493 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4494 && vf < vect_min_worthwhile_factor (code)
4495 && !vec_stmt)
4497 if (dump_enabled_p ())
4498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4499 "not worthwhile without SIMD support.\n");
4500 return false;
4503 if (!vec_stmt) /* transformation not required. */
4505 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4506 if (dump_enabled_p ())
4507 dump_printf_loc (MSG_NOTE, vect_location,
4508 "=== vectorizable_shift ===\n");
4509 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4510 return true;
4513 /** Transform. **/
4515 if (dump_enabled_p ())
4516 dump_printf_loc (MSG_NOTE, vect_location,
4517 "transform binary/unary operation.\n");
4519 /* Handle def. */
4520 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4522 prev_stmt_info = NULL;
4523 for (j = 0; j < ncopies; j++)
4525 /* Handle uses. */
4526 if (j == 0)
4528 if (scalar_shift_arg)
4530 /* Vector shl and shr insn patterns can be defined with scalar
4531 operand 2 (shift operand). In this case, use constant or loop
4532 invariant op1 directly, without extending it to vector mode
4533 first. */
4534 optab_op2_mode = insn_data[icode].operand[2].mode;
4535 if (!VECTOR_MODE_P (optab_op2_mode))
4537 if (dump_enabled_p ())
4538 dump_printf_loc (MSG_NOTE, vect_location,
4539 "operand 1 using scalar mode.\n");
4540 vec_oprnd1 = op1;
4541 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4542 vec_oprnds1.quick_push (vec_oprnd1);
4543 if (slp_node)
4545 /* Store vec_oprnd1 for every vector stmt to be created
4546 for SLP_NODE. We check during the analysis that all
4547 the shift arguments are the same.
4548 TODO: Allow different constants for different vector
4549 stmts generated for an SLP instance. */
4550 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4551 vec_oprnds1.quick_push (vec_oprnd1);
4556 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4557 (a special case for certain kind of vector shifts); otherwise,
4558 operand 1 should be of a vector type (the usual case). */
4559 if (vec_oprnd1)
4560 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4561 slp_node, -1);
4562 else
4563 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4564 slp_node, -1);
4566 else
4567 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4569 /* Arguments are ready. Create the new vector stmt. */
4570 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4572 vop1 = vec_oprnds1[i];
4573 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4574 new_temp = make_ssa_name (vec_dest, new_stmt);
4575 gimple_assign_set_lhs (new_stmt, new_temp);
4576 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4577 if (slp_node)
4578 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4581 if (slp_node)
4582 continue;
4584 if (j == 0)
4585 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4586 else
4587 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4588 prev_stmt_info = vinfo_for_stmt (new_stmt);
4591 vec_oprnds0.release ();
4592 vec_oprnds1.release ();
4594 return true;
4598 /* Function vectorizable_operation.
4600 Check if STMT performs a binary, unary or ternary operation that can
4601 be vectorized.
4602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4603 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4606 static bool
4607 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4608 gimple *vec_stmt, slp_tree slp_node)
4610 tree vec_dest;
4611 tree scalar_dest;
4612 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4614 tree vectype;
4615 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4616 enum tree_code code;
4617 machine_mode vec_mode;
4618 tree new_temp;
4619 int op_type;
4620 optab optab;
4621 int icode;
4622 tree def;
4623 gimple def_stmt;
4624 enum vect_def_type dt[3]
4625 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4626 gimple new_stmt = NULL;
4627 stmt_vec_info prev_stmt_info;
4628 int nunits_in;
4629 int nunits_out;
4630 tree vectype_out;
4631 int ncopies;
4632 int j, i;
4633 vec<tree> vec_oprnds0 = vNULL;
4634 vec<tree> vec_oprnds1 = vNULL;
4635 vec<tree> vec_oprnds2 = vNULL;
4636 tree vop0, vop1, vop2;
4637 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4638 int vf;
4640 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4641 return false;
4643 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4644 return false;
4646 /* Is STMT a vectorizable binary/unary operation? */
4647 if (!is_gimple_assign (stmt))
4648 return false;
4650 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4651 return false;
4653 code = gimple_assign_rhs_code (stmt);
4655 /* For pointer addition, we should use the normal plus for
4656 the vector addition. */
4657 if (code == POINTER_PLUS_EXPR)
4658 code = PLUS_EXPR;
4660 /* Support only unary or binary operations. */
4661 op_type = TREE_CODE_LENGTH (code);
4662 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4664 if (dump_enabled_p ())
4665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4666 "num. args = %d (not unary/binary/ternary op).\n",
4667 op_type);
4668 return false;
4671 scalar_dest = gimple_assign_lhs (stmt);
4672 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4674 /* Most operations cannot handle bit-precision types without extra
4675 truncations. */
4676 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4677 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4678 /* Exception are bitwise binary operations. */
4679 && code != BIT_IOR_EXPR
4680 && code != BIT_XOR_EXPR
4681 && code != BIT_AND_EXPR)
4683 if (dump_enabled_p ())
4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4685 "bit-precision arithmetic not supported.\n");
4686 return false;
4689 op0 = gimple_assign_rhs1 (stmt);
4690 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4691 &def_stmt, &def, &dt[0], &vectype))
4693 if (dump_enabled_p ())
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4695 "use not simple.\n");
4696 return false;
4698 /* If op0 is an external or constant def use a vector type with
4699 the same size as the output vector type. */
4700 if (!vectype)
4701 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4702 if (vec_stmt)
4703 gcc_assert (vectype);
4704 if (!vectype)
4706 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4709 "no vectype for scalar type ");
4710 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4711 TREE_TYPE (op0));
4712 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4715 return false;
4718 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4719 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4720 if (nunits_out != nunits_in)
4721 return false;
4723 if (op_type == binary_op || op_type == ternary_op)
4725 op1 = gimple_assign_rhs2 (stmt);
4726 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4727 &def, &dt[1]))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4731 "use not simple.\n");
4732 return false;
4735 if (op_type == ternary_op)
4737 op2 = gimple_assign_rhs3 (stmt);
4738 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4739 &def, &dt[2]))
4741 if (dump_enabled_p ())
4742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4743 "use not simple.\n");
4744 return false;
4748 if (loop_vinfo)
4749 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4750 else
4751 vf = 1;
4753 /* Multiple types in SLP are handled by creating the appropriate number of
4754 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4755 case of SLP. */
4756 if (slp_node || PURE_SLP_STMT (stmt_info))
4757 ncopies = 1;
4758 else
4759 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4761 gcc_assert (ncopies >= 1);
4763 /* Shifts are handled in vectorizable_shift (). */
4764 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4765 || code == RROTATE_EXPR)
4766 return false;
4768 /* Supportable by target? */
4770 vec_mode = TYPE_MODE (vectype);
4771 if (code == MULT_HIGHPART_EXPR)
4773 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4774 icode = LAST_INSN_CODE;
4775 else
4776 icode = CODE_FOR_nothing;
4778 else
4780 optab = optab_for_tree_code (code, vectype, optab_default);
4781 if (!optab)
4783 if (dump_enabled_p ())
4784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4785 "no optab.\n");
4786 return false;
4788 icode = (int) optab_handler (optab, vec_mode);
4791 if (icode == CODE_FOR_nothing)
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4795 "op not supported by target.\n");
4796 /* Check only during analysis. */
4797 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4798 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4799 return false;
4800 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_NOTE, vect_location,
4802 "proceeding using word mode.\n");
4805 /* Worthwhile without SIMD support? Check only during analysis. */
4806 if (!VECTOR_MODE_P (vec_mode)
4807 && !vec_stmt
4808 && vf < vect_min_worthwhile_factor (code))
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4812 "not worthwhile without SIMD support.\n");
4813 return false;
4816 if (!vec_stmt) /* transformation not required. */
4818 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4819 if (dump_enabled_p ())
4820 dump_printf_loc (MSG_NOTE, vect_location,
4821 "=== vectorizable_operation ===\n");
4822 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4823 return true;
4826 /** Transform. **/
4828 if (dump_enabled_p ())
4829 dump_printf_loc (MSG_NOTE, vect_location,
4830 "transform binary/unary operation.\n");
4832 /* Handle def. */
4833 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4835 /* In case the vectorization factor (VF) is bigger than the number
4836 of elements that we can fit in a vectype (nunits), we have to generate
4837 more than one vector stmt - i.e - we need to "unroll" the
4838 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4839 from one copy of the vector stmt to the next, in the field
4840 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4841 stages to find the correct vector defs to be used when vectorizing
4842 stmts that use the defs of the current stmt. The example below
4843 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4844 we need to create 4 vectorized stmts):
4846 before vectorization:
4847 RELATED_STMT VEC_STMT
4848 S1: x = memref - -
4849 S2: z = x + 1 - -
4851 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4852 there):
4853 RELATED_STMT VEC_STMT
4854 VS1_0: vx0 = memref0 VS1_1 -
4855 VS1_1: vx1 = memref1 VS1_2 -
4856 VS1_2: vx2 = memref2 VS1_3 -
4857 VS1_3: vx3 = memref3 - -
4858 S1: x = load - VS1_0
4859 S2: z = x + 1 - -
4861 step2: vectorize stmt S2 (done here):
4862 To vectorize stmt S2 we first need to find the relevant vector
4863 def for the first operand 'x'. This is, as usual, obtained from
4864 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4865 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4866 relevant vector def 'vx0'. Having found 'vx0' we can generate
4867 the vector stmt VS2_0, and as usual, record it in the
4868 STMT_VINFO_VEC_STMT of stmt S2.
4869 When creating the second copy (VS2_1), we obtain the relevant vector
4870 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4871 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4872 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4873 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4874 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4875 chain of stmts and pointers:
4876 RELATED_STMT VEC_STMT
4877 VS1_0: vx0 = memref0 VS1_1 -
4878 VS1_1: vx1 = memref1 VS1_2 -
4879 VS1_2: vx2 = memref2 VS1_3 -
4880 VS1_3: vx3 = memref3 - -
4881 S1: x = load - VS1_0
4882 VS2_0: vz0 = vx0 + v1 VS2_1 -
4883 VS2_1: vz1 = vx1 + v1 VS2_2 -
4884 VS2_2: vz2 = vx2 + v1 VS2_3 -
4885 VS2_3: vz3 = vx3 + v1 - -
4886 S2: z = x + 1 - VS2_0 */
4888 prev_stmt_info = NULL;
4889 for (j = 0; j < ncopies; j++)
4891 /* Handle uses. */
4892 if (j == 0)
4894 if (op_type == binary_op || op_type == ternary_op)
4895 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4896 slp_node, -1);
4897 else
4898 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4899 slp_node, -1);
4900 if (op_type == ternary_op)
4902 vec_oprnds2.create (1);
4903 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4904 stmt,
4905 NULL));
4908 else
4910 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4911 if (op_type == ternary_op)
4913 tree vec_oprnd = vec_oprnds2.pop ();
4914 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4915 vec_oprnd));
4919 /* Arguments are ready. Create the new vector stmt. */
4920 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4922 vop1 = ((op_type == binary_op || op_type == ternary_op)
4923 ? vec_oprnds1[i] : NULL_TREE);
4924 vop2 = ((op_type == ternary_op)
4925 ? vec_oprnds2[i] : NULL_TREE);
4926 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4927 new_temp = make_ssa_name (vec_dest, new_stmt);
4928 gimple_assign_set_lhs (new_stmt, new_temp);
4929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4930 if (slp_node)
4931 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4934 if (slp_node)
4935 continue;
4937 if (j == 0)
4938 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4939 else
4940 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4941 prev_stmt_info = vinfo_for_stmt (new_stmt);
4944 vec_oprnds0.release ();
4945 vec_oprnds1.release ();
4946 vec_oprnds2.release ();
4948 return true;
4951 /* A helper function to ensure data reference DR's base alignment
4952 for STMT_INFO. */
4954 static void
4955 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4957 if (!dr->aux)
4958 return;
4960 if (((dataref_aux *)dr->aux)->base_misaligned)
4962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4963 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4965 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4966 DECL_USER_ALIGN (base_decl) = 1;
4967 ((dataref_aux *)dr->aux)->base_misaligned = false;
4972 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4973 reversal of the vector elements. If that is impossible to do,
4974 returns NULL. */
4976 static tree
4977 perm_mask_for_reverse (tree vectype)
4979 int i, nunits;
4980 unsigned char *sel;
4982 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4983 sel = XALLOCAVEC (unsigned char, nunits);
4985 for (i = 0; i < nunits; ++i)
4986 sel[i] = nunits - 1 - i;
4988 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4989 return NULL_TREE;
4990 return vect_gen_perm_mask_checked (vectype, sel);
4993 /* Function vectorizable_store.
4995 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4996 can be vectorized.
4997 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4998 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4999 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5001 static bool
5002 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5003 slp_tree slp_node)
5005 tree scalar_dest;
5006 tree data_ref;
5007 tree op;
5008 tree vec_oprnd = NULL_TREE;
5009 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5010 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5011 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5012 tree elem_type;
5013 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5014 struct loop *loop = NULL;
5015 machine_mode vec_mode;
5016 tree dummy;
5017 enum dr_alignment_support alignment_support_scheme;
5018 tree def;
5019 gimple def_stmt;
5020 enum vect_def_type dt;
5021 stmt_vec_info prev_stmt_info = NULL;
5022 tree dataref_ptr = NULL_TREE;
5023 tree dataref_offset = NULL_TREE;
5024 gimple ptr_incr = NULL;
5025 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5026 int ncopies;
5027 int j;
5028 gimple next_stmt, first_stmt = NULL;
5029 bool grouped_store = false;
5030 bool store_lanes_p = false;
5031 unsigned int group_size, i;
5032 vec<tree> dr_chain = vNULL;
5033 vec<tree> oprnds = vNULL;
5034 vec<tree> result_chain = vNULL;
5035 bool inv_p;
5036 bool negative = false;
5037 tree offset = NULL_TREE;
5038 vec<tree> vec_oprnds = vNULL;
5039 bool slp = (slp_node != NULL);
5040 unsigned int vec_num;
5041 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5042 tree aggr_type;
5044 if (loop_vinfo)
5045 loop = LOOP_VINFO_LOOP (loop_vinfo);
5047 /* Multiple types in SLP are handled by creating the appropriate number of
5048 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5049 case of SLP. */
5050 if (slp || PURE_SLP_STMT (stmt_info))
5051 ncopies = 1;
5052 else
5053 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5055 gcc_assert (ncopies >= 1);
5057 /* FORNOW. This restriction should be relaxed. */
5058 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5060 if (dump_enabled_p ())
5061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5062 "multiple types in nested loop.\n");
5063 return false;
5066 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5067 return false;
5069 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5070 return false;
5072 /* Is vectorizable store? */
5074 if (!is_gimple_assign (stmt))
5075 return false;
5077 scalar_dest = gimple_assign_lhs (stmt);
5078 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5079 && is_pattern_stmt_p (stmt_info))
5080 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5081 if (TREE_CODE (scalar_dest) != ARRAY_REF
5082 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5083 && TREE_CODE (scalar_dest) != INDIRECT_REF
5084 && TREE_CODE (scalar_dest) != COMPONENT_REF
5085 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5086 && TREE_CODE (scalar_dest) != REALPART_EXPR
5087 && TREE_CODE (scalar_dest) != MEM_REF)
5088 return false;
5090 gcc_assert (gimple_assign_single_p (stmt));
5091 op = gimple_assign_rhs1 (stmt);
5092 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5093 &def, &dt))
5095 if (dump_enabled_p ())
5096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5097 "use not simple.\n");
5098 return false;
5101 elem_type = TREE_TYPE (vectype);
5102 vec_mode = TYPE_MODE (vectype);
5104 /* FORNOW. In some cases can vectorize even if data-type not supported
5105 (e.g. - array initialization with 0). */
5106 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5107 return false;
5109 if (!STMT_VINFO_DATA_REF (stmt_info))
5110 return false;
5112 negative =
5113 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5114 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5115 size_zero_node) < 0;
5116 if (negative && ncopies > 1)
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5120 "multiple types with negative step.\n");
5121 return false;
5124 if (negative)
5126 gcc_assert (!grouped_store);
5127 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5128 if (alignment_support_scheme != dr_aligned
5129 && alignment_support_scheme != dr_unaligned_supported)
5131 if (dump_enabled_p ())
5132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5133 "negative step but alignment required.\n");
5134 return false;
5136 if (dt != vect_constant_def
5137 && dt != vect_external_def
5138 && !perm_mask_for_reverse (vectype))
5140 if (dump_enabled_p ())
5141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5142 "negative step and reversing not supported.\n");
5143 return false;
5147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5149 grouped_store = true;
5150 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5151 if (!slp && !PURE_SLP_STMT (stmt_info))
5153 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5154 if (vect_store_lanes_supported (vectype, group_size))
5155 store_lanes_p = true;
5156 else if (!vect_grouped_store_supported (vectype, group_size))
5157 return false;
5160 if (first_stmt == stmt)
5162 /* STMT is the leader of the group. Check the operands of all the
5163 stmts of the group. */
5164 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5165 while (next_stmt)
5167 gcc_assert (gimple_assign_single_p (next_stmt));
5168 op = gimple_assign_rhs1 (next_stmt);
5169 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5170 &def_stmt, &def, &dt))
5172 if (dump_enabled_p ())
5173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5174 "use not simple.\n");
5175 return false;
5177 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5182 if (!vec_stmt) /* transformation not required. */
5184 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5185 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5186 NULL, NULL, NULL);
5187 return true;
5190 /** Transform. **/
5192 ensure_base_align (stmt_info, dr);
5194 if (grouped_store)
5196 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5197 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5199 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5201 /* FORNOW */
5202 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5204 /* We vectorize all the stmts of the interleaving group when we
5205 reach the last stmt in the group. */
5206 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5207 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5208 && !slp)
5210 *vec_stmt = NULL;
5211 return true;
5214 if (slp)
5216 grouped_store = false;
5217 /* VEC_NUM is the number of vect stmts to be created for this
5218 group. */
5219 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5220 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5221 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5222 op = gimple_assign_rhs1 (first_stmt);
5224 else
5225 /* VEC_NUM is the number of vect stmts to be created for this
5226 group. */
5227 vec_num = group_size;
5229 else
5231 first_stmt = stmt;
5232 first_dr = dr;
5233 group_size = vec_num = 1;
5236 if (dump_enabled_p ())
5237 dump_printf_loc (MSG_NOTE, vect_location,
5238 "transform store. ncopies = %d\n", ncopies);
5240 dr_chain.create (group_size);
5241 oprnds.create (group_size);
5243 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5244 gcc_assert (alignment_support_scheme);
5245 /* Targets with store-lane instructions must not require explicit
5246 realignment. */
5247 gcc_assert (!store_lanes_p
5248 || alignment_support_scheme == dr_aligned
5249 || alignment_support_scheme == dr_unaligned_supported);
5251 if (negative)
5252 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5254 if (store_lanes_p)
5255 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5256 else
5257 aggr_type = vectype;
5259 /* In case the vectorization factor (VF) is bigger than the number
5260 of elements that we can fit in a vectype (nunits), we have to generate
5261 more than one vector stmt - i.e - we need to "unroll" the
5262 vector stmt by a factor VF/nunits. For more details see documentation in
5263 vect_get_vec_def_for_copy_stmt. */
5265 /* In case of interleaving (non-unit grouped access):
5267 S1: &base + 2 = x2
5268 S2: &base = x0
5269 S3: &base + 1 = x1
5270 S4: &base + 3 = x3
5272 We create vectorized stores starting from base address (the access of the
5273 first stmt in the chain (S2 in the above example), when the last store stmt
5274 of the chain (S4) is reached:
5276 VS1: &base = vx2
5277 VS2: &base + vec_size*1 = vx0
5278 VS3: &base + vec_size*2 = vx1
5279 VS4: &base + vec_size*3 = vx3
5281 Then permutation statements are generated:
5283 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5284 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5287 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5288 (the order of the data-refs in the output of vect_permute_store_chain
5289 corresponds to the order of scalar stmts in the interleaving chain - see
5290 the documentation of vect_permute_store_chain()).
5292 In case of both multiple types and interleaving, above vector stores and
5293 permutation stmts are created for every copy. The result vector stmts are
5294 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5295 STMT_VINFO_RELATED_STMT for the next copies.
5298 prev_stmt_info = NULL;
5299 for (j = 0; j < ncopies; j++)
5301 gimple new_stmt;
5303 if (j == 0)
5305 if (slp)
5307 /* Get vectorized arguments for SLP_NODE. */
5308 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5309 NULL, slp_node, -1);
5311 vec_oprnd = vec_oprnds[0];
5313 else
5315 /* For interleaved stores we collect vectorized defs for all the
5316 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5317 used as an input to vect_permute_store_chain(), and OPRNDS as
5318 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5320 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5321 OPRNDS are of size 1. */
5322 next_stmt = first_stmt;
5323 for (i = 0; i < group_size; i++)
5325 /* Since gaps are not supported for interleaved stores,
5326 GROUP_SIZE is the exact number of stmts in the chain.
5327 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5328 there is no interleaving, GROUP_SIZE is 1, and only one
5329 iteration of the loop will be executed. */
5330 gcc_assert (next_stmt
5331 && gimple_assign_single_p (next_stmt));
5332 op = gimple_assign_rhs1 (next_stmt);
5334 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5335 NULL);
5336 dr_chain.quick_push (vec_oprnd);
5337 oprnds.quick_push (vec_oprnd);
5338 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5342 /* We should have catched mismatched types earlier. */
5343 gcc_assert (useless_type_conversion_p (vectype,
5344 TREE_TYPE (vec_oprnd)));
5345 bool simd_lane_access_p
5346 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5347 if (simd_lane_access_p
5348 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5349 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5350 && integer_zerop (DR_OFFSET (first_dr))
5351 && integer_zerop (DR_INIT (first_dr))
5352 && alias_sets_conflict_p (get_alias_set (aggr_type),
5353 get_alias_set (DR_REF (first_dr))))
5355 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5356 dataref_offset = build_int_cst (reference_alias_ptr_type
5357 (DR_REF (first_dr)), 0);
5358 inv_p = false;
5360 else
5361 dataref_ptr
5362 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5363 simd_lane_access_p ? loop : NULL,
5364 offset, &dummy, gsi, &ptr_incr,
5365 simd_lane_access_p, &inv_p);
5366 gcc_assert (bb_vinfo || !inv_p);
5368 else
5370 /* For interleaved stores we created vectorized defs for all the
5371 defs stored in OPRNDS in the previous iteration (previous copy).
5372 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5373 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5374 next copy.
5375 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5376 OPRNDS are of size 1. */
5377 for (i = 0; i < group_size; i++)
5379 op = oprnds[i];
5380 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5381 &def, &dt);
5382 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5383 dr_chain[i] = vec_oprnd;
5384 oprnds[i] = vec_oprnd;
5386 if (dataref_offset)
5387 dataref_offset
5388 = int_const_binop (PLUS_EXPR, dataref_offset,
5389 TYPE_SIZE_UNIT (aggr_type));
5390 else
5391 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5392 TYPE_SIZE_UNIT (aggr_type));
5395 if (store_lanes_p)
5397 tree vec_array;
5399 /* Combine all the vectors into an array. */
5400 vec_array = create_vector_array (vectype, vec_num);
5401 for (i = 0; i < vec_num; i++)
5403 vec_oprnd = dr_chain[i];
5404 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5407 /* Emit:
5408 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5409 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5410 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5411 gimple_call_set_lhs (new_stmt, data_ref);
5412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5414 else
5416 new_stmt = NULL;
5417 if (grouped_store)
5419 if (j == 0)
5420 result_chain.create (group_size);
5421 /* Permute. */
5422 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5423 &result_chain);
5426 next_stmt = first_stmt;
5427 for (i = 0; i < vec_num; i++)
5429 unsigned align, misalign;
5431 if (i > 0)
5432 /* Bump the vector pointer. */
5433 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5434 stmt, NULL_TREE);
5436 if (slp)
5437 vec_oprnd = vec_oprnds[i];
5438 else if (grouped_store)
5439 /* For grouped stores vectorized defs are interleaved in
5440 vect_permute_store_chain(). */
5441 vec_oprnd = result_chain[i];
5443 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5444 dataref_offset
5445 ? dataref_offset
5446 : build_int_cst (reference_alias_ptr_type
5447 (DR_REF (first_dr)), 0));
5448 align = TYPE_ALIGN_UNIT (vectype);
5449 if (aligned_access_p (first_dr))
5450 misalign = 0;
5451 else if (DR_MISALIGNMENT (first_dr) == -1)
5453 TREE_TYPE (data_ref)
5454 = build_aligned_type (TREE_TYPE (data_ref),
5455 TYPE_ALIGN (elem_type));
5456 align = TYPE_ALIGN_UNIT (elem_type);
5457 misalign = 0;
5459 else
5461 TREE_TYPE (data_ref)
5462 = build_aligned_type (TREE_TYPE (data_ref),
5463 TYPE_ALIGN (elem_type));
5464 misalign = DR_MISALIGNMENT (first_dr);
5466 if (dataref_offset == NULL_TREE)
5467 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5468 misalign);
5470 if (negative
5471 && dt != vect_constant_def
5472 && dt != vect_external_def)
5474 tree perm_mask = perm_mask_for_reverse (vectype);
5475 tree perm_dest
5476 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5477 vectype);
5478 tree new_temp = make_ssa_name (perm_dest);
5480 /* Generate the permute statement. */
5481 gimple perm_stmt
5482 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5483 vec_oprnd, perm_mask);
5484 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5486 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5487 vec_oprnd = new_temp;
5490 /* Arguments are ready. Create the new vector stmt. */
5491 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5492 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5494 if (slp)
5495 continue;
5497 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5498 if (!next_stmt)
5499 break;
5502 if (!slp)
5504 if (j == 0)
5505 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5506 else
5507 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5508 prev_stmt_info = vinfo_for_stmt (new_stmt);
5512 dr_chain.release ();
5513 oprnds.release ();
5514 result_chain.release ();
5515 vec_oprnds.release ();
5517 return true;
5520 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5521 VECTOR_CST mask. No checks are made that the target platform supports the
5522 mask, so callers may wish to test can_vec_perm_p separately, or use
5523 vect_gen_perm_mask_checked. */
5525 tree
5526 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5528 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5529 int i, nunits;
5531 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5533 mask_elt_type = lang_hooks.types.type_for_mode
5534 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5535 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5537 mask_elts = XALLOCAVEC (tree, nunits);
5538 for (i = nunits - 1; i >= 0; i--)
5539 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5540 mask_vec = build_vector (mask_type, mask_elts);
5542 return mask_vec;
5545 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5546 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5548 tree
5549 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5551 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5552 return vect_gen_perm_mask_any (vectype, sel);
5555 /* Given a vector variable X and Y, that was generated for the scalar
5556 STMT, generate instructions to permute the vector elements of X and Y
5557 using permutation mask MASK_VEC, insert them at *GSI and return the
5558 permuted vector variable. */
5560 static tree
5561 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5562 gimple_stmt_iterator *gsi)
5564 tree vectype = TREE_TYPE (x);
5565 tree perm_dest, data_ref;
5566 gimple perm_stmt;
5568 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5569 data_ref = make_ssa_name (perm_dest);
5571 /* Generate the permute statement. */
5572 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5573 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5575 return data_ref;
5578 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5579 inserting them on the loops preheader edge. Returns true if we
5580 were successful in doing so (and thus STMT can be moved then),
5581 otherwise returns false. */
5583 static bool
5584 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5586 ssa_op_iter i;
5587 tree op;
5588 bool any = false;
5590 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5592 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5593 if (!gimple_nop_p (def_stmt)
5594 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5596 /* Make sure we don't need to recurse. While we could do
5597 so in simple cases when there are more complex use webs
5598 we don't have an easy way to preserve stmt order to fulfil
5599 dependencies within them. */
5600 tree op2;
5601 ssa_op_iter i2;
5602 if (gimple_code (def_stmt) == GIMPLE_PHI)
5603 return false;
5604 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5606 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5607 if (!gimple_nop_p (def_stmt2)
5608 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5609 return false;
5611 any = true;
5615 if (!any)
5616 return true;
5618 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5620 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5621 if (!gimple_nop_p (def_stmt)
5622 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5624 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5625 gsi_remove (&gsi, false);
5626 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5630 return true;
5633 /* vectorizable_load.
5635 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5636 can be vectorized.
5637 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5638 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5639 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5641 static bool
5642 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5643 slp_tree slp_node, slp_instance slp_node_instance)
5645 tree scalar_dest;
5646 tree vec_dest = NULL;
5647 tree data_ref = NULL;
5648 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5649 stmt_vec_info prev_stmt_info;
5650 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5651 struct loop *loop = NULL;
5652 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5653 bool nested_in_vect_loop = false;
5654 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5655 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5656 tree elem_type;
5657 tree new_temp;
5658 machine_mode mode;
5659 gimple new_stmt = NULL;
5660 tree dummy;
5661 enum dr_alignment_support alignment_support_scheme;
5662 tree dataref_ptr = NULL_TREE;
5663 tree dataref_offset = NULL_TREE;
5664 gimple ptr_incr = NULL;
5665 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5666 int ncopies;
5667 int i, j, group_size, group_gap;
5668 tree msq = NULL_TREE, lsq;
5669 tree offset = NULL_TREE;
5670 tree byte_offset = NULL_TREE;
5671 tree realignment_token = NULL_TREE;
5672 gphi *phi = NULL;
5673 vec<tree> dr_chain = vNULL;
5674 bool grouped_load = false;
5675 bool load_lanes_p = false;
5676 gimple first_stmt;
5677 bool inv_p;
5678 bool negative = false;
5679 bool compute_in_loop = false;
5680 struct loop *at_loop;
5681 int vec_num;
5682 bool slp = (slp_node != NULL);
5683 bool slp_perm = false;
5684 enum tree_code code;
5685 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5686 int vf;
5687 tree aggr_type;
5688 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5689 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5690 int gather_scale = 1;
5691 enum vect_def_type gather_dt = vect_unknown_def_type;
5693 if (loop_vinfo)
5695 loop = LOOP_VINFO_LOOP (loop_vinfo);
5696 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5697 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5699 else
5700 vf = 1;
5702 /* Multiple types in SLP are handled by creating the appropriate number of
5703 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5704 case of SLP. */
5705 if (slp || PURE_SLP_STMT (stmt_info))
5706 ncopies = 1;
5707 else
5708 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5710 gcc_assert (ncopies >= 1);
5712 /* FORNOW. This restriction should be relaxed. */
5713 if (nested_in_vect_loop && ncopies > 1)
5715 if (dump_enabled_p ())
5716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5717 "multiple types in nested loop.\n");
5718 return false;
5721 /* Invalidate assumptions made by dependence analysis when vectorization
5722 on the unrolled body effectively re-orders stmts. */
5723 if (ncopies > 1
5724 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5725 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5726 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5730 "cannot perform implicit CSE when unrolling "
5731 "with negative dependence distance\n");
5732 return false;
5735 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5736 return false;
5738 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5739 return false;
5741 /* Is vectorizable load? */
5742 if (!is_gimple_assign (stmt))
5743 return false;
5745 scalar_dest = gimple_assign_lhs (stmt);
5746 if (TREE_CODE (scalar_dest) != SSA_NAME)
5747 return false;
5749 code = gimple_assign_rhs_code (stmt);
5750 if (code != ARRAY_REF
5751 && code != BIT_FIELD_REF
5752 && code != INDIRECT_REF
5753 && code != COMPONENT_REF
5754 && code != IMAGPART_EXPR
5755 && code != REALPART_EXPR
5756 && code != MEM_REF
5757 && TREE_CODE_CLASS (code) != tcc_declaration)
5758 return false;
5760 if (!STMT_VINFO_DATA_REF (stmt_info))
5761 return false;
5763 elem_type = TREE_TYPE (vectype);
5764 mode = TYPE_MODE (vectype);
5766 /* FORNOW. In some cases can vectorize even if data-type not supported
5767 (e.g. - data copies). */
5768 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5770 if (dump_enabled_p ())
5771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5772 "Aligned load, but unsupported type.\n");
5773 return false;
5776 /* Check if the load is a part of an interleaving chain. */
5777 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5779 grouped_load = true;
5780 /* FORNOW */
5781 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5783 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5784 if (!slp && !PURE_SLP_STMT (stmt_info))
5786 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5787 if (vect_load_lanes_supported (vectype, group_size))
5788 load_lanes_p = true;
5789 else if (!vect_grouped_load_supported (vectype, group_size))
5790 return false;
5793 /* Invalidate assumptions made by dependence analysis when vectorization
5794 on the unrolled body effectively re-orders stmts. */
5795 if (!PURE_SLP_STMT (stmt_info)
5796 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5797 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5798 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5800 if (dump_enabled_p ())
5801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5802 "cannot perform implicit CSE when performing "
5803 "group loads with negative dependence distance\n");
5804 return false;
5807 /* Similarly when the stmt is a load that is both part of a SLP
5808 instance and a loop vectorized stmt via the same-dr mechanism
5809 we have to give up. */
5810 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5811 && (STMT_SLP_TYPE (stmt_info)
5812 != STMT_SLP_TYPE (vinfo_for_stmt
5813 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5815 if (dump_enabled_p ())
5816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5817 "conflicting SLP types for CSEd load\n");
5818 return false;
5823 if (STMT_VINFO_GATHER_P (stmt_info))
5825 gimple def_stmt;
5826 tree def;
5827 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5828 &gather_off, &gather_scale);
5829 gcc_assert (gather_decl);
5830 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5831 &def_stmt, &def, &gather_dt,
5832 &gather_off_vectype))
5834 if (dump_enabled_p ())
5835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5836 "gather index use not simple.\n");
5837 return false;
5840 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5842 else
5844 negative = tree_int_cst_compare (nested_in_vect_loop
5845 ? STMT_VINFO_DR_STEP (stmt_info)
5846 : DR_STEP (dr),
5847 size_zero_node) < 0;
5848 if (negative && ncopies > 1)
5850 if (dump_enabled_p ())
5851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5852 "multiple types with negative step.\n");
5853 return false;
5856 if (negative)
5858 if (grouped_load)
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5862 "negative step for group load not supported"
5863 "\n");
5864 return false;
5866 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5867 if (alignment_support_scheme != dr_aligned
5868 && alignment_support_scheme != dr_unaligned_supported)
5870 if (dump_enabled_p ())
5871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5872 "negative step but alignment required.\n");
5873 return false;
5875 if (!perm_mask_for_reverse (vectype))
5877 if (dump_enabled_p ())
5878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5879 "negative step and reversing not supported."
5880 "\n");
5881 return false;
5886 if (!vec_stmt) /* transformation not required. */
5888 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5889 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5890 return true;
5893 if (dump_enabled_p ())
5894 dump_printf_loc (MSG_NOTE, vect_location,
5895 "transform load. ncopies = %d\n", ncopies);
5897 /** Transform. **/
5899 ensure_base_align (stmt_info, dr);
5901 if (STMT_VINFO_GATHER_P (stmt_info))
5903 tree vec_oprnd0 = NULL_TREE, op;
5904 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5905 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5906 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5907 edge pe = loop_preheader_edge (loop);
5908 gimple_seq seq;
5909 basic_block new_bb;
5910 enum { NARROW, NONE, WIDEN } modifier;
5911 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5913 if (nunits == gather_off_nunits)
5914 modifier = NONE;
5915 else if (nunits == gather_off_nunits / 2)
5917 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5918 modifier = WIDEN;
5920 for (i = 0; i < gather_off_nunits; ++i)
5921 sel[i] = i | nunits;
5923 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5925 else if (nunits == gather_off_nunits * 2)
5927 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5928 modifier = NARROW;
5930 for (i = 0; i < nunits; ++i)
5931 sel[i] = i < gather_off_nunits
5932 ? i : i + nunits - gather_off_nunits;
5934 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5935 ncopies *= 2;
5937 else
5938 gcc_unreachable ();
5940 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5941 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5942 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5943 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5944 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5945 scaletype = TREE_VALUE (arglist);
5946 gcc_checking_assert (types_compatible_p (srctype, rettype));
5948 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5950 ptr = fold_convert (ptrtype, gather_base);
5951 if (!is_gimple_min_invariant (ptr))
5953 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5954 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5955 gcc_assert (!new_bb);
5958 /* Currently we support only unconditional gather loads,
5959 so mask should be all ones. */
5960 if (TREE_CODE (masktype) == INTEGER_TYPE)
5961 mask = build_int_cst (masktype, -1);
5962 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5964 mask = build_int_cst (TREE_TYPE (masktype), -1);
5965 mask = build_vector_from_val (masktype, mask);
5966 mask = vect_init_vector (stmt, mask, masktype, NULL);
5968 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5970 REAL_VALUE_TYPE r;
5971 long tmp[6];
5972 for (j = 0; j < 6; ++j)
5973 tmp[j] = -1;
5974 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5975 mask = build_real (TREE_TYPE (masktype), r);
5976 mask = build_vector_from_val (masktype, mask);
5977 mask = vect_init_vector (stmt, mask, masktype, NULL);
5979 else
5980 gcc_unreachable ();
5982 scale = build_int_cst (scaletype, gather_scale);
5984 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5985 merge = build_int_cst (TREE_TYPE (rettype), 0);
5986 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5988 REAL_VALUE_TYPE r;
5989 long tmp[6];
5990 for (j = 0; j < 6; ++j)
5991 tmp[j] = 0;
5992 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5993 merge = build_real (TREE_TYPE (rettype), r);
5995 else
5996 gcc_unreachable ();
5997 merge = build_vector_from_val (rettype, merge);
5998 merge = vect_init_vector (stmt, merge, rettype, NULL);
6000 prev_stmt_info = NULL;
6001 for (j = 0; j < ncopies; ++j)
6003 if (modifier == WIDEN && (j & 1))
6004 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6005 perm_mask, stmt, gsi);
6006 else if (j == 0)
6007 op = vec_oprnd0
6008 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6009 else
6010 op = vec_oprnd0
6011 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6013 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6015 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6016 == TYPE_VECTOR_SUBPARTS (idxtype));
6017 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6018 var = make_ssa_name (var);
6019 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6020 new_stmt
6021 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6023 op = var;
6026 new_stmt
6027 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6029 if (!useless_type_conversion_p (vectype, rettype))
6031 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6032 == TYPE_VECTOR_SUBPARTS (rettype));
6033 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6034 op = make_ssa_name (var, new_stmt);
6035 gimple_call_set_lhs (new_stmt, op);
6036 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6037 var = make_ssa_name (vec_dest);
6038 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6039 new_stmt
6040 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6042 else
6044 var = make_ssa_name (vec_dest, new_stmt);
6045 gimple_call_set_lhs (new_stmt, var);
6048 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6050 if (modifier == NARROW)
6052 if ((j & 1) == 0)
6054 prev_res = var;
6055 continue;
6057 var = permute_vec_elements (prev_res, var,
6058 perm_mask, stmt, gsi);
6059 new_stmt = SSA_NAME_DEF_STMT (var);
6062 if (prev_stmt_info == NULL)
6063 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6064 else
6065 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6066 prev_stmt_info = vinfo_for_stmt (new_stmt);
6068 return true;
6070 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6072 gimple_stmt_iterator incr_gsi;
6073 bool insert_after;
6074 gimple incr;
6075 tree offvar;
6076 tree ivstep;
6077 tree running_off;
6078 vec<constructor_elt, va_gc> *v = NULL;
6079 gimple_seq stmts = NULL;
6080 tree stride_base, stride_step, alias_off;
6082 gcc_assert (!nested_in_vect_loop);
6084 stride_base
6085 = fold_build_pointer_plus
6086 (unshare_expr (DR_BASE_ADDRESS (dr)),
6087 size_binop (PLUS_EXPR,
6088 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6089 convert_to_ptrofftype (DR_INIT (dr))));
6090 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6092 /* For a load with loop-invariant (but other than power-of-2)
6093 stride (i.e. not a grouped access) like so:
6095 for (i = 0; i < n; i += stride)
6096 ... = array[i];
6098 we generate a new induction variable and new accesses to
6099 form a new vector (or vectors, depending on ncopies):
6101 for (j = 0; ; j += VF*stride)
6102 tmp1 = array[j];
6103 tmp2 = array[j + stride];
6105 vectemp = {tmp1, tmp2, ...}
6108 ivstep = stride_step;
6109 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6110 build_int_cst (TREE_TYPE (ivstep), vf));
6112 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6114 create_iv (stride_base, ivstep, NULL,
6115 loop, &incr_gsi, insert_after,
6116 &offvar, NULL);
6117 incr = gsi_stmt (incr_gsi);
6118 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6120 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6121 if (stmts)
6122 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6124 prev_stmt_info = NULL;
6125 running_off = offvar;
6126 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6127 for (j = 0; j < ncopies; j++)
6129 tree vec_inv;
6131 vec_alloc (v, nunits);
6132 for (i = 0; i < nunits; i++)
6134 tree newref, newoff;
6135 gimple incr;
6136 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6137 running_off, alias_off);
6139 newref = force_gimple_operand_gsi (gsi, newref, true,
6140 NULL_TREE, true,
6141 GSI_SAME_STMT);
6142 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6143 newoff = copy_ssa_name (running_off);
6144 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6145 running_off, stride_step);
6146 vect_finish_stmt_generation (stmt, incr, gsi);
6148 running_off = newoff;
6151 vec_inv = build_constructor (vectype, v);
6152 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6153 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6155 if (j == 0)
6156 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6157 else
6158 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6159 prev_stmt_info = vinfo_for_stmt (new_stmt);
6161 return true;
6164 if (grouped_load)
6166 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6167 if (slp
6168 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6169 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6170 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6172 /* Check if the chain of loads is already vectorized. */
6173 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6174 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6175 ??? But we can only do so if there is exactly one
6176 as we have no way to get at the rest. Leave the CSE
6177 opportunity alone.
6178 ??? With the group load eventually participating
6179 in multiple different permutations (having multiple
6180 slp nodes which refer to the same group) the CSE
6181 is even wrong code. See PR56270. */
6182 && !slp)
6184 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6185 return true;
6187 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6188 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6190 /* VEC_NUM is the number of vect stmts to be created for this group. */
6191 if (slp)
6193 grouped_load = false;
6194 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6195 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6196 slp_perm = true;
6197 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6199 else
6201 vec_num = group_size;
6202 group_gap = 0;
6205 else
6207 first_stmt = stmt;
6208 first_dr = dr;
6209 group_size = vec_num = 1;
6210 group_gap = 0;
6213 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6214 gcc_assert (alignment_support_scheme);
6215 /* Targets with load-lane instructions must not require explicit
6216 realignment. */
6217 gcc_assert (!load_lanes_p
6218 || alignment_support_scheme == dr_aligned
6219 || alignment_support_scheme == dr_unaligned_supported);
6221 /* In case the vectorization factor (VF) is bigger than the number
6222 of elements that we can fit in a vectype (nunits), we have to generate
6223 more than one vector stmt - i.e - we need to "unroll" the
6224 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6225 from one copy of the vector stmt to the next, in the field
6226 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6227 stages to find the correct vector defs to be used when vectorizing
6228 stmts that use the defs of the current stmt. The example below
6229 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6230 need to create 4 vectorized stmts):
6232 before vectorization:
6233 RELATED_STMT VEC_STMT
6234 S1: x = memref - -
6235 S2: z = x + 1 - -
6237 step 1: vectorize stmt S1:
6238 We first create the vector stmt VS1_0, and, as usual, record a
6239 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6240 Next, we create the vector stmt VS1_1, and record a pointer to
6241 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6242 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6243 stmts and pointers:
6244 RELATED_STMT VEC_STMT
6245 VS1_0: vx0 = memref0 VS1_1 -
6246 VS1_1: vx1 = memref1 VS1_2 -
6247 VS1_2: vx2 = memref2 VS1_3 -
6248 VS1_3: vx3 = memref3 - -
6249 S1: x = load - VS1_0
6250 S2: z = x + 1 - -
6252 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6253 information we recorded in RELATED_STMT field is used to vectorize
6254 stmt S2. */
6256 /* In case of interleaving (non-unit grouped access):
6258 S1: x2 = &base + 2
6259 S2: x0 = &base
6260 S3: x1 = &base + 1
6261 S4: x3 = &base + 3
6263 Vectorized loads are created in the order of memory accesses
6264 starting from the access of the first stmt of the chain:
6266 VS1: vx0 = &base
6267 VS2: vx1 = &base + vec_size*1
6268 VS3: vx3 = &base + vec_size*2
6269 VS4: vx4 = &base + vec_size*3
6271 Then permutation statements are generated:
6273 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6274 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6277 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6278 (the order of the data-refs in the output of vect_permute_load_chain
6279 corresponds to the order of scalar stmts in the interleaving chain - see
6280 the documentation of vect_permute_load_chain()).
6281 The generation of permutation stmts and recording them in
6282 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6284 In case of both multiple types and interleaving, the vector loads and
6285 permutation stmts above are created for every copy. The result vector
6286 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6287 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6289 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6290 on a target that supports unaligned accesses (dr_unaligned_supported)
6291 we generate the following code:
6292 p = initial_addr;
6293 indx = 0;
6294 loop {
6295 p = p + indx * vectype_size;
6296 vec_dest = *(p);
6297 indx = indx + 1;
6300 Otherwise, the data reference is potentially unaligned on a target that
6301 does not support unaligned accesses (dr_explicit_realign_optimized) -
6302 then generate the following code, in which the data in each iteration is
6303 obtained by two vector loads, one from the previous iteration, and one
6304 from the current iteration:
6305 p1 = initial_addr;
6306 msq_init = *(floor(p1))
6307 p2 = initial_addr + VS - 1;
6308 realignment_token = call target_builtin;
6309 indx = 0;
6310 loop {
6311 p2 = p2 + indx * vectype_size
6312 lsq = *(floor(p2))
6313 vec_dest = realign_load (msq, lsq, realignment_token)
6314 indx = indx + 1;
6315 msq = lsq;
6316 } */
6318 /* If the misalignment remains the same throughout the execution of the
6319 loop, we can create the init_addr and permutation mask at the loop
6320 preheader. Otherwise, it needs to be created inside the loop.
6321 This can only occur when vectorizing memory accesses in the inner-loop
6322 nested within an outer-loop that is being vectorized. */
6324 if (nested_in_vect_loop
6325 && (TREE_INT_CST_LOW (DR_STEP (dr))
6326 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6328 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6329 compute_in_loop = true;
6332 if ((alignment_support_scheme == dr_explicit_realign_optimized
6333 || alignment_support_scheme == dr_explicit_realign)
6334 && !compute_in_loop)
6336 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6337 alignment_support_scheme, NULL_TREE,
6338 &at_loop);
6339 if (alignment_support_scheme == dr_explicit_realign_optimized)
6341 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6342 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6343 size_one_node);
6346 else
6347 at_loop = loop;
6349 if (negative)
6350 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6352 if (load_lanes_p)
6353 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6354 else
6355 aggr_type = vectype;
6357 prev_stmt_info = NULL;
6358 for (j = 0; j < ncopies; j++)
6360 /* 1. Create the vector or array pointer update chain. */
6361 if (j == 0)
6363 bool simd_lane_access_p
6364 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6365 if (simd_lane_access_p
6366 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6367 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6368 && integer_zerop (DR_OFFSET (first_dr))
6369 && integer_zerop (DR_INIT (first_dr))
6370 && alias_sets_conflict_p (get_alias_set (aggr_type),
6371 get_alias_set (DR_REF (first_dr)))
6372 && (alignment_support_scheme == dr_aligned
6373 || alignment_support_scheme == dr_unaligned_supported))
6375 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6376 dataref_offset = build_int_cst (reference_alias_ptr_type
6377 (DR_REF (first_dr)), 0);
6378 inv_p = false;
6380 else
6381 dataref_ptr
6382 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6383 offset, &dummy, gsi, &ptr_incr,
6384 simd_lane_access_p, &inv_p,
6385 byte_offset);
6387 else if (dataref_offset)
6388 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6389 TYPE_SIZE_UNIT (aggr_type));
6390 else
6391 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6392 TYPE_SIZE_UNIT (aggr_type));
6394 if (grouped_load || slp_perm)
6395 dr_chain.create (vec_num);
6397 if (load_lanes_p)
6399 tree vec_array;
6401 vec_array = create_vector_array (vectype, vec_num);
6403 /* Emit:
6404 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6405 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6406 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6407 gimple_call_set_lhs (new_stmt, vec_array);
6408 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6410 /* Extract each vector into an SSA_NAME. */
6411 for (i = 0; i < vec_num; i++)
6413 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6414 vec_array, i);
6415 dr_chain.quick_push (new_temp);
6418 /* Record the mapping between SSA_NAMEs and statements. */
6419 vect_record_grouped_load_vectors (stmt, dr_chain);
6421 else
6423 for (i = 0; i < vec_num; i++)
6425 if (i > 0)
6426 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6427 stmt, NULL_TREE);
6429 /* 2. Create the vector-load in the loop. */
6430 switch (alignment_support_scheme)
6432 case dr_aligned:
6433 case dr_unaligned_supported:
6435 unsigned int align, misalign;
6437 data_ref
6438 = build2 (MEM_REF, vectype, dataref_ptr,
6439 dataref_offset
6440 ? dataref_offset
6441 : build_int_cst (reference_alias_ptr_type
6442 (DR_REF (first_dr)), 0));
6443 align = TYPE_ALIGN_UNIT (vectype);
6444 if (alignment_support_scheme == dr_aligned)
6446 gcc_assert (aligned_access_p (first_dr));
6447 misalign = 0;
6449 else if (DR_MISALIGNMENT (first_dr) == -1)
6451 TREE_TYPE (data_ref)
6452 = build_aligned_type (TREE_TYPE (data_ref),
6453 TYPE_ALIGN (elem_type));
6454 align = TYPE_ALIGN_UNIT (elem_type);
6455 misalign = 0;
6457 else
6459 TREE_TYPE (data_ref)
6460 = build_aligned_type (TREE_TYPE (data_ref),
6461 TYPE_ALIGN (elem_type));
6462 misalign = DR_MISALIGNMENT (first_dr);
6464 if (dataref_offset == NULL_TREE)
6465 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6466 align, misalign);
6467 break;
6469 case dr_explicit_realign:
6471 tree ptr, bump;
6472 tree vs_minus_1;
6474 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6476 if (compute_in_loop)
6477 msq = vect_setup_realignment (first_stmt, gsi,
6478 &realignment_token,
6479 dr_explicit_realign,
6480 dataref_ptr, NULL);
6482 ptr = copy_ssa_name (dataref_ptr);
6483 new_stmt = gimple_build_assign
6484 (ptr, BIT_AND_EXPR, dataref_ptr,
6485 build_int_cst
6486 (TREE_TYPE (dataref_ptr),
6487 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6488 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6489 data_ref
6490 = build2 (MEM_REF, vectype, ptr,
6491 build_int_cst (reference_alias_ptr_type
6492 (DR_REF (first_dr)), 0));
6493 vec_dest = vect_create_destination_var (scalar_dest,
6494 vectype);
6495 new_stmt = gimple_build_assign (vec_dest, data_ref);
6496 new_temp = make_ssa_name (vec_dest, new_stmt);
6497 gimple_assign_set_lhs (new_stmt, new_temp);
6498 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6499 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6500 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6501 msq = new_temp;
6503 bump = size_binop (MULT_EXPR, vs_minus_1,
6504 TYPE_SIZE_UNIT (elem_type));
6505 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6506 new_stmt = gimple_build_assign
6507 (NULL_TREE, BIT_AND_EXPR, ptr,
6508 build_int_cst
6509 (TREE_TYPE (ptr),
6510 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6511 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6512 gimple_assign_set_lhs (new_stmt, ptr);
6513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6514 data_ref
6515 = build2 (MEM_REF, vectype, ptr,
6516 build_int_cst (reference_alias_ptr_type
6517 (DR_REF (first_dr)), 0));
6518 break;
6520 case dr_explicit_realign_optimized:
6521 new_temp = copy_ssa_name (dataref_ptr);
6522 new_stmt = gimple_build_assign
6523 (new_temp, BIT_AND_EXPR, dataref_ptr,
6524 build_int_cst
6525 (TREE_TYPE (dataref_ptr),
6526 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6528 data_ref
6529 = build2 (MEM_REF, vectype, new_temp,
6530 build_int_cst (reference_alias_ptr_type
6531 (DR_REF (first_dr)), 0));
6532 break;
6533 default:
6534 gcc_unreachable ();
6536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6537 new_stmt = gimple_build_assign (vec_dest, data_ref);
6538 new_temp = make_ssa_name (vec_dest, new_stmt);
6539 gimple_assign_set_lhs (new_stmt, new_temp);
6540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6542 /* 3. Handle explicit realignment if necessary/supported.
6543 Create in loop:
6544 vec_dest = realign_load (msq, lsq, realignment_token) */
6545 if (alignment_support_scheme == dr_explicit_realign_optimized
6546 || alignment_support_scheme == dr_explicit_realign)
6548 lsq = gimple_assign_lhs (new_stmt);
6549 if (!realignment_token)
6550 realignment_token = dataref_ptr;
6551 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6552 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6553 msq, lsq, realignment_token);
6554 new_temp = make_ssa_name (vec_dest, new_stmt);
6555 gimple_assign_set_lhs (new_stmt, new_temp);
6556 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6558 if (alignment_support_scheme == dr_explicit_realign_optimized)
6560 gcc_assert (phi);
6561 if (i == vec_num - 1 && j == ncopies - 1)
6562 add_phi_arg (phi, lsq,
6563 loop_latch_edge (containing_loop),
6564 UNKNOWN_LOCATION);
6565 msq = lsq;
6569 /* 4. Handle invariant-load. */
6570 if (inv_p && !bb_vinfo)
6572 gcc_assert (!grouped_load);
6573 /* If we have versioned for aliasing or the loop doesn't
6574 have any data dependencies that would preclude this,
6575 then we are sure this is a loop invariant load and
6576 thus we can insert it on the preheader edge. */
6577 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6578 && !nested_in_vect_loop
6579 && hoist_defs_of_uses (stmt, loop))
6581 if (dump_enabled_p ())
6583 dump_printf_loc (MSG_NOTE, vect_location,
6584 "hoisting out of the vectorized "
6585 "loop: ");
6586 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6587 dump_printf (MSG_NOTE, "\n");
6589 tree tem = copy_ssa_name (scalar_dest);
6590 gsi_insert_on_edge_immediate
6591 (loop_preheader_edge (loop),
6592 gimple_build_assign (tem,
6593 unshare_expr
6594 (gimple_assign_rhs1 (stmt))));
6595 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6597 else
6599 gimple_stmt_iterator gsi2 = *gsi;
6600 gsi_next (&gsi2);
6601 new_temp = vect_init_vector (stmt, scalar_dest,
6602 vectype, &gsi2);
6604 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6605 set_vinfo_for_stmt (new_stmt,
6606 new_stmt_vec_info (new_stmt, loop_vinfo,
6607 bb_vinfo));
6610 if (negative)
6612 tree perm_mask = perm_mask_for_reverse (vectype);
6613 new_temp = permute_vec_elements (new_temp, new_temp,
6614 perm_mask, stmt, gsi);
6615 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6618 /* Collect vector loads and later create their permutation in
6619 vect_transform_grouped_load (). */
6620 if (grouped_load || slp_perm)
6621 dr_chain.quick_push (new_temp);
6623 /* Store vector loads in the corresponding SLP_NODE. */
6624 if (slp && !slp_perm)
6625 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6627 /* Bump the vector pointer to account for a gap. */
6628 if (slp && group_gap != 0)
6630 tree bump = size_binop (MULT_EXPR,
6631 TYPE_SIZE_UNIT (elem_type),
6632 size_int (group_gap));
6633 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6634 stmt, bump);
6638 if (slp && !slp_perm)
6639 continue;
6641 if (slp_perm)
6643 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6644 slp_node_instance, false))
6646 dr_chain.release ();
6647 return false;
6650 else
6652 if (grouped_load)
6654 if (!load_lanes_p)
6655 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6656 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6658 else
6660 if (j == 0)
6661 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6662 else
6663 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6664 prev_stmt_info = vinfo_for_stmt (new_stmt);
6667 dr_chain.release ();
6670 return true;
6673 /* Function vect_is_simple_cond.
6675 Input:
6676 LOOP - the loop that is being vectorized.
6677 COND - Condition that is checked for simple use.
6679 Output:
6680 *COMP_VECTYPE - the vector type for the comparison.
6682 Returns whether a COND can be vectorized. Checks whether
6683 condition operands are supportable using vec_is_simple_use. */
6685 static bool
6686 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6687 bb_vec_info bb_vinfo, tree *comp_vectype)
6689 tree lhs, rhs;
6690 tree def;
6691 enum vect_def_type dt;
6692 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6694 if (!COMPARISON_CLASS_P (cond))
6695 return false;
6697 lhs = TREE_OPERAND (cond, 0);
6698 rhs = TREE_OPERAND (cond, 1);
6700 if (TREE_CODE (lhs) == SSA_NAME)
6702 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6703 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6704 &lhs_def_stmt, &def, &dt, &vectype1))
6705 return false;
6707 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6708 && TREE_CODE (lhs) != FIXED_CST)
6709 return false;
6711 if (TREE_CODE (rhs) == SSA_NAME)
6713 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6714 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6715 &rhs_def_stmt, &def, &dt, &vectype2))
6716 return false;
6718 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6719 && TREE_CODE (rhs) != FIXED_CST)
6720 return false;
6722 *comp_vectype = vectype1 ? vectype1 : vectype2;
6723 return true;
6726 /* vectorizable_condition.
6728 Check if STMT is conditional modify expression that can be vectorized.
6729 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6730 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6731 at GSI.
6733 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6734 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6735 else caluse if it is 2).
6737 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6739 bool
6740 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6741 gimple *vec_stmt, tree reduc_def, int reduc_index,
6742 slp_tree slp_node)
6744 tree scalar_dest = NULL_TREE;
6745 tree vec_dest = NULL_TREE;
6746 tree cond_expr, then_clause, else_clause;
6747 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6748 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6749 tree comp_vectype = NULL_TREE;
6750 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6751 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6752 tree vec_compare, vec_cond_expr;
6753 tree new_temp;
6754 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6755 tree def;
6756 enum vect_def_type dt, dts[4];
6757 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6758 int ncopies;
6759 enum tree_code code;
6760 stmt_vec_info prev_stmt_info = NULL;
6761 int i, j;
6762 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6763 vec<tree> vec_oprnds0 = vNULL;
6764 vec<tree> vec_oprnds1 = vNULL;
6765 vec<tree> vec_oprnds2 = vNULL;
6766 vec<tree> vec_oprnds3 = vNULL;
6767 tree vec_cmp_type;
6769 if (slp_node || PURE_SLP_STMT (stmt_info))
6770 ncopies = 1;
6771 else
6772 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6774 gcc_assert (ncopies >= 1);
6775 if (reduc_index && ncopies > 1)
6776 return false; /* FORNOW */
6778 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6779 return false;
6781 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6782 return false;
6784 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6785 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6786 && reduc_def))
6787 return false;
6789 /* FORNOW: not yet supported. */
6790 if (STMT_VINFO_LIVE_P (stmt_info))
6792 if (dump_enabled_p ())
6793 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6794 "value used after loop.\n");
6795 return false;
6798 /* Is vectorizable conditional operation? */
6799 if (!is_gimple_assign (stmt))
6800 return false;
6802 code = gimple_assign_rhs_code (stmt);
6804 if (code != COND_EXPR)
6805 return false;
6807 cond_expr = gimple_assign_rhs1 (stmt);
6808 then_clause = gimple_assign_rhs2 (stmt);
6809 else_clause = gimple_assign_rhs3 (stmt);
6811 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6812 &comp_vectype)
6813 || !comp_vectype)
6814 return false;
6816 if (TREE_CODE (then_clause) == SSA_NAME)
6818 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6819 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6820 &then_def_stmt, &def, &dt))
6821 return false;
6823 else if (TREE_CODE (then_clause) != INTEGER_CST
6824 && TREE_CODE (then_clause) != REAL_CST
6825 && TREE_CODE (then_clause) != FIXED_CST)
6826 return false;
6828 if (TREE_CODE (else_clause) == SSA_NAME)
6830 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6831 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6832 &else_def_stmt, &def, &dt))
6833 return false;
6835 else if (TREE_CODE (else_clause) != INTEGER_CST
6836 && TREE_CODE (else_clause) != REAL_CST
6837 && TREE_CODE (else_clause) != FIXED_CST)
6838 return false;
6840 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6841 /* The result of a vector comparison should be signed type. */
6842 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6843 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6844 if (vec_cmp_type == NULL_TREE)
6845 return false;
6847 if (!vec_stmt)
6849 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6850 return expand_vec_cond_expr_p (vectype, comp_vectype);
6853 /* Transform. */
6855 if (!slp_node)
6857 vec_oprnds0.create (1);
6858 vec_oprnds1.create (1);
6859 vec_oprnds2.create (1);
6860 vec_oprnds3.create (1);
6863 /* Handle def. */
6864 scalar_dest = gimple_assign_lhs (stmt);
6865 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6867 /* Handle cond expr. */
6868 for (j = 0; j < ncopies; j++)
6870 gassign *new_stmt = NULL;
6871 if (j == 0)
6873 if (slp_node)
6875 auto_vec<tree, 4> ops;
6876 auto_vec<vec<tree>, 4> vec_defs;
6878 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6879 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6880 ops.safe_push (then_clause);
6881 ops.safe_push (else_clause);
6882 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6883 vec_oprnds3 = vec_defs.pop ();
6884 vec_oprnds2 = vec_defs.pop ();
6885 vec_oprnds1 = vec_defs.pop ();
6886 vec_oprnds0 = vec_defs.pop ();
6888 ops.release ();
6889 vec_defs.release ();
6891 else
6893 gimple gtemp;
6894 vec_cond_lhs =
6895 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6896 stmt, NULL);
6897 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6898 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6900 vec_cond_rhs =
6901 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6902 stmt, NULL);
6903 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6904 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6905 if (reduc_index == 1)
6906 vec_then_clause = reduc_def;
6907 else
6909 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6910 stmt, NULL);
6911 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6912 NULL, &gtemp, &def, &dts[2]);
6914 if (reduc_index == 2)
6915 vec_else_clause = reduc_def;
6916 else
6918 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6919 stmt, NULL);
6920 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6921 NULL, &gtemp, &def, &dts[3]);
6925 else
6927 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6928 vec_oprnds0.pop ());
6929 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6930 vec_oprnds1.pop ());
6931 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6932 vec_oprnds2.pop ());
6933 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6934 vec_oprnds3.pop ());
6937 if (!slp_node)
6939 vec_oprnds0.quick_push (vec_cond_lhs);
6940 vec_oprnds1.quick_push (vec_cond_rhs);
6941 vec_oprnds2.quick_push (vec_then_clause);
6942 vec_oprnds3.quick_push (vec_else_clause);
6945 /* Arguments are ready. Create the new vector stmt. */
6946 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6948 vec_cond_rhs = vec_oprnds1[i];
6949 vec_then_clause = vec_oprnds2[i];
6950 vec_else_clause = vec_oprnds3[i];
6952 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6953 vec_cond_lhs, vec_cond_rhs);
6954 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6955 vec_compare, vec_then_clause, vec_else_clause);
6957 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6958 new_temp = make_ssa_name (vec_dest, new_stmt);
6959 gimple_assign_set_lhs (new_stmt, new_temp);
6960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6961 if (slp_node)
6962 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6965 if (slp_node)
6966 continue;
6968 if (j == 0)
6969 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6970 else
6971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6973 prev_stmt_info = vinfo_for_stmt (new_stmt);
6976 vec_oprnds0.release ();
6977 vec_oprnds1.release ();
6978 vec_oprnds2.release ();
6979 vec_oprnds3.release ();
6981 return true;
6985 /* Make sure the statement is vectorizable. */
6987 bool
6988 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6990 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6991 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6992 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6993 bool ok;
6994 tree scalar_type, vectype;
6995 gimple pattern_stmt;
6996 gimple_seq pattern_def_seq;
6998 if (dump_enabled_p ())
7000 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7001 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7002 dump_printf (MSG_NOTE, "\n");
7005 if (gimple_has_volatile_ops (stmt))
7007 if (dump_enabled_p ())
7008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7009 "not vectorized: stmt has volatile operands\n");
7011 return false;
7014 /* Skip stmts that do not need to be vectorized. In loops this is expected
7015 to include:
7016 - the COND_EXPR which is the loop exit condition
7017 - any LABEL_EXPRs in the loop
7018 - computations that are used only for array indexing or loop control.
7019 In basic blocks we only analyze statements that are a part of some SLP
7020 instance, therefore, all the statements are relevant.
7022 Pattern statement needs to be analyzed instead of the original statement
7023 if the original statement is not relevant. Otherwise, we analyze both
7024 statements. In basic blocks we are called from some SLP instance
7025 traversal, don't analyze pattern stmts instead, the pattern stmts
7026 already will be part of SLP instance. */
7028 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7029 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7030 && !STMT_VINFO_LIVE_P (stmt_info))
7032 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7033 && pattern_stmt
7034 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7035 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7037 /* Analyze PATTERN_STMT instead of the original stmt. */
7038 stmt = pattern_stmt;
7039 stmt_info = vinfo_for_stmt (pattern_stmt);
7040 if (dump_enabled_p ())
7042 dump_printf_loc (MSG_NOTE, vect_location,
7043 "==> examining pattern statement: ");
7044 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7045 dump_printf (MSG_NOTE, "\n");
7048 else
7050 if (dump_enabled_p ())
7051 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7053 return true;
7056 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7057 && node == NULL
7058 && pattern_stmt
7059 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7060 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7062 /* Analyze PATTERN_STMT too. */
7063 if (dump_enabled_p ())
7065 dump_printf_loc (MSG_NOTE, vect_location,
7066 "==> examining pattern statement: ");
7067 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7068 dump_printf (MSG_NOTE, "\n");
7071 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7072 return false;
7075 if (is_pattern_stmt_p (stmt_info)
7076 && node == NULL
7077 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7079 gimple_stmt_iterator si;
7081 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7083 gimple pattern_def_stmt = gsi_stmt (si);
7084 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7085 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7087 /* Analyze def stmt of STMT if it's a pattern stmt. */
7088 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_NOTE, vect_location,
7091 "==> examining pattern def statement: ");
7092 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7093 dump_printf (MSG_NOTE, "\n");
7096 if (!vect_analyze_stmt (pattern_def_stmt,
7097 need_to_vectorize, node))
7098 return false;
7103 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7105 case vect_internal_def:
7106 break;
7108 case vect_reduction_def:
7109 case vect_nested_cycle:
7110 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7111 || relevance == vect_used_in_outer_by_reduction
7112 || relevance == vect_unused_in_scope));
7113 break;
7115 case vect_induction_def:
7116 case vect_constant_def:
7117 case vect_external_def:
7118 case vect_unknown_def_type:
7119 default:
7120 gcc_unreachable ();
7123 if (bb_vinfo)
7125 gcc_assert (PURE_SLP_STMT (stmt_info));
7127 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7128 if (dump_enabled_p ())
7130 dump_printf_loc (MSG_NOTE, vect_location,
7131 "get vectype for scalar type: ");
7132 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7133 dump_printf (MSG_NOTE, "\n");
7136 vectype = get_vectype_for_scalar_type (scalar_type);
7137 if (!vectype)
7139 if (dump_enabled_p ())
7141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7142 "not SLPed: unsupported data-type ");
7143 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7144 scalar_type);
7145 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7147 return false;
7150 if (dump_enabled_p ())
7152 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7153 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7154 dump_printf (MSG_NOTE, "\n");
7157 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7160 if (STMT_VINFO_RELEVANT_P (stmt_info))
7162 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7163 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7164 || (is_gimple_call (stmt)
7165 && gimple_call_lhs (stmt) == NULL_TREE));
7166 *need_to_vectorize = true;
7169 ok = true;
7170 if (!bb_vinfo
7171 && (STMT_VINFO_RELEVANT_P (stmt_info)
7172 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7173 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7174 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7175 || vectorizable_shift (stmt, NULL, NULL, NULL)
7176 || vectorizable_operation (stmt, NULL, NULL, NULL)
7177 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7178 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7179 || vectorizable_call (stmt, NULL, NULL, NULL)
7180 || vectorizable_store (stmt, NULL, NULL, NULL)
7181 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7182 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7183 else
7185 if (bb_vinfo)
7186 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7187 || vectorizable_conversion (stmt, NULL, NULL, node)
7188 || vectorizable_shift (stmt, NULL, NULL, node)
7189 || vectorizable_operation (stmt, NULL, NULL, node)
7190 || vectorizable_assignment (stmt, NULL, NULL, node)
7191 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7192 || vectorizable_call (stmt, NULL, NULL, node)
7193 || vectorizable_store (stmt, NULL, NULL, node)
7194 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7197 if (!ok)
7199 if (dump_enabled_p ())
7201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7202 "not vectorized: relevant stmt not ");
7203 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7204 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7205 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7208 return false;
7211 if (bb_vinfo)
7212 return true;
7214 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7215 need extra handling, except for vectorizable reductions. */
7216 if (STMT_VINFO_LIVE_P (stmt_info)
7217 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7218 ok = vectorizable_live_operation (stmt, NULL, NULL);
7220 if (!ok)
7222 if (dump_enabled_p ())
7224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7225 "not vectorized: live stmt not ");
7226 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7227 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7228 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7231 return false;
7234 return true;
7238 /* Function vect_transform_stmt.
7240 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7242 bool
7243 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7244 bool *grouped_store, slp_tree slp_node,
7245 slp_instance slp_node_instance)
7247 bool is_store = false;
7248 gimple vec_stmt = NULL;
7249 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7250 bool done;
7252 switch (STMT_VINFO_TYPE (stmt_info))
7254 case type_demotion_vec_info_type:
7255 case type_promotion_vec_info_type:
7256 case type_conversion_vec_info_type:
7257 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7258 gcc_assert (done);
7259 break;
7261 case induc_vec_info_type:
7262 gcc_assert (!slp_node);
7263 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7264 gcc_assert (done);
7265 break;
7267 case shift_vec_info_type:
7268 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7269 gcc_assert (done);
7270 break;
7272 case op_vec_info_type:
7273 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7274 gcc_assert (done);
7275 break;
7277 case assignment_vec_info_type:
7278 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7279 gcc_assert (done);
7280 break;
7282 case load_vec_info_type:
7283 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7284 slp_node_instance);
7285 gcc_assert (done);
7286 break;
7288 case store_vec_info_type:
7289 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7290 gcc_assert (done);
7291 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7293 /* In case of interleaving, the whole chain is vectorized when the
7294 last store in the chain is reached. Store stmts before the last
7295 one are skipped, and there vec_stmt_info shouldn't be freed
7296 meanwhile. */
7297 *grouped_store = true;
7298 if (STMT_VINFO_VEC_STMT (stmt_info))
7299 is_store = true;
7301 else
7302 is_store = true;
7303 break;
7305 case condition_vec_info_type:
7306 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7307 gcc_assert (done);
7308 break;
7310 case call_vec_info_type:
7311 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7312 stmt = gsi_stmt (*gsi);
7313 if (is_gimple_call (stmt)
7314 && gimple_call_internal_p (stmt)
7315 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7316 is_store = true;
7317 break;
7319 case call_simd_clone_vec_info_type:
7320 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7321 stmt = gsi_stmt (*gsi);
7322 break;
7324 case reduc_vec_info_type:
7325 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7326 gcc_assert (done);
7327 break;
7329 default:
7330 if (!STMT_VINFO_LIVE_P (stmt_info))
7332 if (dump_enabled_p ())
7333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7334 "stmt not supported.\n");
7335 gcc_unreachable ();
7339 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7340 is being vectorized, but outside the immediately enclosing loop. */
7341 if (vec_stmt
7342 && STMT_VINFO_LOOP_VINFO (stmt_info)
7343 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7344 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7345 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7346 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7347 || STMT_VINFO_RELEVANT (stmt_info) ==
7348 vect_used_in_outer_by_reduction))
7350 struct loop *innerloop = LOOP_VINFO_LOOP (
7351 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7352 imm_use_iterator imm_iter;
7353 use_operand_p use_p;
7354 tree scalar_dest;
7355 gimple exit_phi;
7357 if (dump_enabled_p ())
7358 dump_printf_loc (MSG_NOTE, vect_location,
7359 "Record the vdef for outer-loop vectorization.\n");
7361 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7362 (to be used when vectorizing outer-loop stmts that use the DEF of
7363 STMT). */
7364 if (gimple_code (stmt) == GIMPLE_PHI)
7365 scalar_dest = PHI_RESULT (stmt);
7366 else
7367 scalar_dest = gimple_assign_lhs (stmt);
7369 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7371 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7373 exit_phi = USE_STMT (use_p);
7374 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7379 /* Handle stmts whose DEF is used outside the loop-nest that is
7380 being vectorized. */
7381 if (STMT_VINFO_LIVE_P (stmt_info)
7382 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7384 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7385 gcc_assert (done);
7388 if (vec_stmt)
7389 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7391 return is_store;
7395 /* Remove a group of stores (for SLP or interleaving), free their
7396 stmt_vec_info. */
7398 void
7399 vect_remove_stores (gimple first_stmt)
7401 gimple next = first_stmt;
7402 gimple tmp;
7403 gimple_stmt_iterator next_si;
7405 while (next)
7407 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7409 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7410 if (is_pattern_stmt_p (stmt_info))
7411 next = STMT_VINFO_RELATED_STMT (stmt_info);
7412 /* Free the attached stmt_vec_info and remove the stmt. */
7413 next_si = gsi_for_stmt (next);
7414 unlink_stmt_vdef (next);
7415 gsi_remove (&next_si, true);
7416 release_defs (next);
7417 free_stmt_vec_info (next);
7418 next = tmp;
7423 /* Function new_stmt_vec_info.
7425 Create and initialize a new stmt_vec_info struct for STMT. */
7427 stmt_vec_info
7428 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7429 bb_vec_info bb_vinfo)
7431 stmt_vec_info res;
7432 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7434 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7435 STMT_VINFO_STMT (res) = stmt;
7436 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7437 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7438 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7439 STMT_VINFO_LIVE_P (res) = false;
7440 STMT_VINFO_VECTYPE (res) = NULL;
7441 STMT_VINFO_VEC_STMT (res) = NULL;
7442 STMT_VINFO_VECTORIZABLE (res) = true;
7443 STMT_VINFO_IN_PATTERN_P (res) = false;
7444 STMT_VINFO_RELATED_STMT (res) = NULL;
7445 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7446 STMT_VINFO_DATA_REF (res) = NULL;
7448 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7449 STMT_VINFO_DR_OFFSET (res) = NULL;
7450 STMT_VINFO_DR_INIT (res) = NULL;
7451 STMT_VINFO_DR_STEP (res) = NULL;
7452 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7454 if (gimple_code (stmt) == GIMPLE_PHI
7455 && is_loop_header_bb_p (gimple_bb (stmt)))
7456 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7457 else
7458 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7460 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7461 STMT_SLP_TYPE (res) = loop_vect;
7462 GROUP_FIRST_ELEMENT (res) = NULL;
7463 GROUP_NEXT_ELEMENT (res) = NULL;
7464 GROUP_SIZE (res) = 0;
7465 GROUP_STORE_COUNT (res) = 0;
7466 GROUP_GAP (res) = 0;
7467 GROUP_SAME_DR_STMT (res) = NULL;
7469 return res;
7473 /* Create a hash table for stmt_vec_info. */
7475 void
7476 init_stmt_vec_info_vec (void)
7478 gcc_assert (!stmt_vec_info_vec.exists ());
7479 stmt_vec_info_vec.create (50);
7483 /* Free hash table for stmt_vec_info. */
7485 void
7486 free_stmt_vec_info_vec (void)
7488 unsigned int i;
7489 vec_void_p info;
7490 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7491 if (info != NULL)
7492 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7493 gcc_assert (stmt_vec_info_vec.exists ());
7494 stmt_vec_info_vec.release ();
7498 /* Free stmt vectorization related info. */
7500 void
7501 free_stmt_vec_info (gimple stmt)
7503 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7505 if (!stmt_info)
7506 return;
7508 /* Check if this statement has a related "pattern stmt"
7509 (introduced by the vectorizer during the pattern recognition
7510 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7511 too. */
7512 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7514 stmt_vec_info patt_info
7515 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7516 if (patt_info)
7518 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7519 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7520 gimple_set_bb (patt_stmt, NULL);
7521 tree lhs = gimple_get_lhs (patt_stmt);
7522 if (TREE_CODE (lhs) == SSA_NAME)
7523 release_ssa_name (lhs);
7524 if (seq)
7526 gimple_stmt_iterator si;
7527 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7529 gimple seq_stmt = gsi_stmt (si);
7530 gimple_set_bb (seq_stmt, NULL);
7531 lhs = gimple_get_lhs (patt_stmt);
7532 if (TREE_CODE (lhs) == SSA_NAME)
7533 release_ssa_name (lhs);
7534 free_stmt_vec_info (seq_stmt);
7537 free_stmt_vec_info (patt_stmt);
7541 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7542 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7543 set_vinfo_for_stmt (stmt, NULL);
7544 free (stmt_info);
7548 /* Function get_vectype_for_scalar_type_and_size.
7550 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7551 by the target. */
7553 static tree
7554 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7556 machine_mode inner_mode = TYPE_MODE (scalar_type);
7557 machine_mode simd_mode;
7558 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7559 int nunits;
7560 tree vectype;
7562 if (nbytes == 0)
7563 return NULL_TREE;
7565 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7566 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7567 return NULL_TREE;
7569 /* For vector types of elements whose mode precision doesn't
7570 match their types precision we use a element type of mode
7571 precision. The vectorization routines will have to make sure
7572 they support the proper result truncation/extension.
7573 We also make sure to build vector types with INTEGER_TYPE
7574 component type only. */
7575 if (INTEGRAL_TYPE_P (scalar_type)
7576 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7577 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7578 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7579 TYPE_UNSIGNED (scalar_type));
7581 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7582 When the component mode passes the above test simply use a type
7583 corresponding to that mode. The theory is that any use that
7584 would cause problems with this will disable vectorization anyway. */
7585 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7586 && !INTEGRAL_TYPE_P (scalar_type))
7587 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7589 /* We can't build a vector type of elements with alignment bigger than
7590 their size. */
7591 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7592 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7593 TYPE_UNSIGNED (scalar_type));
7595 /* If we felt back to using the mode fail if there was
7596 no scalar type for it. */
7597 if (scalar_type == NULL_TREE)
7598 return NULL_TREE;
7600 /* If no size was supplied use the mode the target prefers. Otherwise
7601 lookup a vector mode of the specified size. */
7602 if (size == 0)
7603 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7604 else
7605 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7606 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7607 if (nunits <= 1)
7608 return NULL_TREE;
7610 vectype = build_vector_type (scalar_type, nunits);
7612 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7613 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7614 return NULL_TREE;
7616 return vectype;
7619 unsigned int current_vector_size;
7621 /* Function get_vectype_for_scalar_type.
7623 Returns the vector type corresponding to SCALAR_TYPE as supported
7624 by the target. */
7626 tree
7627 get_vectype_for_scalar_type (tree scalar_type)
7629 tree vectype;
7630 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7631 current_vector_size);
7632 if (vectype
7633 && current_vector_size == 0)
7634 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7635 return vectype;
7638 /* Function get_same_sized_vectype
7640 Returns a vector type corresponding to SCALAR_TYPE of size
7641 VECTOR_TYPE if supported by the target. */
7643 tree
7644 get_same_sized_vectype (tree scalar_type, tree vector_type)
7646 return get_vectype_for_scalar_type_and_size
7647 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7650 /* Function vect_is_simple_use.
7652 Input:
7653 LOOP_VINFO - the vect info of the loop that is being vectorized.
7654 BB_VINFO - the vect info of the basic block that is being vectorized.
7655 OPERAND - operand of STMT in the loop or bb.
7656 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7658 Returns whether a stmt with OPERAND can be vectorized.
7659 For loops, supportable operands are constants, loop invariants, and operands
7660 that are defined by the current iteration of the loop. Unsupportable
7661 operands are those that are defined by a previous iteration of the loop (as
7662 is the case in reduction/induction computations).
7663 For basic blocks, supportable operands are constants and bb invariants.
7664 For now, operands defined outside the basic block are not supported. */
7666 bool
7667 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7668 bb_vec_info bb_vinfo, gimple *def_stmt,
7669 tree *def, enum vect_def_type *dt)
7671 basic_block bb;
7672 stmt_vec_info stmt_vinfo;
7673 struct loop *loop = NULL;
7675 if (loop_vinfo)
7676 loop = LOOP_VINFO_LOOP (loop_vinfo);
7678 *def_stmt = NULL;
7679 *def = NULL_TREE;
7681 if (dump_enabled_p ())
7683 dump_printf_loc (MSG_NOTE, vect_location,
7684 "vect_is_simple_use: operand ");
7685 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7686 dump_printf (MSG_NOTE, "\n");
7689 if (CONSTANT_CLASS_P (operand))
7691 *dt = vect_constant_def;
7692 return true;
7695 if (is_gimple_min_invariant (operand))
7697 *def = operand;
7698 *dt = vect_external_def;
7699 return true;
7702 if (TREE_CODE (operand) == PAREN_EXPR)
7704 if (dump_enabled_p ())
7705 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7706 operand = TREE_OPERAND (operand, 0);
7709 if (TREE_CODE (operand) != SSA_NAME)
7711 if (dump_enabled_p ())
7712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7713 "not ssa-name.\n");
7714 return false;
7717 *def_stmt = SSA_NAME_DEF_STMT (operand);
7718 if (*def_stmt == NULL)
7720 if (dump_enabled_p ())
7721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7722 "no def_stmt.\n");
7723 return false;
7726 if (dump_enabled_p ())
7728 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7729 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7730 dump_printf (MSG_NOTE, "\n");
7733 /* Empty stmt is expected only in case of a function argument.
7734 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7735 if (gimple_nop_p (*def_stmt))
7737 *def = operand;
7738 *dt = vect_external_def;
7739 return true;
7742 bb = gimple_bb (*def_stmt);
7744 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7745 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7746 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7747 *dt = vect_external_def;
7748 else
7750 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7751 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7754 if (*dt == vect_unknown_def_type
7755 || (stmt
7756 && *dt == vect_double_reduction_def
7757 && gimple_code (stmt) != GIMPLE_PHI))
7759 if (dump_enabled_p ())
7760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7761 "Unsupported pattern.\n");
7762 return false;
7765 if (dump_enabled_p ())
7766 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7768 switch (gimple_code (*def_stmt))
7770 case GIMPLE_PHI:
7771 *def = gimple_phi_result (*def_stmt);
7772 break;
7774 case GIMPLE_ASSIGN:
7775 *def = gimple_assign_lhs (*def_stmt);
7776 break;
7778 case GIMPLE_CALL:
7779 *def = gimple_call_lhs (*def_stmt);
7780 if (*def != NULL)
7781 break;
7782 /* FALLTHRU */
7783 default:
7784 if (dump_enabled_p ())
7785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7786 "unsupported defining stmt:\n");
7787 return false;
7790 return true;
7793 /* Function vect_is_simple_use_1.
7795 Same as vect_is_simple_use_1 but also determines the vector operand
7796 type of OPERAND and stores it to *VECTYPE. If the definition of
7797 OPERAND is vect_uninitialized_def, vect_constant_def or
7798 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7799 is responsible to compute the best suited vector type for the
7800 scalar operand. */
7802 bool
7803 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7804 bb_vec_info bb_vinfo, gimple *def_stmt,
7805 tree *def, enum vect_def_type *dt, tree *vectype)
7807 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7808 def, dt))
7809 return false;
7811 /* Now get a vector type if the def is internal, otherwise supply
7812 NULL_TREE and leave it up to the caller to figure out a proper
7813 type for the use stmt. */
7814 if (*dt == vect_internal_def
7815 || *dt == vect_induction_def
7816 || *dt == vect_reduction_def
7817 || *dt == vect_double_reduction_def
7818 || *dt == vect_nested_cycle)
7820 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7822 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7823 && !STMT_VINFO_RELEVANT (stmt_info)
7824 && !STMT_VINFO_LIVE_P (stmt_info))
7825 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7827 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7828 gcc_assert (*vectype != NULL_TREE);
7830 else if (*dt == vect_uninitialized_def
7831 || *dt == vect_constant_def
7832 || *dt == vect_external_def)
7833 *vectype = NULL_TREE;
7834 else
7835 gcc_unreachable ();
7837 return true;
7841 /* Function supportable_widening_operation
7843 Check whether an operation represented by the code CODE is a
7844 widening operation that is supported by the target platform in
7845 vector form (i.e., when operating on arguments of type VECTYPE_IN
7846 producing a result of type VECTYPE_OUT).
7848 Widening operations we currently support are NOP (CONVERT), FLOAT
7849 and WIDEN_MULT. This function checks if these operations are supported
7850 by the target platform either directly (via vector tree-codes), or via
7851 target builtins.
7853 Output:
7854 - CODE1 and CODE2 are codes of vector operations to be used when
7855 vectorizing the operation, if available.
7856 - MULTI_STEP_CVT determines the number of required intermediate steps in
7857 case of multi-step conversion (like char->short->int - in that case
7858 MULTI_STEP_CVT will be 1).
7859 - INTERM_TYPES contains the intermediate type required to perform the
7860 widening operation (short in the above example). */
7862 bool
7863 supportable_widening_operation (enum tree_code code, gimple stmt,
7864 tree vectype_out, tree vectype_in,
7865 enum tree_code *code1, enum tree_code *code2,
7866 int *multi_step_cvt,
7867 vec<tree> *interm_types)
7869 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7870 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7871 struct loop *vect_loop = NULL;
7872 machine_mode vec_mode;
7873 enum insn_code icode1, icode2;
7874 optab optab1, optab2;
7875 tree vectype = vectype_in;
7876 tree wide_vectype = vectype_out;
7877 enum tree_code c1, c2;
7878 int i;
7879 tree prev_type, intermediate_type;
7880 machine_mode intermediate_mode, prev_mode;
7881 optab optab3, optab4;
7883 *multi_step_cvt = 0;
7884 if (loop_info)
7885 vect_loop = LOOP_VINFO_LOOP (loop_info);
7887 switch (code)
7889 case WIDEN_MULT_EXPR:
7890 /* The result of a vectorized widening operation usually requires
7891 two vectors (because the widened results do not fit into one vector).
7892 The generated vector results would normally be expected to be
7893 generated in the same order as in the original scalar computation,
7894 i.e. if 8 results are generated in each vector iteration, they are
7895 to be organized as follows:
7896 vect1: [res1,res2,res3,res4],
7897 vect2: [res5,res6,res7,res8].
7899 However, in the special case that the result of the widening
7900 operation is used in a reduction computation only, the order doesn't
7901 matter (because when vectorizing a reduction we change the order of
7902 the computation). Some targets can take advantage of this and
7903 generate more efficient code. For example, targets like Altivec,
7904 that support widen_mult using a sequence of {mult_even,mult_odd}
7905 generate the following vectors:
7906 vect1: [res1,res3,res5,res7],
7907 vect2: [res2,res4,res6,res8].
7909 When vectorizing outer-loops, we execute the inner-loop sequentially
7910 (each vectorized inner-loop iteration contributes to VF outer-loop
7911 iterations in parallel). We therefore don't allow to change the
7912 order of the computation in the inner-loop during outer-loop
7913 vectorization. */
7914 /* TODO: Another case in which order doesn't *really* matter is when we
7915 widen and then contract again, e.g. (short)((int)x * y >> 8).
7916 Normally, pack_trunc performs an even/odd permute, whereas the
7917 repack from an even/odd expansion would be an interleave, which
7918 would be significantly simpler for e.g. AVX2. */
7919 /* In any case, in order to avoid duplicating the code below, recurse
7920 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7921 are properly set up for the caller. If we fail, we'll continue with
7922 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7923 if (vect_loop
7924 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7925 && !nested_in_vect_loop_p (vect_loop, stmt)
7926 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7927 stmt, vectype_out, vectype_in,
7928 code1, code2, multi_step_cvt,
7929 interm_types))
7931 /* Elements in a vector with vect_used_by_reduction property cannot
7932 be reordered if the use chain with this property does not have the
7933 same operation. One such an example is s += a * b, where elements
7934 in a and b cannot be reordered. Here we check if the vector defined
7935 by STMT is only directly used in the reduction statement. */
7936 tree lhs = gimple_assign_lhs (stmt);
7937 use_operand_p dummy;
7938 gimple use_stmt;
7939 stmt_vec_info use_stmt_info = NULL;
7940 if (single_imm_use (lhs, &dummy, &use_stmt)
7941 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7942 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7943 return true;
7945 c1 = VEC_WIDEN_MULT_LO_EXPR;
7946 c2 = VEC_WIDEN_MULT_HI_EXPR;
7947 break;
7949 case VEC_WIDEN_MULT_EVEN_EXPR:
7950 /* Support the recursion induced just above. */
7951 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7952 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7953 break;
7955 case WIDEN_LSHIFT_EXPR:
7956 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7957 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7958 break;
7960 CASE_CONVERT:
7961 c1 = VEC_UNPACK_LO_EXPR;
7962 c2 = VEC_UNPACK_HI_EXPR;
7963 break;
7965 case FLOAT_EXPR:
7966 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7967 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7968 break;
7970 case FIX_TRUNC_EXPR:
7971 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7972 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7973 computing the operation. */
7974 return false;
7976 default:
7977 gcc_unreachable ();
7980 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7982 enum tree_code ctmp = c1;
7983 c1 = c2;
7984 c2 = ctmp;
7987 if (code == FIX_TRUNC_EXPR)
7989 /* The signedness is determined from output operand. */
7990 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7991 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7993 else
7995 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7996 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7999 if (!optab1 || !optab2)
8000 return false;
8002 vec_mode = TYPE_MODE (vectype);
8003 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8004 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8005 return false;
8007 *code1 = c1;
8008 *code2 = c2;
8010 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8011 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8012 return true;
8014 /* Check if it's a multi-step conversion that can be done using intermediate
8015 types. */
8017 prev_type = vectype;
8018 prev_mode = vec_mode;
8020 if (!CONVERT_EXPR_CODE_P (code))
8021 return false;
8023 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8024 intermediate steps in promotion sequence. We try
8025 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8026 not. */
8027 interm_types->create (MAX_INTERM_CVT_STEPS);
8028 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8030 intermediate_mode = insn_data[icode1].operand[0].mode;
8031 intermediate_type
8032 = lang_hooks.types.type_for_mode (intermediate_mode,
8033 TYPE_UNSIGNED (prev_type));
8034 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8035 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8037 if (!optab3 || !optab4
8038 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8039 || insn_data[icode1].operand[0].mode != intermediate_mode
8040 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8041 || insn_data[icode2].operand[0].mode != intermediate_mode
8042 || ((icode1 = optab_handler (optab3, intermediate_mode))
8043 == CODE_FOR_nothing)
8044 || ((icode2 = optab_handler (optab4, intermediate_mode))
8045 == CODE_FOR_nothing))
8046 break;
8048 interm_types->quick_push (intermediate_type);
8049 (*multi_step_cvt)++;
8051 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8052 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8053 return true;
8055 prev_type = intermediate_type;
8056 prev_mode = intermediate_mode;
8059 interm_types->release ();
8060 return false;
8064 /* Function supportable_narrowing_operation
8066 Check whether an operation represented by the code CODE is a
8067 narrowing operation that is supported by the target platform in
8068 vector form (i.e., when operating on arguments of type VECTYPE_IN
8069 and producing a result of type VECTYPE_OUT).
8071 Narrowing operations we currently support are NOP (CONVERT) and
8072 FIX_TRUNC. This function checks if these operations are supported by
8073 the target platform directly via vector tree-codes.
8075 Output:
8076 - CODE1 is the code of a vector operation to be used when
8077 vectorizing the operation, if available.
8078 - MULTI_STEP_CVT determines the number of required intermediate steps in
8079 case of multi-step conversion (like int->short->char - in that case
8080 MULTI_STEP_CVT will be 1).
8081 - INTERM_TYPES contains the intermediate type required to perform the
8082 narrowing operation (short in the above example). */
8084 bool
8085 supportable_narrowing_operation (enum tree_code code,
8086 tree vectype_out, tree vectype_in,
8087 enum tree_code *code1, int *multi_step_cvt,
8088 vec<tree> *interm_types)
8090 machine_mode vec_mode;
8091 enum insn_code icode1;
8092 optab optab1, interm_optab;
8093 tree vectype = vectype_in;
8094 tree narrow_vectype = vectype_out;
8095 enum tree_code c1;
8096 tree intermediate_type;
8097 machine_mode intermediate_mode, prev_mode;
8098 int i;
8099 bool uns;
8101 *multi_step_cvt = 0;
8102 switch (code)
8104 CASE_CONVERT:
8105 c1 = VEC_PACK_TRUNC_EXPR;
8106 break;
8108 case FIX_TRUNC_EXPR:
8109 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8110 break;
8112 case FLOAT_EXPR:
8113 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8114 tree code and optabs used for computing the operation. */
8115 return false;
8117 default:
8118 gcc_unreachable ();
8121 if (code == FIX_TRUNC_EXPR)
8122 /* The signedness is determined from output operand. */
8123 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8124 else
8125 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8127 if (!optab1)
8128 return false;
8130 vec_mode = TYPE_MODE (vectype);
8131 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8132 return false;
8134 *code1 = c1;
8136 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8137 return true;
8139 /* Check if it's a multi-step conversion that can be done using intermediate
8140 types. */
8141 prev_mode = vec_mode;
8142 if (code == FIX_TRUNC_EXPR)
8143 uns = TYPE_UNSIGNED (vectype_out);
8144 else
8145 uns = TYPE_UNSIGNED (vectype);
8147 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8148 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8149 costly than signed. */
8150 if (code == FIX_TRUNC_EXPR && uns)
8152 enum insn_code icode2;
8154 intermediate_type
8155 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8156 interm_optab
8157 = optab_for_tree_code (c1, intermediate_type, optab_default);
8158 if (interm_optab != unknown_optab
8159 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8160 && insn_data[icode1].operand[0].mode
8161 == insn_data[icode2].operand[0].mode)
8163 uns = false;
8164 optab1 = interm_optab;
8165 icode1 = icode2;
8169 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8170 intermediate steps in promotion sequence. We try
8171 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8172 interm_types->create (MAX_INTERM_CVT_STEPS);
8173 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8175 intermediate_mode = insn_data[icode1].operand[0].mode;
8176 intermediate_type
8177 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8178 interm_optab
8179 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8180 optab_default);
8181 if (!interm_optab
8182 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8183 || insn_data[icode1].operand[0].mode != intermediate_mode
8184 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8185 == CODE_FOR_nothing))
8186 break;
8188 interm_types->quick_push (intermediate_type);
8189 (*multi_step_cvt)++;
8191 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8192 return true;
8194 prev_mode = intermediate_mode;
8195 optab1 = interm_optab;
8198 interm_types->release ();
8199 return false;