[AArch64] PR target/65491: Classify V1TF vectors as AAPCS64 short vectors rather...
[official-gcc.git] / gcc / tree-vect-stmts.c
blob1e7b13562df1e4eb5f67d2fc8a567e33a22533f5
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "tree-eh.h"
50 #include "gimple-expr.h"
51 #include "is-a.h"
52 #include "gimple.h"
53 #include "gimplify.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
57 #include "tree-cfg.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
63 #include "cfgloop.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
66 #include "hashtab.h"
67 #include "rtl.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
83 #include "optabs.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
86 #include "hash-map.h"
87 #include "plugin-api.h"
88 #include "ipa-ref.h"
89 #include "cgraph.h"
90 #include "builtins.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
97 tree
98 stmt_vectype (struct _stmt_vec_info *stmt_info)
100 return STMT_VINFO_VECTYPE (stmt_info);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
105 bool
106 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
108 gimple stmt = STMT_VINFO_STMT (stmt_info);
109 basic_block bb = gimple_bb (stmt);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111 struct loop* loop;
113 if (!loop_vinfo)
114 return false;
116 loop = LOOP_VINFO_LOOP (loop_vinfo);
118 return (bb->loop_father == loop->inner);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
125 unsigned
126 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128 int misalign, enum vect_cost_model_location where)
130 if (body_cost_vec)
132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133 add_stmt_info_to_vec (body_cost_vec, count, kind,
134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 misalign);
136 return (unsigned)
137 (builtin_vectorization_cost (kind, vectype, misalign) * count);
140 else
142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144 void *target_cost_data;
146 if (loop_vinfo)
147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148 else
149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
151 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 misalign, where);
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
158 static tree
159 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
161 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 "vect_array");
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
170 static tree
171 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 tree array, unsigned HOST_WIDE_INT n)
174 tree vect_type, vect, vect_name, array_ref;
175 gimple new_stmt;
177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178 vect_type = TREE_TYPE (TREE_TYPE (array));
179 vect = vect_create_destination_var (scalar_dest, vect_type);
180 array_ref = build4 (ARRAY_REF, vect_type, array,
181 build_int_cst (size_type_node, n),
182 NULL_TREE, NULL_TREE);
184 new_stmt = gimple_build_assign (vect, array_ref);
185 vect_name = make_ssa_name (vect, new_stmt);
186 gimple_assign_set_lhs (new_stmt, vect_name);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
189 return vect_name;
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
196 static void
197 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 tree array, unsigned HOST_WIDE_INT n)
200 tree array_ref;
201 gimple new_stmt;
203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 build_int_cst (size_type_node, n),
205 NULL_TREE, NULL_TREE);
207 new_stmt = gimple_build_assign (array_ref, vect);
208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
213 (and its group). */
215 static tree
216 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
218 tree mem_ref, alias_ptr_type;
220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224 return mem_ref;
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
233 static void
234 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235 enum vect_relevant relevant, bool live_p,
236 bool used_in_pattern)
238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241 gimple pattern_stmt;
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "mark relevant %d, live %d.\n", relevant, live_p);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 bool found = false;
254 if (!used_in_pattern)
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 gimple use_stmt;
259 tree lhs;
260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
263 if (is_gimple_assign (stmt))
264 lhs = gimple_assign_lhs (stmt);
265 else
266 lhs = gimple_call_lhs (stmt);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
270 stmt. */
271 if (lhs && TREE_CODE (lhs) == SSA_NAME)
272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
274 if (is_gimple_debug (USE_STMT (use_p)))
275 continue;
276 use_stmt = USE_STMT (use_p);
278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 continue;
281 if (vinfo_for_stmt (use_stmt)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
284 found = true;
285 break;
290 if (!found)
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info = vinfo_for_stmt (pattern_stmt);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307 stmt = pattern_stmt;
311 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313 STMT_VINFO_RELEVANT (stmt_info) = relevant;
315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "already marked relevant/live.\n");
321 return;
324 worklist->safe_push (stmt);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
340 static bool
341 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 enum vect_relevant *relevant, bool *live_p)
344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345 ssa_op_iter op_iter;
346 imm_use_iterator imm_iter;
347 use_operand_p use_p;
348 def_operand_p def_p;
350 *relevant = vect_unused_in_scope;
351 *live_p = false;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356 != loop_exit_ctrl_vec_info_type)
357 *relevant = vect_used_in_scope;
359 /* changing memory. */
360 if (gimple_code (stmt) != GIMPLE_PHI)
361 if (gimple_vdef (stmt)
362 && !gimple_clobber_p (stmt))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE, vect_location,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant = vect_used_in_scope;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
375 basic_block bb = gimple_bb (USE_STMT (use_p));
376 if (!flow_bb_inside_loop_p (loop, bb))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE, vect_location,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p)))
383 continue;
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 gcc_assert (bb == single_exit (loop)->dest);
390 *live_p = true;
395 return (*live_p || *relevant);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
404 static bool
405 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
407 tree operand;
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info))
414 return true;
416 /* STMT has a data_ref. FORNOW this means that its of one of
417 the following forms:
418 -1- ARRAY_REF = var
419 -2- var = ARRAY_REF
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
424 for array indexing.
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt))
431 if (is_gimple_call (stmt)
432 && gimple_call_internal_p (stmt))
433 switch (gimple_call_internal_fn (stmt))
435 case IFN_MASK_STORE:
436 operand = gimple_call_arg (stmt, 3);
437 if (operand == use)
438 return true;
439 /* FALLTHRU */
440 case IFN_MASK_LOAD:
441 operand = gimple_call_arg (stmt, 2);
442 if (operand == use)
443 return true;
444 break;
445 default:
446 break;
448 return false;
451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452 return false;
453 operand = gimple_assign_rhs1 (stmt);
454 if (TREE_CODE (operand) != SSA_NAME)
455 return false;
457 if (operand == use)
458 return true;
460 return false;
465 Function process_use.
467 Inputs:
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473 be performed.
475 Outputs:
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480 Exceptions:
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
491 static bool
492 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493 enum vect_relevant relevant, vec<gimple> *worklist,
494 bool force)
496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498 stmt_vec_info dstmt_vinfo;
499 basic_block bb, def_bb;
500 tree def;
501 gimple def_stmt;
502 enum vect_def_type dt;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507 return true;
509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "not vectorized: unsupported use in stmt.\n");
514 return false;
517 if (!def_stmt || gimple_nop_p (def_stmt))
518 return true;
520 def_bb = gimple_bb (def_stmt);
521 if (!flow_bb_inside_loop_p (loop, def_bb))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525 return true;
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo = vinfo_for_stmt (def_stmt);
534 bb = gimple_bb (stmt);
535 if (gimple_code (stmt) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537 && gimple_code (def_stmt) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539 && bb->loop_father == def_bb->loop_father)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549 return true;
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
554 d = def_stmt
555 inner-loop:
556 stmt # use (d)
557 outer-loop-tail-bb:
558 ... */
559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE, vect_location,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
565 switch (relevant)
567 case vect_unused_in_scope:
568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 vect_used_in_scope : vect_unused_in_scope;
570 break;
572 case vect_used_in_outer_by_reduction:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574 relevant = vect_used_by_reduction;
575 break;
577 case vect_used_in_outer:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579 relevant = vect_used_in_scope;
580 break;
582 case vect_used_in_scope:
583 break;
585 default:
586 gcc_unreachable ();
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
593 inner-loop:
594 d = def_stmt
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596 stmt # use (d) */
597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
603 switch (relevant)
605 case vect_unused_in_scope:
606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608 vect_used_in_outer_by_reduction : vect_unused_in_scope;
609 break;
611 case vect_used_by_reduction:
612 relevant = vect_used_in_outer_by_reduction;
613 break;
615 case vect_used_in_scope:
616 relevant = vect_used_in_outer;
617 break;
619 default:
620 gcc_unreachable ();
624 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625 is_pattern_stmt_p (stmt_vinfo));
626 return true;
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
634 for i...
635 for j...
636 1. T0 = i + j
637 2. T1 = a[T0]
639 3. j = j + 1
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
646 bool
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651 unsigned int nbbs = loop->num_nodes;
652 gimple_stmt_iterator si;
653 gimple stmt;
654 unsigned int i;
655 stmt_vec_info stmt_vinfo;
656 basic_block bb;
657 gimple phi;
658 bool live_p;
659 enum vect_relevant relevant, tmp_relevant;
660 enum vect_def_type def_type;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE, vect_location,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec<gimple, 64> worklist;
668 /* 1. Init worklist. */
669 for (i = 0; i < nbbs; i++)
671 bb = bbs[i];
672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
674 phi = gsi_stmt (si);
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
682 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
686 stmt = gsi_stmt (si);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
694 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
698 /* 2. Process_worklist */
699 while (worklist.length () > 0)
701 use_operand_p use_p;
702 ssa_op_iter iter;
704 stmt = worklist.pop ();
705 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo = vinfo_for_stmt (stmt);
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
725 live_p = false
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
733 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734 tmp_relevant = relevant;
735 switch (def_type)
737 case vect_reduction_def:
738 switch (tmp_relevant)
740 case vect_unused_in_scope:
741 relevant = vect_used_by_reduction;
742 break;
744 case vect_used_by_reduction:
745 if (gimple_code (stmt) == GIMPLE_PHI)
746 break;
747 /* fall through */
749 default:
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "unsupported use of reduction.\n");
753 return false;
756 live_p = false;
757 break;
759 case vect_nested_cycle:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_in_outer_by_reduction
762 && tmp_relevant != vect_used_in_outer)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766 "unsupported use of nested cycle.\n");
768 return false;
771 live_p = false;
772 break;
774 case vect_double_reduction_def:
775 if (tmp_relevant != vect_unused_in_scope
776 && tmp_relevant != vect_used_by_reduction)
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780 "unsupported use of double reduction.\n");
782 return false;
785 live_p = false;
786 break;
788 default:
789 break;
792 if (is_pattern_stmt_p (stmt_vinfo))
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt))
799 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800 tree op = gimple_assign_rhs1 (stmt);
802 i = 1;
803 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
805 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
806 live_p, relevant, &worklist, false)
807 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
808 live_p, relevant, &worklist, false))
809 return false;
810 i = 2;
812 for (; i < gimple_num_ops (stmt); i++)
814 op = gimple_op (stmt, i);
815 if (TREE_CODE (op) == SSA_NAME
816 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
817 &worklist, false))
818 return false;
821 else if (is_gimple_call (stmt))
823 for (i = 0; i < gimple_call_num_args (stmt); i++)
825 tree arg = gimple_call_arg (stmt, i);
826 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
827 &worklist, false))
828 return false;
832 else
833 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
835 tree op = USE_FROM_PTR (use_p);
836 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
837 &worklist, false))
838 return false;
841 if (STMT_VINFO_GATHER_P (stmt_vinfo))
843 tree off;
844 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
845 gcc_assert (decl);
846 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
847 &worklist, true))
848 return false;
850 } /* while worklist */
852 return true;
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
862 void
863 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
864 enum vect_def_type *dt,
865 stmt_vector_for_cost *prologue_cost_vec,
866 stmt_vector_for_cost *body_cost_vec)
868 int i;
869 int inside_cost = 0, prologue_cost = 0;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info))
873 return;
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i = 0; i < 2; i++)
877 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
878 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
879 stmt_info, 0, vect_prologue);
881 /* Pass the inside-of-loop statements to the target-specific cost model. */
882 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
883 stmt_info, 0, vect_body);
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE, vect_location,
887 "vect_model_simple_cost: inside_cost = %d, "
888 "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 /* Model cost for type demotion and promotion operations. PWR is normally
893 zero for single-step promotions and demotions. It will be one if
894 two-step promotion/demotion is required, and so on. Each additional
895 step doubles the number of instructions required. */
897 static void
898 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
899 enum vect_def_type *dt, int pwr)
901 int i, tmp;
902 int inside_cost = 0, prologue_cost = 0;
903 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
904 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
905 void *target_cost_data;
907 /* The SLP costs were already calculated during SLP tree build. */
908 if (PURE_SLP_STMT (stmt_info))
909 return;
911 if (loop_vinfo)
912 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
913 else
914 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
916 for (i = 0; i < pwr + 1; i++)
918 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
919 (i + 1) : i;
920 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
921 vec_promote_demote, stmt_info, 0,
922 vect_body);
925 /* FORNOW: Assuming maximum 2 args per stmts. */
926 for (i = 0; i < 2; i++)
927 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
928 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
929 stmt_info, 0, vect_prologue);
931 if (dump_enabled_p ())
932 dump_printf_loc (MSG_NOTE, vect_location,
933 "vect_model_promotion_demotion_cost: inside_cost = %d, "
934 "prologue_cost = %d .\n", inside_cost, prologue_cost);
937 /* Function vect_cost_group_size
939 For grouped load or store, return the group_size only if it is the first
940 load or store of a group, else return 1. This ensures that group size is
941 only returned once per group. */
943 static int
944 vect_cost_group_size (stmt_vec_info stmt_info)
946 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
948 if (first_stmt == STMT_VINFO_STMT (stmt_info))
949 return GROUP_SIZE (stmt_info);
951 return 1;
955 /* Function vect_model_store_cost
957 Models cost for stores. In the case of grouped accesses, one access
958 has the overhead of the grouped access attributed to it. */
960 void
961 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
962 bool store_lanes_p, enum vect_def_type dt,
963 slp_tree slp_node,
964 stmt_vector_for_cost *prologue_cost_vec,
965 stmt_vector_for_cost *body_cost_vec)
967 int group_size;
968 unsigned int inside_cost = 0, prologue_cost = 0;
969 struct data_reference *first_dr;
970 gimple first_stmt;
972 if (dt == vect_constant_def || dt == vect_external_def)
973 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
974 stmt_info, 0, vect_prologue);
976 /* Grouped access? */
977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
979 if (slp_node)
981 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
982 group_size = 1;
984 else
986 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
987 group_size = vect_cost_group_size (stmt_info);
990 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
992 /* Not a grouped access. */
993 else
995 group_size = 1;
996 first_dr = STMT_VINFO_DATA_REF (stmt_info);
999 /* We assume that the cost of a single store-lanes instruction is
1000 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1001 access is instead being provided by a permute-and-store operation,
1002 include the cost of the permutes. */
1003 if (!store_lanes_p && group_size > 1)
1005 /* Uses a high and low interleave or shuffle operations for each
1006 needed permute. */
1007 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1008 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1009 stmt_info, 0, vect_body);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: strided group_size = %d .\n",
1014 group_size);
1017 /* Costs of the stores. */
1018 if (STMT_VINFO_STRIDED_P (stmt_info))
1020 /* N scalar stores plus extracting the elements. */
1021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1022 inside_cost += record_stmt_cost (body_cost_vec,
1023 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1024 scalar_store, stmt_info, 0, vect_body);
1025 inside_cost += record_stmt_cost (body_cost_vec,
1026 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1027 vec_to_scalar, stmt_info, 0, vect_body);
1029 else
1030 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1032 if (dump_enabled_p ())
1033 dump_printf_loc (MSG_NOTE, vect_location,
1034 "vect_model_store_cost: inside_cost = %d, "
1035 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1039 /* Calculate cost of DR's memory access. */
1040 void
1041 vect_get_store_cost (struct data_reference *dr, int ncopies,
1042 unsigned int *inside_cost,
1043 stmt_vector_for_cost *body_cost_vec)
1045 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1046 gimple stmt = DR_STMT (dr);
1047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1049 switch (alignment_support_scheme)
1051 case dr_aligned:
1053 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1054 vector_store, stmt_info, 0,
1055 vect_body);
1057 if (dump_enabled_p ())
1058 dump_printf_loc (MSG_NOTE, vect_location,
1059 "vect_model_store_cost: aligned.\n");
1060 break;
1063 case dr_unaligned_supported:
1065 /* Here, we assign an additional cost for the unaligned store. */
1066 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1067 unaligned_store, stmt_info,
1068 DR_MISALIGNMENT (dr), vect_body);
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_NOTE, vect_location,
1071 "vect_model_store_cost: unaligned supported by "
1072 "hardware.\n");
1073 break;
1076 case dr_unaligned_unsupported:
1078 *inside_cost = VECT_MAX_COST;
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1082 "vect_model_store_cost: unsupported access.\n");
1083 break;
1086 default:
1087 gcc_unreachable ();
1092 /* Function vect_model_load_cost
1094 Models cost for loads. In the case of grouped accesses, the last access
1095 has the overhead of the grouped access attributed to it. Since unaligned
1096 accesses are supported for loads, we also account for the costs of the
1097 access scheme chosen. */
1099 void
1100 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1101 bool load_lanes_p, slp_tree slp_node,
1102 stmt_vector_for_cost *prologue_cost_vec,
1103 stmt_vector_for_cost *body_cost_vec)
1105 int group_size;
1106 gimple first_stmt;
1107 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1108 unsigned int inside_cost = 0, prologue_cost = 0;
1110 /* Grouped accesses? */
1111 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1112 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1114 group_size = vect_cost_group_size (stmt_info);
1115 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1117 /* Not a grouped access. */
1118 else
1120 group_size = 1;
1121 first_dr = dr;
1124 /* We assume that the cost of a single load-lanes instruction is
1125 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1126 access is instead being provided by a load-and-permute operation,
1127 include the cost of the permutes. */
1128 if (!load_lanes_p && group_size > 1
1129 && !STMT_VINFO_STRIDED_P (stmt_info))
1131 /* Uses an even and odd extract operations or shuffle operations
1132 for each needed permute. */
1133 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1134 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1140 group_size);
1143 /* The loads themselves. */
1144 if (STMT_VINFO_STRIDED_P (stmt_info)
1145 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1149 inside_cost += record_stmt_cost (body_cost_vec,
1150 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1151 scalar_load, stmt_info, 0, vect_body);
1153 else
1154 vect_get_load_cost (first_dr, ncopies,
1155 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1156 || group_size > 1 || slp_node),
1157 &inside_cost, &prologue_cost,
1158 prologue_cost_vec, body_cost_vec, true);
1159 if (STMT_VINFO_STRIDED_P (stmt_info))
1160 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1161 stmt_info, 0, vect_body);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1170 /* Calculate cost of DR's memory access. */
1171 void
1172 vect_get_load_cost (struct data_reference *dr, int ncopies,
1173 bool add_realign_cost, unsigned int *inside_cost,
1174 unsigned int *prologue_cost,
1175 stmt_vector_for_cost *prologue_cost_vec,
1176 stmt_vector_for_cost *body_cost_vec,
1177 bool record_prologue_costs)
1179 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1180 gimple stmt = DR_STMT (dr);
1181 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1183 switch (alignment_support_scheme)
1185 case dr_aligned:
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE, vect_location,
1192 "vect_model_load_cost: aligned.\n");
1194 break;
1196 case dr_unaligned_supported:
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1200 unaligned_load, stmt_info,
1201 DR_MISALIGNMENT (dr), vect_body);
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_NOTE, vect_location,
1205 "vect_model_load_cost: unaligned supported by "
1206 "hardware.\n");
1208 break;
1210 case dr_explicit_realign:
1212 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1213 vector_load, stmt_info, 0, vect_body);
1214 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1215 vec_perm, stmt_info, 0, vect_body);
1217 /* FIXME: If the misalignment remains fixed across the iterations of
1218 the containing loop, the following cost should be added to the
1219 prologue costs. */
1220 if (targetm.vectorize.builtin_mask_for_load)
1221 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1222 stmt_info, 0, vect_body);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_model_load_cost: explicit realign\n");
1228 break;
1230 case dr_explicit_realign_optimized:
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location,
1234 "vect_model_load_cost: unaligned software "
1235 "pipelined.\n");
1237 /* Unaligned software pipeline has a load of an address, an initial
1238 load, and possibly a mask operation to "prime" the loop. However,
1239 if this is an access in a group of loads, which provide grouped
1240 access, then the above cost should only be considered for one
1241 access in the group. Inside the loop, there is a load op
1242 and a realignment op. */
1244 if (add_realign_cost && record_prologue_costs)
1246 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1247 vector_stmt, stmt_info,
1248 0, vect_prologue);
1249 if (targetm.vectorize.builtin_mask_for_load)
1250 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1251 vector_stmt, stmt_info,
1252 0, vect_prologue);
1255 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1256 stmt_info, 0, vect_body);
1257 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1258 stmt_info, 0, vect_body);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE, vect_location,
1262 "vect_model_load_cost: explicit realign optimized"
1263 "\n");
1265 break;
1268 case dr_unaligned_unsupported:
1270 *inside_cost = VECT_MAX_COST;
1272 if (dump_enabled_p ())
1273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1274 "vect_model_load_cost: unsupported access.\n");
1275 break;
1278 default:
1279 gcc_unreachable ();
1283 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1284 the loop preheader for the vectorized stmt STMT. */
1286 static void
1287 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1289 if (gsi)
1290 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1291 else
1293 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1294 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1296 if (loop_vinfo)
1298 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1299 basic_block new_bb;
1300 edge pe;
1302 if (nested_in_vect_loop_p (loop, stmt))
1303 loop = loop->inner;
1305 pe = loop_preheader_edge (loop);
1306 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1307 gcc_assert (!new_bb);
1309 else
1311 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1312 basic_block bb;
1313 gimple_stmt_iterator gsi_bb_start;
1315 gcc_assert (bb_vinfo);
1316 bb = BB_VINFO_BB (bb_vinfo);
1317 gsi_bb_start = gsi_after_labels (bb);
1318 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1322 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE, vect_location,
1325 "created new init_stmt: ");
1326 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1330 /* Function vect_init_vector.
1332 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1333 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1334 vector type a vector with all elements equal to VAL is created first.
1335 Place the initialization at BSI if it is not NULL. Otherwise, place the
1336 initialization at the loop preheader.
1337 Return the DEF of INIT_STMT.
1338 It will be used in the vectorization of STMT. */
1340 tree
1341 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1343 tree new_var;
1344 gimple init_stmt;
1345 tree vec_oprnd;
1346 tree new_temp;
1348 if (TREE_CODE (type) == VECTOR_TYPE
1349 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1351 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1353 if (CONSTANT_CLASS_P (val))
1354 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1355 else
1357 new_temp = make_ssa_name (TREE_TYPE (type));
1358 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1359 vect_init_vector_1 (stmt, init_stmt, gsi);
1360 val = new_temp;
1363 val = build_vector_from_val (type, val);
1366 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1367 init_stmt = gimple_build_assign (new_var, val);
1368 new_temp = make_ssa_name (new_var, init_stmt);
1369 gimple_assign_set_lhs (init_stmt, new_temp);
1370 vect_init_vector_1 (stmt, init_stmt, gsi);
1371 vec_oprnd = gimple_assign_lhs (init_stmt);
1372 return vec_oprnd;
1376 /* Function vect_get_vec_def_for_operand.
1378 OP is an operand in STMT. This function returns a (vector) def that will be
1379 used in the vectorized stmt for STMT.
1381 In the case that OP is an SSA_NAME which is defined in the loop, then
1382 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1384 In case OP is an invariant or constant, a new stmt that creates a vector def
1385 needs to be introduced. */
1387 tree
1388 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1390 tree vec_oprnd;
1391 gimple vec_stmt;
1392 gimple def_stmt;
1393 stmt_vec_info def_stmt_info = NULL;
1394 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1395 unsigned int nunits;
1396 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1397 tree def;
1398 enum vect_def_type dt;
1399 bool is_simple_use;
1400 tree vector_type;
1402 if (dump_enabled_p ())
1404 dump_printf_loc (MSG_NOTE, vect_location,
1405 "vect_get_vec_def_for_operand: ");
1406 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1407 dump_printf (MSG_NOTE, "\n");
1410 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1411 &def_stmt, &def, &dt);
1412 gcc_assert (is_simple_use);
1413 if (dump_enabled_p ())
1415 int loc_printed = 0;
1416 if (def)
1418 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1419 loc_printed = 1;
1420 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1421 dump_printf (MSG_NOTE, "\n");
1423 if (def_stmt)
1425 if (loc_printed)
1426 dump_printf (MSG_NOTE, " def_stmt = ");
1427 else
1428 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1429 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1433 switch (dt)
1435 /* Case 1: operand is a constant. */
1436 case vect_constant_def:
1438 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1439 gcc_assert (vector_type);
1440 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1442 if (scalar_def)
1443 *scalar_def = op;
1445 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1446 if (dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE, vect_location,
1448 "Create vector_cst. nunits = %d\n", nunits);
1450 return vect_init_vector (stmt, op, vector_type, NULL);
1453 /* Case 2: operand is defined outside the loop - loop invariant. */
1454 case vect_external_def:
1456 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1457 gcc_assert (vector_type);
1459 if (scalar_def)
1460 *scalar_def = def;
1462 /* Create 'vec_inv = {inv,inv,..,inv}' */
1463 if (dump_enabled_p ())
1464 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1466 return vect_init_vector (stmt, def, vector_type, NULL);
1469 /* Case 3: operand is defined inside the loop. */
1470 case vect_internal_def:
1472 if (scalar_def)
1473 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1475 /* Get the def from the vectorized stmt. */
1476 def_stmt_info = vinfo_for_stmt (def_stmt);
1478 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1479 /* Get vectorized pattern statement. */
1480 if (!vec_stmt
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info))
1483 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1484 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1485 gcc_assert (vec_stmt);
1486 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1487 vec_oprnd = PHI_RESULT (vec_stmt);
1488 else if (is_gimple_call (vec_stmt))
1489 vec_oprnd = gimple_call_lhs (vec_stmt);
1490 else
1491 vec_oprnd = gimple_assign_lhs (vec_stmt);
1492 return vec_oprnd;
1495 /* Case 4: operand is defined by a loop header phi - reduction */
1496 case vect_reduction_def:
1497 case vect_double_reduction_def:
1498 case vect_nested_cycle:
1500 struct loop *loop;
1502 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1503 loop = (gimple_bb (def_stmt))->loop_father;
1505 /* Get the def before the loop */
1506 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1507 return get_initial_def_for_reduction (stmt, op, scalar_def);
1510 /* Case 5: operand is defined by loop-header phi - induction. */
1511 case vect_induction_def:
1513 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1515 /* Get the def from the vectorized stmt. */
1516 def_stmt_info = vinfo_for_stmt (def_stmt);
1517 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1518 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1519 vec_oprnd = PHI_RESULT (vec_stmt);
1520 else
1521 vec_oprnd = gimple_get_lhs (vec_stmt);
1522 return vec_oprnd;
1525 default:
1526 gcc_unreachable ();
1531 /* Function vect_get_vec_def_for_stmt_copy
1533 Return a vector-def for an operand. This function is used when the
1534 vectorized stmt to be created (by the caller to this function) is a "copy"
1535 created in case the vectorized result cannot fit in one vector, and several
1536 copies of the vector-stmt are required. In this case the vector-def is
1537 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1538 of the stmt that defines VEC_OPRND.
1539 DT is the type of the vector def VEC_OPRND.
1541 Context:
1542 In case the vectorization factor (VF) is bigger than the number
1543 of elements that can fit in a vectype (nunits), we have to generate
1544 more than one vector stmt to vectorize the scalar stmt. This situation
1545 arises when there are multiple data-types operated upon in the loop; the
1546 smallest data-type determines the VF, and as a result, when vectorizing
1547 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1548 vector stmt (each computing a vector of 'nunits' results, and together
1549 computing 'VF' results in each iteration). This function is called when
1550 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1551 which VF=16 and nunits=4, so the number of copies required is 4):
1553 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1555 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1556 VS1.1: vx.1 = memref1 VS1.2
1557 VS1.2: vx.2 = memref2 VS1.3
1558 VS1.3: vx.3 = memref3
1560 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1561 VSnew.1: vz1 = vx.1 + ... VSnew.2
1562 VSnew.2: vz2 = vx.2 + ... VSnew.3
1563 VSnew.3: vz3 = vx.3 + ...
1565 The vectorization of S1 is explained in vectorizable_load.
1566 The vectorization of S2:
1567 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1568 the function 'vect_get_vec_def_for_operand' is called to
1569 get the relevant vector-def for each operand of S2. For operand x it
1570 returns the vector-def 'vx.0'.
1572 To create the remaining copies of the vector-stmt (VSnew.j), this
1573 function is called to get the relevant vector-def for each operand. It is
1574 obtained from the respective VS1.j stmt, which is recorded in the
1575 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1577 For example, to obtain the vector-def 'vx.1' in order to create the
1578 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1579 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1580 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1581 and return its def ('vx.1').
1582 Overall, to create the above sequence this function will be called 3 times:
1583 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1584 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1585 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1587 tree
1588 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1590 gimple vec_stmt_for_operand;
1591 stmt_vec_info def_stmt_info;
1593 /* Do nothing; can reuse same def. */
1594 if (dt == vect_external_def || dt == vect_constant_def )
1595 return vec_oprnd;
1597 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1598 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1599 gcc_assert (def_stmt_info);
1600 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1601 gcc_assert (vec_stmt_for_operand);
1602 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1603 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1604 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1605 else
1606 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1607 return vec_oprnd;
1611 /* Get vectorized definitions for the operands to create a copy of an original
1612 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1614 static void
1615 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1616 vec<tree> *vec_oprnds0,
1617 vec<tree> *vec_oprnds1)
1619 tree vec_oprnd = vec_oprnds0->pop ();
1621 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1622 vec_oprnds0->quick_push (vec_oprnd);
1624 if (vec_oprnds1 && vec_oprnds1->length ())
1626 vec_oprnd = vec_oprnds1->pop ();
1627 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1628 vec_oprnds1->quick_push (vec_oprnd);
1633 /* Get vectorized definitions for OP0 and OP1.
1634 REDUC_INDEX is the index of reduction operand in case of reduction,
1635 and -1 otherwise. */
1637 void
1638 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1639 vec<tree> *vec_oprnds0,
1640 vec<tree> *vec_oprnds1,
1641 slp_tree slp_node, int reduc_index)
1643 if (slp_node)
1645 int nops = (op1 == NULL_TREE) ? 1 : 2;
1646 auto_vec<tree> ops (nops);
1647 auto_vec<vec<tree> > vec_defs (nops);
1649 ops.quick_push (op0);
1650 if (op1)
1651 ops.quick_push (op1);
1653 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1655 *vec_oprnds0 = vec_defs[0];
1656 if (op1)
1657 *vec_oprnds1 = vec_defs[1];
1659 else
1661 tree vec_oprnd;
1663 vec_oprnds0->create (1);
1664 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1665 vec_oprnds0->quick_push (vec_oprnd);
1667 if (op1)
1669 vec_oprnds1->create (1);
1670 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1671 vec_oprnds1->quick_push (vec_oprnd);
1677 /* Function vect_finish_stmt_generation.
1679 Insert a new stmt. */
1681 void
1682 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1683 gimple_stmt_iterator *gsi)
1685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1686 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1687 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1689 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1691 if (!gsi_end_p (*gsi)
1692 && gimple_has_mem_ops (vec_stmt))
1694 gimple at_stmt = gsi_stmt (*gsi);
1695 tree vuse = gimple_vuse (at_stmt);
1696 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1698 tree vdef = gimple_vdef (at_stmt);
1699 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1700 /* If we have an SSA vuse and insert a store, update virtual
1701 SSA form to avoid triggering the renamer. Do so only
1702 if we can easily see all uses - which is what almost always
1703 happens with the way vectorized stmts are inserted. */
1704 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1705 && ((is_gimple_assign (vec_stmt)
1706 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1707 || (is_gimple_call (vec_stmt)
1708 && !(gimple_call_flags (vec_stmt)
1709 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1711 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1712 gimple_set_vdef (vec_stmt, new_vdef);
1713 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1717 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1719 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1720 bb_vinfo));
1722 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1725 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1728 gimple_set_location (vec_stmt, gimple_location (stmt));
1730 /* While EH edges will generally prevent vectorization, stmt might
1731 e.g. be in a must-not-throw region. Ensure newly created stmts
1732 that could throw are part of the same region. */
1733 int lp_nr = lookup_stmt_eh_lp (stmt);
1734 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1735 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1738 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1739 a function declaration if the target has a vectorized version
1740 of the function, or NULL_TREE if the function cannot be vectorized. */
1742 tree
1743 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1745 tree fndecl = gimple_call_fndecl (call);
1747 /* We only handle functions that do not read or clobber memory -- i.e.
1748 const or novops ones. */
1749 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1750 return NULL_TREE;
1752 if (!fndecl
1753 || TREE_CODE (fndecl) != FUNCTION_DECL
1754 || !DECL_BUILT_IN (fndecl))
1755 return NULL_TREE;
1757 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1758 vectype_in);
1762 static tree permute_vec_elements (tree, tree, tree, gimple,
1763 gimple_stmt_iterator *);
1766 /* Function vectorizable_mask_load_store.
1768 Check if STMT performs a conditional load or store that can be vectorized.
1769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1770 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1773 static bool
1774 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1775 gimple *vec_stmt, slp_tree slp_node)
1777 tree vec_dest = NULL;
1778 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1779 stmt_vec_info prev_stmt_info;
1780 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1781 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1782 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1783 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1784 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1785 tree elem_type;
1786 gimple new_stmt;
1787 tree dummy;
1788 tree dataref_ptr = NULL_TREE;
1789 gimple ptr_incr;
1790 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1791 int ncopies;
1792 int i, j;
1793 bool inv_p;
1794 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1795 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1796 int gather_scale = 1;
1797 enum vect_def_type gather_dt = vect_unknown_def_type;
1798 bool is_store;
1799 tree mask;
1800 gimple def_stmt;
1801 tree def;
1802 enum vect_def_type dt;
1804 if (slp_node != NULL)
1805 return false;
1807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1808 gcc_assert (ncopies >= 1);
1810 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1811 mask = gimple_call_arg (stmt, 2);
1812 if (TYPE_PRECISION (TREE_TYPE (mask))
1813 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1814 return false;
1816 /* FORNOW. This restriction should be relaxed. */
1817 if (nested_in_vect_loop && ncopies > 1)
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1821 "multiple types in nested loop.");
1822 return false;
1825 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1826 return false;
1828 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1829 return false;
1831 if (!STMT_VINFO_DATA_REF (stmt_info))
1832 return false;
1834 elem_type = TREE_TYPE (vectype);
1836 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1837 return false;
1839 if (STMT_VINFO_STRIDED_P (stmt_info))
1840 return false;
1842 if (STMT_VINFO_GATHER_P (stmt_info))
1844 gimple def_stmt;
1845 tree def;
1846 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1847 &gather_off, &gather_scale);
1848 gcc_assert (gather_decl);
1849 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1850 &def_stmt, &def, &gather_dt,
1851 &gather_off_vectype))
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1855 "gather index use not simple.");
1856 return false;
1859 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1860 tree masktype
1861 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1862 if (TREE_CODE (masktype) == INTEGER_TYPE)
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1866 "masked gather with integer mask not supported.");
1867 return false;
1870 else if (tree_int_cst_compare (nested_in_vect_loop
1871 ? STMT_VINFO_DR_STEP (stmt_info)
1872 : DR_STEP (dr), size_zero_node) <= 0)
1873 return false;
1874 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1875 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1876 return false;
1878 if (TREE_CODE (mask) != SSA_NAME)
1879 return false;
1881 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1882 &def_stmt, &def, &dt))
1883 return false;
1885 if (is_store)
1887 tree rhs = gimple_call_arg (stmt, 3);
1888 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1889 &def_stmt, &def, &dt))
1890 return false;
1893 if (!vec_stmt) /* transformation not required. */
1895 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1896 if (is_store)
1897 vect_model_store_cost (stmt_info, ncopies, false, dt,
1898 NULL, NULL, NULL);
1899 else
1900 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1901 return true;
1904 /** Transform. **/
1906 if (STMT_VINFO_GATHER_P (stmt_info))
1908 tree vec_oprnd0 = NULL_TREE, op;
1909 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1910 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1911 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1912 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1913 tree mask_perm_mask = NULL_TREE;
1914 edge pe = loop_preheader_edge (loop);
1915 gimple_seq seq;
1916 basic_block new_bb;
1917 enum { NARROW, NONE, WIDEN } modifier;
1918 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1920 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1921 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1922 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1923 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1924 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1925 scaletype = TREE_VALUE (arglist);
1926 gcc_checking_assert (types_compatible_p (srctype, rettype)
1927 && types_compatible_p (srctype, masktype));
1929 if (nunits == gather_off_nunits)
1930 modifier = NONE;
1931 else if (nunits == gather_off_nunits / 2)
1933 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1934 modifier = WIDEN;
1936 for (i = 0; i < gather_off_nunits; ++i)
1937 sel[i] = i | nunits;
1939 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1941 else if (nunits == gather_off_nunits * 2)
1943 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1944 modifier = NARROW;
1946 for (i = 0; i < nunits; ++i)
1947 sel[i] = i < gather_off_nunits
1948 ? i : i + nunits - gather_off_nunits;
1950 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1951 ncopies *= 2;
1952 for (i = 0; i < nunits; ++i)
1953 sel[i] = i | gather_off_nunits;
1954 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1956 else
1957 gcc_unreachable ();
1959 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1961 ptr = fold_convert (ptrtype, gather_base);
1962 if (!is_gimple_min_invariant (ptr))
1964 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1965 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1966 gcc_assert (!new_bb);
1969 scale = build_int_cst (scaletype, gather_scale);
1971 prev_stmt_info = NULL;
1972 for (j = 0; j < ncopies; ++j)
1974 if (modifier == WIDEN && (j & 1))
1975 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1976 perm_mask, stmt, gsi);
1977 else if (j == 0)
1978 op = vec_oprnd0
1979 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1980 else
1981 op = vec_oprnd0
1982 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1984 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1986 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1987 == TYPE_VECTOR_SUBPARTS (idxtype));
1988 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1989 var = make_ssa_name (var);
1990 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1991 new_stmt
1992 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1994 op = var;
1997 if (mask_perm_mask && (j & 1))
1998 mask_op = permute_vec_elements (mask_op, mask_op,
1999 mask_perm_mask, stmt, gsi);
2000 else
2002 if (j == 0)
2003 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2004 else
2006 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
2007 &def_stmt, &def, &dt);
2008 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2011 mask_op = vec_mask;
2012 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2014 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2015 == TYPE_VECTOR_SUBPARTS (masktype));
2016 var = vect_get_new_vect_var (masktype, vect_simple_var,
2017 NULL);
2018 var = make_ssa_name (var);
2019 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2020 new_stmt
2021 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2023 mask_op = var;
2027 new_stmt
2028 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2029 scale);
2031 if (!useless_type_conversion_p (vectype, rettype))
2033 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2034 == TYPE_VECTOR_SUBPARTS (rettype));
2035 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2036 op = make_ssa_name (var, new_stmt);
2037 gimple_call_set_lhs (new_stmt, op);
2038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2039 var = make_ssa_name (vec_dest);
2040 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2041 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2043 else
2045 var = make_ssa_name (vec_dest, new_stmt);
2046 gimple_call_set_lhs (new_stmt, var);
2049 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2051 if (modifier == NARROW)
2053 if ((j & 1) == 0)
2055 prev_res = var;
2056 continue;
2058 var = permute_vec_elements (prev_res, var,
2059 perm_mask, stmt, gsi);
2060 new_stmt = SSA_NAME_DEF_STMT (var);
2063 if (prev_stmt_info == NULL)
2064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2065 else
2066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2067 prev_stmt_info = vinfo_for_stmt (new_stmt);
2070 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2071 from the IL. */
2072 tree lhs = gimple_call_lhs (stmt);
2073 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2074 set_vinfo_for_stmt (new_stmt, stmt_info);
2075 set_vinfo_for_stmt (stmt, NULL);
2076 STMT_VINFO_STMT (stmt_info) = new_stmt;
2077 gsi_replace (gsi, new_stmt, true);
2078 return true;
2080 else if (is_store)
2082 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2083 prev_stmt_info = NULL;
2084 for (i = 0; i < ncopies; i++)
2086 unsigned align, misalign;
2088 if (i == 0)
2090 tree rhs = gimple_call_arg (stmt, 3);
2091 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2092 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2093 /* We should have catched mismatched types earlier. */
2094 gcc_assert (useless_type_conversion_p (vectype,
2095 TREE_TYPE (vec_rhs)));
2096 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2097 NULL_TREE, &dummy, gsi,
2098 &ptr_incr, false, &inv_p);
2099 gcc_assert (!inv_p);
2101 else
2103 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2104 &def, &dt);
2105 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2106 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2107 &def, &dt);
2108 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2109 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2110 TYPE_SIZE_UNIT (vectype));
2113 align = TYPE_ALIGN_UNIT (vectype);
2114 if (aligned_access_p (dr))
2115 misalign = 0;
2116 else if (DR_MISALIGNMENT (dr) == -1)
2118 align = TYPE_ALIGN_UNIT (elem_type);
2119 misalign = 0;
2121 else
2122 misalign = DR_MISALIGNMENT (dr);
2123 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2124 misalign);
2125 new_stmt
2126 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2127 gimple_call_arg (stmt, 1),
2128 vec_mask, vec_rhs);
2129 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2130 if (i == 0)
2131 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2132 else
2133 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2134 prev_stmt_info = vinfo_for_stmt (new_stmt);
2137 else
2139 tree vec_mask = NULL_TREE;
2140 prev_stmt_info = NULL;
2141 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2142 for (i = 0; i < ncopies; i++)
2144 unsigned align, misalign;
2146 if (i == 0)
2148 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2149 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2150 NULL_TREE, &dummy, gsi,
2151 &ptr_incr, false, &inv_p);
2152 gcc_assert (!inv_p);
2154 else
2156 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2157 &def, &dt);
2158 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2159 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2160 TYPE_SIZE_UNIT (vectype));
2163 align = TYPE_ALIGN_UNIT (vectype);
2164 if (aligned_access_p (dr))
2165 misalign = 0;
2166 else if (DR_MISALIGNMENT (dr) == -1)
2168 align = TYPE_ALIGN_UNIT (elem_type);
2169 misalign = 0;
2171 else
2172 misalign = DR_MISALIGNMENT (dr);
2173 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2174 misalign);
2175 new_stmt
2176 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2177 gimple_call_arg (stmt, 1),
2178 vec_mask);
2179 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2181 if (i == 0)
2182 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2183 else
2184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2185 prev_stmt_info = vinfo_for_stmt (new_stmt);
2189 if (!is_store)
2191 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2192 from the IL. */
2193 tree lhs = gimple_call_lhs (stmt);
2194 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2195 set_vinfo_for_stmt (new_stmt, stmt_info);
2196 set_vinfo_for_stmt (stmt, NULL);
2197 STMT_VINFO_STMT (stmt_info) = new_stmt;
2198 gsi_replace (gsi, new_stmt, true);
2201 return true;
2205 /* Function vectorizable_call.
2207 Check if GS performs a function call that can be vectorized.
2208 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2209 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2210 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2212 static bool
2213 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2214 slp_tree slp_node)
2216 gcall *stmt;
2217 tree vec_dest;
2218 tree scalar_dest;
2219 tree op, type;
2220 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2221 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2222 tree vectype_out, vectype_in;
2223 int nunits_in;
2224 int nunits_out;
2225 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2226 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2227 tree fndecl, new_temp, def, rhs_type;
2228 gimple def_stmt;
2229 enum vect_def_type dt[3]
2230 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2231 gimple new_stmt = NULL;
2232 int ncopies, j;
2233 vec<tree> vargs = vNULL;
2234 enum { NARROW, NONE, WIDEN } modifier;
2235 size_t i, nargs;
2236 tree lhs;
2238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2239 return false;
2241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2242 return false;
2244 /* Is GS a vectorizable call? */
2245 stmt = dyn_cast <gcall *> (gs);
2246 if (!stmt)
2247 return false;
2249 if (gimple_call_internal_p (stmt)
2250 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2251 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2252 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2253 slp_node);
2255 if (gimple_call_lhs (stmt) == NULL_TREE
2256 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2257 return false;
2259 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2261 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2263 /* Process function arguments. */
2264 rhs_type = NULL_TREE;
2265 vectype_in = NULL_TREE;
2266 nargs = gimple_call_num_args (stmt);
2268 /* Bail out if the function has more than three arguments, we do not have
2269 interesting builtin functions to vectorize with more than two arguments
2270 except for fma. No arguments is also not good. */
2271 if (nargs == 0 || nargs > 3)
2272 return false;
2274 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2275 if (gimple_call_internal_p (stmt)
2276 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2278 nargs = 0;
2279 rhs_type = unsigned_type_node;
2282 for (i = 0; i < nargs; i++)
2284 tree opvectype;
2286 op = gimple_call_arg (stmt, i);
2288 /* We can only handle calls with arguments of the same type. */
2289 if (rhs_type
2290 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "argument types differ.\n");
2295 return false;
2297 if (!rhs_type)
2298 rhs_type = TREE_TYPE (op);
2300 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2301 &def_stmt, &def, &dt[i], &opvectype))
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2305 "use not simple.\n");
2306 return false;
2309 if (!vectype_in)
2310 vectype_in = opvectype;
2311 else if (opvectype
2312 && opvectype != vectype_in)
2314 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2316 "argument vector types differ.\n");
2317 return false;
2320 /* If all arguments are external or constant defs use a vector type with
2321 the same size as the output vector type. */
2322 if (!vectype_in)
2323 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2324 if (vec_stmt)
2325 gcc_assert (vectype_in);
2326 if (!vectype_in)
2328 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2331 "no vectype for scalar type ");
2332 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2333 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2336 return false;
2339 /* FORNOW */
2340 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2341 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2342 if (nunits_in == nunits_out / 2)
2343 modifier = NARROW;
2344 else if (nunits_out == nunits_in)
2345 modifier = NONE;
2346 else if (nunits_out == nunits_in / 2)
2347 modifier = WIDEN;
2348 else
2349 return false;
2351 /* For now, we only vectorize functions if a target specific builtin
2352 is available. TODO -- in some cases, it might be profitable to
2353 insert the calls for pieces of the vector, in order to be able
2354 to vectorize other operations in the loop. */
2355 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2356 if (fndecl == NULL_TREE)
2358 if (gimple_call_internal_p (stmt)
2359 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2360 && !slp_node
2361 && loop_vinfo
2362 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2363 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2364 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2365 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2367 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2368 { 0, 1, 2, ... vf - 1 } vector. */
2369 gcc_assert (nargs == 0);
2371 else
2373 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2375 "function is not vectorizable.\n");
2376 return false;
2380 gcc_assert (!gimple_vuse (stmt));
2382 if (slp_node || PURE_SLP_STMT (stmt_info))
2383 ncopies = 1;
2384 else if (modifier == NARROW)
2385 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2386 else
2387 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2389 /* Sanity check: make sure that at least one copy of the vectorized stmt
2390 needs to be generated. */
2391 gcc_assert (ncopies >= 1);
2393 if (!vec_stmt) /* transformation not required. */
2395 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2398 "\n");
2399 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2400 return true;
2403 /** Transform. **/
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2408 /* Handle def. */
2409 scalar_dest = gimple_call_lhs (stmt);
2410 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2412 prev_stmt_info = NULL;
2413 switch (modifier)
2415 case NONE:
2416 for (j = 0; j < ncopies; ++j)
2418 /* Build argument list for the vectorized call. */
2419 if (j == 0)
2420 vargs.create (nargs);
2421 else
2422 vargs.truncate (0);
2424 if (slp_node)
2426 auto_vec<vec<tree> > vec_defs (nargs);
2427 vec<tree> vec_oprnds0;
2429 for (i = 0; i < nargs; i++)
2430 vargs.quick_push (gimple_call_arg (stmt, i));
2431 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2432 vec_oprnds0 = vec_defs[0];
2434 /* Arguments are ready. Create the new vector stmt. */
2435 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2437 size_t k;
2438 for (k = 0; k < nargs; k++)
2440 vec<tree> vec_oprndsk = vec_defs[k];
2441 vargs[k] = vec_oprndsk[i];
2443 new_stmt = gimple_build_call_vec (fndecl, vargs);
2444 new_temp = make_ssa_name (vec_dest, new_stmt);
2445 gimple_call_set_lhs (new_stmt, new_temp);
2446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2447 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2450 for (i = 0; i < nargs; i++)
2452 vec<tree> vec_oprndsi = vec_defs[i];
2453 vec_oprndsi.release ();
2455 continue;
2458 for (i = 0; i < nargs; i++)
2460 op = gimple_call_arg (stmt, i);
2461 if (j == 0)
2462 vec_oprnd0
2463 = vect_get_vec_def_for_operand (op, stmt, NULL);
2464 else
2466 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2467 vec_oprnd0
2468 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2471 vargs.quick_push (vec_oprnd0);
2474 if (gimple_call_internal_p (stmt)
2475 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2477 tree *v = XALLOCAVEC (tree, nunits_out);
2478 int k;
2479 for (k = 0; k < nunits_out; ++k)
2480 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2481 tree cst = build_vector (vectype_out, v);
2482 tree new_var
2483 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2484 gimple init_stmt = gimple_build_assign (new_var, cst);
2485 new_temp = make_ssa_name (new_var, init_stmt);
2486 gimple_assign_set_lhs (init_stmt, new_temp);
2487 vect_init_vector_1 (stmt, init_stmt, NULL);
2488 new_temp = make_ssa_name (vec_dest);
2489 new_stmt = gimple_build_assign (new_temp,
2490 gimple_assign_lhs (init_stmt));
2492 else
2494 new_stmt = gimple_build_call_vec (fndecl, vargs);
2495 new_temp = make_ssa_name (vec_dest, new_stmt);
2496 gimple_call_set_lhs (new_stmt, new_temp);
2498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2500 if (j == 0)
2501 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2502 else
2503 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2505 prev_stmt_info = vinfo_for_stmt (new_stmt);
2508 break;
2510 case NARROW:
2511 for (j = 0; j < ncopies; ++j)
2513 /* Build argument list for the vectorized call. */
2514 if (j == 0)
2515 vargs.create (nargs * 2);
2516 else
2517 vargs.truncate (0);
2519 if (slp_node)
2521 auto_vec<vec<tree> > vec_defs (nargs);
2522 vec<tree> vec_oprnds0;
2524 for (i = 0; i < nargs; i++)
2525 vargs.quick_push (gimple_call_arg (stmt, i));
2526 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2527 vec_oprnds0 = vec_defs[0];
2529 /* Arguments are ready. Create the new vector stmt. */
2530 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2532 size_t k;
2533 vargs.truncate (0);
2534 for (k = 0; k < nargs; k++)
2536 vec<tree> vec_oprndsk = vec_defs[k];
2537 vargs.quick_push (vec_oprndsk[i]);
2538 vargs.quick_push (vec_oprndsk[i + 1]);
2540 new_stmt = gimple_build_call_vec (fndecl, vargs);
2541 new_temp = make_ssa_name (vec_dest, new_stmt);
2542 gimple_call_set_lhs (new_stmt, new_temp);
2543 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2544 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2547 for (i = 0; i < nargs; i++)
2549 vec<tree> vec_oprndsi = vec_defs[i];
2550 vec_oprndsi.release ();
2552 continue;
2555 for (i = 0; i < nargs; i++)
2557 op = gimple_call_arg (stmt, i);
2558 if (j == 0)
2560 vec_oprnd0
2561 = vect_get_vec_def_for_operand (op, stmt, NULL);
2562 vec_oprnd1
2563 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2565 else
2567 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2568 vec_oprnd0
2569 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2570 vec_oprnd1
2571 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2574 vargs.quick_push (vec_oprnd0);
2575 vargs.quick_push (vec_oprnd1);
2578 new_stmt = gimple_build_call_vec (fndecl, vargs);
2579 new_temp = make_ssa_name (vec_dest, new_stmt);
2580 gimple_call_set_lhs (new_stmt, new_temp);
2581 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2583 if (j == 0)
2584 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2585 else
2586 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2588 prev_stmt_info = vinfo_for_stmt (new_stmt);
2591 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2593 break;
2595 case WIDEN:
2596 /* No current target implements this case. */
2597 return false;
2600 vargs.release ();
2602 /* The call in STMT might prevent it from being removed in dce.
2603 We however cannot remove it here, due to the way the ssa name
2604 it defines is mapped to the new definition. So just replace
2605 rhs of the statement with something harmless. */
2607 if (slp_node)
2608 return true;
2610 type = TREE_TYPE (scalar_dest);
2611 if (is_pattern_stmt_p (stmt_info))
2612 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2613 else
2614 lhs = gimple_call_lhs (stmt);
2615 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2616 set_vinfo_for_stmt (new_stmt, stmt_info);
2617 set_vinfo_for_stmt (stmt, NULL);
2618 STMT_VINFO_STMT (stmt_info) = new_stmt;
2619 gsi_replace (gsi, new_stmt, false);
2621 return true;
2625 struct simd_call_arg_info
2627 tree vectype;
2628 tree op;
2629 enum vect_def_type dt;
2630 HOST_WIDE_INT linear_step;
2631 unsigned int align;
2634 /* Function vectorizable_simd_clone_call.
2636 Check if STMT performs a function call that can be vectorized
2637 by calling a simd clone of the function.
2638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2642 static bool
2643 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2644 gimple *vec_stmt, slp_tree slp_node)
2646 tree vec_dest;
2647 tree scalar_dest;
2648 tree op, type;
2649 tree vec_oprnd0 = NULL_TREE;
2650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2651 tree vectype;
2652 unsigned int nunits;
2653 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2654 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2655 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2656 tree fndecl, new_temp, def;
2657 gimple def_stmt;
2658 gimple new_stmt = NULL;
2659 int ncopies, j;
2660 vec<simd_call_arg_info> arginfo = vNULL;
2661 vec<tree> vargs = vNULL;
2662 size_t i, nargs;
2663 tree lhs, rtype, ratype;
2664 vec<constructor_elt, va_gc> *ret_ctor_elts;
2666 /* Is STMT a vectorizable call? */
2667 if (!is_gimple_call (stmt))
2668 return false;
2670 fndecl = gimple_call_fndecl (stmt);
2671 if (fndecl == NULL_TREE)
2672 return false;
2674 struct cgraph_node *node = cgraph_node::get (fndecl);
2675 if (node == NULL || node->simd_clones == NULL)
2676 return false;
2678 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2679 return false;
2681 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2682 return false;
2684 if (gimple_call_lhs (stmt)
2685 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2686 return false;
2688 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2690 vectype = STMT_VINFO_VECTYPE (stmt_info);
2692 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2693 return false;
2695 /* FORNOW */
2696 if (slp_node || PURE_SLP_STMT (stmt_info))
2697 return false;
2699 /* Process function arguments. */
2700 nargs = gimple_call_num_args (stmt);
2702 /* Bail out if the function has zero arguments. */
2703 if (nargs == 0)
2704 return false;
2706 arginfo.create (nargs);
2708 for (i = 0; i < nargs; i++)
2710 simd_call_arg_info thisarginfo;
2711 affine_iv iv;
2713 thisarginfo.linear_step = 0;
2714 thisarginfo.align = 0;
2715 thisarginfo.op = NULL_TREE;
2717 op = gimple_call_arg (stmt, i);
2718 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2719 &def_stmt, &def, &thisarginfo.dt,
2720 &thisarginfo.vectype)
2721 || thisarginfo.dt == vect_uninitialized_def)
2723 if (dump_enabled_p ())
2724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2725 "use not simple.\n");
2726 arginfo.release ();
2727 return false;
2730 if (thisarginfo.dt == vect_constant_def
2731 || thisarginfo.dt == vect_external_def)
2732 gcc_assert (thisarginfo.vectype == NULL_TREE);
2733 else
2734 gcc_assert (thisarginfo.vectype != NULL_TREE);
2736 /* For linear arguments, the analyze phase should have saved
2737 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2738 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2739 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2741 gcc_assert (vec_stmt);
2742 thisarginfo.linear_step
2743 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2744 thisarginfo.op
2745 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2746 /* If loop has been peeled for alignment, we need to adjust it. */
2747 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2748 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2749 if (n1 != n2)
2751 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2752 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2753 tree opt = TREE_TYPE (thisarginfo.op);
2754 bias = fold_convert (TREE_TYPE (step), bias);
2755 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2756 thisarginfo.op
2757 = fold_build2 (POINTER_TYPE_P (opt)
2758 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2759 thisarginfo.op, bias);
2762 else if (!vec_stmt
2763 && thisarginfo.dt != vect_constant_def
2764 && thisarginfo.dt != vect_external_def
2765 && loop_vinfo
2766 && TREE_CODE (op) == SSA_NAME
2767 && simple_iv (loop, loop_containing_stmt (stmt), op,
2768 &iv, false)
2769 && tree_fits_shwi_p (iv.step))
2771 thisarginfo.linear_step = tree_to_shwi (iv.step);
2772 thisarginfo.op = iv.base;
2774 else if ((thisarginfo.dt == vect_constant_def
2775 || thisarginfo.dt == vect_external_def)
2776 && POINTER_TYPE_P (TREE_TYPE (op)))
2777 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2779 arginfo.quick_push (thisarginfo);
2782 unsigned int badness = 0;
2783 struct cgraph_node *bestn = NULL;
2784 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2785 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2786 else
2787 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2788 n = n->simdclone->next_clone)
2790 unsigned int this_badness = 0;
2791 if (n->simdclone->simdlen
2792 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2793 || n->simdclone->nargs != nargs)
2794 continue;
2795 if (n->simdclone->simdlen
2796 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2797 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2798 - exact_log2 (n->simdclone->simdlen)) * 1024;
2799 if (n->simdclone->inbranch)
2800 this_badness += 2048;
2801 int target_badness = targetm.simd_clone.usable (n);
2802 if (target_badness < 0)
2803 continue;
2804 this_badness += target_badness * 512;
2805 /* FORNOW: Have to add code to add the mask argument. */
2806 if (n->simdclone->inbranch)
2807 continue;
2808 for (i = 0; i < nargs; i++)
2810 switch (n->simdclone->args[i].arg_type)
2812 case SIMD_CLONE_ARG_TYPE_VECTOR:
2813 if (!useless_type_conversion_p
2814 (n->simdclone->args[i].orig_type,
2815 TREE_TYPE (gimple_call_arg (stmt, i))))
2816 i = -1;
2817 else if (arginfo[i].dt == vect_constant_def
2818 || arginfo[i].dt == vect_external_def
2819 || arginfo[i].linear_step)
2820 this_badness += 64;
2821 break;
2822 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2823 if (arginfo[i].dt != vect_constant_def
2824 && arginfo[i].dt != vect_external_def)
2825 i = -1;
2826 break;
2827 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2828 if (arginfo[i].dt == vect_constant_def
2829 || arginfo[i].dt == vect_external_def
2830 || (arginfo[i].linear_step
2831 != n->simdclone->args[i].linear_step))
2832 i = -1;
2833 break;
2834 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2835 /* FORNOW */
2836 i = -1;
2837 break;
2838 case SIMD_CLONE_ARG_TYPE_MASK:
2839 gcc_unreachable ();
2841 if (i == (size_t) -1)
2842 break;
2843 if (n->simdclone->args[i].alignment > arginfo[i].align)
2845 i = -1;
2846 break;
2848 if (arginfo[i].align)
2849 this_badness += (exact_log2 (arginfo[i].align)
2850 - exact_log2 (n->simdclone->args[i].alignment));
2852 if (i == (size_t) -1)
2853 continue;
2854 if (bestn == NULL || this_badness < badness)
2856 bestn = n;
2857 badness = this_badness;
2861 if (bestn == NULL)
2863 arginfo.release ();
2864 return false;
2867 for (i = 0; i < nargs; i++)
2868 if ((arginfo[i].dt == vect_constant_def
2869 || arginfo[i].dt == vect_external_def)
2870 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2872 arginfo[i].vectype
2873 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2874 i)));
2875 if (arginfo[i].vectype == NULL
2876 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2877 > bestn->simdclone->simdlen))
2879 arginfo.release ();
2880 return false;
2884 fndecl = bestn->decl;
2885 nunits = bestn->simdclone->simdlen;
2886 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2888 /* If the function isn't const, only allow it in simd loops where user
2889 has asserted that at least nunits consecutive iterations can be
2890 performed using SIMD instructions. */
2891 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2892 && gimple_vuse (stmt))
2894 arginfo.release ();
2895 return false;
2898 /* Sanity check: make sure that at least one copy of the vectorized stmt
2899 needs to be generated. */
2900 gcc_assert (ncopies >= 1);
2902 if (!vec_stmt) /* transformation not required. */
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2905 for (i = 0; i < nargs; i++)
2906 if (bestn->simdclone->args[i].arg_type
2907 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2909 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2910 + 1);
2911 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2912 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2913 ? size_type_node : TREE_TYPE (arginfo[i].op);
2914 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2917 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE, vect_location,
2920 "=== vectorizable_simd_clone_call ===\n");
2921 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2922 arginfo.release ();
2923 return true;
2926 /** Transform. **/
2928 if (dump_enabled_p ())
2929 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2931 /* Handle def. */
2932 scalar_dest = gimple_call_lhs (stmt);
2933 vec_dest = NULL_TREE;
2934 rtype = NULL_TREE;
2935 ratype = NULL_TREE;
2936 if (scalar_dest)
2938 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2939 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2940 if (TREE_CODE (rtype) == ARRAY_TYPE)
2942 ratype = rtype;
2943 rtype = TREE_TYPE (ratype);
2947 prev_stmt_info = NULL;
2948 for (j = 0; j < ncopies; ++j)
2950 /* Build argument list for the vectorized call. */
2951 if (j == 0)
2952 vargs.create (nargs);
2953 else
2954 vargs.truncate (0);
2956 for (i = 0; i < nargs; i++)
2958 unsigned int k, l, m, o;
2959 tree atype;
2960 op = gimple_call_arg (stmt, i);
2961 switch (bestn->simdclone->args[i].arg_type)
2963 case SIMD_CLONE_ARG_TYPE_VECTOR:
2964 atype = bestn->simdclone->args[i].vector_type;
2965 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2966 for (m = j * o; m < (j + 1) * o; m++)
2968 if (TYPE_VECTOR_SUBPARTS (atype)
2969 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2971 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2972 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2973 / TYPE_VECTOR_SUBPARTS (atype));
2974 gcc_assert ((k & (k - 1)) == 0);
2975 if (m == 0)
2976 vec_oprnd0
2977 = vect_get_vec_def_for_operand (op, stmt, NULL);
2978 else
2980 vec_oprnd0 = arginfo[i].op;
2981 if ((m & (k - 1)) == 0)
2982 vec_oprnd0
2983 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2984 vec_oprnd0);
2986 arginfo[i].op = vec_oprnd0;
2987 vec_oprnd0
2988 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2989 size_int (prec),
2990 bitsize_int ((m & (k - 1)) * prec));
2991 new_stmt
2992 = gimple_build_assign (make_ssa_name (atype),
2993 vec_oprnd0);
2994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2995 vargs.safe_push (gimple_assign_lhs (new_stmt));
2997 else
2999 k = (TYPE_VECTOR_SUBPARTS (atype)
3000 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3001 gcc_assert ((k & (k - 1)) == 0);
3002 vec<constructor_elt, va_gc> *ctor_elts;
3003 if (k != 1)
3004 vec_alloc (ctor_elts, k);
3005 else
3006 ctor_elts = NULL;
3007 for (l = 0; l < k; l++)
3009 if (m == 0 && l == 0)
3010 vec_oprnd0
3011 = vect_get_vec_def_for_operand (op, stmt, NULL);
3012 else
3013 vec_oprnd0
3014 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3015 arginfo[i].op);
3016 arginfo[i].op = vec_oprnd0;
3017 if (k == 1)
3018 break;
3019 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3020 vec_oprnd0);
3022 if (k == 1)
3023 vargs.safe_push (vec_oprnd0);
3024 else
3026 vec_oprnd0 = build_constructor (atype, ctor_elts);
3027 new_stmt
3028 = gimple_build_assign (make_ssa_name (atype),
3029 vec_oprnd0);
3030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3031 vargs.safe_push (gimple_assign_lhs (new_stmt));
3035 break;
3036 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3037 vargs.safe_push (op);
3038 break;
3039 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3040 if (j == 0)
3042 gimple_seq stmts;
3043 arginfo[i].op
3044 = force_gimple_operand (arginfo[i].op, &stmts, true,
3045 NULL_TREE);
3046 if (stmts != NULL)
3048 basic_block new_bb;
3049 edge pe = loop_preheader_edge (loop);
3050 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3051 gcc_assert (!new_bb);
3053 tree phi_res = copy_ssa_name (op);
3054 gphi *new_phi = create_phi_node (phi_res, loop->header);
3055 set_vinfo_for_stmt (new_phi,
3056 new_stmt_vec_info (new_phi, loop_vinfo,
3057 NULL));
3058 add_phi_arg (new_phi, arginfo[i].op,
3059 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3060 enum tree_code code
3061 = POINTER_TYPE_P (TREE_TYPE (op))
3062 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3063 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3064 ? sizetype : TREE_TYPE (op);
3065 widest_int cst
3066 = wi::mul (bestn->simdclone->args[i].linear_step,
3067 ncopies * nunits);
3068 tree tcst = wide_int_to_tree (type, cst);
3069 tree phi_arg = copy_ssa_name (op);
3070 new_stmt
3071 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3072 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3073 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3074 set_vinfo_for_stmt (new_stmt,
3075 new_stmt_vec_info (new_stmt, loop_vinfo,
3076 NULL));
3077 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3078 UNKNOWN_LOCATION);
3079 arginfo[i].op = phi_res;
3080 vargs.safe_push (phi_res);
3082 else
3084 enum tree_code code
3085 = POINTER_TYPE_P (TREE_TYPE (op))
3086 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3087 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3088 ? sizetype : TREE_TYPE (op);
3089 widest_int cst
3090 = wi::mul (bestn->simdclone->args[i].linear_step,
3091 j * nunits);
3092 tree tcst = wide_int_to_tree (type, cst);
3093 new_temp = make_ssa_name (TREE_TYPE (op));
3094 new_stmt = gimple_build_assign (new_temp, code,
3095 arginfo[i].op, tcst);
3096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3097 vargs.safe_push (new_temp);
3099 break;
3100 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3101 default:
3102 gcc_unreachable ();
3106 new_stmt = gimple_build_call_vec (fndecl, vargs);
3107 if (vec_dest)
3109 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3110 if (ratype)
3111 new_temp = create_tmp_var (ratype);
3112 else if (TYPE_VECTOR_SUBPARTS (vectype)
3113 == TYPE_VECTOR_SUBPARTS (rtype))
3114 new_temp = make_ssa_name (vec_dest, new_stmt);
3115 else
3116 new_temp = make_ssa_name (rtype, new_stmt);
3117 gimple_call_set_lhs (new_stmt, new_temp);
3119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3121 if (vec_dest)
3123 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3125 unsigned int k, l;
3126 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3127 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3128 gcc_assert ((k & (k - 1)) == 0);
3129 for (l = 0; l < k; l++)
3131 tree t;
3132 if (ratype)
3134 t = build_fold_addr_expr (new_temp);
3135 t = build2 (MEM_REF, vectype, t,
3136 build_int_cst (TREE_TYPE (t),
3137 l * prec / BITS_PER_UNIT));
3139 else
3140 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3141 size_int (prec), bitsize_int (l * prec));
3142 new_stmt
3143 = gimple_build_assign (make_ssa_name (vectype), t);
3144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3145 if (j == 0 && l == 0)
3146 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3147 else
3148 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3150 prev_stmt_info = vinfo_for_stmt (new_stmt);
3153 if (ratype)
3155 tree clobber = build_constructor (ratype, NULL);
3156 TREE_THIS_VOLATILE (clobber) = 1;
3157 new_stmt = gimple_build_assign (new_temp, clobber);
3158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 continue;
3162 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3164 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3165 / TYPE_VECTOR_SUBPARTS (rtype));
3166 gcc_assert ((k & (k - 1)) == 0);
3167 if ((j & (k - 1)) == 0)
3168 vec_alloc (ret_ctor_elts, k);
3169 if (ratype)
3171 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3172 for (m = 0; m < o; m++)
3174 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3175 size_int (m), NULL_TREE, NULL_TREE);
3176 new_stmt
3177 = gimple_build_assign (make_ssa_name (rtype), tem);
3178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3179 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3180 gimple_assign_lhs (new_stmt));
3182 tree clobber = build_constructor (ratype, NULL);
3183 TREE_THIS_VOLATILE (clobber) = 1;
3184 new_stmt = gimple_build_assign (new_temp, clobber);
3185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187 else
3188 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3189 if ((j & (k - 1)) != k - 1)
3190 continue;
3191 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3192 new_stmt
3193 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3194 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3196 if ((unsigned) j == k - 1)
3197 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3198 else
3199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3201 prev_stmt_info = vinfo_for_stmt (new_stmt);
3202 continue;
3204 else if (ratype)
3206 tree t = build_fold_addr_expr (new_temp);
3207 t = build2 (MEM_REF, vectype, t,
3208 build_int_cst (TREE_TYPE (t), 0));
3209 new_stmt
3210 = gimple_build_assign (make_ssa_name (vec_dest), t);
3211 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3212 tree clobber = build_constructor (ratype, NULL);
3213 TREE_THIS_VOLATILE (clobber) = 1;
3214 vect_finish_stmt_generation (stmt,
3215 gimple_build_assign (new_temp,
3216 clobber), gsi);
3220 if (j == 0)
3221 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3222 else
3223 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3225 prev_stmt_info = vinfo_for_stmt (new_stmt);
3228 vargs.release ();
3230 /* The call in STMT might prevent it from being removed in dce.
3231 We however cannot remove it here, due to the way the ssa name
3232 it defines is mapped to the new definition. So just replace
3233 rhs of the statement with something harmless. */
3235 if (slp_node)
3236 return true;
3238 if (scalar_dest)
3240 type = TREE_TYPE (scalar_dest);
3241 if (is_pattern_stmt_p (stmt_info))
3242 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3243 else
3244 lhs = gimple_call_lhs (stmt);
3245 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3247 else
3248 new_stmt = gimple_build_nop ();
3249 set_vinfo_for_stmt (new_stmt, stmt_info);
3250 set_vinfo_for_stmt (stmt, NULL);
3251 STMT_VINFO_STMT (stmt_info) = new_stmt;
3252 gsi_replace (gsi, new_stmt, true);
3253 unlink_stmt_vdef (stmt);
3255 return true;
3259 /* Function vect_gen_widened_results_half
3261 Create a vector stmt whose code, type, number of arguments, and result
3262 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3263 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3264 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3265 needs to be created (DECL is a function-decl of a target-builtin).
3266 STMT is the original scalar stmt that we are vectorizing. */
3268 static gimple
3269 vect_gen_widened_results_half (enum tree_code code,
3270 tree decl,
3271 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3272 tree vec_dest, gimple_stmt_iterator *gsi,
3273 gimple stmt)
3275 gimple new_stmt;
3276 tree new_temp;
3278 /* Generate half of the widened result: */
3279 if (code == CALL_EXPR)
3281 /* Target specific support */
3282 if (op_type == binary_op)
3283 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3284 else
3285 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3286 new_temp = make_ssa_name (vec_dest, new_stmt);
3287 gimple_call_set_lhs (new_stmt, new_temp);
3289 else
3291 /* Generic support */
3292 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3293 if (op_type != binary_op)
3294 vec_oprnd1 = NULL;
3295 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3296 new_temp = make_ssa_name (vec_dest, new_stmt);
3297 gimple_assign_set_lhs (new_stmt, new_temp);
3299 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3301 return new_stmt;
3305 /* Get vectorized definitions for loop-based vectorization. For the first
3306 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3307 scalar operand), and for the rest we get a copy with
3308 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3309 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3310 The vectors are collected into VEC_OPRNDS. */
3312 static void
3313 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3314 vec<tree> *vec_oprnds, int multi_step_cvt)
3316 tree vec_oprnd;
3318 /* Get first vector operand. */
3319 /* All the vector operands except the very first one (that is scalar oprnd)
3320 are stmt copies. */
3321 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3322 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3323 else
3324 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3326 vec_oprnds->quick_push (vec_oprnd);
3328 /* Get second vector operand. */
3329 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3330 vec_oprnds->quick_push (vec_oprnd);
3332 *oprnd = vec_oprnd;
3334 /* For conversion in multiple steps, continue to get operands
3335 recursively. */
3336 if (multi_step_cvt)
3337 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3341 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3342 For multi-step conversions store the resulting vectors and call the function
3343 recursively. */
3345 static void
3346 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3347 int multi_step_cvt, gimple stmt,
3348 vec<tree> vec_dsts,
3349 gimple_stmt_iterator *gsi,
3350 slp_tree slp_node, enum tree_code code,
3351 stmt_vec_info *prev_stmt_info)
3353 unsigned int i;
3354 tree vop0, vop1, new_tmp, vec_dest;
3355 gimple new_stmt;
3356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3358 vec_dest = vec_dsts.pop ();
3360 for (i = 0; i < vec_oprnds->length (); i += 2)
3362 /* Create demotion operation. */
3363 vop0 = (*vec_oprnds)[i];
3364 vop1 = (*vec_oprnds)[i + 1];
3365 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3366 new_tmp = make_ssa_name (vec_dest, new_stmt);
3367 gimple_assign_set_lhs (new_stmt, new_tmp);
3368 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3370 if (multi_step_cvt)
3371 /* Store the resulting vector for next recursive call. */
3372 (*vec_oprnds)[i/2] = new_tmp;
3373 else
3375 /* This is the last step of the conversion sequence. Store the
3376 vectors in SLP_NODE or in vector info of the scalar statement
3377 (or in STMT_VINFO_RELATED_STMT chain). */
3378 if (slp_node)
3379 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3380 else
3382 if (!*prev_stmt_info)
3383 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3384 else
3385 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3387 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3392 /* For multi-step demotion operations we first generate demotion operations
3393 from the source type to the intermediate types, and then combine the
3394 results (stored in VEC_OPRNDS) in demotion operation to the destination
3395 type. */
3396 if (multi_step_cvt)
3398 /* At each level of recursion we have half of the operands we had at the
3399 previous level. */
3400 vec_oprnds->truncate ((i+1)/2);
3401 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3402 stmt, vec_dsts, gsi, slp_node,
3403 VEC_PACK_TRUNC_EXPR,
3404 prev_stmt_info);
3407 vec_dsts.quick_push (vec_dest);
3411 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3412 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3413 the resulting vectors and call the function recursively. */
3415 static void
3416 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3417 vec<tree> *vec_oprnds1,
3418 gimple stmt, tree vec_dest,
3419 gimple_stmt_iterator *gsi,
3420 enum tree_code code1,
3421 enum tree_code code2, tree decl1,
3422 tree decl2, int op_type)
3424 int i;
3425 tree vop0, vop1, new_tmp1, new_tmp2;
3426 gimple new_stmt1, new_stmt2;
3427 vec<tree> vec_tmp = vNULL;
3429 vec_tmp.create (vec_oprnds0->length () * 2);
3430 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3432 if (op_type == binary_op)
3433 vop1 = (*vec_oprnds1)[i];
3434 else
3435 vop1 = NULL_TREE;
3437 /* Generate the two halves of promotion operation. */
3438 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3439 op_type, vec_dest, gsi, stmt);
3440 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3441 op_type, vec_dest, gsi, stmt);
3442 if (is_gimple_call (new_stmt1))
3444 new_tmp1 = gimple_call_lhs (new_stmt1);
3445 new_tmp2 = gimple_call_lhs (new_stmt2);
3447 else
3449 new_tmp1 = gimple_assign_lhs (new_stmt1);
3450 new_tmp2 = gimple_assign_lhs (new_stmt2);
3453 /* Store the results for the next step. */
3454 vec_tmp.quick_push (new_tmp1);
3455 vec_tmp.quick_push (new_tmp2);
3458 vec_oprnds0->release ();
3459 *vec_oprnds0 = vec_tmp;
3463 /* Check if STMT performs a conversion operation, that can be vectorized.
3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3465 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3468 static bool
3469 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3470 gimple *vec_stmt, slp_tree slp_node)
3472 tree vec_dest;
3473 tree scalar_dest;
3474 tree op0, op1 = NULL_TREE;
3475 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3478 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3479 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3480 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3481 tree new_temp;
3482 tree def;
3483 gimple def_stmt;
3484 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3485 gimple new_stmt = NULL;
3486 stmt_vec_info prev_stmt_info;
3487 int nunits_in;
3488 int nunits_out;
3489 tree vectype_out, vectype_in;
3490 int ncopies, i, j;
3491 tree lhs_type, rhs_type;
3492 enum { NARROW, NONE, WIDEN } modifier;
3493 vec<tree> vec_oprnds0 = vNULL;
3494 vec<tree> vec_oprnds1 = vNULL;
3495 tree vop0;
3496 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3497 int multi_step_cvt = 0;
3498 vec<tree> vec_dsts = vNULL;
3499 vec<tree> interm_types = vNULL;
3500 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3501 int op_type;
3502 machine_mode rhs_mode;
3503 unsigned short fltsz;
3505 /* Is STMT a vectorizable conversion? */
3507 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3508 return false;
3510 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3511 return false;
3513 if (!is_gimple_assign (stmt))
3514 return false;
3516 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3517 return false;
3519 code = gimple_assign_rhs_code (stmt);
3520 if (!CONVERT_EXPR_CODE_P (code)
3521 && code != FIX_TRUNC_EXPR
3522 && code != FLOAT_EXPR
3523 && code != WIDEN_MULT_EXPR
3524 && code != WIDEN_LSHIFT_EXPR)
3525 return false;
3527 op_type = TREE_CODE_LENGTH (code);
3529 /* Check types of lhs and rhs. */
3530 scalar_dest = gimple_assign_lhs (stmt);
3531 lhs_type = TREE_TYPE (scalar_dest);
3532 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3534 op0 = gimple_assign_rhs1 (stmt);
3535 rhs_type = TREE_TYPE (op0);
3537 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3538 && !((INTEGRAL_TYPE_P (lhs_type)
3539 && INTEGRAL_TYPE_P (rhs_type))
3540 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3541 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3542 return false;
3544 if ((INTEGRAL_TYPE_P (lhs_type)
3545 && (TYPE_PRECISION (lhs_type)
3546 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3547 || (INTEGRAL_TYPE_P (rhs_type)
3548 && (TYPE_PRECISION (rhs_type)
3549 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3553 "type conversion to/from bit-precision unsupported."
3554 "\n");
3555 return false;
3558 /* Check the operands of the operation. */
3559 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3560 &def_stmt, &def, &dt[0], &vectype_in))
3562 if (dump_enabled_p ())
3563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3564 "use not simple.\n");
3565 return false;
3567 if (op_type == binary_op)
3569 bool ok;
3571 op1 = gimple_assign_rhs2 (stmt);
3572 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3573 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3574 OP1. */
3575 if (CONSTANT_CLASS_P (op0))
3576 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3577 &def_stmt, &def, &dt[1], &vectype_in);
3578 else
3579 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3580 &def, &dt[1]);
3582 if (!ok)
3584 if (dump_enabled_p ())
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3586 "use not simple.\n");
3587 return false;
3591 /* If op0 is an external or constant defs use a vector type of
3592 the same size as the output vector type. */
3593 if (!vectype_in)
3594 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3595 if (vec_stmt)
3596 gcc_assert (vectype_in);
3597 if (!vectype_in)
3599 if (dump_enabled_p ())
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3602 "no vectype for scalar type ");
3603 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3604 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3607 return false;
3610 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3611 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3612 if (nunits_in < nunits_out)
3613 modifier = NARROW;
3614 else if (nunits_out == nunits_in)
3615 modifier = NONE;
3616 else
3617 modifier = WIDEN;
3619 /* Multiple types in SLP are handled by creating the appropriate number of
3620 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3621 case of SLP. */
3622 if (slp_node || PURE_SLP_STMT (stmt_info))
3623 ncopies = 1;
3624 else if (modifier == NARROW)
3625 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3626 else
3627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3629 /* Sanity check: make sure that at least one copy of the vectorized stmt
3630 needs to be generated. */
3631 gcc_assert (ncopies >= 1);
3633 /* Supportable by target? */
3634 switch (modifier)
3636 case NONE:
3637 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3638 return false;
3639 if (supportable_convert_operation (code, vectype_out, vectype_in,
3640 &decl1, &code1))
3641 break;
3642 /* FALLTHRU */
3643 unsupported:
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3646 "conversion not supported by target.\n");
3647 return false;
3649 case WIDEN:
3650 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3651 &code1, &code2, &multi_step_cvt,
3652 &interm_types))
3654 /* Binary widening operation can only be supported directly by the
3655 architecture. */
3656 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3657 break;
3660 if (code != FLOAT_EXPR
3661 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3662 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3663 goto unsupported;
3665 rhs_mode = TYPE_MODE (rhs_type);
3666 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3667 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3668 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3669 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3671 cvt_type
3672 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3673 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3674 if (cvt_type == NULL_TREE)
3675 goto unsupported;
3677 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3679 if (!supportable_convert_operation (code, vectype_out,
3680 cvt_type, &decl1, &codecvt1))
3681 goto unsupported;
3683 else if (!supportable_widening_operation (code, stmt, vectype_out,
3684 cvt_type, &codecvt1,
3685 &codecvt2, &multi_step_cvt,
3686 &interm_types))
3687 continue;
3688 else
3689 gcc_assert (multi_step_cvt == 0);
3691 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3692 vectype_in, &code1, &code2,
3693 &multi_step_cvt, &interm_types))
3694 break;
3697 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3698 goto unsupported;
3700 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3701 codecvt2 = ERROR_MARK;
3702 else
3704 multi_step_cvt++;
3705 interm_types.safe_push (cvt_type);
3706 cvt_type = NULL_TREE;
3708 break;
3710 case NARROW:
3711 gcc_assert (op_type == unary_op);
3712 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3713 &code1, &multi_step_cvt,
3714 &interm_types))
3715 break;
3717 if (code != FIX_TRUNC_EXPR
3718 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3719 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3720 goto unsupported;
3722 rhs_mode = TYPE_MODE (rhs_type);
3723 cvt_type
3724 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3725 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3726 if (cvt_type == NULL_TREE)
3727 goto unsupported;
3728 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3729 &decl1, &codecvt1))
3730 goto unsupported;
3731 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3732 &code1, &multi_step_cvt,
3733 &interm_types))
3734 break;
3735 goto unsupported;
3737 default:
3738 gcc_unreachable ();
3741 if (!vec_stmt) /* transformation not required. */
3743 if (dump_enabled_p ())
3744 dump_printf_loc (MSG_NOTE, vect_location,
3745 "=== vectorizable_conversion ===\n");
3746 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3748 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3749 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3751 else if (modifier == NARROW)
3753 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3754 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3756 else
3758 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3759 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3761 interm_types.release ();
3762 return true;
3765 /** Transform. **/
3766 if (dump_enabled_p ())
3767 dump_printf_loc (MSG_NOTE, vect_location,
3768 "transform conversion. ncopies = %d.\n", ncopies);
3770 if (op_type == binary_op)
3772 if (CONSTANT_CLASS_P (op0))
3773 op0 = fold_convert (TREE_TYPE (op1), op0);
3774 else if (CONSTANT_CLASS_P (op1))
3775 op1 = fold_convert (TREE_TYPE (op0), op1);
3778 /* In case of multi-step conversion, we first generate conversion operations
3779 to the intermediate types, and then from that types to the final one.
3780 We create vector destinations for the intermediate type (TYPES) received
3781 from supportable_*_operation, and store them in the correct order
3782 for future use in vect_create_vectorized_*_stmts (). */
3783 vec_dsts.create (multi_step_cvt + 1);
3784 vec_dest = vect_create_destination_var (scalar_dest,
3785 (cvt_type && modifier == WIDEN)
3786 ? cvt_type : vectype_out);
3787 vec_dsts.quick_push (vec_dest);
3789 if (multi_step_cvt)
3791 for (i = interm_types.length () - 1;
3792 interm_types.iterate (i, &intermediate_type); i--)
3794 vec_dest = vect_create_destination_var (scalar_dest,
3795 intermediate_type);
3796 vec_dsts.quick_push (vec_dest);
3800 if (cvt_type)
3801 vec_dest = vect_create_destination_var (scalar_dest,
3802 modifier == WIDEN
3803 ? vectype_out : cvt_type);
3805 if (!slp_node)
3807 if (modifier == WIDEN)
3809 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3810 if (op_type == binary_op)
3811 vec_oprnds1.create (1);
3813 else if (modifier == NARROW)
3814 vec_oprnds0.create (
3815 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3817 else if (code == WIDEN_LSHIFT_EXPR)
3818 vec_oprnds1.create (slp_node->vec_stmts_size);
3820 last_oprnd = op0;
3821 prev_stmt_info = NULL;
3822 switch (modifier)
3824 case NONE:
3825 for (j = 0; j < ncopies; j++)
3827 if (j == 0)
3828 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3829 -1);
3830 else
3831 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3833 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3835 /* Arguments are ready, create the new vector stmt. */
3836 if (code1 == CALL_EXPR)
3838 new_stmt = gimple_build_call (decl1, 1, vop0);
3839 new_temp = make_ssa_name (vec_dest, new_stmt);
3840 gimple_call_set_lhs (new_stmt, new_temp);
3842 else
3844 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3845 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3846 new_temp = make_ssa_name (vec_dest, new_stmt);
3847 gimple_assign_set_lhs (new_stmt, new_temp);
3850 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3851 if (slp_node)
3852 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3855 if (j == 0)
3856 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3857 else
3858 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3859 prev_stmt_info = vinfo_for_stmt (new_stmt);
3861 break;
3863 case WIDEN:
3864 /* In case the vectorization factor (VF) is bigger than the number
3865 of elements that we can fit in a vectype (nunits), we have to
3866 generate more than one vector stmt - i.e - we need to "unroll"
3867 the vector stmt by a factor VF/nunits. */
3868 for (j = 0; j < ncopies; j++)
3870 /* Handle uses. */
3871 if (j == 0)
3873 if (slp_node)
3875 if (code == WIDEN_LSHIFT_EXPR)
3877 unsigned int k;
3879 vec_oprnd1 = op1;
3880 /* Store vec_oprnd1 for every vector stmt to be created
3881 for SLP_NODE. We check during the analysis that all
3882 the shift arguments are the same. */
3883 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3884 vec_oprnds1.quick_push (vec_oprnd1);
3886 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3887 slp_node, -1);
3889 else
3890 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3891 &vec_oprnds1, slp_node, -1);
3893 else
3895 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3896 vec_oprnds0.quick_push (vec_oprnd0);
3897 if (op_type == binary_op)
3899 if (code == WIDEN_LSHIFT_EXPR)
3900 vec_oprnd1 = op1;
3901 else
3902 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3903 NULL);
3904 vec_oprnds1.quick_push (vec_oprnd1);
3908 else
3910 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3911 vec_oprnds0.truncate (0);
3912 vec_oprnds0.quick_push (vec_oprnd0);
3913 if (op_type == binary_op)
3915 if (code == WIDEN_LSHIFT_EXPR)
3916 vec_oprnd1 = op1;
3917 else
3918 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3919 vec_oprnd1);
3920 vec_oprnds1.truncate (0);
3921 vec_oprnds1.quick_push (vec_oprnd1);
3925 /* Arguments are ready. Create the new vector stmts. */
3926 for (i = multi_step_cvt; i >= 0; i--)
3928 tree this_dest = vec_dsts[i];
3929 enum tree_code c1 = code1, c2 = code2;
3930 if (i == 0 && codecvt2 != ERROR_MARK)
3932 c1 = codecvt1;
3933 c2 = codecvt2;
3935 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3936 &vec_oprnds1,
3937 stmt, this_dest, gsi,
3938 c1, c2, decl1, decl2,
3939 op_type);
3942 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3944 if (cvt_type)
3946 if (codecvt1 == CALL_EXPR)
3948 new_stmt = gimple_build_call (decl1, 1, vop0);
3949 new_temp = make_ssa_name (vec_dest, new_stmt);
3950 gimple_call_set_lhs (new_stmt, new_temp);
3952 else
3954 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3955 new_temp = make_ssa_name (vec_dest);
3956 new_stmt = gimple_build_assign (new_temp, codecvt1,
3957 vop0);
3960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3962 else
3963 new_stmt = SSA_NAME_DEF_STMT (vop0);
3965 if (slp_node)
3966 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3968 if (!prev_stmt_info)
3969 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3970 else
3971 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3972 prev_stmt_info = vinfo_for_stmt (new_stmt);
3976 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3977 break;
3979 case NARROW:
3980 /* In case the vectorization factor (VF) is bigger than the number
3981 of elements that we can fit in a vectype (nunits), we have to
3982 generate more than one vector stmt - i.e - we need to "unroll"
3983 the vector stmt by a factor VF/nunits. */
3984 for (j = 0; j < ncopies; j++)
3986 /* Handle uses. */
3987 if (slp_node)
3988 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3989 slp_node, -1);
3990 else
3992 vec_oprnds0.truncate (0);
3993 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3994 vect_pow2 (multi_step_cvt) - 1);
3997 /* Arguments are ready. Create the new vector stmts. */
3998 if (cvt_type)
3999 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4001 if (codecvt1 == CALL_EXPR)
4003 new_stmt = gimple_build_call (decl1, 1, vop0);
4004 new_temp = make_ssa_name (vec_dest, new_stmt);
4005 gimple_call_set_lhs (new_stmt, new_temp);
4007 else
4009 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4010 new_temp = make_ssa_name (vec_dest);
4011 new_stmt = gimple_build_assign (new_temp, codecvt1,
4012 vop0);
4015 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4016 vec_oprnds0[i] = new_temp;
4019 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4020 stmt, vec_dsts, gsi,
4021 slp_node, code1,
4022 &prev_stmt_info);
4025 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4026 break;
4029 vec_oprnds0.release ();
4030 vec_oprnds1.release ();
4031 vec_dsts.release ();
4032 interm_types.release ();
4034 return true;
4038 /* Function vectorizable_assignment.
4040 Check if STMT performs an assignment (copy) that can be vectorized.
4041 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4042 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4043 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4045 static bool
4046 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4047 gimple *vec_stmt, slp_tree slp_node)
4049 tree vec_dest;
4050 tree scalar_dest;
4051 tree op;
4052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4053 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4055 tree new_temp;
4056 tree def;
4057 gimple def_stmt;
4058 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4059 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4060 int ncopies;
4061 int i, j;
4062 vec<tree> vec_oprnds = vNULL;
4063 tree vop;
4064 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4065 gimple new_stmt = NULL;
4066 stmt_vec_info prev_stmt_info = NULL;
4067 enum tree_code code;
4068 tree vectype_in;
4070 /* Multiple types in SLP are handled by creating the appropriate number of
4071 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4072 case of SLP. */
4073 if (slp_node || PURE_SLP_STMT (stmt_info))
4074 ncopies = 1;
4075 else
4076 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4078 gcc_assert (ncopies >= 1);
4080 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4081 return false;
4083 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4084 return false;
4086 /* Is vectorizable assignment? */
4087 if (!is_gimple_assign (stmt))
4088 return false;
4090 scalar_dest = gimple_assign_lhs (stmt);
4091 if (TREE_CODE (scalar_dest) != SSA_NAME)
4092 return false;
4094 code = gimple_assign_rhs_code (stmt);
4095 if (gimple_assign_single_p (stmt)
4096 || code == PAREN_EXPR
4097 || CONVERT_EXPR_CODE_P (code))
4098 op = gimple_assign_rhs1 (stmt);
4099 else
4100 return false;
4102 if (code == VIEW_CONVERT_EXPR)
4103 op = TREE_OPERAND (op, 0);
4105 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4106 &def_stmt, &def, &dt[0], &vectype_in))
4108 if (dump_enabled_p ())
4109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4110 "use not simple.\n");
4111 return false;
4114 /* We can handle NOP_EXPR conversions that do not change the number
4115 of elements or the vector size. */
4116 if ((CONVERT_EXPR_CODE_P (code)
4117 || code == VIEW_CONVERT_EXPR)
4118 && (!vectype_in
4119 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4120 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4121 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4122 return false;
4124 /* We do not handle bit-precision changes. */
4125 if ((CONVERT_EXPR_CODE_P (code)
4126 || code == VIEW_CONVERT_EXPR)
4127 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4128 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4129 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4130 || ((TYPE_PRECISION (TREE_TYPE (op))
4131 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4132 /* But a conversion that does not change the bit-pattern is ok. */
4133 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4134 > TYPE_PRECISION (TREE_TYPE (op)))
4135 && TYPE_UNSIGNED (TREE_TYPE (op))))
4137 if (dump_enabled_p ())
4138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4139 "type conversion to/from bit-precision "
4140 "unsupported.\n");
4141 return false;
4144 if (!vec_stmt) /* transformation not required. */
4146 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4147 if (dump_enabled_p ())
4148 dump_printf_loc (MSG_NOTE, vect_location,
4149 "=== vectorizable_assignment ===\n");
4150 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4151 return true;
4154 /** Transform. **/
4155 if (dump_enabled_p ())
4156 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4158 /* Handle def. */
4159 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4161 /* Handle use. */
4162 for (j = 0; j < ncopies; j++)
4164 /* Handle uses. */
4165 if (j == 0)
4166 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4167 else
4168 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4170 /* Arguments are ready. create the new vector stmt. */
4171 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4173 if (CONVERT_EXPR_CODE_P (code)
4174 || code == VIEW_CONVERT_EXPR)
4175 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4176 new_stmt = gimple_build_assign (vec_dest, vop);
4177 new_temp = make_ssa_name (vec_dest, new_stmt);
4178 gimple_assign_set_lhs (new_stmt, new_temp);
4179 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4180 if (slp_node)
4181 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4184 if (slp_node)
4185 continue;
4187 if (j == 0)
4188 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4189 else
4190 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4192 prev_stmt_info = vinfo_for_stmt (new_stmt);
4195 vec_oprnds.release ();
4196 return true;
4200 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4201 either as shift by a scalar or by a vector. */
4203 bool
4204 vect_supportable_shift (enum tree_code code, tree scalar_type)
4207 machine_mode vec_mode;
4208 optab optab;
4209 int icode;
4210 tree vectype;
4212 vectype = get_vectype_for_scalar_type (scalar_type);
4213 if (!vectype)
4214 return false;
4216 optab = optab_for_tree_code (code, vectype, optab_scalar);
4217 if (!optab
4218 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4220 optab = optab_for_tree_code (code, vectype, optab_vector);
4221 if (!optab
4222 || (optab_handler (optab, TYPE_MODE (vectype))
4223 == CODE_FOR_nothing))
4224 return false;
4227 vec_mode = TYPE_MODE (vectype);
4228 icode = (int) optab_handler (optab, vec_mode);
4229 if (icode == CODE_FOR_nothing)
4230 return false;
4232 return true;
4236 /* Function vectorizable_shift.
4238 Check if STMT performs a shift operation that can be vectorized.
4239 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4240 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4241 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4243 static bool
4244 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4245 gimple *vec_stmt, slp_tree slp_node)
4247 tree vec_dest;
4248 tree scalar_dest;
4249 tree op0, op1 = NULL;
4250 tree vec_oprnd1 = NULL_TREE;
4251 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4252 tree vectype;
4253 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4254 enum tree_code code;
4255 machine_mode vec_mode;
4256 tree new_temp;
4257 optab optab;
4258 int icode;
4259 machine_mode optab_op2_mode;
4260 tree def;
4261 gimple def_stmt;
4262 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4263 gimple new_stmt = NULL;
4264 stmt_vec_info prev_stmt_info;
4265 int nunits_in;
4266 int nunits_out;
4267 tree vectype_out;
4268 tree op1_vectype;
4269 int ncopies;
4270 int j, i;
4271 vec<tree> vec_oprnds0 = vNULL;
4272 vec<tree> vec_oprnds1 = vNULL;
4273 tree vop0, vop1;
4274 unsigned int k;
4275 bool scalar_shift_arg = true;
4276 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4277 int vf;
4279 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4280 return false;
4282 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4283 return false;
4285 /* Is STMT a vectorizable binary/unary operation? */
4286 if (!is_gimple_assign (stmt))
4287 return false;
4289 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4290 return false;
4292 code = gimple_assign_rhs_code (stmt);
4294 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4295 || code == RROTATE_EXPR))
4296 return false;
4298 scalar_dest = gimple_assign_lhs (stmt);
4299 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4300 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4301 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4303 if (dump_enabled_p ())
4304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4305 "bit-precision shifts not supported.\n");
4306 return false;
4309 op0 = gimple_assign_rhs1 (stmt);
4310 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4311 &def_stmt, &def, &dt[0], &vectype))
4313 if (dump_enabled_p ())
4314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4315 "use not simple.\n");
4316 return false;
4318 /* If op0 is an external or constant def use a vector type with
4319 the same size as the output vector type. */
4320 if (!vectype)
4321 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4322 if (vec_stmt)
4323 gcc_assert (vectype);
4324 if (!vectype)
4326 if (dump_enabled_p ())
4327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4328 "no vectype for scalar type\n");
4329 return false;
4332 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4333 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4334 if (nunits_out != nunits_in)
4335 return false;
4337 op1 = gimple_assign_rhs2 (stmt);
4338 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4339 &def, &dt[1], &op1_vectype))
4341 if (dump_enabled_p ())
4342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4343 "use not simple.\n");
4344 return false;
4347 if (loop_vinfo)
4348 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4349 else
4350 vf = 1;
4352 /* Multiple types in SLP are handled by creating the appropriate number of
4353 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4354 case of SLP. */
4355 if (slp_node || PURE_SLP_STMT (stmt_info))
4356 ncopies = 1;
4357 else
4358 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4360 gcc_assert (ncopies >= 1);
4362 /* Determine whether the shift amount is a vector, or scalar. If the
4363 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4365 if (dt[1] == vect_internal_def && !slp_node)
4366 scalar_shift_arg = false;
4367 else if (dt[1] == vect_constant_def
4368 || dt[1] == vect_external_def
4369 || dt[1] == vect_internal_def)
4371 /* In SLP, need to check whether the shift count is the same,
4372 in loops if it is a constant or invariant, it is always
4373 a scalar shift. */
4374 if (slp_node)
4376 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4377 gimple slpstmt;
4379 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4380 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4381 scalar_shift_arg = false;
4384 else
4386 if (dump_enabled_p ())
4387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4388 "operand mode requires invariant argument.\n");
4389 return false;
4392 /* Vector shifted by vector. */
4393 if (!scalar_shift_arg)
4395 optab = optab_for_tree_code (code, vectype, optab_vector);
4396 if (dump_enabled_p ())
4397 dump_printf_loc (MSG_NOTE, vect_location,
4398 "vector/vector shift/rotate found.\n");
4400 if (!op1_vectype)
4401 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4402 if (op1_vectype == NULL_TREE
4403 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4405 if (dump_enabled_p ())
4406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4407 "unusable type for last operand in"
4408 " vector/vector shift/rotate.\n");
4409 return false;
4412 /* See if the machine has a vector shifted by scalar insn and if not
4413 then see if it has a vector shifted by vector insn. */
4414 else
4416 optab = optab_for_tree_code (code, vectype, optab_scalar);
4417 if (optab
4418 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4420 if (dump_enabled_p ())
4421 dump_printf_loc (MSG_NOTE, vect_location,
4422 "vector/scalar shift/rotate found.\n");
4424 else
4426 optab = optab_for_tree_code (code, vectype, optab_vector);
4427 if (optab
4428 && (optab_handler (optab, TYPE_MODE (vectype))
4429 != CODE_FOR_nothing))
4431 scalar_shift_arg = false;
4433 if (dump_enabled_p ())
4434 dump_printf_loc (MSG_NOTE, vect_location,
4435 "vector/vector shift/rotate found.\n");
4437 /* Unlike the other binary operators, shifts/rotates have
4438 the rhs being int, instead of the same type as the lhs,
4439 so make sure the scalar is the right type if we are
4440 dealing with vectors of long long/long/short/char. */
4441 if (dt[1] == vect_constant_def)
4442 op1 = fold_convert (TREE_TYPE (vectype), op1);
4443 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4444 TREE_TYPE (op1)))
4446 if (slp_node
4447 && TYPE_MODE (TREE_TYPE (vectype))
4448 != TYPE_MODE (TREE_TYPE (op1)))
4450 if (dump_enabled_p ())
4451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4452 "unusable type for last operand in"
4453 " vector/vector shift/rotate.\n");
4454 return false;
4456 if (vec_stmt && !slp_node)
4458 op1 = fold_convert (TREE_TYPE (vectype), op1);
4459 op1 = vect_init_vector (stmt, op1,
4460 TREE_TYPE (vectype), NULL);
4467 /* Supportable by target? */
4468 if (!optab)
4470 if (dump_enabled_p ())
4471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4472 "no optab.\n");
4473 return false;
4475 vec_mode = TYPE_MODE (vectype);
4476 icode = (int) optab_handler (optab, vec_mode);
4477 if (icode == CODE_FOR_nothing)
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "op not supported by target.\n");
4482 /* Check only during analysis. */
4483 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4484 || (vf < vect_min_worthwhile_factor (code)
4485 && !vec_stmt))
4486 return false;
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_NOTE, vect_location,
4489 "proceeding using word mode.\n");
4492 /* Worthwhile without SIMD support? Check only during analysis. */
4493 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4494 && vf < vect_min_worthwhile_factor (code)
4495 && !vec_stmt)
4497 if (dump_enabled_p ())
4498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4499 "not worthwhile without SIMD support.\n");
4500 return false;
4503 if (!vec_stmt) /* transformation not required. */
4505 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4506 if (dump_enabled_p ())
4507 dump_printf_loc (MSG_NOTE, vect_location,
4508 "=== vectorizable_shift ===\n");
4509 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4510 return true;
4513 /** Transform. **/
4515 if (dump_enabled_p ())
4516 dump_printf_loc (MSG_NOTE, vect_location,
4517 "transform binary/unary operation.\n");
4519 /* Handle def. */
4520 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4522 prev_stmt_info = NULL;
4523 for (j = 0; j < ncopies; j++)
4525 /* Handle uses. */
4526 if (j == 0)
4528 if (scalar_shift_arg)
4530 /* Vector shl and shr insn patterns can be defined with scalar
4531 operand 2 (shift operand). In this case, use constant or loop
4532 invariant op1 directly, without extending it to vector mode
4533 first. */
4534 optab_op2_mode = insn_data[icode].operand[2].mode;
4535 if (!VECTOR_MODE_P (optab_op2_mode))
4537 if (dump_enabled_p ())
4538 dump_printf_loc (MSG_NOTE, vect_location,
4539 "operand 1 using scalar mode.\n");
4540 vec_oprnd1 = op1;
4541 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4542 vec_oprnds1.quick_push (vec_oprnd1);
4543 if (slp_node)
4545 /* Store vec_oprnd1 for every vector stmt to be created
4546 for SLP_NODE. We check during the analysis that all
4547 the shift arguments are the same.
4548 TODO: Allow different constants for different vector
4549 stmts generated for an SLP instance. */
4550 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4551 vec_oprnds1.quick_push (vec_oprnd1);
4556 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4557 (a special case for certain kind of vector shifts); otherwise,
4558 operand 1 should be of a vector type (the usual case). */
4559 if (vec_oprnd1)
4560 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4561 slp_node, -1);
4562 else
4563 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4564 slp_node, -1);
4566 else
4567 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4569 /* Arguments are ready. Create the new vector stmt. */
4570 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4572 vop1 = vec_oprnds1[i];
4573 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4574 new_temp = make_ssa_name (vec_dest, new_stmt);
4575 gimple_assign_set_lhs (new_stmt, new_temp);
4576 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4577 if (slp_node)
4578 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4581 if (slp_node)
4582 continue;
4584 if (j == 0)
4585 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4586 else
4587 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4588 prev_stmt_info = vinfo_for_stmt (new_stmt);
4591 vec_oprnds0.release ();
4592 vec_oprnds1.release ();
4594 return true;
4598 /* Function vectorizable_operation.
4600 Check if STMT performs a binary, unary or ternary operation that can
4601 be vectorized.
4602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4603 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4606 static bool
4607 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4608 gimple *vec_stmt, slp_tree slp_node)
4610 tree vec_dest;
4611 tree scalar_dest;
4612 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4614 tree vectype;
4615 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4616 enum tree_code code;
4617 machine_mode vec_mode;
4618 tree new_temp;
4619 int op_type;
4620 optab optab;
4621 int icode;
4622 tree def;
4623 gimple def_stmt;
4624 enum vect_def_type dt[3]
4625 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4626 gimple new_stmt = NULL;
4627 stmt_vec_info prev_stmt_info;
4628 int nunits_in;
4629 int nunits_out;
4630 tree vectype_out;
4631 int ncopies;
4632 int j, i;
4633 vec<tree> vec_oprnds0 = vNULL;
4634 vec<tree> vec_oprnds1 = vNULL;
4635 vec<tree> vec_oprnds2 = vNULL;
4636 tree vop0, vop1, vop2;
4637 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4638 int vf;
4640 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4641 return false;
4643 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4644 return false;
4646 /* Is STMT a vectorizable binary/unary operation? */
4647 if (!is_gimple_assign (stmt))
4648 return false;
4650 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4651 return false;
4653 code = gimple_assign_rhs_code (stmt);
4655 /* For pointer addition, we should use the normal plus for
4656 the vector addition. */
4657 if (code == POINTER_PLUS_EXPR)
4658 code = PLUS_EXPR;
4660 /* Support only unary or binary operations. */
4661 op_type = TREE_CODE_LENGTH (code);
4662 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4664 if (dump_enabled_p ())
4665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4666 "num. args = %d (not unary/binary/ternary op).\n",
4667 op_type);
4668 return false;
4671 scalar_dest = gimple_assign_lhs (stmt);
4672 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4674 /* Most operations cannot handle bit-precision types without extra
4675 truncations. */
4676 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4677 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4678 /* Exception are bitwise binary operations. */
4679 && code != BIT_IOR_EXPR
4680 && code != BIT_XOR_EXPR
4681 && code != BIT_AND_EXPR)
4683 if (dump_enabled_p ())
4684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4685 "bit-precision arithmetic not supported.\n");
4686 return false;
4689 op0 = gimple_assign_rhs1 (stmt);
4690 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4691 &def_stmt, &def, &dt[0], &vectype))
4693 if (dump_enabled_p ())
4694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4695 "use not simple.\n");
4696 return false;
4698 /* If op0 is an external or constant def use a vector type with
4699 the same size as the output vector type. */
4700 if (!vectype)
4701 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4702 if (vec_stmt)
4703 gcc_assert (vectype);
4704 if (!vectype)
4706 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4709 "no vectype for scalar type ");
4710 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4711 TREE_TYPE (op0));
4712 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4715 return false;
4718 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4719 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4720 if (nunits_out != nunits_in)
4721 return false;
4723 if (op_type == binary_op || op_type == ternary_op)
4725 op1 = gimple_assign_rhs2 (stmt);
4726 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4727 &def, &dt[1]))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4731 "use not simple.\n");
4732 return false;
4735 if (op_type == ternary_op)
4737 op2 = gimple_assign_rhs3 (stmt);
4738 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4739 &def, &dt[2]))
4741 if (dump_enabled_p ())
4742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4743 "use not simple.\n");
4744 return false;
4748 if (loop_vinfo)
4749 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4750 else
4751 vf = 1;
4753 /* Multiple types in SLP are handled by creating the appropriate number of
4754 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4755 case of SLP. */
4756 if (slp_node || PURE_SLP_STMT (stmt_info))
4757 ncopies = 1;
4758 else
4759 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4761 gcc_assert (ncopies >= 1);
4763 /* Shifts are handled in vectorizable_shift (). */
4764 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4765 || code == RROTATE_EXPR)
4766 return false;
4768 /* Supportable by target? */
4770 vec_mode = TYPE_MODE (vectype);
4771 if (code == MULT_HIGHPART_EXPR)
4773 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4774 icode = LAST_INSN_CODE;
4775 else
4776 icode = CODE_FOR_nothing;
4778 else
4780 optab = optab_for_tree_code (code, vectype, optab_default);
4781 if (!optab)
4783 if (dump_enabled_p ())
4784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4785 "no optab.\n");
4786 return false;
4788 icode = (int) optab_handler (optab, vec_mode);
4791 if (icode == CODE_FOR_nothing)
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4795 "op not supported by target.\n");
4796 /* Check only during analysis. */
4797 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4798 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4799 return false;
4800 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_NOTE, vect_location,
4802 "proceeding using word mode.\n");
4805 /* Worthwhile without SIMD support? Check only during analysis. */
4806 if (!VECTOR_MODE_P (vec_mode)
4807 && !vec_stmt
4808 && vf < vect_min_worthwhile_factor (code))
4810 if (dump_enabled_p ())
4811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4812 "not worthwhile without SIMD support.\n");
4813 return false;
4816 if (!vec_stmt) /* transformation not required. */
4818 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4819 if (dump_enabled_p ())
4820 dump_printf_loc (MSG_NOTE, vect_location,
4821 "=== vectorizable_operation ===\n");
4822 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4823 return true;
4826 /** Transform. **/
4828 if (dump_enabled_p ())
4829 dump_printf_loc (MSG_NOTE, vect_location,
4830 "transform binary/unary operation.\n");
4832 /* Handle def. */
4833 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4835 /* In case the vectorization factor (VF) is bigger than the number
4836 of elements that we can fit in a vectype (nunits), we have to generate
4837 more than one vector stmt - i.e - we need to "unroll" the
4838 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4839 from one copy of the vector stmt to the next, in the field
4840 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4841 stages to find the correct vector defs to be used when vectorizing
4842 stmts that use the defs of the current stmt. The example below
4843 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4844 we need to create 4 vectorized stmts):
4846 before vectorization:
4847 RELATED_STMT VEC_STMT
4848 S1: x = memref - -
4849 S2: z = x + 1 - -
4851 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4852 there):
4853 RELATED_STMT VEC_STMT
4854 VS1_0: vx0 = memref0 VS1_1 -
4855 VS1_1: vx1 = memref1 VS1_2 -
4856 VS1_2: vx2 = memref2 VS1_3 -
4857 VS1_3: vx3 = memref3 - -
4858 S1: x = load - VS1_0
4859 S2: z = x + 1 - -
4861 step2: vectorize stmt S2 (done here):
4862 To vectorize stmt S2 we first need to find the relevant vector
4863 def for the first operand 'x'. This is, as usual, obtained from
4864 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4865 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4866 relevant vector def 'vx0'. Having found 'vx0' we can generate
4867 the vector stmt VS2_0, and as usual, record it in the
4868 STMT_VINFO_VEC_STMT of stmt S2.
4869 When creating the second copy (VS2_1), we obtain the relevant vector
4870 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4871 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4872 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4873 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4874 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4875 chain of stmts and pointers:
4876 RELATED_STMT VEC_STMT
4877 VS1_0: vx0 = memref0 VS1_1 -
4878 VS1_1: vx1 = memref1 VS1_2 -
4879 VS1_2: vx2 = memref2 VS1_3 -
4880 VS1_3: vx3 = memref3 - -
4881 S1: x = load - VS1_0
4882 VS2_0: vz0 = vx0 + v1 VS2_1 -
4883 VS2_1: vz1 = vx1 + v1 VS2_2 -
4884 VS2_2: vz2 = vx2 + v1 VS2_3 -
4885 VS2_3: vz3 = vx3 + v1 - -
4886 S2: z = x + 1 - VS2_0 */
4888 prev_stmt_info = NULL;
4889 for (j = 0; j < ncopies; j++)
4891 /* Handle uses. */
4892 if (j == 0)
4894 if (op_type == binary_op || op_type == ternary_op)
4895 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4896 slp_node, -1);
4897 else
4898 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4899 slp_node, -1);
4900 if (op_type == ternary_op)
4902 vec_oprnds2.create (1);
4903 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4904 stmt,
4905 NULL));
4908 else
4910 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4911 if (op_type == ternary_op)
4913 tree vec_oprnd = vec_oprnds2.pop ();
4914 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4915 vec_oprnd));
4919 /* Arguments are ready. Create the new vector stmt. */
4920 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4922 vop1 = ((op_type == binary_op || op_type == ternary_op)
4923 ? vec_oprnds1[i] : NULL_TREE);
4924 vop2 = ((op_type == ternary_op)
4925 ? vec_oprnds2[i] : NULL_TREE);
4926 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4927 new_temp = make_ssa_name (vec_dest, new_stmt);
4928 gimple_assign_set_lhs (new_stmt, new_temp);
4929 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4930 if (slp_node)
4931 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4934 if (slp_node)
4935 continue;
4937 if (j == 0)
4938 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4939 else
4940 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4941 prev_stmt_info = vinfo_for_stmt (new_stmt);
4944 vec_oprnds0.release ();
4945 vec_oprnds1.release ();
4946 vec_oprnds2.release ();
4948 return true;
4951 /* A helper function to ensure data reference DR's base alignment
4952 for STMT_INFO. */
4954 static void
4955 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4957 if (!dr->aux)
4958 return;
4960 if (((dataref_aux *)dr->aux)->base_misaligned)
4962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4963 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4965 if (decl_in_symtab_p (base_decl))
4966 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4967 else
4969 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4970 DECL_USER_ALIGN (base_decl) = 1;
4972 ((dataref_aux *)dr->aux)->base_misaligned = false;
4977 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4978 reversal of the vector elements. If that is impossible to do,
4979 returns NULL. */
4981 static tree
4982 perm_mask_for_reverse (tree vectype)
4984 int i, nunits;
4985 unsigned char *sel;
4987 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4988 sel = XALLOCAVEC (unsigned char, nunits);
4990 for (i = 0; i < nunits; ++i)
4991 sel[i] = nunits - 1 - i;
4993 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4994 return NULL_TREE;
4995 return vect_gen_perm_mask_checked (vectype, sel);
4998 /* Function vectorizable_store.
5000 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5001 can be vectorized.
5002 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5003 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5004 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5006 static bool
5007 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5008 slp_tree slp_node)
5010 tree scalar_dest;
5011 tree data_ref;
5012 tree op;
5013 tree vec_oprnd = NULL_TREE;
5014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5015 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5016 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5017 tree elem_type;
5018 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5019 struct loop *loop = NULL;
5020 machine_mode vec_mode;
5021 tree dummy;
5022 enum dr_alignment_support alignment_support_scheme;
5023 tree def;
5024 gimple def_stmt;
5025 enum vect_def_type dt;
5026 stmt_vec_info prev_stmt_info = NULL;
5027 tree dataref_ptr = NULL_TREE;
5028 tree dataref_offset = NULL_TREE;
5029 gimple ptr_incr = NULL;
5030 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5031 int ncopies;
5032 int j;
5033 gimple next_stmt, first_stmt = NULL;
5034 bool grouped_store = false;
5035 bool store_lanes_p = false;
5036 unsigned int group_size, i;
5037 vec<tree> dr_chain = vNULL;
5038 vec<tree> oprnds = vNULL;
5039 vec<tree> result_chain = vNULL;
5040 bool inv_p;
5041 bool negative = false;
5042 tree offset = NULL_TREE;
5043 vec<tree> vec_oprnds = vNULL;
5044 bool slp = (slp_node != NULL);
5045 unsigned int vec_num;
5046 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5047 tree aggr_type;
5049 if (loop_vinfo)
5050 loop = LOOP_VINFO_LOOP (loop_vinfo);
5052 /* Multiple types in SLP are handled by creating the appropriate number of
5053 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5054 case of SLP. */
5055 if (slp || PURE_SLP_STMT (stmt_info))
5056 ncopies = 1;
5057 else
5058 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5060 gcc_assert (ncopies >= 1);
5062 /* FORNOW. This restriction should be relaxed. */
5063 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5065 if (dump_enabled_p ())
5066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5067 "multiple types in nested loop.\n");
5068 return false;
5071 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5072 return false;
5074 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5075 return false;
5077 /* Is vectorizable store? */
5079 if (!is_gimple_assign (stmt))
5080 return false;
5082 scalar_dest = gimple_assign_lhs (stmt);
5083 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5084 && is_pattern_stmt_p (stmt_info))
5085 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5086 if (TREE_CODE (scalar_dest) != ARRAY_REF
5087 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5088 && TREE_CODE (scalar_dest) != INDIRECT_REF
5089 && TREE_CODE (scalar_dest) != COMPONENT_REF
5090 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5091 && TREE_CODE (scalar_dest) != REALPART_EXPR
5092 && TREE_CODE (scalar_dest) != MEM_REF)
5093 return false;
5095 gcc_assert (gimple_assign_single_p (stmt));
5096 op = gimple_assign_rhs1 (stmt);
5097 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5098 &def, &dt))
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5102 "use not simple.\n");
5103 return false;
5106 elem_type = TREE_TYPE (vectype);
5107 vec_mode = TYPE_MODE (vectype);
5109 /* FORNOW. In some cases can vectorize even if data-type not supported
5110 (e.g. - array initialization with 0). */
5111 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5112 return false;
5114 if (!STMT_VINFO_DATA_REF (stmt_info))
5115 return false;
5117 if (!STMT_VINFO_STRIDED_P (stmt_info))
5119 negative =
5120 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5121 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5122 size_zero_node) < 0;
5123 if (negative && ncopies > 1)
5125 if (dump_enabled_p ())
5126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5127 "multiple types with negative step.\n");
5128 return false;
5130 if (negative)
5132 gcc_assert (!grouped_store);
5133 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5134 if (alignment_support_scheme != dr_aligned
5135 && alignment_support_scheme != dr_unaligned_supported)
5137 if (dump_enabled_p ())
5138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5139 "negative step but alignment required.\n");
5140 return false;
5142 if (dt != vect_constant_def
5143 && dt != vect_external_def
5144 && !perm_mask_for_reverse (vectype))
5146 if (dump_enabled_p ())
5147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5148 "negative step and reversing not supported.\n");
5149 return false;
5154 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5156 grouped_store = true;
5157 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5158 if (!slp && !PURE_SLP_STMT (stmt_info))
5160 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5161 if (vect_store_lanes_supported (vectype, group_size))
5162 store_lanes_p = true;
5163 else if (!vect_grouped_store_supported (vectype, group_size))
5164 return false;
5167 if (first_stmt == stmt)
5169 /* STMT is the leader of the group. Check the operands of all the
5170 stmts of the group. */
5171 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5172 while (next_stmt)
5174 gcc_assert (gimple_assign_single_p (next_stmt));
5175 op = gimple_assign_rhs1 (next_stmt);
5176 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5177 &def_stmt, &def, &dt))
5179 if (dump_enabled_p ())
5180 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5181 "use not simple.\n");
5182 return false;
5184 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5189 if (!vec_stmt) /* transformation not required. */
5191 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5192 /* The SLP costs are calculated during SLP analysis. */
5193 if (!PURE_SLP_STMT (stmt_info))
5194 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5195 NULL, NULL, NULL);
5196 return true;
5199 /** Transform. **/
5201 ensure_base_align (stmt_info, dr);
5203 if (grouped_store)
5205 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5206 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5208 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5210 /* FORNOW */
5211 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5213 /* We vectorize all the stmts of the interleaving group when we
5214 reach the last stmt in the group. */
5215 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5216 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5217 && !slp)
5219 *vec_stmt = NULL;
5220 return true;
5223 if (slp)
5225 grouped_store = false;
5226 /* VEC_NUM is the number of vect stmts to be created for this
5227 group. */
5228 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5229 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5230 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5231 op = gimple_assign_rhs1 (first_stmt);
5233 else
5234 /* VEC_NUM is the number of vect stmts to be created for this
5235 group. */
5236 vec_num = group_size;
5238 else
5240 first_stmt = stmt;
5241 first_dr = dr;
5242 group_size = vec_num = 1;
5245 if (dump_enabled_p ())
5246 dump_printf_loc (MSG_NOTE, vect_location,
5247 "transform store. ncopies = %d\n", ncopies);
5249 if (STMT_VINFO_STRIDED_P (stmt_info))
5251 gimple_stmt_iterator incr_gsi;
5252 bool insert_after;
5253 gimple incr;
5254 tree offvar;
5255 tree ivstep;
5256 tree running_off;
5257 gimple_seq stmts = NULL;
5258 tree stride_base, stride_step, alias_off;
5259 tree vec_oprnd;
5261 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5263 stride_base
5264 = fold_build_pointer_plus
5265 (unshare_expr (DR_BASE_ADDRESS (dr)),
5266 size_binop (PLUS_EXPR,
5267 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5268 convert_to_ptrofftype (DR_INIT(dr))));
5269 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5271 /* For a store with loop-invariant (but other than power-of-2)
5272 stride (i.e. not a grouped access) like so:
5274 for (i = 0; i < n; i += stride)
5275 array[i] = ...;
5277 we generate a new induction variable and new stores from
5278 the components of the (vectorized) rhs:
5280 for (j = 0; ; j += VF*stride)
5281 vectemp = ...;
5282 tmp1 = vectemp[0];
5283 array[j] = tmp1;
5284 tmp2 = vectemp[1];
5285 array[j + stride] = tmp2;
5289 ivstep = stride_step;
5290 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5291 build_int_cst (TREE_TYPE (ivstep),
5292 ncopies * nunits));
5294 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5296 create_iv (stride_base, ivstep, NULL,
5297 loop, &incr_gsi, insert_after,
5298 &offvar, NULL);
5299 incr = gsi_stmt (incr_gsi);
5300 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5302 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5303 if (stmts)
5304 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5306 prev_stmt_info = NULL;
5307 running_off = offvar;
5308 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
5309 for (j = 0; j < ncopies; j++)
5311 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5312 and first_stmt == stmt. */
5313 if (j == 0)
5314 vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
5315 else
5316 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5318 for (i = 0; i < nunits; i++)
5320 tree newref, newoff;
5321 gimple incr, assign;
5322 tree size = TYPE_SIZE (elem_type);
5323 /* Extract the i'th component. */
5324 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
5325 size);
5326 tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
5327 size, pos);
5329 elem = force_gimple_operand_gsi (gsi, elem, true,
5330 NULL_TREE, true,
5331 GSI_SAME_STMT);
5333 newref = build2 (MEM_REF, TREE_TYPE (vectype),
5334 running_off, alias_off);
5336 /* And store it to *running_off. */
5337 assign = gimple_build_assign (newref, elem);
5338 vect_finish_stmt_generation (stmt, assign, gsi);
5340 newoff = copy_ssa_name (running_off, NULL);
5341 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5342 running_off, stride_step);
5343 vect_finish_stmt_generation (stmt, incr, gsi);
5345 running_off = newoff;
5346 if (j == 0 && i == i)
5347 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5348 else
5349 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5350 prev_stmt_info = vinfo_for_stmt (assign);
5353 return true;
5356 dr_chain.create (group_size);
5357 oprnds.create (group_size);
5359 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5360 gcc_assert (alignment_support_scheme);
5361 /* Targets with store-lane instructions must not require explicit
5362 realignment. */
5363 gcc_assert (!store_lanes_p
5364 || alignment_support_scheme == dr_aligned
5365 || alignment_support_scheme == dr_unaligned_supported);
5367 if (negative)
5368 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5370 if (store_lanes_p)
5371 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5372 else
5373 aggr_type = vectype;
5375 /* In case the vectorization factor (VF) is bigger than the number
5376 of elements that we can fit in a vectype (nunits), we have to generate
5377 more than one vector stmt - i.e - we need to "unroll" the
5378 vector stmt by a factor VF/nunits. For more details see documentation in
5379 vect_get_vec_def_for_copy_stmt. */
5381 /* In case of interleaving (non-unit grouped access):
5383 S1: &base + 2 = x2
5384 S2: &base = x0
5385 S3: &base + 1 = x1
5386 S4: &base + 3 = x3
5388 We create vectorized stores starting from base address (the access of the
5389 first stmt in the chain (S2 in the above example), when the last store stmt
5390 of the chain (S4) is reached:
5392 VS1: &base = vx2
5393 VS2: &base + vec_size*1 = vx0
5394 VS3: &base + vec_size*2 = vx1
5395 VS4: &base + vec_size*3 = vx3
5397 Then permutation statements are generated:
5399 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5400 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5403 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5404 (the order of the data-refs in the output of vect_permute_store_chain
5405 corresponds to the order of scalar stmts in the interleaving chain - see
5406 the documentation of vect_permute_store_chain()).
5408 In case of both multiple types and interleaving, above vector stores and
5409 permutation stmts are created for every copy. The result vector stmts are
5410 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5411 STMT_VINFO_RELATED_STMT for the next copies.
5414 prev_stmt_info = NULL;
5415 for (j = 0; j < ncopies; j++)
5417 gimple new_stmt;
5419 if (j == 0)
5421 if (slp)
5423 /* Get vectorized arguments for SLP_NODE. */
5424 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5425 NULL, slp_node, -1);
5427 vec_oprnd = vec_oprnds[0];
5429 else
5431 /* For interleaved stores we collect vectorized defs for all the
5432 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5433 used as an input to vect_permute_store_chain(), and OPRNDS as
5434 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5436 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5437 OPRNDS are of size 1. */
5438 next_stmt = first_stmt;
5439 for (i = 0; i < group_size; i++)
5441 /* Since gaps are not supported for interleaved stores,
5442 GROUP_SIZE is the exact number of stmts in the chain.
5443 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5444 there is no interleaving, GROUP_SIZE is 1, and only one
5445 iteration of the loop will be executed. */
5446 gcc_assert (next_stmt
5447 && gimple_assign_single_p (next_stmt));
5448 op = gimple_assign_rhs1 (next_stmt);
5450 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5451 NULL);
5452 dr_chain.quick_push (vec_oprnd);
5453 oprnds.quick_push (vec_oprnd);
5454 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5458 /* We should have catched mismatched types earlier. */
5459 gcc_assert (useless_type_conversion_p (vectype,
5460 TREE_TYPE (vec_oprnd)));
5461 bool simd_lane_access_p
5462 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5463 if (simd_lane_access_p
5464 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5465 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5466 && integer_zerop (DR_OFFSET (first_dr))
5467 && integer_zerop (DR_INIT (first_dr))
5468 && alias_sets_conflict_p (get_alias_set (aggr_type),
5469 get_alias_set (DR_REF (first_dr))))
5471 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5472 dataref_offset = build_int_cst (reference_alias_ptr_type
5473 (DR_REF (first_dr)), 0);
5474 inv_p = false;
5476 else
5477 dataref_ptr
5478 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5479 simd_lane_access_p ? loop : NULL,
5480 offset, &dummy, gsi, &ptr_incr,
5481 simd_lane_access_p, &inv_p);
5482 gcc_assert (bb_vinfo || !inv_p);
5484 else
5486 /* For interleaved stores we created vectorized defs for all the
5487 defs stored in OPRNDS in the previous iteration (previous copy).
5488 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5489 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5490 next copy.
5491 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5492 OPRNDS are of size 1. */
5493 for (i = 0; i < group_size; i++)
5495 op = oprnds[i];
5496 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5497 &def, &dt);
5498 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5499 dr_chain[i] = vec_oprnd;
5500 oprnds[i] = vec_oprnd;
5502 if (dataref_offset)
5503 dataref_offset
5504 = int_const_binop (PLUS_EXPR, dataref_offset,
5505 TYPE_SIZE_UNIT (aggr_type));
5506 else
5507 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5508 TYPE_SIZE_UNIT (aggr_type));
5511 if (store_lanes_p)
5513 tree vec_array;
5515 /* Combine all the vectors into an array. */
5516 vec_array = create_vector_array (vectype, vec_num);
5517 for (i = 0; i < vec_num; i++)
5519 vec_oprnd = dr_chain[i];
5520 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5523 /* Emit:
5524 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5525 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5526 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5527 gimple_call_set_lhs (new_stmt, data_ref);
5528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5530 else
5532 new_stmt = NULL;
5533 if (grouped_store)
5535 if (j == 0)
5536 result_chain.create (group_size);
5537 /* Permute. */
5538 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5539 &result_chain);
5542 next_stmt = first_stmt;
5543 for (i = 0; i < vec_num; i++)
5545 unsigned align, misalign;
5547 if (i > 0)
5548 /* Bump the vector pointer. */
5549 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5550 stmt, NULL_TREE);
5552 if (slp)
5553 vec_oprnd = vec_oprnds[i];
5554 else if (grouped_store)
5555 /* For grouped stores vectorized defs are interleaved in
5556 vect_permute_store_chain(). */
5557 vec_oprnd = result_chain[i];
5559 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5560 dataref_offset
5561 ? dataref_offset
5562 : build_int_cst (reference_alias_ptr_type
5563 (DR_REF (first_dr)), 0));
5564 align = TYPE_ALIGN_UNIT (vectype);
5565 if (aligned_access_p (first_dr))
5566 misalign = 0;
5567 else if (DR_MISALIGNMENT (first_dr) == -1)
5569 TREE_TYPE (data_ref)
5570 = build_aligned_type (TREE_TYPE (data_ref),
5571 TYPE_ALIGN (elem_type));
5572 align = TYPE_ALIGN_UNIT (elem_type);
5573 misalign = 0;
5575 else
5577 TREE_TYPE (data_ref)
5578 = build_aligned_type (TREE_TYPE (data_ref),
5579 TYPE_ALIGN (elem_type));
5580 misalign = DR_MISALIGNMENT (first_dr);
5582 if (dataref_offset == NULL_TREE)
5583 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5584 misalign);
5586 if (negative
5587 && dt != vect_constant_def
5588 && dt != vect_external_def)
5590 tree perm_mask = perm_mask_for_reverse (vectype);
5591 tree perm_dest
5592 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5593 vectype);
5594 tree new_temp = make_ssa_name (perm_dest);
5596 /* Generate the permute statement. */
5597 gimple perm_stmt
5598 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5599 vec_oprnd, perm_mask);
5600 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5602 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5603 vec_oprnd = new_temp;
5606 /* Arguments are ready. Create the new vector stmt. */
5607 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5608 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5610 if (slp)
5611 continue;
5613 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5614 if (!next_stmt)
5615 break;
5618 if (!slp)
5620 if (j == 0)
5621 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5622 else
5623 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5624 prev_stmt_info = vinfo_for_stmt (new_stmt);
5628 dr_chain.release ();
5629 oprnds.release ();
5630 result_chain.release ();
5631 vec_oprnds.release ();
5633 return true;
5636 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5637 VECTOR_CST mask. No checks are made that the target platform supports the
5638 mask, so callers may wish to test can_vec_perm_p separately, or use
5639 vect_gen_perm_mask_checked. */
5641 tree
5642 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5644 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5645 int i, nunits;
5647 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5649 mask_elt_type = lang_hooks.types.type_for_mode
5650 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5651 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5653 mask_elts = XALLOCAVEC (tree, nunits);
5654 for (i = nunits - 1; i >= 0; i--)
5655 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5656 mask_vec = build_vector (mask_type, mask_elts);
5658 return mask_vec;
5661 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5662 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5664 tree
5665 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5667 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5668 return vect_gen_perm_mask_any (vectype, sel);
5671 /* Given a vector variable X and Y, that was generated for the scalar
5672 STMT, generate instructions to permute the vector elements of X and Y
5673 using permutation mask MASK_VEC, insert them at *GSI and return the
5674 permuted vector variable. */
5676 static tree
5677 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5678 gimple_stmt_iterator *gsi)
5680 tree vectype = TREE_TYPE (x);
5681 tree perm_dest, data_ref;
5682 gimple perm_stmt;
5684 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5685 data_ref = make_ssa_name (perm_dest);
5687 /* Generate the permute statement. */
5688 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5689 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5691 return data_ref;
5694 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5695 inserting them on the loops preheader edge. Returns true if we
5696 were successful in doing so (and thus STMT can be moved then),
5697 otherwise returns false. */
5699 static bool
5700 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5702 ssa_op_iter i;
5703 tree op;
5704 bool any = false;
5706 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5708 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5709 if (!gimple_nop_p (def_stmt)
5710 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5712 /* Make sure we don't need to recurse. While we could do
5713 so in simple cases when there are more complex use webs
5714 we don't have an easy way to preserve stmt order to fulfil
5715 dependencies within them. */
5716 tree op2;
5717 ssa_op_iter i2;
5718 if (gimple_code (def_stmt) == GIMPLE_PHI)
5719 return false;
5720 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5722 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5723 if (!gimple_nop_p (def_stmt2)
5724 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5725 return false;
5727 any = true;
5731 if (!any)
5732 return true;
5734 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5736 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5737 if (!gimple_nop_p (def_stmt)
5738 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5740 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5741 gsi_remove (&gsi, false);
5742 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5746 return true;
5749 /* vectorizable_load.
5751 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5752 can be vectorized.
5753 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5754 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5755 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5757 static bool
5758 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5759 slp_tree slp_node, slp_instance slp_node_instance)
5761 tree scalar_dest;
5762 tree vec_dest = NULL;
5763 tree data_ref = NULL;
5764 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5765 stmt_vec_info prev_stmt_info;
5766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5767 struct loop *loop = NULL;
5768 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5769 bool nested_in_vect_loop = false;
5770 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5771 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5772 tree elem_type;
5773 tree new_temp;
5774 machine_mode mode;
5775 gimple new_stmt = NULL;
5776 tree dummy;
5777 enum dr_alignment_support alignment_support_scheme;
5778 tree dataref_ptr = NULL_TREE;
5779 tree dataref_offset = NULL_TREE;
5780 gimple ptr_incr = NULL;
5781 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5782 int ncopies;
5783 int i, j, group_size = -1, group_gap;
5784 tree msq = NULL_TREE, lsq;
5785 tree offset = NULL_TREE;
5786 tree byte_offset = NULL_TREE;
5787 tree realignment_token = NULL_TREE;
5788 gphi *phi = NULL;
5789 vec<tree> dr_chain = vNULL;
5790 bool grouped_load = false;
5791 bool load_lanes_p = false;
5792 gimple first_stmt;
5793 bool inv_p;
5794 bool negative = false;
5795 bool compute_in_loop = false;
5796 struct loop *at_loop;
5797 int vec_num;
5798 bool slp = (slp_node != NULL);
5799 bool slp_perm = false;
5800 enum tree_code code;
5801 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5802 int vf;
5803 tree aggr_type;
5804 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5805 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5806 int gather_scale = 1;
5807 enum vect_def_type gather_dt = vect_unknown_def_type;
5809 if (loop_vinfo)
5811 loop = LOOP_VINFO_LOOP (loop_vinfo);
5812 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5813 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5815 else
5816 vf = 1;
5818 /* Multiple types in SLP are handled by creating the appropriate number of
5819 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5820 case of SLP. */
5821 if (slp || PURE_SLP_STMT (stmt_info))
5822 ncopies = 1;
5823 else
5824 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5826 gcc_assert (ncopies >= 1);
5828 /* FORNOW. This restriction should be relaxed. */
5829 if (nested_in_vect_loop && ncopies > 1)
5831 if (dump_enabled_p ())
5832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5833 "multiple types in nested loop.\n");
5834 return false;
5837 /* Invalidate assumptions made by dependence analysis when vectorization
5838 on the unrolled body effectively re-orders stmts. */
5839 if (ncopies > 1
5840 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5841 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5842 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "cannot perform implicit CSE when unrolling "
5847 "with negative dependence distance\n");
5848 return false;
5851 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5852 return false;
5854 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5855 return false;
5857 /* Is vectorizable load? */
5858 if (!is_gimple_assign (stmt))
5859 return false;
5861 scalar_dest = gimple_assign_lhs (stmt);
5862 if (TREE_CODE (scalar_dest) != SSA_NAME)
5863 return false;
5865 code = gimple_assign_rhs_code (stmt);
5866 if (code != ARRAY_REF
5867 && code != BIT_FIELD_REF
5868 && code != INDIRECT_REF
5869 && code != COMPONENT_REF
5870 && code != IMAGPART_EXPR
5871 && code != REALPART_EXPR
5872 && code != MEM_REF
5873 && TREE_CODE_CLASS (code) != tcc_declaration)
5874 return false;
5876 if (!STMT_VINFO_DATA_REF (stmt_info))
5877 return false;
5879 elem_type = TREE_TYPE (vectype);
5880 mode = TYPE_MODE (vectype);
5882 /* FORNOW. In some cases can vectorize even if data-type not supported
5883 (e.g. - data copies). */
5884 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5888 "Aligned load, but unsupported type.\n");
5889 return false;
5892 /* Check if the load is a part of an interleaving chain. */
5893 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5895 grouped_load = true;
5896 /* FORNOW */
5897 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5899 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5901 /* If this is single-element interleaving with an element distance
5902 that leaves unused vector loads around punt - we at least create
5903 very sub-optimal code in that case (and blow up memory,
5904 see PR65518). */
5905 if (first_stmt == stmt
5906 && !GROUP_NEXT_ELEMENT (stmt_info)
5907 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5909 if (dump_enabled_p ())
5910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5911 "single-element interleaving not supported "
5912 "for not adjacent vector loads\n");
5913 return false;
5916 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5917 if (!slp
5918 && !PURE_SLP_STMT (stmt_info)
5919 && !STMT_VINFO_STRIDED_P (stmt_info))
5921 if (vect_load_lanes_supported (vectype, group_size))
5922 load_lanes_p = true;
5923 else if (!vect_grouped_load_supported (vectype, group_size))
5924 return false;
5927 /* Invalidate assumptions made by dependence analysis when vectorization
5928 on the unrolled body effectively re-orders stmts. */
5929 if (!PURE_SLP_STMT (stmt_info)
5930 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5931 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5932 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5934 if (dump_enabled_p ())
5935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5936 "cannot perform implicit CSE when performing "
5937 "group loads with negative dependence distance\n");
5938 return false;
5941 /* Similarly when the stmt is a load that is both part of a SLP
5942 instance and a loop vectorized stmt via the same-dr mechanism
5943 we have to give up. */
5944 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5945 && (STMT_SLP_TYPE (stmt_info)
5946 != STMT_SLP_TYPE (vinfo_for_stmt
5947 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5949 if (dump_enabled_p ())
5950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5951 "conflicting SLP types for CSEd load\n");
5952 return false;
5957 if (STMT_VINFO_GATHER_P (stmt_info))
5959 gimple def_stmt;
5960 tree def;
5961 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5962 &gather_off, &gather_scale);
5963 gcc_assert (gather_decl);
5964 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5965 &def_stmt, &def, &gather_dt,
5966 &gather_off_vectype))
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "gather index use not simple.\n");
5971 return false;
5974 else if (STMT_VINFO_STRIDED_P (stmt_info))
5976 if ((grouped_load
5977 && (slp || PURE_SLP_STMT (stmt_info)))
5978 && (group_size > nunits
5979 || nunits % group_size != 0
5980 /* ??? During analysis phase we are not called with the
5981 slp node/instance we are in so whether we'll end up
5982 with a permutation we don't know. Still we don't
5983 support load permutations. */
5984 || slp_perm))
5986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5987 "unhandled strided group load\n");
5988 return false;
5991 else
5993 negative = tree_int_cst_compare (nested_in_vect_loop
5994 ? STMT_VINFO_DR_STEP (stmt_info)
5995 : DR_STEP (dr),
5996 size_zero_node) < 0;
5997 if (negative && ncopies > 1)
5999 if (dump_enabled_p ())
6000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6001 "multiple types with negative step.\n");
6002 return false;
6005 if (negative)
6007 if (grouped_load)
6009 if (dump_enabled_p ())
6010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6011 "negative step for group load not supported"
6012 "\n");
6013 return false;
6015 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6016 if (alignment_support_scheme != dr_aligned
6017 && alignment_support_scheme != dr_unaligned_supported)
6019 if (dump_enabled_p ())
6020 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6021 "negative step but alignment required.\n");
6022 return false;
6024 if (!perm_mask_for_reverse (vectype))
6026 if (dump_enabled_p ())
6027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6028 "negative step and reversing not supported."
6029 "\n");
6030 return false;
6035 if (!vec_stmt) /* transformation not required. */
6037 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6038 /* The SLP costs are calculated during SLP analysis. */
6039 if (!PURE_SLP_STMT (stmt_info))
6040 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6041 NULL, NULL, NULL);
6042 return true;
6045 if (dump_enabled_p ())
6046 dump_printf_loc (MSG_NOTE, vect_location,
6047 "transform load. ncopies = %d\n", ncopies);
6049 /** Transform. **/
6051 ensure_base_align (stmt_info, dr);
6053 if (STMT_VINFO_GATHER_P (stmt_info))
6055 tree vec_oprnd0 = NULL_TREE, op;
6056 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6057 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6058 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6059 edge pe = loop_preheader_edge (loop);
6060 gimple_seq seq;
6061 basic_block new_bb;
6062 enum { NARROW, NONE, WIDEN } modifier;
6063 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6065 if (nunits == gather_off_nunits)
6066 modifier = NONE;
6067 else if (nunits == gather_off_nunits / 2)
6069 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6070 modifier = WIDEN;
6072 for (i = 0; i < gather_off_nunits; ++i)
6073 sel[i] = i | nunits;
6075 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6077 else if (nunits == gather_off_nunits * 2)
6079 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6080 modifier = NARROW;
6082 for (i = 0; i < nunits; ++i)
6083 sel[i] = i < gather_off_nunits
6084 ? i : i + nunits - gather_off_nunits;
6086 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6087 ncopies *= 2;
6089 else
6090 gcc_unreachable ();
6092 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6093 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6094 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6095 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6096 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6097 scaletype = TREE_VALUE (arglist);
6098 gcc_checking_assert (types_compatible_p (srctype, rettype));
6100 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6102 ptr = fold_convert (ptrtype, gather_base);
6103 if (!is_gimple_min_invariant (ptr))
6105 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6106 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6107 gcc_assert (!new_bb);
6110 /* Currently we support only unconditional gather loads,
6111 so mask should be all ones. */
6112 if (TREE_CODE (masktype) == INTEGER_TYPE)
6113 mask = build_int_cst (masktype, -1);
6114 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6116 mask = build_int_cst (TREE_TYPE (masktype), -1);
6117 mask = build_vector_from_val (masktype, mask);
6118 mask = vect_init_vector (stmt, mask, masktype, NULL);
6120 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6122 REAL_VALUE_TYPE r;
6123 long tmp[6];
6124 for (j = 0; j < 6; ++j)
6125 tmp[j] = -1;
6126 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6127 mask = build_real (TREE_TYPE (masktype), r);
6128 mask = build_vector_from_val (masktype, mask);
6129 mask = vect_init_vector (stmt, mask, masktype, NULL);
6131 else
6132 gcc_unreachable ();
6134 scale = build_int_cst (scaletype, gather_scale);
6136 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6137 merge = build_int_cst (TREE_TYPE (rettype), 0);
6138 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6140 REAL_VALUE_TYPE r;
6141 long tmp[6];
6142 for (j = 0; j < 6; ++j)
6143 tmp[j] = 0;
6144 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6145 merge = build_real (TREE_TYPE (rettype), r);
6147 else
6148 gcc_unreachable ();
6149 merge = build_vector_from_val (rettype, merge);
6150 merge = vect_init_vector (stmt, merge, rettype, NULL);
6152 prev_stmt_info = NULL;
6153 for (j = 0; j < ncopies; ++j)
6155 if (modifier == WIDEN && (j & 1))
6156 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6157 perm_mask, stmt, gsi);
6158 else if (j == 0)
6159 op = vec_oprnd0
6160 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6161 else
6162 op = vec_oprnd0
6163 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6165 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6167 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6168 == TYPE_VECTOR_SUBPARTS (idxtype));
6169 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6170 var = make_ssa_name (var);
6171 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6172 new_stmt
6173 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6175 op = var;
6178 new_stmt
6179 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6181 if (!useless_type_conversion_p (vectype, rettype))
6183 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6184 == TYPE_VECTOR_SUBPARTS (rettype));
6185 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6186 op = make_ssa_name (var, new_stmt);
6187 gimple_call_set_lhs (new_stmt, op);
6188 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6189 var = make_ssa_name (vec_dest);
6190 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6191 new_stmt
6192 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6194 else
6196 var = make_ssa_name (vec_dest, new_stmt);
6197 gimple_call_set_lhs (new_stmt, var);
6200 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6202 if (modifier == NARROW)
6204 if ((j & 1) == 0)
6206 prev_res = var;
6207 continue;
6209 var = permute_vec_elements (prev_res, var,
6210 perm_mask, stmt, gsi);
6211 new_stmt = SSA_NAME_DEF_STMT (var);
6214 if (prev_stmt_info == NULL)
6215 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6216 else
6217 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6218 prev_stmt_info = vinfo_for_stmt (new_stmt);
6220 return true;
6222 else if (STMT_VINFO_STRIDED_P (stmt_info))
6224 gimple_stmt_iterator incr_gsi;
6225 bool insert_after;
6226 gimple incr;
6227 tree offvar;
6228 tree ivstep;
6229 tree running_off;
6230 vec<constructor_elt, va_gc> *v = NULL;
6231 gimple_seq stmts = NULL;
6232 tree stride_base, stride_step, alias_off;
6234 gcc_assert (!nested_in_vect_loop);
6236 stride_base
6237 = fold_build_pointer_plus
6238 (unshare_expr (DR_BASE_ADDRESS (dr)),
6239 size_binop (PLUS_EXPR,
6240 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6241 convert_to_ptrofftype (DR_INIT (dr))));
6242 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6244 /* For a load with loop-invariant (but other than power-of-2)
6245 stride (i.e. not a grouped access) like so:
6247 for (i = 0; i < n; i += stride)
6248 ... = array[i];
6250 we generate a new induction variable and new accesses to
6251 form a new vector (or vectors, depending on ncopies):
6253 for (j = 0; ; j += VF*stride)
6254 tmp1 = array[j];
6255 tmp2 = array[j + stride];
6257 vectemp = {tmp1, tmp2, ...}
6260 ivstep = stride_step;
6261 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6262 build_int_cst (TREE_TYPE (ivstep), vf));
6264 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6266 create_iv (stride_base, ivstep, NULL,
6267 loop, &incr_gsi, insert_after,
6268 &offvar, NULL);
6269 incr = gsi_stmt (incr_gsi);
6270 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6272 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6273 if (stmts)
6274 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6276 prev_stmt_info = NULL;
6277 running_off = offvar;
6278 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6279 int nloads = nunits;
6280 tree ltype = TREE_TYPE (vectype);
6281 if (slp)
6283 nloads = nunits / group_size;
6284 if (group_size < nunits)
6285 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6286 else
6287 ltype = vectype;
6288 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6289 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6290 gcc_assert (!slp_perm);
6292 for (j = 0; j < ncopies; j++)
6294 tree vec_inv;
6296 if (nloads > 1)
6298 vec_alloc (v, nloads);
6299 for (i = 0; i < nloads; i++)
6301 tree newref, newoff;
6302 gimple incr;
6303 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6305 newref = force_gimple_operand_gsi (gsi, newref, true,
6306 NULL_TREE, true,
6307 GSI_SAME_STMT);
6308 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6309 newoff = copy_ssa_name (running_off);
6310 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6311 running_off, stride_step);
6312 vect_finish_stmt_generation (stmt, incr, gsi);
6314 running_off = newoff;
6317 vec_inv = build_constructor (vectype, v);
6318 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6319 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6321 else
6323 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6324 build2 (MEM_REF, ltype,
6325 running_off, alias_off));
6326 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6328 tree newoff = copy_ssa_name (running_off);
6329 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6330 running_off, stride_step);
6331 vect_finish_stmt_generation (stmt, incr, gsi);
6333 running_off = newoff;
6336 if (slp)
6337 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6338 if (j == 0)
6339 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6340 else
6341 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6342 prev_stmt_info = vinfo_for_stmt (new_stmt);
6344 return true;
6347 if (grouped_load)
6349 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6350 if (slp
6351 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6352 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6353 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6355 /* Check if the chain of loads is already vectorized. */
6356 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6357 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6358 ??? But we can only do so if there is exactly one
6359 as we have no way to get at the rest. Leave the CSE
6360 opportunity alone.
6361 ??? With the group load eventually participating
6362 in multiple different permutations (having multiple
6363 slp nodes which refer to the same group) the CSE
6364 is even wrong code. See PR56270. */
6365 && !slp)
6367 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6368 return true;
6370 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6371 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6373 /* VEC_NUM is the number of vect stmts to be created for this group. */
6374 if (slp)
6376 grouped_load = false;
6377 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6378 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6379 slp_perm = true;
6380 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6382 else
6384 vec_num = group_size;
6385 group_gap = 0;
6388 else
6390 first_stmt = stmt;
6391 first_dr = dr;
6392 group_size = vec_num = 1;
6393 group_gap = 0;
6396 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6397 gcc_assert (alignment_support_scheme);
6398 /* Targets with load-lane instructions must not require explicit
6399 realignment. */
6400 gcc_assert (!load_lanes_p
6401 || alignment_support_scheme == dr_aligned
6402 || alignment_support_scheme == dr_unaligned_supported);
6404 /* In case the vectorization factor (VF) is bigger than the number
6405 of elements that we can fit in a vectype (nunits), we have to generate
6406 more than one vector stmt - i.e - we need to "unroll" the
6407 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6408 from one copy of the vector stmt to the next, in the field
6409 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6410 stages to find the correct vector defs to be used when vectorizing
6411 stmts that use the defs of the current stmt. The example below
6412 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6413 need to create 4 vectorized stmts):
6415 before vectorization:
6416 RELATED_STMT VEC_STMT
6417 S1: x = memref - -
6418 S2: z = x + 1 - -
6420 step 1: vectorize stmt S1:
6421 We first create the vector stmt VS1_0, and, as usual, record a
6422 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6423 Next, we create the vector stmt VS1_1, and record a pointer to
6424 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6425 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6426 stmts and pointers:
6427 RELATED_STMT VEC_STMT
6428 VS1_0: vx0 = memref0 VS1_1 -
6429 VS1_1: vx1 = memref1 VS1_2 -
6430 VS1_2: vx2 = memref2 VS1_3 -
6431 VS1_3: vx3 = memref3 - -
6432 S1: x = load - VS1_0
6433 S2: z = x + 1 - -
6435 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6436 information we recorded in RELATED_STMT field is used to vectorize
6437 stmt S2. */
6439 /* In case of interleaving (non-unit grouped access):
6441 S1: x2 = &base + 2
6442 S2: x0 = &base
6443 S3: x1 = &base + 1
6444 S4: x3 = &base + 3
6446 Vectorized loads are created in the order of memory accesses
6447 starting from the access of the first stmt of the chain:
6449 VS1: vx0 = &base
6450 VS2: vx1 = &base + vec_size*1
6451 VS3: vx3 = &base + vec_size*2
6452 VS4: vx4 = &base + vec_size*3
6454 Then permutation statements are generated:
6456 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6457 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6460 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6461 (the order of the data-refs in the output of vect_permute_load_chain
6462 corresponds to the order of scalar stmts in the interleaving chain - see
6463 the documentation of vect_permute_load_chain()).
6464 The generation of permutation stmts and recording them in
6465 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6467 In case of both multiple types and interleaving, the vector loads and
6468 permutation stmts above are created for every copy. The result vector
6469 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6470 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6472 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6473 on a target that supports unaligned accesses (dr_unaligned_supported)
6474 we generate the following code:
6475 p = initial_addr;
6476 indx = 0;
6477 loop {
6478 p = p + indx * vectype_size;
6479 vec_dest = *(p);
6480 indx = indx + 1;
6483 Otherwise, the data reference is potentially unaligned on a target that
6484 does not support unaligned accesses (dr_explicit_realign_optimized) -
6485 then generate the following code, in which the data in each iteration is
6486 obtained by two vector loads, one from the previous iteration, and one
6487 from the current iteration:
6488 p1 = initial_addr;
6489 msq_init = *(floor(p1))
6490 p2 = initial_addr + VS - 1;
6491 realignment_token = call target_builtin;
6492 indx = 0;
6493 loop {
6494 p2 = p2 + indx * vectype_size
6495 lsq = *(floor(p2))
6496 vec_dest = realign_load (msq, lsq, realignment_token)
6497 indx = indx + 1;
6498 msq = lsq;
6499 } */
6501 /* If the misalignment remains the same throughout the execution of the
6502 loop, we can create the init_addr and permutation mask at the loop
6503 preheader. Otherwise, it needs to be created inside the loop.
6504 This can only occur when vectorizing memory accesses in the inner-loop
6505 nested within an outer-loop that is being vectorized. */
6507 if (nested_in_vect_loop
6508 && (TREE_INT_CST_LOW (DR_STEP (dr))
6509 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6511 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6512 compute_in_loop = true;
6515 if ((alignment_support_scheme == dr_explicit_realign_optimized
6516 || alignment_support_scheme == dr_explicit_realign)
6517 && !compute_in_loop)
6519 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6520 alignment_support_scheme, NULL_TREE,
6521 &at_loop);
6522 if (alignment_support_scheme == dr_explicit_realign_optimized)
6524 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6525 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6526 size_one_node);
6529 else
6530 at_loop = loop;
6532 if (negative)
6533 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6535 if (load_lanes_p)
6536 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6537 else
6538 aggr_type = vectype;
6540 prev_stmt_info = NULL;
6541 for (j = 0; j < ncopies; j++)
6543 /* 1. Create the vector or array pointer update chain. */
6544 if (j == 0)
6546 bool simd_lane_access_p
6547 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6548 if (simd_lane_access_p
6549 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6550 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6551 && integer_zerop (DR_OFFSET (first_dr))
6552 && integer_zerop (DR_INIT (first_dr))
6553 && alias_sets_conflict_p (get_alias_set (aggr_type),
6554 get_alias_set (DR_REF (first_dr)))
6555 && (alignment_support_scheme == dr_aligned
6556 || alignment_support_scheme == dr_unaligned_supported))
6558 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6559 dataref_offset = build_int_cst (reference_alias_ptr_type
6560 (DR_REF (first_dr)), 0);
6561 inv_p = false;
6563 else
6564 dataref_ptr
6565 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6566 offset, &dummy, gsi, &ptr_incr,
6567 simd_lane_access_p, &inv_p,
6568 byte_offset);
6570 else if (dataref_offset)
6571 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6572 TYPE_SIZE_UNIT (aggr_type));
6573 else
6574 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6575 TYPE_SIZE_UNIT (aggr_type));
6577 if (grouped_load || slp_perm)
6578 dr_chain.create (vec_num);
6580 if (load_lanes_p)
6582 tree vec_array;
6584 vec_array = create_vector_array (vectype, vec_num);
6586 /* Emit:
6587 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6588 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6589 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6590 gimple_call_set_lhs (new_stmt, vec_array);
6591 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6593 /* Extract each vector into an SSA_NAME. */
6594 for (i = 0; i < vec_num; i++)
6596 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6597 vec_array, i);
6598 dr_chain.quick_push (new_temp);
6601 /* Record the mapping between SSA_NAMEs and statements. */
6602 vect_record_grouped_load_vectors (stmt, dr_chain);
6604 else
6606 for (i = 0; i < vec_num; i++)
6608 if (i > 0)
6609 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6610 stmt, NULL_TREE);
6612 /* 2. Create the vector-load in the loop. */
6613 switch (alignment_support_scheme)
6615 case dr_aligned:
6616 case dr_unaligned_supported:
6618 unsigned int align, misalign;
6620 data_ref
6621 = build2 (MEM_REF, vectype, dataref_ptr,
6622 dataref_offset
6623 ? dataref_offset
6624 : build_int_cst (reference_alias_ptr_type
6625 (DR_REF (first_dr)), 0));
6626 align = TYPE_ALIGN_UNIT (vectype);
6627 if (alignment_support_scheme == dr_aligned)
6629 gcc_assert (aligned_access_p (first_dr));
6630 misalign = 0;
6632 else if (DR_MISALIGNMENT (first_dr) == -1)
6634 TREE_TYPE (data_ref)
6635 = build_aligned_type (TREE_TYPE (data_ref),
6636 TYPE_ALIGN (elem_type));
6637 align = TYPE_ALIGN_UNIT (elem_type);
6638 misalign = 0;
6640 else
6642 TREE_TYPE (data_ref)
6643 = build_aligned_type (TREE_TYPE (data_ref),
6644 TYPE_ALIGN (elem_type));
6645 misalign = DR_MISALIGNMENT (first_dr);
6647 if (dataref_offset == NULL_TREE)
6648 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6649 align, misalign);
6650 break;
6652 case dr_explicit_realign:
6654 tree ptr, bump;
6656 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6658 if (compute_in_loop)
6659 msq = vect_setup_realignment (first_stmt, gsi,
6660 &realignment_token,
6661 dr_explicit_realign,
6662 dataref_ptr, NULL);
6664 ptr = copy_ssa_name (dataref_ptr);
6665 new_stmt = gimple_build_assign
6666 (ptr, BIT_AND_EXPR, dataref_ptr,
6667 build_int_cst
6668 (TREE_TYPE (dataref_ptr),
6669 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6670 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6671 data_ref
6672 = build2 (MEM_REF, vectype, ptr,
6673 build_int_cst (reference_alias_ptr_type
6674 (DR_REF (first_dr)), 0));
6675 vec_dest = vect_create_destination_var (scalar_dest,
6676 vectype);
6677 new_stmt = gimple_build_assign (vec_dest, data_ref);
6678 new_temp = make_ssa_name (vec_dest, new_stmt);
6679 gimple_assign_set_lhs (new_stmt, new_temp);
6680 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6681 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6683 msq = new_temp;
6685 bump = size_binop (MULT_EXPR, vs,
6686 TYPE_SIZE_UNIT (elem_type));
6687 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6688 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6689 new_stmt = gimple_build_assign
6690 (NULL_TREE, BIT_AND_EXPR, ptr,
6691 build_int_cst
6692 (TREE_TYPE (ptr),
6693 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6694 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6695 gimple_assign_set_lhs (new_stmt, ptr);
6696 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6697 data_ref
6698 = build2 (MEM_REF, vectype, ptr,
6699 build_int_cst (reference_alias_ptr_type
6700 (DR_REF (first_dr)), 0));
6701 break;
6703 case dr_explicit_realign_optimized:
6704 new_temp = copy_ssa_name (dataref_ptr);
6705 new_stmt = gimple_build_assign
6706 (new_temp, BIT_AND_EXPR, dataref_ptr,
6707 build_int_cst
6708 (TREE_TYPE (dataref_ptr),
6709 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6710 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6711 data_ref
6712 = build2 (MEM_REF, vectype, new_temp,
6713 build_int_cst (reference_alias_ptr_type
6714 (DR_REF (first_dr)), 0));
6715 break;
6716 default:
6717 gcc_unreachable ();
6719 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6720 new_stmt = gimple_build_assign (vec_dest, data_ref);
6721 new_temp = make_ssa_name (vec_dest, new_stmt);
6722 gimple_assign_set_lhs (new_stmt, new_temp);
6723 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6725 /* 3. Handle explicit realignment if necessary/supported.
6726 Create in loop:
6727 vec_dest = realign_load (msq, lsq, realignment_token) */
6728 if (alignment_support_scheme == dr_explicit_realign_optimized
6729 || alignment_support_scheme == dr_explicit_realign)
6731 lsq = gimple_assign_lhs (new_stmt);
6732 if (!realignment_token)
6733 realignment_token = dataref_ptr;
6734 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6735 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6736 msq, lsq, realignment_token);
6737 new_temp = make_ssa_name (vec_dest, new_stmt);
6738 gimple_assign_set_lhs (new_stmt, new_temp);
6739 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6741 if (alignment_support_scheme == dr_explicit_realign_optimized)
6743 gcc_assert (phi);
6744 if (i == vec_num - 1 && j == ncopies - 1)
6745 add_phi_arg (phi, lsq,
6746 loop_latch_edge (containing_loop),
6747 UNKNOWN_LOCATION);
6748 msq = lsq;
6752 /* 4. Handle invariant-load. */
6753 if (inv_p && !bb_vinfo)
6755 gcc_assert (!grouped_load);
6756 /* If we have versioned for aliasing or the loop doesn't
6757 have any data dependencies that would preclude this,
6758 then we are sure this is a loop invariant load and
6759 thus we can insert it on the preheader edge. */
6760 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6761 && !nested_in_vect_loop
6762 && hoist_defs_of_uses (stmt, loop))
6764 if (dump_enabled_p ())
6766 dump_printf_loc (MSG_NOTE, vect_location,
6767 "hoisting out of the vectorized "
6768 "loop: ");
6769 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6771 tree tem = copy_ssa_name (scalar_dest);
6772 gsi_insert_on_edge_immediate
6773 (loop_preheader_edge (loop),
6774 gimple_build_assign (tem,
6775 unshare_expr
6776 (gimple_assign_rhs1 (stmt))));
6777 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6779 else
6781 gimple_stmt_iterator gsi2 = *gsi;
6782 gsi_next (&gsi2);
6783 new_temp = vect_init_vector (stmt, scalar_dest,
6784 vectype, &gsi2);
6786 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6787 set_vinfo_for_stmt (new_stmt,
6788 new_stmt_vec_info (new_stmt, loop_vinfo,
6789 bb_vinfo));
6792 if (negative)
6794 tree perm_mask = perm_mask_for_reverse (vectype);
6795 new_temp = permute_vec_elements (new_temp, new_temp,
6796 perm_mask, stmt, gsi);
6797 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6800 /* Collect vector loads and later create their permutation in
6801 vect_transform_grouped_load (). */
6802 if (grouped_load || slp_perm)
6803 dr_chain.quick_push (new_temp);
6805 /* Store vector loads in the corresponding SLP_NODE. */
6806 if (slp && !slp_perm)
6807 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6809 /* Bump the vector pointer to account for a gap. */
6810 if (slp && group_gap != 0)
6812 tree bump = size_binop (MULT_EXPR,
6813 TYPE_SIZE_UNIT (elem_type),
6814 size_int (group_gap));
6815 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6816 stmt, bump);
6820 if (slp && !slp_perm)
6821 continue;
6823 if (slp_perm)
6825 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6826 slp_node_instance, false))
6828 dr_chain.release ();
6829 return false;
6832 else
6834 if (grouped_load)
6836 if (!load_lanes_p)
6837 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6838 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6840 else
6842 if (j == 0)
6843 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6844 else
6845 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6846 prev_stmt_info = vinfo_for_stmt (new_stmt);
6849 dr_chain.release ();
6852 return true;
6855 /* Function vect_is_simple_cond.
6857 Input:
6858 LOOP - the loop that is being vectorized.
6859 COND - Condition that is checked for simple use.
6861 Output:
6862 *COMP_VECTYPE - the vector type for the comparison.
6864 Returns whether a COND can be vectorized. Checks whether
6865 condition operands are supportable using vec_is_simple_use. */
6867 static bool
6868 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6869 bb_vec_info bb_vinfo, tree *comp_vectype)
6871 tree lhs, rhs;
6872 tree def;
6873 enum vect_def_type dt;
6874 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6876 if (!COMPARISON_CLASS_P (cond))
6877 return false;
6879 lhs = TREE_OPERAND (cond, 0);
6880 rhs = TREE_OPERAND (cond, 1);
6882 if (TREE_CODE (lhs) == SSA_NAME)
6884 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6885 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6886 &lhs_def_stmt, &def, &dt, &vectype1))
6887 return false;
6889 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6890 && TREE_CODE (lhs) != FIXED_CST)
6891 return false;
6893 if (TREE_CODE (rhs) == SSA_NAME)
6895 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6896 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6897 &rhs_def_stmt, &def, &dt, &vectype2))
6898 return false;
6900 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6901 && TREE_CODE (rhs) != FIXED_CST)
6902 return false;
6904 *comp_vectype = vectype1 ? vectype1 : vectype2;
6905 return true;
6908 /* vectorizable_condition.
6910 Check if STMT is conditional modify expression that can be vectorized.
6911 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6912 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6913 at GSI.
6915 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6916 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6917 else caluse if it is 2).
6919 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6921 bool
6922 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6923 gimple *vec_stmt, tree reduc_def, int reduc_index,
6924 slp_tree slp_node)
6926 tree scalar_dest = NULL_TREE;
6927 tree vec_dest = NULL_TREE;
6928 tree cond_expr, then_clause, else_clause;
6929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6931 tree comp_vectype = NULL_TREE;
6932 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6933 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6934 tree vec_compare, vec_cond_expr;
6935 tree new_temp;
6936 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6937 tree def;
6938 enum vect_def_type dt, dts[4];
6939 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6940 int ncopies;
6941 enum tree_code code;
6942 stmt_vec_info prev_stmt_info = NULL;
6943 int i, j;
6944 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6945 vec<tree> vec_oprnds0 = vNULL;
6946 vec<tree> vec_oprnds1 = vNULL;
6947 vec<tree> vec_oprnds2 = vNULL;
6948 vec<tree> vec_oprnds3 = vNULL;
6949 tree vec_cmp_type;
6951 if (slp_node || PURE_SLP_STMT (stmt_info))
6952 ncopies = 1;
6953 else
6954 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6956 gcc_assert (ncopies >= 1);
6957 if (reduc_index && ncopies > 1)
6958 return false; /* FORNOW */
6960 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6961 return false;
6963 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6964 return false;
6966 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6967 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6968 && reduc_def))
6969 return false;
6971 /* FORNOW: not yet supported. */
6972 if (STMT_VINFO_LIVE_P (stmt_info))
6974 if (dump_enabled_p ())
6975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6976 "value used after loop.\n");
6977 return false;
6980 /* Is vectorizable conditional operation? */
6981 if (!is_gimple_assign (stmt))
6982 return false;
6984 code = gimple_assign_rhs_code (stmt);
6986 if (code != COND_EXPR)
6987 return false;
6989 cond_expr = gimple_assign_rhs1 (stmt);
6990 then_clause = gimple_assign_rhs2 (stmt);
6991 else_clause = gimple_assign_rhs3 (stmt);
6993 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6994 &comp_vectype)
6995 || !comp_vectype)
6996 return false;
6998 if (TREE_CODE (then_clause) == SSA_NAME)
7000 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7001 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7002 &then_def_stmt, &def, &dt))
7003 return false;
7005 else if (TREE_CODE (then_clause) != INTEGER_CST
7006 && TREE_CODE (then_clause) != REAL_CST
7007 && TREE_CODE (then_clause) != FIXED_CST)
7008 return false;
7010 if (TREE_CODE (else_clause) == SSA_NAME)
7012 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7013 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7014 &else_def_stmt, &def, &dt))
7015 return false;
7017 else if (TREE_CODE (else_clause) != INTEGER_CST
7018 && TREE_CODE (else_clause) != REAL_CST
7019 && TREE_CODE (else_clause) != FIXED_CST)
7020 return false;
7022 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7023 /* The result of a vector comparison should be signed type. */
7024 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7025 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7026 if (vec_cmp_type == NULL_TREE)
7027 return false;
7029 if (!vec_stmt)
7031 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7032 return expand_vec_cond_expr_p (vectype, comp_vectype);
7035 /* Transform. */
7037 if (!slp_node)
7039 vec_oprnds0.create (1);
7040 vec_oprnds1.create (1);
7041 vec_oprnds2.create (1);
7042 vec_oprnds3.create (1);
7045 /* Handle def. */
7046 scalar_dest = gimple_assign_lhs (stmt);
7047 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7049 /* Handle cond expr. */
7050 for (j = 0; j < ncopies; j++)
7052 gassign *new_stmt = NULL;
7053 if (j == 0)
7055 if (slp_node)
7057 auto_vec<tree, 4> ops;
7058 auto_vec<vec<tree>, 4> vec_defs;
7060 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7061 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7062 ops.safe_push (then_clause);
7063 ops.safe_push (else_clause);
7064 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7065 vec_oprnds3 = vec_defs.pop ();
7066 vec_oprnds2 = vec_defs.pop ();
7067 vec_oprnds1 = vec_defs.pop ();
7068 vec_oprnds0 = vec_defs.pop ();
7070 ops.release ();
7071 vec_defs.release ();
7073 else
7075 gimple gtemp;
7076 vec_cond_lhs =
7077 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7078 stmt, NULL);
7079 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7080 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7082 vec_cond_rhs =
7083 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7084 stmt, NULL);
7085 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7086 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7087 if (reduc_index == 1)
7088 vec_then_clause = reduc_def;
7089 else
7091 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7092 stmt, NULL);
7093 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7094 NULL, &gtemp, &def, &dts[2]);
7096 if (reduc_index == 2)
7097 vec_else_clause = reduc_def;
7098 else
7100 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7101 stmt, NULL);
7102 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7103 NULL, &gtemp, &def, &dts[3]);
7107 else
7109 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7110 vec_oprnds0.pop ());
7111 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7112 vec_oprnds1.pop ());
7113 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7114 vec_oprnds2.pop ());
7115 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7116 vec_oprnds3.pop ());
7119 if (!slp_node)
7121 vec_oprnds0.quick_push (vec_cond_lhs);
7122 vec_oprnds1.quick_push (vec_cond_rhs);
7123 vec_oprnds2.quick_push (vec_then_clause);
7124 vec_oprnds3.quick_push (vec_else_clause);
7127 /* Arguments are ready. Create the new vector stmt. */
7128 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7130 vec_cond_rhs = vec_oprnds1[i];
7131 vec_then_clause = vec_oprnds2[i];
7132 vec_else_clause = vec_oprnds3[i];
7134 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7135 vec_cond_lhs, vec_cond_rhs);
7136 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7137 vec_compare, vec_then_clause, vec_else_clause);
7139 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7140 new_temp = make_ssa_name (vec_dest, new_stmt);
7141 gimple_assign_set_lhs (new_stmt, new_temp);
7142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7143 if (slp_node)
7144 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7147 if (slp_node)
7148 continue;
7150 if (j == 0)
7151 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7152 else
7153 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7155 prev_stmt_info = vinfo_for_stmt (new_stmt);
7158 vec_oprnds0.release ();
7159 vec_oprnds1.release ();
7160 vec_oprnds2.release ();
7161 vec_oprnds3.release ();
7163 return true;
7167 /* Make sure the statement is vectorizable. */
7169 bool
7170 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7174 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7175 bool ok;
7176 tree scalar_type, vectype;
7177 gimple pattern_stmt;
7178 gimple_seq pattern_def_seq;
7180 if (dump_enabled_p ())
7182 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7183 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7186 if (gimple_has_volatile_ops (stmt))
7188 if (dump_enabled_p ())
7189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7190 "not vectorized: stmt has volatile operands\n");
7192 return false;
7195 /* Skip stmts that do not need to be vectorized. In loops this is expected
7196 to include:
7197 - the COND_EXPR which is the loop exit condition
7198 - any LABEL_EXPRs in the loop
7199 - computations that are used only for array indexing or loop control.
7200 In basic blocks we only analyze statements that are a part of some SLP
7201 instance, therefore, all the statements are relevant.
7203 Pattern statement needs to be analyzed instead of the original statement
7204 if the original statement is not relevant. Otherwise, we analyze both
7205 statements. In basic blocks we are called from some SLP instance
7206 traversal, don't analyze pattern stmts instead, the pattern stmts
7207 already will be part of SLP instance. */
7209 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7210 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7211 && !STMT_VINFO_LIVE_P (stmt_info))
7213 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7214 && pattern_stmt
7215 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7216 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7218 /* Analyze PATTERN_STMT instead of the original stmt. */
7219 stmt = pattern_stmt;
7220 stmt_info = vinfo_for_stmt (pattern_stmt);
7221 if (dump_enabled_p ())
7223 dump_printf_loc (MSG_NOTE, vect_location,
7224 "==> examining pattern statement: ");
7225 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7228 else
7230 if (dump_enabled_p ())
7231 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7233 return true;
7236 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7237 && node == NULL
7238 && pattern_stmt
7239 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7240 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7242 /* Analyze PATTERN_STMT too. */
7243 if (dump_enabled_p ())
7245 dump_printf_loc (MSG_NOTE, vect_location,
7246 "==> examining pattern statement: ");
7247 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7250 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7251 return false;
7254 if (is_pattern_stmt_p (stmt_info)
7255 && node == NULL
7256 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7258 gimple_stmt_iterator si;
7260 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7262 gimple pattern_def_stmt = gsi_stmt (si);
7263 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7264 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7266 /* Analyze def stmt of STMT if it's a pattern stmt. */
7267 if (dump_enabled_p ())
7269 dump_printf_loc (MSG_NOTE, vect_location,
7270 "==> examining pattern def statement: ");
7271 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7274 if (!vect_analyze_stmt (pattern_def_stmt,
7275 need_to_vectorize, node))
7276 return false;
7281 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7283 case vect_internal_def:
7284 break;
7286 case vect_reduction_def:
7287 case vect_nested_cycle:
7288 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7289 || relevance == vect_used_in_outer_by_reduction
7290 || relevance == vect_unused_in_scope));
7291 break;
7293 case vect_induction_def:
7294 case vect_constant_def:
7295 case vect_external_def:
7296 case vect_unknown_def_type:
7297 default:
7298 gcc_unreachable ();
7301 if (bb_vinfo)
7303 gcc_assert (PURE_SLP_STMT (stmt_info));
7305 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7306 if (dump_enabled_p ())
7308 dump_printf_loc (MSG_NOTE, vect_location,
7309 "get vectype for scalar type: ");
7310 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7311 dump_printf (MSG_NOTE, "\n");
7314 vectype = get_vectype_for_scalar_type (scalar_type);
7315 if (!vectype)
7317 if (dump_enabled_p ())
7319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7320 "not SLPed: unsupported data-type ");
7321 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7322 scalar_type);
7323 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7325 return false;
7328 if (dump_enabled_p ())
7330 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7331 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7332 dump_printf (MSG_NOTE, "\n");
7335 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7338 if (STMT_VINFO_RELEVANT_P (stmt_info))
7340 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7341 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7342 || (is_gimple_call (stmt)
7343 && gimple_call_lhs (stmt) == NULL_TREE));
7344 *need_to_vectorize = true;
7347 ok = true;
7348 if (!bb_vinfo
7349 && (STMT_VINFO_RELEVANT_P (stmt_info)
7350 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7351 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7352 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7353 || vectorizable_shift (stmt, NULL, NULL, NULL)
7354 || vectorizable_operation (stmt, NULL, NULL, NULL)
7355 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7356 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7357 || vectorizable_call (stmt, NULL, NULL, NULL)
7358 || vectorizable_store (stmt, NULL, NULL, NULL)
7359 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7360 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7361 else
7363 if (bb_vinfo)
7364 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7365 || vectorizable_conversion (stmt, NULL, NULL, node)
7366 || vectorizable_shift (stmt, NULL, NULL, node)
7367 || vectorizable_operation (stmt, NULL, NULL, node)
7368 || vectorizable_assignment (stmt, NULL, NULL, node)
7369 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7370 || vectorizable_call (stmt, NULL, NULL, node)
7371 || vectorizable_store (stmt, NULL, NULL, node)
7372 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7375 if (!ok)
7377 if (dump_enabled_p ())
7379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7380 "not vectorized: relevant stmt not ");
7381 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7382 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7385 return false;
7388 if (bb_vinfo)
7389 return true;
7391 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7392 need extra handling, except for vectorizable reductions. */
7393 if (STMT_VINFO_LIVE_P (stmt_info)
7394 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7395 ok = vectorizable_live_operation (stmt, NULL, NULL);
7397 if (!ok)
7399 if (dump_enabled_p ())
7401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7402 "not vectorized: live stmt not ");
7403 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7404 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7407 return false;
7410 return true;
7414 /* Function vect_transform_stmt.
7416 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7418 bool
7419 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7420 bool *grouped_store, slp_tree slp_node,
7421 slp_instance slp_node_instance)
7423 bool is_store = false;
7424 gimple vec_stmt = NULL;
7425 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7426 bool done;
7428 switch (STMT_VINFO_TYPE (stmt_info))
7430 case type_demotion_vec_info_type:
7431 case type_promotion_vec_info_type:
7432 case type_conversion_vec_info_type:
7433 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7434 gcc_assert (done);
7435 break;
7437 case induc_vec_info_type:
7438 gcc_assert (!slp_node);
7439 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7440 gcc_assert (done);
7441 break;
7443 case shift_vec_info_type:
7444 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7445 gcc_assert (done);
7446 break;
7448 case op_vec_info_type:
7449 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7450 gcc_assert (done);
7451 break;
7453 case assignment_vec_info_type:
7454 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7455 gcc_assert (done);
7456 break;
7458 case load_vec_info_type:
7459 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7460 slp_node_instance);
7461 gcc_assert (done);
7462 break;
7464 case store_vec_info_type:
7465 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7466 gcc_assert (done);
7467 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7469 /* In case of interleaving, the whole chain is vectorized when the
7470 last store in the chain is reached. Store stmts before the last
7471 one are skipped, and there vec_stmt_info shouldn't be freed
7472 meanwhile. */
7473 *grouped_store = true;
7474 if (STMT_VINFO_VEC_STMT (stmt_info))
7475 is_store = true;
7477 else
7478 is_store = true;
7479 break;
7481 case condition_vec_info_type:
7482 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7483 gcc_assert (done);
7484 break;
7486 case call_vec_info_type:
7487 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7488 stmt = gsi_stmt (*gsi);
7489 if (is_gimple_call (stmt)
7490 && gimple_call_internal_p (stmt)
7491 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7492 is_store = true;
7493 break;
7495 case call_simd_clone_vec_info_type:
7496 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7497 stmt = gsi_stmt (*gsi);
7498 break;
7500 case reduc_vec_info_type:
7501 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7502 gcc_assert (done);
7503 break;
7505 default:
7506 if (!STMT_VINFO_LIVE_P (stmt_info))
7508 if (dump_enabled_p ())
7509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7510 "stmt not supported.\n");
7511 gcc_unreachable ();
7515 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7516 is being vectorized, but outside the immediately enclosing loop. */
7517 if (vec_stmt
7518 && STMT_VINFO_LOOP_VINFO (stmt_info)
7519 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7520 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7521 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7522 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7523 || STMT_VINFO_RELEVANT (stmt_info) ==
7524 vect_used_in_outer_by_reduction))
7526 struct loop *innerloop = LOOP_VINFO_LOOP (
7527 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7528 imm_use_iterator imm_iter;
7529 use_operand_p use_p;
7530 tree scalar_dest;
7531 gimple exit_phi;
7533 if (dump_enabled_p ())
7534 dump_printf_loc (MSG_NOTE, vect_location,
7535 "Record the vdef for outer-loop vectorization.\n");
7537 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7538 (to be used when vectorizing outer-loop stmts that use the DEF of
7539 STMT). */
7540 if (gimple_code (stmt) == GIMPLE_PHI)
7541 scalar_dest = PHI_RESULT (stmt);
7542 else
7543 scalar_dest = gimple_assign_lhs (stmt);
7545 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7547 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7549 exit_phi = USE_STMT (use_p);
7550 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7555 /* Handle stmts whose DEF is used outside the loop-nest that is
7556 being vectorized. */
7557 if (STMT_VINFO_LIVE_P (stmt_info)
7558 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7560 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7561 gcc_assert (done);
7564 if (vec_stmt)
7565 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7567 return is_store;
7571 /* Remove a group of stores (for SLP or interleaving), free their
7572 stmt_vec_info. */
7574 void
7575 vect_remove_stores (gimple first_stmt)
7577 gimple next = first_stmt;
7578 gimple tmp;
7579 gimple_stmt_iterator next_si;
7581 while (next)
7583 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7585 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7586 if (is_pattern_stmt_p (stmt_info))
7587 next = STMT_VINFO_RELATED_STMT (stmt_info);
7588 /* Free the attached stmt_vec_info and remove the stmt. */
7589 next_si = gsi_for_stmt (next);
7590 unlink_stmt_vdef (next);
7591 gsi_remove (&next_si, true);
7592 release_defs (next);
7593 free_stmt_vec_info (next);
7594 next = tmp;
7599 /* Function new_stmt_vec_info.
7601 Create and initialize a new stmt_vec_info struct for STMT. */
7603 stmt_vec_info
7604 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7605 bb_vec_info bb_vinfo)
7607 stmt_vec_info res;
7608 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7610 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7611 STMT_VINFO_STMT (res) = stmt;
7612 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7613 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7614 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7615 STMT_VINFO_LIVE_P (res) = false;
7616 STMT_VINFO_VECTYPE (res) = NULL;
7617 STMT_VINFO_VEC_STMT (res) = NULL;
7618 STMT_VINFO_VECTORIZABLE (res) = true;
7619 STMT_VINFO_IN_PATTERN_P (res) = false;
7620 STMT_VINFO_RELATED_STMT (res) = NULL;
7621 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7622 STMT_VINFO_DATA_REF (res) = NULL;
7624 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7625 STMT_VINFO_DR_OFFSET (res) = NULL;
7626 STMT_VINFO_DR_INIT (res) = NULL;
7627 STMT_VINFO_DR_STEP (res) = NULL;
7628 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7630 if (gimple_code (stmt) == GIMPLE_PHI
7631 && is_loop_header_bb_p (gimple_bb (stmt)))
7632 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7633 else
7634 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7636 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7637 STMT_SLP_TYPE (res) = loop_vect;
7638 GROUP_FIRST_ELEMENT (res) = NULL;
7639 GROUP_NEXT_ELEMENT (res) = NULL;
7640 GROUP_SIZE (res) = 0;
7641 GROUP_STORE_COUNT (res) = 0;
7642 GROUP_GAP (res) = 0;
7643 GROUP_SAME_DR_STMT (res) = NULL;
7645 return res;
7649 /* Create a hash table for stmt_vec_info. */
7651 void
7652 init_stmt_vec_info_vec (void)
7654 gcc_assert (!stmt_vec_info_vec.exists ());
7655 stmt_vec_info_vec.create (50);
7659 /* Free hash table for stmt_vec_info. */
7661 void
7662 free_stmt_vec_info_vec (void)
7664 unsigned int i;
7665 vec_void_p info;
7666 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7667 if (info != NULL)
7668 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7669 gcc_assert (stmt_vec_info_vec.exists ());
7670 stmt_vec_info_vec.release ();
7674 /* Free stmt vectorization related info. */
7676 void
7677 free_stmt_vec_info (gimple stmt)
7679 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7681 if (!stmt_info)
7682 return;
7684 /* Check if this statement has a related "pattern stmt"
7685 (introduced by the vectorizer during the pattern recognition
7686 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7687 too. */
7688 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7690 stmt_vec_info patt_info
7691 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7692 if (patt_info)
7694 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7695 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7696 gimple_set_bb (patt_stmt, NULL);
7697 tree lhs = gimple_get_lhs (patt_stmt);
7698 if (TREE_CODE (lhs) == SSA_NAME)
7699 release_ssa_name (lhs);
7700 if (seq)
7702 gimple_stmt_iterator si;
7703 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7705 gimple seq_stmt = gsi_stmt (si);
7706 gimple_set_bb (seq_stmt, NULL);
7707 lhs = gimple_get_lhs (patt_stmt);
7708 if (TREE_CODE (lhs) == SSA_NAME)
7709 release_ssa_name (lhs);
7710 free_stmt_vec_info (seq_stmt);
7713 free_stmt_vec_info (patt_stmt);
7717 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7718 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7719 set_vinfo_for_stmt (stmt, NULL);
7720 free (stmt_info);
7724 /* Function get_vectype_for_scalar_type_and_size.
7726 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7727 by the target. */
7729 static tree
7730 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7732 machine_mode inner_mode = TYPE_MODE (scalar_type);
7733 machine_mode simd_mode;
7734 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7735 int nunits;
7736 tree vectype;
7738 if (nbytes == 0)
7739 return NULL_TREE;
7741 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7742 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7743 return NULL_TREE;
7745 /* For vector types of elements whose mode precision doesn't
7746 match their types precision we use a element type of mode
7747 precision. The vectorization routines will have to make sure
7748 they support the proper result truncation/extension.
7749 We also make sure to build vector types with INTEGER_TYPE
7750 component type only. */
7751 if (INTEGRAL_TYPE_P (scalar_type)
7752 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7753 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7754 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7755 TYPE_UNSIGNED (scalar_type));
7757 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7758 When the component mode passes the above test simply use a type
7759 corresponding to that mode. The theory is that any use that
7760 would cause problems with this will disable vectorization anyway. */
7761 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7762 && !INTEGRAL_TYPE_P (scalar_type))
7763 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7765 /* We can't build a vector type of elements with alignment bigger than
7766 their size. */
7767 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7768 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7769 TYPE_UNSIGNED (scalar_type));
7771 /* If we felt back to using the mode fail if there was
7772 no scalar type for it. */
7773 if (scalar_type == NULL_TREE)
7774 return NULL_TREE;
7776 /* If no size was supplied use the mode the target prefers. Otherwise
7777 lookup a vector mode of the specified size. */
7778 if (size == 0)
7779 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7780 else
7781 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7782 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7783 if (nunits <= 1)
7784 return NULL_TREE;
7786 vectype = build_vector_type (scalar_type, nunits);
7788 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7789 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7790 return NULL_TREE;
7792 return vectype;
7795 unsigned int current_vector_size;
7797 /* Function get_vectype_for_scalar_type.
7799 Returns the vector type corresponding to SCALAR_TYPE as supported
7800 by the target. */
7802 tree
7803 get_vectype_for_scalar_type (tree scalar_type)
7805 tree vectype;
7806 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7807 current_vector_size);
7808 if (vectype
7809 && current_vector_size == 0)
7810 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7811 return vectype;
7814 /* Function get_same_sized_vectype
7816 Returns a vector type corresponding to SCALAR_TYPE of size
7817 VECTOR_TYPE if supported by the target. */
7819 tree
7820 get_same_sized_vectype (tree scalar_type, tree vector_type)
7822 return get_vectype_for_scalar_type_and_size
7823 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7826 /* Function vect_is_simple_use.
7828 Input:
7829 LOOP_VINFO - the vect info of the loop that is being vectorized.
7830 BB_VINFO - the vect info of the basic block that is being vectorized.
7831 OPERAND - operand of STMT in the loop or bb.
7832 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7834 Returns whether a stmt with OPERAND can be vectorized.
7835 For loops, supportable operands are constants, loop invariants, and operands
7836 that are defined by the current iteration of the loop. Unsupportable
7837 operands are those that are defined by a previous iteration of the loop (as
7838 is the case in reduction/induction computations).
7839 For basic blocks, supportable operands are constants and bb invariants.
7840 For now, operands defined outside the basic block are not supported. */
7842 bool
7843 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7844 bb_vec_info bb_vinfo, gimple *def_stmt,
7845 tree *def, enum vect_def_type *dt)
7847 basic_block bb;
7848 stmt_vec_info stmt_vinfo;
7849 struct loop *loop = NULL;
7851 if (loop_vinfo)
7852 loop = LOOP_VINFO_LOOP (loop_vinfo);
7854 *def_stmt = NULL;
7855 *def = NULL_TREE;
7857 if (dump_enabled_p ())
7859 dump_printf_loc (MSG_NOTE, vect_location,
7860 "vect_is_simple_use: operand ");
7861 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7862 dump_printf (MSG_NOTE, "\n");
7865 if (CONSTANT_CLASS_P (operand))
7867 *dt = vect_constant_def;
7868 return true;
7871 if (is_gimple_min_invariant (operand))
7873 *def = operand;
7874 *dt = vect_external_def;
7875 return true;
7878 if (TREE_CODE (operand) == PAREN_EXPR)
7880 if (dump_enabled_p ())
7881 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7882 operand = TREE_OPERAND (operand, 0);
7885 if (TREE_CODE (operand) != SSA_NAME)
7887 if (dump_enabled_p ())
7888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7889 "not ssa-name.\n");
7890 return false;
7893 *def_stmt = SSA_NAME_DEF_STMT (operand);
7894 if (*def_stmt == NULL)
7896 if (dump_enabled_p ())
7897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7898 "no def_stmt.\n");
7899 return false;
7902 if (dump_enabled_p ())
7904 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7905 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7908 /* Empty stmt is expected only in case of a function argument.
7909 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7910 if (gimple_nop_p (*def_stmt))
7912 *def = operand;
7913 *dt = vect_external_def;
7914 return true;
7917 bb = gimple_bb (*def_stmt);
7919 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7920 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7921 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7922 *dt = vect_external_def;
7923 else
7925 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7926 if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
7927 *dt = vect_external_def;
7928 else
7929 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7932 if (dump_enabled_p ())
7934 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
7935 switch (*dt)
7937 case vect_uninitialized_def:
7938 dump_printf (MSG_NOTE, "uninitialized\n");
7939 break;
7940 case vect_constant_def:
7941 dump_printf (MSG_NOTE, "constant\n");
7942 break;
7943 case vect_external_def:
7944 dump_printf (MSG_NOTE, "external\n");
7945 break;
7946 case vect_internal_def:
7947 dump_printf (MSG_NOTE, "internal\n");
7948 break;
7949 case vect_induction_def:
7950 dump_printf (MSG_NOTE, "induction\n");
7951 break;
7952 case vect_reduction_def:
7953 dump_printf (MSG_NOTE, "reduction\n");
7954 break;
7955 case vect_double_reduction_def:
7956 dump_printf (MSG_NOTE, "double reduction\n");
7957 break;
7958 case vect_nested_cycle:
7959 dump_printf (MSG_NOTE, "nested cycle\n");
7960 break;
7961 case vect_unknown_def_type:
7962 dump_printf (MSG_NOTE, "unknown\n");
7963 break;
7967 if (*dt == vect_unknown_def_type
7968 || (stmt
7969 && *dt == vect_double_reduction_def
7970 && gimple_code (stmt) != GIMPLE_PHI))
7972 if (dump_enabled_p ())
7973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7974 "Unsupported pattern.\n");
7975 return false;
7978 switch (gimple_code (*def_stmt))
7980 case GIMPLE_PHI:
7981 *def = gimple_phi_result (*def_stmt);
7982 break;
7984 case GIMPLE_ASSIGN:
7985 *def = gimple_assign_lhs (*def_stmt);
7986 break;
7988 case GIMPLE_CALL:
7989 *def = gimple_call_lhs (*def_stmt);
7990 if (*def != NULL)
7991 break;
7992 /* FALLTHRU */
7993 default:
7994 if (dump_enabled_p ())
7995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7996 "unsupported defining stmt:\n");
7997 return false;
8000 return true;
8003 /* Function vect_is_simple_use_1.
8005 Same as vect_is_simple_use_1 but also determines the vector operand
8006 type of OPERAND and stores it to *VECTYPE. If the definition of
8007 OPERAND is vect_uninitialized_def, vect_constant_def or
8008 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8009 is responsible to compute the best suited vector type for the
8010 scalar operand. */
8012 bool
8013 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8014 bb_vec_info bb_vinfo, gimple *def_stmt,
8015 tree *def, enum vect_def_type *dt, tree *vectype)
8017 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8018 def, dt))
8019 return false;
8021 /* Now get a vector type if the def is internal, otherwise supply
8022 NULL_TREE and leave it up to the caller to figure out a proper
8023 type for the use stmt. */
8024 if (*dt == vect_internal_def
8025 || *dt == vect_induction_def
8026 || *dt == vect_reduction_def
8027 || *dt == vect_double_reduction_def
8028 || *dt == vect_nested_cycle)
8030 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8032 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8033 && !STMT_VINFO_RELEVANT (stmt_info)
8034 && !STMT_VINFO_LIVE_P (stmt_info))
8035 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8037 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8038 gcc_assert (*vectype != NULL_TREE);
8040 else if (*dt == vect_uninitialized_def
8041 || *dt == vect_constant_def
8042 || *dt == vect_external_def)
8043 *vectype = NULL_TREE;
8044 else
8045 gcc_unreachable ();
8047 return true;
8051 /* Function supportable_widening_operation
8053 Check whether an operation represented by the code CODE is a
8054 widening operation that is supported by the target platform in
8055 vector form (i.e., when operating on arguments of type VECTYPE_IN
8056 producing a result of type VECTYPE_OUT).
8058 Widening operations we currently support are NOP (CONVERT), FLOAT
8059 and WIDEN_MULT. This function checks if these operations are supported
8060 by the target platform either directly (via vector tree-codes), or via
8061 target builtins.
8063 Output:
8064 - CODE1 and CODE2 are codes of vector operations to be used when
8065 vectorizing the operation, if available.
8066 - MULTI_STEP_CVT determines the number of required intermediate steps in
8067 case of multi-step conversion (like char->short->int - in that case
8068 MULTI_STEP_CVT will be 1).
8069 - INTERM_TYPES contains the intermediate type required to perform the
8070 widening operation (short in the above example). */
8072 bool
8073 supportable_widening_operation (enum tree_code code, gimple stmt,
8074 tree vectype_out, tree vectype_in,
8075 enum tree_code *code1, enum tree_code *code2,
8076 int *multi_step_cvt,
8077 vec<tree> *interm_types)
8079 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8080 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8081 struct loop *vect_loop = NULL;
8082 machine_mode vec_mode;
8083 enum insn_code icode1, icode2;
8084 optab optab1, optab2;
8085 tree vectype = vectype_in;
8086 tree wide_vectype = vectype_out;
8087 enum tree_code c1, c2;
8088 int i;
8089 tree prev_type, intermediate_type;
8090 machine_mode intermediate_mode, prev_mode;
8091 optab optab3, optab4;
8093 *multi_step_cvt = 0;
8094 if (loop_info)
8095 vect_loop = LOOP_VINFO_LOOP (loop_info);
8097 switch (code)
8099 case WIDEN_MULT_EXPR:
8100 /* The result of a vectorized widening operation usually requires
8101 two vectors (because the widened results do not fit into one vector).
8102 The generated vector results would normally be expected to be
8103 generated in the same order as in the original scalar computation,
8104 i.e. if 8 results are generated in each vector iteration, they are
8105 to be organized as follows:
8106 vect1: [res1,res2,res3,res4],
8107 vect2: [res5,res6,res7,res8].
8109 However, in the special case that the result of the widening
8110 operation is used in a reduction computation only, the order doesn't
8111 matter (because when vectorizing a reduction we change the order of
8112 the computation). Some targets can take advantage of this and
8113 generate more efficient code. For example, targets like Altivec,
8114 that support widen_mult using a sequence of {mult_even,mult_odd}
8115 generate the following vectors:
8116 vect1: [res1,res3,res5,res7],
8117 vect2: [res2,res4,res6,res8].
8119 When vectorizing outer-loops, we execute the inner-loop sequentially
8120 (each vectorized inner-loop iteration contributes to VF outer-loop
8121 iterations in parallel). We therefore don't allow to change the
8122 order of the computation in the inner-loop during outer-loop
8123 vectorization. */
8124 /* TODO: Another case in which order doesn't *really* matter is when we
8125 widen and then contract again, e.g. (short)((int)x * y >> 8).
8126 Normally, pack_trunc performs an even/odd permute, whereas the
8127 repack from an even/odd expansion would be an interleave, which
8128 would be significantly simpler for e.g. AVX2. */
8129 /* In any case, in order to avoid duplicating the code below, recurse
8130 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8131 are properly set up for the caller. If we fail, we'll continue with
8132 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8133 if (vect_loop
8134 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8135 && !nested_in_vect_loop_p (vect_loop, stmt)
8136 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8137 stmt, vectype_out, vectype_in,
8138 code1, code2, multi_step_cvt,
8139 interm_types))
8141 /* Elements in a vector with vect_used_by_reduction property cannot
8142 be reordered if the use chain with this property does not have the
8143 same operation. One such an example is s += a * b, where elements
8144 in a and b cannot be reordered. Here we check if the vector defined
8145 by STMT is only directly used in the reduction statement. */
8146 tree lhs = gimple_assign_lhs (stmt);
8147 use_operand_p dummy;
8148 gimple use_stmt;
8149 stmt_vec_info use_stmt_info = NULL;
8150 if (single_imm_use (lhs, &dummy, &use_stmt)
8151 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8152 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8153 return true;
8155 c1 = VEC_WIDEN_MULT_LO_EXPR;
8156 c2 = VEC_WIDEN_MULT_HI_EXPR;
8157 break;
8159 case VEC_WIDEN_MULT_EVEN_EXPR:
8160 /* Support the recursion induced just above. */
8161 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8162 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8163 break;
8165 case WIDEN_LSHIFT_EXPR:
8166 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8167 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8168 break;
8170 CASE_CONVERT:
8171 c1 = VEC_UNPACK_LO_EXPR;
8172 c2 = VEC_UNPACK_HI_EXPR;
8173 break;
8175 case FLOAT_EXPR:
8176 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8177 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8178 break;
8180 case FIX_TRUNC_EXPR:
8181 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8182 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8183 computing the operation. */
8184 return false;
8186 default:
8187 gcc_unreachable ();
8190 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8192 enum tree_code ctmp = c1;
8193 c1 = c2;
8194 c2 = ctmp;
8197 if (code == FIX_TRUNC_EXPR)
8199 /* The signedness is determined from output operand. */
8200 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8201 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8203 else
8205 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8206 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8209 if (!optab1 || !optab2)
8210 return false;
8212 vec_mode = TYPE_MODE (vectype);
8213 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8214 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8215 return false;
8217 *code1 = c1;
8218 *code2 = c2;
8220 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8221 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8222 return true;
8224 /* Check if it's a multi-step conversion that can be done using intermediate
8225 types. */
8227 prev_type = vectype;
8228 prev_mode = vec_mode;
8230 if (!CONVERT_EXPR_CODE_P (code))
8231 return false;
8233 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8234 intermediate steps in promotion sequence. We try
8235 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8236 not. */
8237 interm_types->create (MAX_INTERM_CVT_STEPS);
8238 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8240 intermediate_mode = insn_data[icode1].operand[0].mode;
8241 intermediate_type
8242 = lang_hooks.types.type_for_mode (intermediate_mode,
8243 TYPE_UNSIGNED (prev_type));
8244 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8245 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8247 if (!optab3 || !optab4
8248 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8249 || insn_data[icode1].operand[0].mode != intermediate_mode
8250 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8251 || insn_data[icode2].operand[0].mode != intermediate_mode
8252 || ((icode1 = optab_handler (optab3, intermediate_mode))
8253 == CODE_FOR_nothing)
8254 || ((icode2 = optab_handler (optab4, intermediate_mode))
8255 == CODE_FOR_nothing))
8256 break;
8258 interm_types->quick_push (intermediate_type);
8259 (*multi_step_cvt)++;
8261 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8262 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8263 return true;
8265 prev_type = intermediate_type;
8266 prev_mode = intermediate_mode;
8269 interm_types->release ();
8270 return false;
8274 /* Function supportable_narrowing_operation
8276 Check whether an operation represented by the code CODE is a
8277 narrowing operation that is supported by the target platform in
8278 vector form (i.e., when operating on arguments of type VECTYPE_IN
8279 and producing a result of type VECTYPE_OUT).
8281 Narrowing operations we currently support are NOP (CONVERT) and
8282 FIX_TRUNC. This function checks if these operations are supported by
8283 the target platform directly via vector tree-codes.
8285 Output:
8286 - CODE1 is the code of a vector operation to be used when
8287 vectorizing the operation, if available.
8288 - MULTI_STEP_CVT determines the number of required intermediate steps in
8289 case of multi-step conversion (like int->short->char - in that case
8290 MULTI_STEP_CVT will be 1).
8291 - INTERM_TYPES contains the intermediate type required to perform the
8292 narrowing operation (short in the above example). */
8294 bool
8295 supportable_narrowing_operation (enum tree_code code,
8296 tree vectype_out, tree vectype_in,
8297 enum tree_code *code1, int *multi_step_cvt,
8298 vec<tree> *interm_types)
8300 machine_mode vec_mode;
8301 enum insn_code icode1;
8302 optab optab1, interm_optab;
8303 tree vectype = vectype_in;
8304 tree narrow_vectype = vectype_out;
8305 enum tree_code c1;
8306 tree intermediate_type;
8307 machine_mode intermediate_mode, prev_mode;
8308 int i;
8309 bool uns;
8311 *multi_step_cvt = 0;
8312 switch (code)
8314 CASE_CONVERT:
8315 c1 = VEC_PACK_TRUNC_EXPR;
8316 break;
8318 case FIX_TRUNC_EXPR:
8319 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8320 break;
8322 case FLOAT_EXPR:
8323 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8324 tree code and optabs used for computing the operation. */
8325 return false;
8327 default:
8328 gcc_unreachable ();
8331 if (code == FIX_TRUNC_EXPR)
8332 /* The signedness is determined from output operand. */
8333 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8334 else
8335 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8337 if (!optab1)
8338 return false;
8340 vec_mode = TYPE_MODE (vectype);
8341 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8342 return false;
8344 *code1 = c1;
8346 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8347 return true;
8349 /* Check if it's a multi-step conversion that can be done using intermediate
8350 types. */
8351 prev_mode = vec_mode;
8352 if (code == FIX_TRUNC_EXPR)
8353 uns = TYPE_UNSIGNED (vectype_out);
8354 else
8355 uns = TYPE_UNSIGNED (vectype);
8357 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8358 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8359 costly than signed. */
8360 if (code == FIX_TRUNC_EXPR && uns)
8362 enum insn_code icode2;
8364 intermediate_type
8365 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8366 interm_optab
8367 = optab_for_tree_code (c1, intermediate_type, optab_default);
8368 if (interm_optab != unknown_optab
8369 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8370 && insn_data[icode1].operand[0].mode
8371 == insn_data[icode2].operand[0].mode)
8373 uns = false;
8374 optab1 = interm_optab;
8375 icode1 = icode2;
8379 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8380 intermediate steps in promotion sequence. We try
8381 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8382 interm_types->create (MAX_INTERM_CVT_STEPS);
8383 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8385 intermediate_mode = insn_data[icode1].operand[0].mode;
8386 intermediate_type
8387 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8388 interm_optab
8389 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8390 optab_default);
8391 if (!interm_optab
8392 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8393 || insn_data[icode1].operand[0].mode != intermediate_mode
8394 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8395 == CODE_FOR_nothing))
8396 break;
8398 interm_types->quick_push (intermediate_type);
8399 (*multi_step_cvt)++;
8401 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8402 return true;
8404 prev_mode = intermediate_mode;
8405 optab1 = interm_optab;
8408 interm_types->release ();
8409 return false;