* alias.c (alias_set_entry_d): Add is_pointer and has_pointer.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob6b018e53aadef6d7e68746a1f6196ad3489bfdae
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "tree-eh.h"
50 #include "gimple-expr.h"
51 #include "is-a.h"
52 #include "gimple.h"
53 #include "gimplify.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
57 #include "tree-cfg.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
63 #include "cfgloop.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
66 #include "hashtab.h"
67 #include "rtl.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
83 #include "optabs.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
86 #include "hash-map.h"
87 #include "plugin-api.h"
88 #include "ipa-ref.h"
89 #include "cgraph.h"
90 #include "builtins.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
97 tree
98 stmt_vectype (struct _stmt_vec_info *stmt_info)
100 return STMT_VINFO_VECTYPE (stmt_info);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
105 bool
106 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
108 gimple stmt = STMT_VINFO_STMT (stmt_info);
109 basic_block bb = gimple_bb (stmt);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111 struct loop* loop;
113 if (!loop_vinfo)
114 return false;
116 loop = LOOP_VINFO_LOOP (loop_vinfo);
118 return (bb->loop_father == loop->inner);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
125 unsigned
126 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128 int misalign, enum vect_cost_model_location where)
130 if (body_cost_vec)
132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133 add_stmt_info_to_vec (body_cost_vec, count, kind,
134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 misalign);
136 return (unsigned)
137 (builtin_vectorization_cost (kind, vectype, misalign) * count);
140 else
142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144 void *target_cost_data;
146 if (loop_vinfo)
147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148 else
149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
151 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 misalign, where);
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
158 static tree
159 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
161 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 "vect_array");
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
170 static tree
171 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 tree array, unsigned HOST_WIDE_INT n)
174 tree vect_type, vect, vect_name, array_ref;
175 gimple new_stmt;
177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178 vect_type = TREE_TYPE (TREE_TYPE (array));
179 vect = vect_create_destination_var (scalar_dest, vect_type);
180 array_ref = build4 (ARRAY_REF, vect_type, array,
181 build_int_cst (size_type_node, n),
182 NULL_TREE, NULL_TREE);
184 new_stmt = gimple_build_assign (vect, array_ref);
185 vect_name = make_ssa_name (vect, new_stmt);
186 gimple_assign_set_lhs (new_stmt, vect_name);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
189 return vect_name;
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
196 static void
197 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 tree array, unsigned HOST_WIDE_INT n)
200 tree array_ref;
201 gimple new_stmt;
203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 build_int_cst (size_type_node, n),
205 NULL_TREE, NULL_TREE);
207 new_stmt = gimple_build_assign (array_ref, vect);
208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
213 (and its group). */
215 static tree
216 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
218 tree mem_ref, alias_ptr_type;
220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224 return mem_ref;
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
233 static void
234 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235 enum vect_relevant relevant, bool live_p,
236 bool used_in_pattern)
238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241 gimple pattern_stmt;
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "mark relevant %d, live %d.\n", relevant, live_p);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 bool found = false;
254 if (!used_in_pattern)
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 gimple use_stmt;
259 tree lhs;
260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
263 if (is_gimple_assign (stmt))
264 lhs = gimple_assign_lhs (stmt);
265 else
266 lhs = gimple_call_lhs (stmt);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
270 stmt. */
271 if (lhs && TREE_CODE (lhs) == SSA_NAME)
272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
274 if (is_gimple_debug (USE_STMT (use_p)))
275 continue;
276 use_stmt = USE_STMT (use_p);
278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 continue;
281 if (vinfo_for_stmt (use_stmt)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
284 found = true;
285 break;
290 if (!found)
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info = vinfo_for_stmt (pattern_stmt);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307 stmt = pattern_stmt;
311 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313 STMT_VINFO_RELEVANT (stmt_info) = relevant;
315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "already marked relevant/live.\n");
321 return;
324 worklist->safe_push (stmt);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
340 static bool
341 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 enum vect_relevant *relevant, bool *live_p)
344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345 ssa_op_iter op_iter;
346 imm_use_iterator imm_iter;
347 use_operand_p use_p;
348 def_operand_p def_p;
350 *relevant = vect_unused_in_scope;
351 *live_p = false;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356 != loop_exit_ctrl_vec_info_type)
357 *relevant = vect_used_in_scope;
359 /* changing memory. */
360 if (gimple_code (stmt) != GIMPLE_PHI)
361 if (gimple_vdef (stmt)
362 && !gimple_clobber_p (stmt))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE, vect_location,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant = vect_used_in_scope;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
375 basic_block bb = gimple_bb (USE_STMT (use_p));
376 if (!flow_bb_inside_loop_p (loop, bb))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE, vect_location,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p)))
383 continue;
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 gcc_assert (bb == single_exit (loop)->dest);
390 *live_p = true;
395 return (*live_p || *relevant);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
404 static bool
405 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
407 tree operand;
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info))
414 return true;
416 /* STMT has a data_ref. FORNOW this means that its of one of
417 the following forms:
418 -1- ARRAY_REF = var
419 -2- var = ARRAY_REF
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
424 for array indexing.
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt))
431 if (is_gimple_call (stmt)
432 && gimple_call_internal_p (stmt))
433 switch (gimple_call_internal_fn (stmt))
435 case IFN_MASK_STORE:
436 operand = gimple_call_arg (stmt, 3);
437 if (operand == use)
438 return true;
439 /* FALLTHRU */
440 case IFN_MASK_LOAD:
441 operand = gimple_call_arg (stmt, 2);
442 if (operand == use)
443 return true;
444 break;
445 default:
446 break;
448 return false;
451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452 return false;
453 operand = gimple_assign_rhs1 (stmt);
454 if (TREE_CODE (operand) != SSA_NAME)
455 return false;
457 if (operand == use)
458 return true;
460 return false;
465 Function process_use.
467 Inputs:
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473 be performed.
475 Outputs:
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480 Exceptions:
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
491 static bool
492 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493 enum vect_relevant relevant, vec<gimple> *worklist,
494 bool force)
496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498 stmt_vec_info dstmt_vinfo;
499 basic_block bb, def_bb;
500 tree def;
501 gimple def_stmt;
502 enum vect_def_type dt;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507 return true;
509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "not vectorized: unsupported use in stmt.\n");
514 return false;
517 if (!def_stmt || gimple_nop_p (def_stmt))
518 return true;
520 def_bb = gimple_bb (def_stmt);
521 if (!flow_bb_inside_loop_p (loop, def_bb))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525 return true;
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo = vinfo_for_stmt (def_stmt);
534 bb = gimple_bb (stmt);
535 if (gimple_code (stmt) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537 && gimple_code (def_stmt) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539 && bb->loop_father == def_bb->loop_father)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549 return true;
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
554 d = def_stmt
555 inner-loop:
556 stmt # use (d)
557 outer-loop-tail-bb:
558 ... */
559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE, vect_location,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
565 switch (relevant)
567 case vect_unused_in_scope:
568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 vect_used_in_scope : vect_unused_in_scope;
570 break;
572 case vect_used_in_outer_by_reduction:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574 relevant = vect_used_by_reduction;
575 break;
577 case vect_used_in_outer:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579 relevant = vect_used_in_scope;
580 break;
582 case vect_used_in_scope:
583 break;
585 default:
586 gcc_unreachable ();
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
593 inner-loop:
594 d = def_stmt
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596 stmt # use (d) */
597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
603 switch (relevant)
605 case vect_unused_in_scope:
606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608 vect_used_in_outer_by_reduction : vect_unused_in_scope;
609 break;
611 case vect_used_by_reduction:
612 relevant = vect_used_in_outer_by_reduction;
613 break;
615 case vect_used_in_scope:
616 relevant = vect_used_in_outer;
617 break;
619 default:
620 gcc_unreachable ();
624 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625 is_pattern_stmt_p (stmt_vinfo));
626 return true;
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
634 for i...
635 for j...
636 1. T0 = i + j
637 2. T1 = a[T0]
639 3. j = j + 1
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
646 bool
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651 unsigned int nbbs = loop->num_nodes;
652 gimple_stmt_iterator si;
653 gimple stmt;
654 unsigned int i;
655 stmt_vec_info stmt_vinfo;
656 basic_block bb;
657 gimple phi;
658 bool live_p;
659 enum vect_relevant relevant, tmp_relevant;
660 enum vect_def_type def_type;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE, vect_location,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec<gimple, 64> worklist;
668 /* 1. Init worklist. */
669 for (i = 0; i < nbbs; i++)
671 bb = bbs[i];
672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
674 phi = gsi_stmt (si);
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
682 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
686 stmt = gsi_stmt (si);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
694 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
698 /* 2. Process_worklist */
699 while (worklist.length () > 0)
701 use_operand_p use_p;
702 ssa_op_iter iter;
704 stmt = worklist.pop ();
705 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo = vinfo_for_stmt (stmt);
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
725 live_p = false
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
733 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734 tmp_relevant = relevant;
735 switch (def_type)
737 case vect_reduction_def:
738 switch (tmp_relevant)
740 case vect_unused_in_scope:
741 relevant = vect_used_by_reduction;
742 break;
744 case vect_used_by_reduction:
745 if (gimple_code (stmt) == GIMPLE_PHI)
746 break;
747 /* fall through */
749 default:
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "unsupported use of reduction.\n");
753 return false;
756 live_p = false;
757 break;
759 case vect_nested_cycle:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_in_outer_by_reduction
762 && tmp_relevant != vect_used_in_outer)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766 "unsupported use of nested cycle.\n");
768 return false;
771 live_p = false;
772 break;
774 case vect_double_reduction_def:
775 if (tmp_relevant != vect_unused_in_scope
776 && tmp_relevant != vect_used_by_reduction)
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780 "unsupported use of double reduction.\n");
782 return false;
785 live_p = false;
786 break;
788 default:
789 break;
792 if (is_pattern_stmt_p (stmt_vinfo))
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt))
799 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800 tree op = gimple_assign_rhs1 (stmt);
802 i = 1;
803 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
805 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
806 live_p, relevant, &worklist, false)
807 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
808 live_p, relevant, &worklist, false))
809 return false;
810 i = 2;
812 for (; i < gimple_num_ops (stmt); i++)
814 op = gimple_op (stmt, i);
815 if (TREE_CODE (op) == SSA_NAME
816 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
817 &worklist, false))
818 return false;
821 else if (is_gimple_call (stmt))
823 for (i = 0; i < gimple_call_num_args (stmt); i++)
825 tree arg = gimple_call_arg (stmt, i);
826 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
827 &worklist, false))
828 return false;
832 else
833 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
835 tree op = USE_FROM_PTR (use_p);
836 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
837 &worklist, false))
838 return false;
841 if (STMT_VINFO_GATHER_P (stmt_vinfo))
843 tree off;
844 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
845 gcc_assert (decl);
846 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
847 &worklist, true))
848 return false;
850 } /* while worklist */
852 return true;
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
862 void
863 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
864 enum vect_def_type *dt,
865 stmt_vector_for_cost *prologue_cost_vec,
866 stmt_vector_for_cost *body_cost_vec)
868 int i;
869 int inside_cost = 0, prologue_cost = 0;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info))
873 return;
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i = 0; i < 2; i++)
877 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
878 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
879 stmt_info, 0, vect_prologue);
881 /* Pass the inside-of-loop statements to the target-specific cost model. */
882 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
883 stmt_info, 0, vect_body);
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE, vect_location,
887 "vect_model_simple_cost: inside_cost = %d, "
888 "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 /* Model cost for type demotion and promotion operations. PWR is normally
893 zero for single-step promotions and demotions. It will be one if
894 two-step promotion/demotion is required, and so on. Each additional
895 step doubles the number of instructions required. */
897 static void
898 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
899 enum vect_def_type *dt, int pwr)
901 int i, tmp;
902 int inside_cost = 0, prologue_cost = 0;
903 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
904 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
905 void *target_cost_data;
907 /* The SLP costs were already calculated during SLP tree build. */
908 if (PURE_SLP_STMT (stmt_info))
909 return;
911 if (loop_vinfo)
912 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
913 else
914 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
916 for (i = 0; i < pwr + 1; i++)
918 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
919 (i + 1) : i;
920 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
921 vec_promote_demote, stmt_info, 0,
922 vect_body);
925 /* FORNOW: Assuming maximum 2 args per stmts. */
926 for (i = 0; i < 2; i++)
927 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
928 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
929 stmt_info, 0, vect_prologue);
931 if (dump_enabled_p ())
932 dump_printf_loc (MSG_NOTE, vect_location,
933 "vect_model_promotion_demotion_cost: inside_cost = %d, "
934 "prologue_cost = %d .\n", inside_cost, prologue_cost);
937 /* Function vect_cost_group_size
939 For grouped load or store, return the group_size only if it is the first
940 load or store of a group, else return 1. This ensures that group size is
941 only returned once per group. */
943 static int
944 vect_cost_group_size (stmt_vec_info stmt_info)
946 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
948 if (first_stmt == STMT_VINFO_STMT (stmt_info))
949 return GROUP_SIZE (stmt_info);
951 return 1;
955 /* Function vect_model_store_cost
957 Models cost for stores. In the case of grouped accesses, one access
958 has the overhead of the grouped access attributed to it. */
960 void
961 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
962 bool store_lanes_p, enum vect_def_type dt,
963 slp_tree slp_node,
964 stmt_vector_for_cost *prologue_cost_vec,
965 stmt_vector_for_cost *body_cost_vec)
967 int group_size;
968 unsigned int inside_cost = 0, prologue_cost = 0;
969 struct data_reference *first_dr;
970 gimple first_stmt;
972 if (dt == vect_constant_def || dt == vect_external_def)
973 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
974 stmt_info, 0, vect_prologue);
976 /* Grouped access? */
977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
979 if (slp_node)
981 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
982 group_size = 1;
984 else
986 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
987 group_size = vect_cost_group_size (stmt_info);
990 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
992 /* Not a grouped access. */
993 else
995 group_size = 1;
996 first_dr = STMT_VINFO_DATA_REF (stmt_info);
999 /* We assume that the cost of a single store-lanes instruction is
1000 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1001 access is instead being provided by a permute-and-store operation,
1002 include the cost of the permutes. */
1003 if (!store_lanes_p && group_size > 1
1004 && !STMT_VINFO_STRIDED_P (stmt_info))
1006 /* Uses a high and low interleave or shuffle operations for each
1007 needed permute. */
1008 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1009 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1010 stmt_info, 0, vect_body);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "vect_model_store_cost: strided group_size = %d .\n",
1015 group_size);
1018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1019 /* Costs of the stores. */
1020 if (STMT_VINFO_STRIDED_P (stmt_info)
1021 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1023 /* N scalar stores plus extracting the elements. */
1024 inside_cost += record_stmt_cost (body_cost_vec,
1025 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1026 scalar_store, stmt_info, 0, vect_body);
1028 else
1029 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1031 if (STMT_VINFO_STRIDED_P (stmt_info))
1032 inside_cost += record_stmt_cost (body_cost_vec,
1033 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1034 vec_to_scalar, stmt_info, 0, vect_body);
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "vect_model_store_cost: inside_cost = %d, "
1039 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1043 /* Calculate cost of DR's memory access. */
1044 void
1045 vect_get_store_cost (struct data_reference *dr, int ncopies,
1046 unsigned int *inside_cost,
1047 stmt_vector_for_cost *body_cost_vec)
1049 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1050 gimple stmt = DR_STMT (dr);
1051 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1053 switch (alignment_support_scheme)
1055 case dr_aligned:
1057 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1058 vector_store, stmt_info, 0,
1059 vect_body);
1061 if (dump_enabled_p ())
1062 dump_printf_loc (MSG_NOTE, vect_location,
1063 "vect_model_store_cost: aligned.\n");
1064 break;
1067 case dr_unaligned_supported:
1069 /* Here, we assign an additional cost for the unaligned store. */
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 unaligned_store, stmt_info,
1072 DR_MISALIGNMENT (dr), vect_body);
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE, vect_location,
1075 "vect_model_store_cost: unaligned supported by "
1076 "hardware.\n");
1077 break;
1080 case dr_unaligned_unsupported:
1082 *inside_cost = VECT_MAX_COST;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1086 "vect_model_store_cost: unsupported access.\n");
1087 break;
1090 default:
1091 gcc_unreachable ();
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, the last access
1099 has the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1103 void
1104 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1105 bool load_lanes_p, slp_tree slp_node,
1106 stmt_vector_for_cost *prologue_cost_vec,
1107 stmt_vector_for_cost *body_cost_vec)
1109 int group_size;
1110 gimple first_stmt;
1111 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1112 unsigned int inside_cost = 0, prologue_cost = 0;
1114 /* Grouped accesses? */
1115 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1116 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1118 group_size = vect_cost_group_size (stmt_info);
1119 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1121 /* Not a grouped access. */
1122 else
1124 group_size = 1;
1125 first_dr = dr;
1128 /* We assume that the cost of a single load-lanes instruction is
1129 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1130 access is instead being provided by a load-and-permute operation,
1131 include the cost of the permutes. */
1132 if (!load_lanes_p && group_size > 1
1133 && !STMT_VINFO_STRIDED_P (stmt_info))
1135 /* Uses an even and odd extract operations or shuffle operations
1136 for each needed permute. */
1137 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1138 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1139 stmt_info, 0, vect_body);
1141 if (dump_enabled_p ())
1142 dump_printf_loc (MSG_NOTE, vect_location,
1143 "vect_model_load_cost: strided group_size = %d .\n",
1144 group_size);
1147 /* The loads themselves. */
1148 if (STMT_VINFO_STRIDED_P (stmt_info)
1149 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1151 /* N scalar loads plus gathering them into a vector. */
1152 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1153 inside_cost += record_stmt_cost (body_cost_vec,
1154 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1155 scalar_load, stmt_info, 0, vect_body);
1157 else
1158 vect_get_load_cost (first_dr, ncopies,
1159 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1160 || group_size > 1 || slp_node),
1161 &inside_cost, &prologue_cost,
1162 prologue_cost_vec, body_cost_vec, true);
1163 if (STMT_VINFO_STRIDED_P (stmt_info))
1164 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1165 stmt_info, 0, vect_body);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: inside_cost = %d, "
1170 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1174 /* Calculate cost of DR's memory access. */
1175 void
1176 vect_get_load_cost (struct data_reference *dr, int ncopies,
1177 bool add_realign_cost, unsigned int *inside_cost,
1178 unsigned int *prologue_cost,
1179 stmt_vector_for_cost *prologue_cost_vec,
1180 stmt_vector_for_cost *body_cost_vec,
1181 bool record_prologue_costs)
1183 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1184 gimple stmt = DR_STMT (dr);
1185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1187 switch (alignment_support_scheme)
1189 case dr_aligned:
1191 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1192 stmt_info, 0, vect_body);
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE, vect_location,
1196 "vect_model_load_cost: aligned.\n");
1198 break;
1200 case dr_unaligned_supported:
1202 /* Here, we assign an additional cost for the unaligned load. */
1203 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1204 unaligned_load, stmt_info,
1205 DR_MISALIGNMENT (dr), vect_body);
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE, vect_location,
1209 "vect_model_load_cost: unaligned supported by "
1210 "hardware.\n");
1212 break;
1214 case dr_explicit_realign:
1216 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1217 vector_load, stmt_info, 0, vect_body);
1218 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1219 vec_perm, stmt_info, 0, vect_body);
1221 /* FIXME: If the misalignment remains fixed across the iterations of
1222 the containing loop, the following cost should be added to the
1223 prologue costs. */
1224 if (targetm.vectorize.builtin_mask_for_load)
1225 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1226 stmt_info, 0, vect_body);
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: explicit realign\n");
1232 break;
1234 case dr_explicit_realign_optimized:
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "vect_model_load_cost: unaligned software "
1239 "pipelined.\n");
1241 /* Unaligned software pipeline has a load of an address, an initial
1242 load, and possibly a mask operation to "prime" the loop. However,
1243 if this is an access in a group of loads, which provide grouped
1244 access, then the above cost should only be considered for one
1245 access in the group. Inside the loop, there is a load op
1246 and a realignment op. */
1248 if (add_realign_cost && record_prologue_costs)
1250 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1251 vector_stmt, stmt_info,
1252 0, vect_prologue);
1253 if (targetm.vectorize.builtin_mask_for_load)
1254 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1255 vector_stmt, stmt_info,
1256 0, vect_prologue);
1259 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1260 stmt_info, 0, vect_body);
1261 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1262 stmt_info, 0, vect_body);
1264 if (dump_enabled_p ())
1265 dump_printf_loc (MSG_NOTE, vect_location,
1266 "vect_model_load_cost: explicit realign optimized"
1267 "\n");
1269 break;
1272 case dr_unaligned_unsupported:
1274 *inside_cost = VECT_MAX_COST;
1276 if (dump_enabled_p ())
1277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1278 "vect_model_load_cost: unsupported access.\n");
1279 break;
1282 default:
1283 gcc_unreachable ();
1287 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1288 the loop preheader for the vectorized stmt STMT. */
1290 static void
1291 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1293 if (gsi)
1294 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1295 else
1297 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1298 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1300 if (loop_vinfo)
1302 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1303 basic_block new_bb;
1304 edge pe;
1306 if (nested_in_vect_loop_p (loop, stmt))
1307 loop = loop->inner;
1309 pe = loop_preheader_edge (loop);
1310 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1311 gcc_assert (!new_bb);
1313 else
1315 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1316 basic_block bb;
1317 gimple_stmt_iterator gsi_bb_start;
1319 gcc_assert (bb_vinfo);
1320 bb = BB_VINFO_BB (bb_vinfo);
1321 gsi_bb_start = gsi_after_labels (bb);
1322 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1326 if (dump_enabled_p ())
1328 dump_printf_loc (MSG_NOTE, vect_location,
1329 "created new init_stmt: ");
1330 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1334 /* Function vect_init_vector.
1336 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1337 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1338 vector type a vector with all elements equal to VAL is created first.
1339 Place the initialization at BSI if it is not NULL. Otherwise, place the
1340 initialization at the loop preheader.
1341 Return the DEF of INIT_STMT.
1342 It will be used in the vectorization of STMT. */
1344 tree
1345 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1347 tree new_var;
1348 gimple init_stmt;
1349 tree vec_oprnd;
1350 tree new_temp;
1352 if (TREE_CODE (type) == VECTOR_TYPE
1353 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1355 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1357 if (CONSTANT_CLASS_P (val))
1358 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1359 else
1361 new_temp = make_ssa_name (TREE_TYPE (type));
1362 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1363 vect_init_vector_1 (stmt, init_stmt, gsi);
1364 val = new_temp;
1367 val = build_vector_from_val (type, val);
1370 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1371 init_stmt = gimple_build_assign (new_var, val);
1372 new_temp = make_ssa_name (new_var, init_stmt);
1373 gimple_assign_set_lhs (init_stmt, new_temp);
1374 vect_init_vector_1 (stmt, init_stmt, gsi);
1375 vec_oprnd = gimple_assign_lhs (init_stmt);
1376 return vec_oprnd;
1380 /* Function vect_get_vec_def_for_operand.
1382 OP is an operand in STMT. This function returns a (vector) def that will be
1383 used in the vectorized stmt for STMT.
1385 In the case that OP is an SSA_NAME which is defined in the loop, then
1386 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1388 In case OP is an invariant or constant, a new stmt that creates a vector def
1389 needs to be introduced. */
1391 tree
1392 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1394 tree vec_oprnd;
1395 gimple vec_stmt;
1396 gimple def_stmt;
1397 stmt_vec_info def_stmt_info = NULL;
1398 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1399 unsigned int nunits;
1400 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1401 tree def;
1402 enum vect_def_type dt;
1403 bool is_simple_use;
1404 tree vector_type;
1406 if (dump_enabled_p ())
1408 dump_printf_loc (MSG_NOTE, vect_location,
1409 "vect_get_vec_def_for_operand: ");
1410 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1411 dump_printf (MSG_NOTE, "\n");
1414 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1415 &def_stmt, &def, &dt);
1416 gcc_assert (is_simple_use);
1417 if (dump_enabled_p ())
1419 int loc_printed = 0;
1420 if (def)
1422 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1423 loc_printed = 1;
1424 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1425 dump_printf (MSG_NOTE, "\n");
1427 if (def_stmt)
1429 if (loc_printed)
1430 dump_printf (MSG_NOTE, " def_stmt = ");
1431 else
1432 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1437 switch (dt)
1439 /* Case 1: operand is a constant. */
1440 case vect_constant_def:
1442 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1443 gcc_assert (vector_type);
1444 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1446 if (scalar_def)
1447 *scalar_def = op;
1449 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1450 if (dump_enabled_p ())
1451 dump_printf_loc (MSG_NOTE, vect_location,
1452 "Create vector_cst. nunits = %d\n", nunits);
1454 return vect_init_vector (stmt, op, vector_type, NULL);
1457 /* Case 2: operand is defined outside the loop - loop invariant. */
1458 case vect_external_def:
1460 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1461 gcc_assert (vector_type);
1463 if (scalar_def)
1464 *scalar_def = def;
1466 /* Create 'vec_inv = {inv,inv,..,inv}' */
1467 if (dump_enabled_p ())
1468 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1470 return vect_init_vector (stmt, def, vector_type, NULL);
1473 /* Case 3: operand is defined inside the loop. */
1474 case vect_internal_def:
1476 if (scalar_def)
1477 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1479 /* Get the def from the vectorized stmt. */
1480 def_stmt_info = vinfo_for_stmt (def_stmt);
1482 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1483 /* Get vectorized pattern statement. */
1484 if (!vec_stmt
1485 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1486 && !STMT_VINFO_RELEVANT (def_stmt_info))
1487 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1488 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1489 gcc_assert (vec_stmt);
1490 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1491 vec_oprnd = PHI_RESULT (vec_stmt);
1492 else if (is_gimple_call (vec_stmt))
1493 vec_oprnd = gimple_call_lhs (vec_stmt);
1494 else
1495 vec_oprnd = gimple_assign_lhs (vec_stmt);
1496 return vec_oprnd;
1499 /* Case 4: operand is defined by a loop header phi - reduction */
1500 case vect_reduction_def:
1501 case vect_double_reduction_def:
1502 case vect_nested_cycle:
1504 struct loop *loop;
1506 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1507 loop = (gimple_bb (def_stmt))->loop_father;
1509 /* Get the def before the loop */
1510 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1511 return get_initial_def_for_reduction (stmt, op, scalar_def);
1514 /* Case 5: operand is defined by loop-header phi - induction. */
1515 case vect_induction_def:
1517 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1519 /* Get the def from the vectorized stmt. */
1520 def_stmt_info = vinfo_for_stmt (def_stmt);
1521 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1522 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1523 vec_oprnd = PHI_RESULT (vec_stmt);
1524 else
1525 vec_oprnd = gimple_get_lhs (vec_stmt);
1526 return vec_oprnd;
1529 default:
1530 gcc_unreachable ();
1535 /* Function vect_get_vec_def_for_stmt_copy
1537 Return a vector-def for an operand. This function is used when the
1538 vectorized stmt to be created (by the caller to this function) is a "copy"
1539 created in case the vectorized result cannot fit in one vector, and several
1540 copies of the vector-stmt are required. In this case the vector-def is
1541 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1542 of the stmt that defines VEC_OPRND.
1543 DT is the type of the vector def VEC_OPRND.
1545 Context:
1546 In case the vectorization factor (VF) is bigger than the number
1547 of elements that can fit in a vectype (nunits), we have to generate
1548 more than one vector stmt to vectorize the scalar stmt. This situation
1549 arises when there are multiple data-types operated upon in the loop; the
1550 smallest data-type determines the VF, and as a result, when vectorizing
1551 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1552 vector stmt (each computing a vector of 'nunits' results, and together
1553 computing 'VF' results in each iteration). This function is called when
1554 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1555 which VF=16 and nunits=4, so the number of copies required is 4):
1557 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1559 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1560 VS1.1: vx.1 = memref1 VS1.2
1561 VS1.2: vx.2 = memref2 VS1.3
1562 VS1.3: vx.3 = memref3
1564 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1565 VSnew.1: vz1 = vx.1 + ... VSnew.2
1566 VSnew.2: vz2 = vx.2 + ... VSnew.3
1567 VSnew.3: vz3 = vx.3 + ...
1569 The vectorization of S1 is explained in vectorizable_load.
1570 The vectorization of S2:
1571 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1572 the function 'vect_get_vec_def_for_operand' is called to
1573 get the relevant vector-def for each operand of S2. For operand x it
1574 returns the vector-def 'vx.0'.
1576 To create the remaining copies of the vector-stmt (VSnew.j), this
1577 function is called to get the relevant vector-def for each operand. It is
1578 obtained from the respective VS1.j stmt, which is recorded in the
1579 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1581 For example, to obtain the vector-def 'vx.1' in order to create the
1582 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1583 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1584 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1585 and return its def ('vx.1').
1586 Overall, to create the above sequence this function will be called 3 times:
1587 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1588 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1589 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1591 tree
1592 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1594 gimple vec_stmt_for_operand;
1595 stmt_vec_info def_stmt_info;
1597 /* Do nothing; can reuse same def. */
1598 if (dt == vect_external_def || dt == vect_constant_def )
1599 return vec_oprnd;
1601 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1602 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1603 gcc_assert (def_stmt_info);
1604 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1605 gcc_assert (vec_stmt_for_operand);
1606 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1607 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1608 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1609 else
1610 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1611 return vec_oprnd;
1615 /* Get vectorized definitions for the operands to create a copy of an original
1616 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1618 static void
1619 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1620 vec<tree> *vec_oprnds0,
1621 vec<tree> *vec_oprnds1)
1623 tree vec_oprnd = vec_oprnds0->pop ();
1625 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1626 vec_oprnds0->quick_push (vec_oprnd);
1628 if (vec_oprnds1 && vec_oprnds1->length ())
1630 vec_oprnd = vec_oprnds1->pop ();
1631 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1632 vec_oprnds1->quick_push (vec_oprnd);
1637 /* Get vectorized definitions for OP0 and OP1.
1638 REDUC_INDEX is the index of reduction operand in case of reduction,
1639 and -1 otherwise. */
1641 void
1642 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1643 vec<tree> *vec_oprnds0,
1644 vec<tree> *vec_oprnds1,
1645 slp_tree slp_node, int reduc_index)
1647 if (slp_node)
1649 int nops = (op1 == NULL_TREE) ? 1 : 2;
1650 auto_vec<tree> ops (nops);
1651 auto_vec<vec<tree> > vec_defs (nops);
1653 ops.quick_push (op0);
1654 if (op1)
1655 ops.quick_push (op1);
1657 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1659 *vec_oprnds0 = vec_defs[0];
1660 if (op1)
1661 *vec_oprnds1 = vec_defs[1];
1663 else
1665 tree vec_oprnd;
1667 vec_oprnds0->create (1);
1668 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1669 vec_oprnds0->quick_push (vec_oprnd);
1671 if (op1)
1673 vec_oprnds1->create (1);
1674 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1675 vec_oprnds1->quick_push (vec_oprnd);
1681 /* Function vect_finish_stmt_generation.
1683 Insert a new stmt. */
1685 void
1686 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1687 gimple_stmt_iterator *gsi)
1689 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1690 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1691 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1693 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1695 if (!gsi_end_p (*gsi)
1696 && gimple_has_mem_ops (vec_stmt))
1698 gimple at_stmt = gsi_stmt (*gsi);
1699 tree vuse = gimple_vuse (at_stmt);
1700 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1702 tree vdef = gimple_vdef (at_stmt);
1703 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1704 /* If we have an SSA vuse and insert a store, update virtual
1705 SSA form to avoid triggering the renamer. Do so only
1706 if we can easily see all uses - which is what almost always
1707 happens with the way vectorized stmts are inserted. */
1708 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1709 && ((is_gimple_assign (vec_stmt)
1710 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1711 || (is_gimple_call (vec_stmt)
1712 && !(gimple_call_flags (vec_stmt)
1713 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1715 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1716 gimple_set_vdef (vec_stmt, new_vdef);
1717 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1721 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1723 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1724 bb_vinfo));
1726 if (dump_enabled_p ())
1728 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1729 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1732 gimple_set_location (vec_stmt, gimple_location (stmt));
1734 /* While EH edges will generally prevent vectorization, stmt might
1735 e.g. be in a must-not-throw region. Ensure newly created stmts
1736 that could throw are part of the same region. */
1737 int lp_nr = lookup_stmt_eh_lp (stmt);
1738 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1739 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1742 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1743 a function declaration if the target has a vectorized version
1744 of the function, or NULL_TREE if the function cannot be vectorized. */
1746 tree
1747 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1749 tree fndecl = gimple_call_fndecl (call);
1751 /* We only handle functions that do not read or clobber memory -- i.e.
1752 const or novops ones. */
1753 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1754 return NULL_TREE;
1756 if (!fndecl
1757 || TREE_CODE (fndecl) != FUNCTION_DECL
1758 || !DECL_BUILT_IN (fndecl))
1759 return NULL_TREE;
1761 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1762 vectype_in);
1766 static tree permute_vec_elements (tree, tree, tree, gimple,
1767 gimple_stmt_iterator *);
1770 /* Function vectorizable_mask_load_store.
1772 Check if STMT performs a conditional load or store that can be vectorized.
1773 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1774 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1775 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1777 static bool
1778 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1779 gimple *vec_stmt, slp_tree slp_node)
1781 tree vec_dest = NULL;
1782 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1783 stmt_vec_info prev_stmt_info;
1784 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1785 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1786 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1787 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1788 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1789 tree elem_type;
1790 gimple new_stmt;
1791 tree dummy;
1792 tree dataref_ptr = NULL_TREE;
1793 gimple ptr_incr;
1794 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1795 int ncopies;
1796 int i, j;
1797 bool inv_p;
1798 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1799 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1800 int gather_scale = 1;
1801 enum vect_def_type gather_dt = vect_unknown_def_type;
1802 bool is_store;
1803 tree mask;
1804 gimple def_stmt;
1805 tree def;
1806 enum vect_def_type dt;
1808 if (slp_node != NULL)
1809 return false;
1811 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1812 gcc_assert (ncopies >= 1);
1814 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1815 mask = gimple_call_arg (stmt, 2);
1816 if (TYPE_PRECISION (TREE_TYPE (mask))
1817 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1818 return false;
1820 /* FORNOW. This restriction should be relaxed. */
1821 if (nested_in_vect_loop && ncopies > 1)
1823 if (dump_enabled_p ())
1824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1825 "multiple types in nested loop.");
1826 return false;
1829 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1830 return false;
1832 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1833 return false;
1835 if (!STMT_VINFO_DATA_REF (stmt_info))
1836 return false;
1838 elem_type = TREE_TYPE (vectype);
1840 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1841 return false;
1843 if (STMT_VINFO_STRIDED_P (stmt_info))
1844 return false;
1846 if (STMT_VINFO_GATHER_P (stmt_info))
1848 gimple def_stmt;
1849 tree def;
1850 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1851 &gather_off, &gather_scale);
1852 gcc_assert (gather_decl);
1853 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1854 &def_stmt, &def, &gather_dt,
1855 &gather_off_vectype))
1857 if (dump_enabled_p ())
1858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1859 "gather index use not simple.");
1860 return false;
1863 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1864 tree masktype
1865 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1866 if (TREE_CODE (masktype) == INTEGER_TYPE)
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1870 "masked gather with integer mask not supported.");
1871 return false;
1874 else if (tree_int_cst_compare (nested_in_vect_loop
1875 ? STMT_VINFO_DR_STEP (stmt_info)
1876 : DR_STEP (dr), size_zero_node) <= 0)
1877 return false;
1878 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1879 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1880 return false;
1882 if (TREE_CODE (mask) != SSA_NAME)
1883 return false;
1885 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1886 &def_stmt, &def, &dt))
1887 return false;
1889 if (is_store)
1891 tree rhs = gimple_call_arg (stmt, 3);
1892 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1893 &def_stmt, &def, &dt))
1894 return false;
1897 if (!vec_stmt) /* transformation not required. */
1899 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1900 if (is_store)
1901 vect_model_store_cost (stmt_info, ncopies, false, dt,
1902 NULL, NULL, NULL);
1903 else
1904 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1905 return true;
1908 /** Transform. **/
1910 if (STMT_VINFO_GATHER_P (stmt_info))
1912 tree vec_oprnd0 = NULL_TREE, op;
1913 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1914 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1915 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1916 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1917 tree mask_perm_mask = NULL_TREE;
1918 edge pe = loop_preheader_edge (loop);
1919 gimple_seq seq;
1920 basic_block new_bb;
1921 enum { NARROW, NONE, WIDEN } modifier;
1922 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1924 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1925 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1926 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1927 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1928 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1929 scaletype = TREE_VALUE (arglist);
1930 gcc_checking_assert (types_compatible_p (srctype, rettype)
1931 && types_compatible_p (srctype, masktype));
1933 if (nunits == gather_off_nunits)
1934 modifier = NONE;
1935 else if (nunits == gather_off_nunits / 2)
1937 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1938 modifier = WIDEN;
1940 for (i = 0; i < gather_off_nunits; ++i)
1941 sel[i] = i | nunits;
1943 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1945 else if (nunits == gather_off_nunits * 2)
1947 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1948 modifier = NARROW;
1950 for (i = 0; i < nunits; ++i)
1951 sel[i] = i < gather_off_nunits
1952 ? i : i + nunits - gather_off_nunits;
1954 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1955 ncopies *= 2;
1956 for (i = 0; i < nunits; ++i)
1957 sel[i] = i | gather_off_nunits;
1958 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1960 else
1961 gcc_unreachable ();
1963 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1965 ptr = fold_convert (ptrtype, gather_base);
1966 if (!is_gimple_min_invariant (ptr))
1968 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1969 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1970 gcc_assert (!new_bb);
1973 scale = build_int_cst (scaletype, gather_scale);
1975 prev_stmt_info = NULL;
1976 for (j = 0; j < ncopies; ++j)
1978 if (modifier == WIDEN && (j & 1))
1979 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1980 perm_mask, stmt, gsi);
1981 else if (j == 0)
1982 op = vec_oprnd0
1983 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1984 else
1985 op = vec_oprnd0
1986 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1988 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1990 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1991 == TYPE_VECTOR_SUBPARTS (idxtype));
1992 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1993 var = make_ssa_name (var);
1994 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1995 new_stmt
1996 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1997 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1998 op = var;
2001 if (mask_perm_mask && (j & 1))
2002 mask_op = permute_vec_elements (mask_op, mask_op,
2003 mask_perm_mask, stmt, gsi);
2004 else
2006 if (j == 0)
2007 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2008 else
2010 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
2011 &def_stmt, &def, &dt);
2012 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2015 mask_op = vec_mask;
2016 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2018 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2019 == TYPE_VECTOR_SUBPARTS (masktype));
2020 var = vect_get_new_vect_var (masktype, vect_simple_var,
2021 NULL);
2022 var = make_ssa_name (var);
2023 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2024 new_stmt
2025 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2026 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2027 mask_op = var;
2031 new_stmt
2032 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2033 scale);
2035 if (!useless_type_conversion_p (vectype, rettype))
2037 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2038 == TYPE_VECTOR_SUBPARTS (rettype));
2039 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2040 op = make_ssa_name (var, new_stmt);
2041 gimple_call_set_lhs (new_stmt, op);
2042 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2043 var = make_ssa_name (vec_dest);
2044 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2045 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2047 else
2049 var = make_ssa_name (vec_dest, new_stmt);
2050 gimple_call_set_lhs (new_stmt, var);
2053 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2055 if (modifier == NARROW)
2057 if ((j & 1) == 0)
2059 prev_res = var;
2060 continue;
2062 var = permute_vec_elements (prev_res, var,
2063 perm_mask, stmt, gsi);
2064 new_stmt = SSA_NAME_DEF_STMT (var);
2067 if (prev_stmt_info == NULL)
2068 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2069 else
2070 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2071 prev_stmt_info = vinfo_for_stmt (new_stmt);
2074 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2075 from the IL. */
2076 tree lhs = gimple_call_lhs (stmt);
2077 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2078 set_vinfo_for_stmt (new_stmt, stmt_info);
2079 set_vinfo_for_stmt (stmt, NULL);
2080 STMT_VINFO_STMT (stmt_info) = new_stmt;
2081 gsi_replace (gsi, new_stmt, true);
2082 return true;
2084 else if (is_store)
2086 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2087 prev_stmt_info = NULL;
2088 for (i = 0; i < ncopies; i++)
2090 unsigned align, misalign;
2092 if (i == 0)
2094 tree rhs = gimple_call_arg (stmt, 3);
2095 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2096 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2097 /* We should have catched mismatched types earlier. */
2098 gcc_assert (useless_type_conversion_p (vectype,
2099 TREE_TYPE (vec_rhs)));
2100 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2101 NULL_TREE, &dummy, gsi,
2102 &ptr_incr, false, &inv_p);
2103 gcc_assert (!inv_p);
2105 else
2107 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2108 &def, &dt);
2109 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2110 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2111 &def, &dt);
2112 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2113 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2114 TYPE_SIZE_UNIT (vectype));
2117 align = TYPE_ALIGN_UNIT (vectype);
2118 if (aligned_access_p (dr))
2119 misalign = 0;
2120 else if (DR_MISALIGNMENT (dr) == -1)
2122 align = TYPE_ALIGN_UNIT (elem_type);
2123 misalign = 0;
2125 else
2126 misalign = DR_MISALIGNMENT (dr);
2127 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2128 misalign);
2129 new_stmt
2130 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2131 gimple_call_arg (stmt, 1),
2132 vec_mask, vec_rhs);
2133 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2134 if (i == 0)
2135 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2136 else
2137 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2138 prev_stmt_info = vinfo_for_stmt (new_stmt);
2141 else
2143 tree vec_mask = NULL_TREE;
2144 prev_stmt_info = NULL;
2145 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2146 for (i = 0; i < ncopies; i++)
2148 unsigned align, misalign;
2150 if (i == 0)
2152 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2153 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2154 NULL_TREE, &dummy, gsi,
2155 &ptr_incr, false, &inv_p);
2156 gcc_assert (!inv_p);
2158 else
2160 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2161 &def, &dt);
2162 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2163 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2164 TYPE_SIZE_UNIT (vectype));
2167 align = TYPE_ALIGN_UNIT (vectype);
2168 if (aligned_access_p (dr))
2169 misalign = 0;
2170 else if (DR_MISALIGNMENT (dr) == -1)
2172 align = TYPE_ALIGN_UNIT (elem_type);
2173 misalign = 0;
2175 else
2176 misalign = DR_MISALIGNMENT (dr);
2177 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2178 misalign);
2179 new_stmt
2180 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2181 gimple_call_arg (stmt, 1),
2182 vec_mask);
2183 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2184 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2185 if (i == 0)
2186 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2187 else
2188 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2189 prev_stmt_info = vinfo_for_stmt (new_stmt);
2193 if (!is_store)
2195 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2196 from the IL. */
2197 tree lhs = gimple_call_lhs (stmt);
2198 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2199 set_vinfo_for_stmt (new_stmt, stmt_info);
2200 set_vinfo_for_stmt (stmt, NULL);
2201 STMT_VINFO_STMT (stmt_info) = new_stmt;
2202 gsi_replace (gsi, new_stmt, true);
2205 return true;
2209 /* Function vectorizable_call.
2211 Check if GS performs a function call that can be vectorized.
2212 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2213 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2214 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2216 static bool
2217 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2218 slp_tree slp_node)
2220 gcall *stmt;
2221 tree vec_dest;
2222 tree scalar_dest;
2223 tree op, type;
2224 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2225 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2226 tree vectype_out, vectype_in;
2227 int nunits_in;
2228 int nunits_out;
2229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2230 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2231 tree fndecl, new_temp, def, rhs_type;
2232 gimple def_stmt;
2233 enum vect_def_type dt[3]
2234 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2235 gimple new_stmt = NULL;
2236 int ncopies, j;
2237 vec<tree> vargs = vNULL;
2238 enum { NARROW, NONE, WIDEN } modifier;
2239 size_t i, nargs;
2240 tree lhs;
2242 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2243 return false;
2245 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2246 return false;
2248 /* Is GS a vectorizable call? */
2249 stmt = dyn_cast <gcall *> (gs);
2250 if (!stmt)
2251 return false;
2253 if (gimple_call_internal_p (stmt)
2254 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2255 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2256 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2257 slp_node);
2259 if (gimple_call_lhs (stmt) == NULL_TREE
2260 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2261 return false;
2263 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2265 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2267 /* Process function arguments. */
2268 rhs_type = NULL_TREE;
2269 vectype_in = NULL_TREE;
2270 nargs = gimple_call_num_args (stmt);
2272 /* Bail out if the function has more than three arguments, we do not have
2273 interesting builtin functions to vectorize with more than two arguments
2274 except for fma. No arguments is also not good. */
2275 if (nargs == 0 || nargs > 3)
2276 return false;
2278 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2279 if (gimple_call_internal_p (stmt)
2280 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2282 nargs = 0;
2283 rhs_type = unsigned_type_node;
2286 for (i = 0; i < nargs; i++)
2288 tree opvectype;
2290 op = gimple_call_arg (stmt, i);
2292 /* We can only handle calls with arguments of the same type. */
2293 if (rhs_type
2294 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2298 "argument types differ.\n");
2299 return false;
2301 if (!rhs_type)
2302 rhs_type = TREE_TYPE (op);
2304 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2305 &def_stmt, &def, &dt[i], &opvectype))
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2309 "use not simple.\n");
2310 return false;
2313 if (!vectype_in)
2314 vectype_in = opvectype;
2315 else if (opvectype
2316 && opvectype != vectype_in)
2318 if (dump_enabled_p ())
2319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2320 "argument vector types differ.\n");
2321 return false;
2324 /* If all arguments are external or constant defs use a vector type with
2325 the same size as the output vector type. */
2326 if (!vectype_in)
2327 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2328 if (vec_stmt)
2329 gcc_assert (vectype_in);
2330 if (!vectype_in)
2332 if (dump_enabled_p ())
2334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2335 "no vectype for scalar type ");
2336 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2337 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2340 return false;
2343 /* FORNOW */
2344 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2345 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2346 if (nunits_in == nunits_out / 2)
2347 modifier = NARROW;
2348 else if (nunits_out == nunits_in)
2349 modifier = NONE;
2350 else if (nunits_out == nunits_in / 2)
2351 modifier = WIDEN;
2352 else
2353 return false;
2355 /* For now, we only vectorize functions if a target specific builtin
2356 is available. TODO -- in some cases, it might be profitable to
2357 insert the calls for pieces of the vector, in order to be able
2358 to vectorize other operations in the loop. */
2359 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2360 if (fndecl == NULL_TREE)
2362 if (gimple_call_internal_p (stmt)
2363 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2364 && !slp_node
2365 && loop_vinfo
2366 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs == 0);
2375 else
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2379 "function is not vectorizable.\n");
2380 return false;
2384 gcc_assert (!gimple_vuse (stmt));
2386 if (slp_node || PURE_SLP_STMT (stmt_info))
2387 ncopies = 1;
2388 else if (modifier == NARROW)
2389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2390 else
2391 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2393 /* Sanity check: make sure that at least one copy of the vectorized stmt
2394 needs to be generated. */
2395 gcc_assert (ncopies >= 1);
2397 if (!vec_stmt) /* transformation not required. */
2399 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2402 "\n");
2403 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2404 return true;
2407 /** Transform. **/
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2412 /* Handle def. */
2413 scalar_dest = gimple_call_lhs (stmt);
2414 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2416 prev_stmt_info = NULL;
2417 switch (modifier)
2419 case NONE:
2420 for (j = 0; j < ncopies; ++j)
2422 /* Build argument list for the vectorized call. */
2423 if (j == 0)
2424 vargs.create (nargs);
2425 else
2426 vargs.truncate (0);
2428 if (slp_node)
2430 auto_vec<vec<tree> > vec_defs (nargs);
2431 vec<tree> vec_oprnds0;
2433 for (i = 0; i < nargs; i++)
2434 vargs.quick_push (gimple_call_arg (stmt, i));
2435 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2436 vec_oprnds0 = vec_defs[0];
2438 /* Arguments are ready. Create the new vector stmt. */
2439 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2441 size_t k;
2442 for (k = 0; k < nargs; k++)
2444 vec<tree> vec_oprndsk = vec_defs[k];
2445 vargs[k] = vec_oprndsk[i];
2447 new_stmt = gimple_build_call_vec (fndecl, vargs);
2448 new_temp = make_ssa_name (vec_dest, new_stmt);
2449 gimple_call_set_lhs (new_stmt, new_temp);
2450 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2451 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2454 for (i = 0; i < nargs; i++)
2456 vec<tree> vec_oprndsi = vec_defs[i];
2457 vec_oprndsi.release ();
2459 continue;
2462 for (i = 0; i < nargs; i++)
2464 op = gimple_call_arg (stmt, i);
2465 if (j == 0)
2466 vec_oprnd0
2467 = vect_get_vec_def_for_operand (op, stmt, NULL);
2468 else
2470 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2471 vec_oprnd0
2472 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2475 vargs.quick_push (vec_oprnd0);
2478 if (gimple_call_internal_p (stmt)
2479 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2481 tree *v = XALLOCAVEC (tree, nunits_out);
2482 int k;
2483 for (k = 0; k < nunits_out; ++k)
2484 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2485 tree cst = build_vector (vectype_out, v);
2486 tree new_var
2487 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2488 gimple init_stmt = gimple_build_assign (new_var, cst);
2489 new_temp = make_ssa_name (new_var, init_stmt);
2490 gimple_assign_set_lhs (init_stmt, new_temp);
2491 vect_init_vector_1 (stmt, init_stmt, NULL);
2492 new_temp = make_ssa_name (vec_dest);
2493 new_stmt = gimple_build_assign (new_temp,
2494 gimple_assign_lhs (init_stmt));
2496 else
2498 new_stmt = gimple_build_call_vec (fndecl, vargs);
2499 new_temp = make_ssa_name (vec_dest, new_stmt);
2500 gimple_call_set_lhs (new_stmt, new_temp);
2502 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2504 if (j == 0)
2505 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2506 else
2507 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2509 prev_stmt_info = vinfo_for_stmt (new_stmt);
2512 break;
2514 case NARROW:
2515 for (j = 0; j < ncopies; ++j)
2517 /* Build argument list for the vectorized call. */
2518 if (j == 0)
2519 vargs.create (nargs * 2);
2520 else
2521 vargs.truncate (0);
2523 if (slp_node)
2525 auto_vec<vec<tree> > vec_defs (nargs);
2526 vec<tree> vec_oprnds0;
2528 for (i = 0; i < nargs; i++)
2529 vargs.quick_push (gimple_call_arg (stmt, i));
2530 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2531 vec_oprnds0 = vec_defs[0];
2533 /* Arguments are ready. Create the new vector stmt. */
2534 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2536 size_t k;
2537 vargs.truncate (0);
2538 for (k = 0; k < nargs; k++)
2540 vec<tree> vec_oprndsk = vec_defs[k];
2541 vargs.quick_push (vec_oprndsk[i]);
2542 vargs.quick_push (vec_oprndsk[i + 1]);
2544 new_stmt = gimple_build_call_vec (fndecl, vargs);
2545 new_temp = make_ssa_name (vec_dest, new_stmt);
2546 gimple_call_set_lhs (new_stmt, new_temp);
2547 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2548 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2551 for (i = 0; i < nargs; i++)
2553 vec<tree> vec_oprndsi = vec_defs[i];
2554 vec_oprndsi.release ();
2556 continue;
2559 for (i = 0; i < nargs; i++)
2561 op = gimple_call_arg (stmt, i);
2562 if (j == 0)
2564 vec_oprnd0
2565 = vect_get_vec_def_for_operand (op, stmt, NULL);
2566 vec_oprnd1
2567 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2569 else
2571 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2572 vec_oprnd0
2573 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2574 vec_oprnd1
2575 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2578 vargs.quick_push (vec_oprnd0);
2579 vargs.quick_push (vec_oprnd1);
2582 new_stmt = gimple_build_call_vec (fndecl, vargs);
2583 new_temp = make_ssa_name (vec_dest, new_stmt);
2584 gimple_call_set_lhs (new_stmt, new_temp);
2585 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2587 if (j == 0)
2588 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2589 else
2590 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2592 prev_stmt_info = vinfo_for_stmt (new_stmt);
2595 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2597 break;
2599 case WIDEN:
2600 /* No current target implements this case. */
2601 return false;
2604 vargs.release ();
2606 /* The call in STMT might prevent it from being removed in dce.
2607 We however cannot remove it here, due to the way the ssa name
2608 it defines is mapped to the new definition. So just replace
2609 rhs of the statement with something harmless. */
2611 if (slp_node)
2612 return true;
2614 type = TREE_TYPE (scalar_dest);
2615 if (is_pattern_stmt_p (stmt_info))
2616 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2617 else
2618 lhs = gimple_call_lhs (stmt);
2619 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2620 set_vinfo_for_stmt (new_stmt, stmt_info);
2621 set_vinfo_for_stmt (stmt, NULL);
2622 STMT_VINFO_STMT (stmt_info) = new_stmt;
2623 gsi_replace (gsi, new_stmt, false);
2625 return true;
2629 struct simd_call_arg_info
2631 tree vectype;
2632 tree op;
2633 enum vect_def_type dt;
2634 HOST_WIDE_INT linear_step;
2635 unsigned int align;
2638 /* Function vectorizable_simd_clone_call.
2640 Check if STMT performs a function call that can be vectorized
2641 by calling a simd clone of the function.
2642 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2643 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2644 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2646 static bool
2647 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2648 gimple *vec_stmt, slp_tree slp_node)
2650 tree vec_dest;
2651 tree scalar_dest;
2652 tree op, type;
2653 tree vec_oprnd0 = NULL_TREE;
2654 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2655 tree vectype;
2656 unsigned int nunits;
2657 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2658 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2659 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2660 tree fndecl, new_temp, def;
2661 gimple def_stmt;
2662 gimple new_stmt = NULL;
2663 int ncopies, j;
2664 vec<simd_call_arg_info> arginfo = vNULL;
2665 vec<tree> vargs = vNULL;
2666 size_t i, nargs;
2667 tree lhs, rtype, ratype;
2668 vec<constructor_elt, va_gc> *ret_ctor_elts;
2670 /* Is STMT a vectorizable call? */
2671 if (!is_gimple_call (stmt))
2672 return false;
2674 fndecl = gimple_call_fndecl (stmt);
2675 if (fndecl == NULL_TREE)
2676 return false;
2678 struct cgraph_node *node = cgraph_node::get (fndecl);
2679 if (node == NULL || node->simd_clones == NULL)
2680 return false;
2682 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2683 return false;
2685 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2686 return false;
2688 if (gimple_call_lhs (stmt)
2689 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2690 return false;
2692 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2694 vectype = STMT_VINFO_VECTYPE (stmt_info);
2696 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2697 return false;
2699 /* FORNOW */
2700 if (slp_node || PURE_SLP_STMT (stmt_info))
2701 return false;
2703 /* Process function arguments. */
2704 nargs = gimple_call_num_args (stmt);
2706 /* Bail out if the function has zero arguments. */
2707 if (nargs == 0)
2708 return false;
2710 arginfo.create (nargs);
2712 for (i = 0; i < nargs; i++)
2714 simd_call_arg_info thisarginfo;
2715 affine_iv iv;
2717 thisarginfo.linear_step = 0;
2718 thisarginfo.align = 0;
2719 thisarginfo.op = NULL_TREE;
2721 op = gimple_call_arg (stmt, i);
2722 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2723 &def_stmt, &def, &thisarginfo.dt,
2724 &thisarginfo.vectype)
2725 || thisarginfo.dt == vect_uninitialized_def)
2727 if (dump_enabled_p ())
2728 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2729 "use not simple.\n");
2730 arginfo.release ();
2731 return false;
2734 if (thisarginfo.dt == vect_constant_def
2735 || thisarginfo.dt == vect_external_def)
2736 gcc_assert (thisarginfo.vectype == NULL_TREE);
2737 else
2738 gcc_assert (thisarginfo.vectype != NULL_TREE);
2740 /* For linear arguments, the analyze phase should have saved
2741 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2742 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2743 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2745 gcc_assert (vec_stmt);
2746 thisarginfo.linear_step
2747 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2748 thisarginfo.op
2749 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2750 /* If loop has been peeled for alignment, we need to adjust it. */
2751 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2752 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2753 if (n1 != n2)
2755 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2756 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2757 tree opt = TREE_TYPE (thisarginfo.op);
2758 bias = fold_convert (TREE_TYPE (step), bias);
2759 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2760 thisarginfo.op
2761 = fold_build2 (POINTER_TYPE_P (opt)
2762 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2763 thisarginfo.op, bias);
2766 else if (!vec_stmt
2767 && thisarginfo.dt != vect_constant_def
2768 && thisarginfo.dt != vect_external_def
2769 && loop_vinfo
2770 && TREE_CODE (op) == SSA_NAME
2771 && simple_iv (loop, loop_containing_stmt (stmt), op,
2772 &iv, false)
2773 && tree_fits_shwi_p (iv.step))
2775 thisarginfo.linear_step = tree_to_shwi (iv.step);
2776 thisarginfo.op = iv.base;
2778 else if ((thisarginfo.dt == vect_constant_def
2779 || thisarginfo.dt == vect_external_def)
2780 && POINTER_TYPE_P (TREE_TYPE (op)))
2781 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2783 arginfo.quick_push (thisarginfo);
2786 unsigned int badness = 0;
2787 struct cgraph_node *bestn = NULL;
2788 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2789 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2790 else
2791 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2792 n = n->simdclone->next_clone)
2794 unsigned int this_badness = 0;
2795 if (n->simdclone->simdlen
2796 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2797 || n->simdclone->nargs != nargs)
2798 continue;
2799 if (n->simdclone->simdlen
2800 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2801 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2802 - exact_log2 (n->simdclone->simdlen)) * 1024;
2803 if (n->simdclone->inbranch)
2804 this_badness += 2048;
2805 int target_badness = targetm.simd_clone.usable (n);
2806 if (target_badness < 0)
2807 continue;
2808 this_badness += target_badness * 512;
2809 /* FORNOW: Have to add code to add the mask argument. */
2810 if (n->simdclone->inbranch)
2811 continue;
2812 for (i = 0; i < nargs; i++)
2814 switch (n->simdclone->args[i].arg_type)
2816 case SIMD_CLONE_ARG_TYPE_VECTOR:
2817 if (!useless_type_conversion_p
2818 (n->simdclone->args[i].orig_type,
2819 TREE_TYPE (gimple_call_arg (stmt, i))))
2820 i = -1;
2821 else if (arginfo[i].dt == vect_constant_def
2822 || arginfo[i].dt == vect_external_def
2823 || arginfo[i].linear_step)
2824 this_badness += 64;
2825 break;
2826 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2827 if (arginfo[i].dt != vect_constant_def
2828 && arginfo[i].dt != vect_external_def)
2829 i = -1;
2830 break;
2831 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2832 if (arginfo[i].dt == vect_constant_def
2833 || arginfo[i].dt == vect_external_def
2834 || (arginfo[i].linear_step
2835 != n->simdclone->args[i].linear_step))
2836 i = -1;
2837 break;
2838 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2839 /* FORNOW */
2840 i = -1;
2841 break;
2842 case SIMD_CLONE_ARG_TYPE_MASK:
2843 gcc_unreachable ();
2845 if (i == (size_t) -1)
2846 break;
2847 if (n->simdclone->args[i].alignment > arginfo[i].align)
2849 i = -1;
2850 break;
2852 if (arginfo[i].align)
2853 this_badness += (exact_log2 (arginfo[i].align)
2854 - exact_log2 (n->simdclone->args[i].alignment));
2856 if (i == (size_t) -1)
2857 continue;
2858 if (bestn == NULL || this_badness < badness)
2860 bestn = n;
2861 badness = this_badness;
2865 if (bestn == NULL)
2867 arginfo.release ();
2868 return false;
2871 for (i = 0; i < nargs; i++)
2872 if ((arginfo[i].dt == vect_constant_def
2873 || arginfo[i].dt == vect_external_def)
2874 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2876 arginfo[i].vectype
2877 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2878 i)));
2879 if (arginfo[i].vectype == NULL
2880 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2881 > bestn->simdclone->simdlen))
2883 arginfo.release ();
2884 return false;
2888 fndecl = bestn->decl;
2889 nunits = bestn->simdclone->simdlen;
2890 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2892 /* If the function isn't const, only allow it in simd loops where user
2893 has asserted that at least nunits consecutive iterations can be
2894 performed using SIMD instructions. */
2895 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2896 && gimple_vuse (stmt))
2898 arginfo.release ();
2899 return false;
2902 /* Sanity check: make sure that at least one copy of the vectorized stmt
2903 needs to be generated. */
2904 gcc_assert (ncopies >= 1);
2906 if (!vec_stmt) /* transformation not required. */
2908 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2909 for (i = 0; i < nargs; i++)
2910 if (bestn->simdclone->args[i].arg_type
2911 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2913 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2914 + 1);
2915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2916 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2917 ? size_type_node : TREE_TYPE (arginfo[i].op);
2918 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2919 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2921 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2922 if (dump_enabled_p ())
2923 dump_printf_loc (MSG_NOTE, vect_location,
2924 "=== vectorizable_simd_clone_call ===\n");
2925 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2926 arginfo.release ();
2927 return true;
2930 /** Transform. **/
2932 if (dump_enabled_p ())
2933 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2935 /* Handle def. */
2936 scalar_dest = gimple_call_lhs (stmt);
2937 vec_dest = NULL_TREE;
2938 rtype = NULL_TREE;
2939 ratype = NULL_TREE;
2940 if (scalar_dest)
2942 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2943 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2944 if (TREE_CODE (rtype) == ARRAY_TYPE)
2946 ratype = rtype;
2947 rtype = TREE_TYPE (ratype);
2951 prev_stmt_info = NULL;
2952 for (j = 0; j < ncopies; ++j)
2954 /* Build argument list for the vectorized call. */
2955 if (j == 0)
2956 vargs.create (nargs);
2957 else
2958 vargs.truncate (0);
2960 for (i = 0; i < nargs; i++)
2962 unsigned int k, l, m, o;
2963 tree atype;
2964 op = gimple_call_arg (stmt, i);
2965 switch (bestn->simdclone->args[i].arg_type)
2967 case SIMD_CLONE_ARG_TYPE_VECTOR:
2968 atype = bestn->simdclone->args[i].vector_type;
2969 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2970 for (m = j * o; m < (j + 1) * o; m++)
2972 if (TYPE_VECTOR_SUBPARTS (atype)
2973 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2975 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2976 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2977 / TYPE_VECTOR_SUBPARTS (atype));
2978 gcc_assert ((k & (k - 1)) == 0);
2979 if (m == 0)
2980 vec_oprnd0
2981 = vect_get_vec_def_for_operand (op, stmt, NULL);
2982 else
2984 vec_oprnd0 = arginfo[i].op;
2985 if ((m & (k - 1)) == 0)
2986 vec_oprnd0
2987 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2988 vec_oprnd0);
2990 arginfo[i].op = vec_oprnd0;
2991 vec_oprnd0
2992 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2993 size_int (prec),
2994 bitsize_int ((m & (k - 1)) * prec));
2995 new_stmt
2996 = gimple_build_assign (make_ssa_name (atype),
2997 vec_oprnd0);
2998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2999 vargs.safe_push (gimple_assign_lhs (new_stmt));
3001 else
3003 k = (TYPE_VECTOR_SUBPARTS (atype)
3004 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3005 gcc_assert ((k & (k - 1)) == 0);
3006 vec<constructor_elt, va_gc> *ctor_elts;
3007 if (k != 1)
3008 vec_alloc (ctor_elts, k);
3009 else
3010 ctor_elts = NULL;
3011 for (l = 0; l < k; l++)
3013 if (m == 0 && l == 0)
3014 vec_oprnd0
3015 = vect_get_vec_def_for_operand (op, stmt, NULL);
3016 else
3017 vec_oprnd0
3018 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3019 arginfo[i].op);
3020 arginfo[i].op = vec_oprnd0;
3021 if (k == 1)
3022 break;
3023 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3024 vec_oprnd0);
3026 if (k == 1)
3027 vargs.safe_push (vec_oprnd0);
3028 else
3030 vec_oprnd0 = build_constructor (atype, ctor_elts);
3031 new_stmt
3032 = gimple_build_assign (make_ssa_name (atype),
3033 vec_oprnd0);
3034 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3035 vargs.safe_push (gimple_assign_lhs (new_stmt));
3039 break;
3040 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3041 vargs.safe_push (op);
3042 break;
3043 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3044 if (j == 0)
3046 gimple_seq stmts;
3047 arginfo[i].op
3048 = force_gimple_operand (arginfo[i].op, &stmts, true,
3049 NULL_TREE);
3050 if (stmts != NULL)
3052 basic_block new_bb;
3053 edge pe = loop_preheader_edge (loop);
3054 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3055 gcc_assert (!new_bb);
3057 tree phi_res = copy_ssa_name (op);
3058 gphi *new_phi = create_phi_node (phi_res, loop->header);
3059 set_vinfo_for_stmt (new_phi,
3060 new_stmt_vec_info (new_phi, loop_vinfo,
3061 NULL));
3062 add_phi_arg (new_phi, arginfo[i].op,
3063 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3064 enum tree_code code
3065 = POINTER_TYPE_P (TREE_TYPE (op))
3066 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3067 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3068 ? sizetype : TREE_TYPE (op);
3069 widest_int cst
3070 = wi::mul (bestn->simdclone->args[i].linear_step,
3071 ncopies * nunits);
3072 tree tcst = wide_int_to_tree (type, cst);
3073 tree phi_arg = copy_ssa_name (op);
3074 new_stmt
3075 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3076 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3077 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3078 set_vinfo_for_stmt (new_stmt,
3079 new_stmt_vec_info (new_stmt, loop_vinfo,
3080 NULL));
3081 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3082 UNKNOWN_LOCATION);
3083 arginfo[i].op = phi_res;
3084 vargs.safe_push (phi_res);
3086 else
3088 enum tree_code code
3089 = POINTER_TYPE_P (TREE_TYPE (op))
3090 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3091 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3092 ? sizetype : TREE_TYPE (op);
3093 widest_int cst
3094 = wi::mul (bestn->simdclone->args[i].linear_step,
3095 j * nunits);
3096 tree tcst = wide_int_to_tree (type, cst);
3097 new_temp = make_ssa_name (TREE_TYPE (op));
3098 new_stmt = gimple_build_assign (new_temp, code,
3099 arginfo[i].op, tcst);
3100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3101 vargs.safe_push (new_temp);
3103 break;
3104 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3105 default:
3106 gcc_unreachable ();
3110 new_stmt = gimple_build_call_vec (fndecl, vargs);
3111 if (vec_dest)
3113 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3114 if (ratype)
3115 new_temp = create_tmp_var (ratype);
3116 else if (TYPE_VECTOR_SUBPARTS (vectype)
3117 == TYPE_VECTOR_SUBPARTS (rtype))
3118 new_temp = make_ssa_name (vec_dest, new_stmt);
3119 else
3120 new_temp = make_ssa_name (rtype, new_stmt);
3121 gimple_call_set_lhs (new_stmt, new_temp);
3123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3125 if (vec_dest)
3127 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3129 unsigned int k, l;
3130 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3131 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3132 gcc_assert ((k & (k - 1)) == 0);
3133 for (l = 0; l < k; l++)
3135 tree t;
3136 if (ratype)
3138 t = build_fold_addr_expr (new_temp);
3139 t = build2 (MEM_REF, vectype, t,
3140 build_int_cst (TREE_TYPE (t),
3141 l * prec / BITS_PER_UNIT));
3143 else
3144 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3145 size_int (prec), bitsize_int (l * prec));
3146 new_stmt
3147 = gimple_build_assign (make_ssa_name (vectype), t);
3148 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3149 if (j == 0 && l == 0)
3150 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3151 else
3152 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3154 prev_stmt_info = vinfo_for_stmt (new_stmt);
3157 if (ratype)
3159 tree clobber = build_constructor (ratype, NULL);
3160 TREE_THIS_VOLATILE (clobber) = 1;
3161 new_stmt = gimple_build_assign (new_temp, clobber);
3162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3164 continue;
3166 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3168 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3169 / TYPE_VECTOR_SUBPARTS (rtype));
3170 gcc_assert ((k & (k - 1)) == 0);
3171 if ((j & (k - 1)) == 0)
3172 vec_alloc (ret_ctor_elts, k);
3173 if (ratype)
3175 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3176 for (m = 0; m < o; m++)
3178 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3179 size_int (m), NULL_TREE, NULL_TREE);
3180 new_stmt
3181 = gimple_build_assign (make_ssa_name (rtype), tem);
3182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3183 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3184 gimple_assign_lhs (new_stmt));
3186 tree clobber = build_constructor (ratype, NULL);
3187 TREE_THIS_VOLATILE (clobber) = 1;
3188 new_stmt = gimple_build_assign (new_temp, clobber);
3189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3191 else
3192 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3193 if ((j & (k - 1)) != k - 1)
3194 continue;
3195 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3196 new_stmt
3197 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3198 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3200 if ((unsigned) j == k - 1)
3201 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3202 else
3203 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3205 prev_stmt_info = vinfo_for_stmt (new_stmt);
3206 continue;
3208 else if (ratype)
3210 tree t = build_fold_addr_expr (new_temp);
3211 t = build2 (MEM_REF, vectype, t,
3212 build_int_cst (TREE_TYPE (t), 0));
3213 new_stmt
3214 = gimple_build_assign (make_ssa_name (vec_dest), t);
3215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3216 tree clobber = build_constructor (ratype, NULL);
3217 TREE_THIS_VOLATILE (clobber) = 1;
3218 vect_finish_stmt_generation (stmt,
3219 gimple_build_assign (new_temp,
3220 clobber), gsi);
3224 if (j == 0)
3225 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3226 else
3227 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3229 prev_stmt_info = vinfo_for_stmt (new_stmt);
3232 vargs.release ();
3234 /* The call in STMT might prevent it from being removed in dce.
3235 We however cannot remove it here, due to the way the ssa name
3236 it defines is mapped to the new definition. So just replace
3237 rhs of the statement with something harmless. */
3239 if (slp_node)
3240 return true;
3242 if (scalar_dest)
3244 type = TREE_TYPE (scalar_dest);
3245 if (is_pattern_stmt_p (stmt_info))
3246 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3247 else
3248 lhs = gimple_call_lhs (stmt);
3249 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3251 else
3252 new_stmt = gimple_build_nop ();
3253 set_vinfo_for_stmt (new_stmt, stmt_info);
3254 set_vinfo_for_stmt (stmt, NULL);
3255 STMT_VINFO_STMT (stmt_info) = new_stmt;
3256 gsi_replace (gsi, new_stmt, true);
3257 unlink_stmt_vdef (stmt);
3259 return true;
3263 /* Function vect_gen_widened_results_half
3265 Create a vector stmt whose code, type, number of arguments, and result
3266 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3267 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3268 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3269 needs to be created (DECL is a function-decl of a target-builtin).
3270 STMT is the original scalar stmt that we are vectorizing. */
3272 static gimple
3273 vect_gen_widened_results_half (enum tree_code code,
3274 tree decl,
3275 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3276 tree vec_dest, gimple_stmt_iterator *gsi,
3277 gimple stmt)
3279 gimple new_stmt;
3280 tree new_temp;
3282 /* Generate half of the widened result: */
3283 if (code == CALL_EXPR)
3285 /* Target specific support */
3286 if (op_type == binary_op)
3287 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3288 else
3289 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3290 new_temp = make_ssa_name (vec_dest, new_stmt);
3291 gimple_call_set_lhs (new_stmt, new_temp);
3293 else
3295 /* Generic support */
3296 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3297 if (op_type != binary_op)
3298 vec_oprnd1 = NULL;
3299 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3300 new_temp = make_ssa_name (vec_dest, new_stmt);
3301 gimple_assign_set_lhs (new_stmt, new_temp);
3303 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3305 return new_stmt;
3309 /* Get vectorized definitions for loop-based vectorization. For the first
3310 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3311 scalar operand), and for the rest we get a copy with
3312 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3313 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3314 The vectors are collected into VEC_OPRNDS. */
3316 static void
3317 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3318 vec<tree> *vec_oprnds, int multi_step_cvt)
3320 tree vec_oprnd;
3322 /* Get first vector operand. */
3323 /* All the vector operands except the very first one (that is scalar oprnd)
3324 are stmt copies. */
3325 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3326 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3327 else
3328 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3330 vec_oprnds->quick_push (vec_oprnd);
3332 /* Get second vector operand. */
3333 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3334 vec_oprnds->quick_push (vec_oprnd);
3336 *oprnd = vec_oprnd;
3338 /* For conversion in multiple steps, continue to get operands
3339 recursively. */
3340 if (multi_step_cvt)
3341 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3345 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3346 For multi-step conversions store the resulting vectors and call the function
3347 recursively. */
3349 static void
3350 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3351 int multi_step_cvt, gimple stmt,
3352 vec<tree> vec_dsts,
3353 gimple_stmt_iterator *gsi,
3354 slp_tree slp_node, enum tree_code code,
3355 stmt_vec_info *prev_stmt_info)
3357 unsigned int i;
3358 tree vop0, vop1, new_tmp, vec_dest;
3359 gimple new_stmt;
3360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3362 vec_dest = vec_dsts.pop ();
3364 for (i = 0; i < vec_oprnds->length (); i += 2)
3366 /* Create demotion operation. */
3367 vop0 = (*vec_oprnds)[i];
3368 vop1 = (*vec_oprnds)[i + 1];
3369 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3370 new_tmp = make_ssa_name (vec_dest, new_stmt);
3371 gimple_assign_set_lhs (new_stmt, new_tmp);
3372 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3374 if (multi_step_cvt)
3375 /* Store the resulting vector for next recursive call. */
3376 (*vec_oprnds)[i/2] = new_tmp;
3377 else
3379 /* This is the last step of the conversion sequence. Store the
3380 vectors in SLP_NODE or in vector info of the scalar statement
3381 (or in STMT_VINFO_RELATED_STMT chain). */
3382 if (slp_node)
3383 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3385 if (!*prev_stmt_info)
3386 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3387 else
3388 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3390 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3394 /* For multi-step demotion operations we first generate demotion operations
3395 from the source type to the intermediate types, and then combine the
3396 results (stored in VEC_OPRNDS) in demotion operation to the destination
3397 type. */
3398 if (multi_step_cvt)
3400 /* At each level of recursion we have half of the operands we had at the
3401 previous level. */
3402 vec_oprnds->truncate ((i+1)/2);
3403 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3404 stmt, vec_dsts, gsi, slp_node,
3405 VEC_PACK_TRUNC_EXPR,
3406 prev_stmt_info);
3409 vec_dsts.quick_push (vec_dest);
3413 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3414 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3415 the resulting vectors and call the function recursively. */
3417 static void
3418 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3419 vec<tree> *vec_oprnds1,
3420 gimple stmt, tree vec_dest,
3421 gimple_stmt_iterator *gsi,
3422 enum tree_code code1,
3423 enum tree_code code2, tree decl1,
3424 tree decl2, int op_type)
3426 int i;
3427 tree vop0, vop1, new_tmp1, new_tmp2;
3428 gimple new_stmt1, new_stmt2;
3429 vec<tree> vec_tmp = vNULL;
3431 vec_tmp.create (vec_oprnds0->length () * 2);
3432 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3434 if (op_type == binary_op)
3435 vop1 = (*vec_oprnds1)[i];
3436 else
3437 vop1 = NULL_TREE;
3439 /* Generate the two halves of promotion operation. */
3440 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3441 op_type, vec_dest, gsi, stmt);
3442 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3443 op_type, vec_dest, gsi, stmt);
3444 if (is_gimple_call (new_stmt1))
3446 new_tmp1 = gimple_call_lhs (new_stmt1);
3447 new_tmp2 = gimple_call_lhs (new_stmt2);
3449 else
3451 new_tmp1 = gimple_assign_lhs (new_stmt1);
3452 new_tmp2 = gimple_assign_lhs (new_stmt2);
3455 /* Store the results for the next step. */
3456 vec_tmp.quick_push (new_tmp1);
3457 vec_tmp.quick_push (new_tmp2);
3460 vec_oprnds0->release ();
3461 *vec_oprnds0 = vec_tmp;
3465 /* Check if STMT performs a conversion operation, that can be vectorized.
3466 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3467 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3468 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3470 static bool
3471 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3472 gimple *vec_stmt, slp_tree slp_node)
3474 tree vec_dest;
3475 tree scalar_dest;
3476 tree op0, op1 = NULL_TREE;
3477 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3478 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3480 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3481 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3482 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3483 tree new_temp;
3484 tree def;
3485 gimple def_stmt;
3486 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3487 gimple new_stmt = NULL;
3488 stmt_vec_info prev_stmt_info;
3489 int nunits_in;
3490 int nunits_out;
3491 tree vectype_out, vectype_in;
3492 int ncopies, i, j;
3493 tree lhs_type, rhs_type;
3494 enum { NARROW, NONE, WIDEN } modifier;
3495 vec<tree> vec_oprnds0 = vNULL;
3496 vec<tree> vec_oprnds1 = vNULL;
3497 tree vop0;
3498 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3499 int multi_step_cvt = 0;
3500 vec<tree> vec_dsts = vNULL;
3501 vec<tree> interm_types = vNULL;
3502 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3503 int op_type;
3504 machine_mode rhs_mode;
3505 unsigned short fltsz;
3507 /* Is STMT a vectorizable conversion? */
3509 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3510 return false;
3512 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3513 return false;
3515 if (!is_gimple_assign (stmt))
3516 return false;
3518 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3519 return false;
3521 code = gimple_assign_rhs_code (stmt);
3522 if (!CONVERT_EXPR_CODE_P (code)
3523 && code != FIX_TRUNC_EXPR
3524 && code != FLOAT_EXPR
3525 && code != WIDEN_MULT_EXPR
3526 && code != WIDEN_LSHIFT_EXPR)
3527 return false;
3529 op_type = TREE_CODE_LENGTH (code);
3531 /* Check types of lhs and rhs. */
3532 scalar_dest = gimple_assign_lhs (stmt);
3533 lhs_type = TREE_TYPE (scalar_dest);
3534 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3536 op0 = gimple_assign_rhs1 (stmt);
3537 rhs_type = TREE_TYPE (op0);
3539 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3540 && !((INTEGRAL_TYPE_P (lhs_type)
3541 && INTEGRAL_TYPE_P (rhs_type))
3542 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3543 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3544 return false;
3546 if ((INTEGRAL_TYPE_P (lhs_type)
3547 && (TYPE_PRECISION (lhs_type)
3548 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3549 || (INTEGRAL_TYPE_P (rhs_type)
3550 && (TYPE_PRECISION (rhs_type)
3551 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3555 "type conversion to/from bit-precision unsupported."
3556 "\n");
3557 return false;
3560 /* Check the operands of the operation. */
3561 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3562 &def_stmt, &def, &dt[0], &vectype_in))
3564 if (dump_enabled_p ())
3565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3566 "use not simple.\n");
3567 return false;
3569 if (op_type == binary_op)
3571 bool ok;
3573 op1 = gimple_assign_rhs2 (stmt);
3574 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3575 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3576 OP1. */
3577 if (CONSTANT_CLASS_P (op0))
3578 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3579 &def_stmt, &def, &dt[1], &vectype_in);
3580 else
3581 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3582 &def, &dt[1]);
3584 if (!ok)
3586 if (dump_enabled_p ())
3587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3588 "use not simple.\n");
3589 return false;
3593 /* If op0 is an external or constant defs use a vector type of
3594 the same size as the output vector type. */
3595 if (!vectype_in)
3596 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3597 if (vec_stmt)
3598 gcc_assert (vectype_in);
3599 if (!vectype_in)
3601 if (dump_enabled_p ())
3603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3604 "no vectype for scalar type ");
3605 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3606 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3609 return false;
3612 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3613 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3614 if (nunits_in < nunits_out)
3615 modifier = NARROW;
3616 else if (nunits_out == nunits_in)
3617 modifier = NONE;
3618 else
3619 modifier = WIDEN;
3621 /* Multiple types in SLP are handled by creating the appropriate number of
3622 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3623 case of SLP. */
3624 if (slp_node || PURE_SLP_STMT (stmt_info))
3625 ncopies = 1;
3626 else if (modifier == NARROW)
3627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3628 else
3629 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3631 /* Sanity check: make sure that at least one copy of the vectorized stmt
3632 needs to be generated. */
3633 gcc_assert (ncopies >= 1);
3635 /* Supportable by target? */
3636 switch (modifier)
3638 case NONE:
3639 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3640 return false;
3641 if (supportable_convert_operation (code, vectype_out, vectype_in,
3642 &decl1, &code1))
3643 break;
3644 /* FALLTHRU */
3645 unsupported:
3646 if (dump_enabled_p ())
3647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3648 "conversion not supported by target.\n");
3649 return false;
3651 case WIDEN:
3652 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3653 &code1, &code2, &multi_step_cvt,
3654 &interm_types))
3656 /* Binary widening operation can only be supported directly by the
3657 architecture. */
3658 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3659 break;
3662 if (code != FLOAT_EXPR
3663 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3664 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3665 goto unsupported;
3667 rhs_mode = TYPE_MODE (rhs_type);
3668 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3669 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3670 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3671 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3673 cvt_type
3674 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3675 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3676 if (cvt_type == NULL_TREE)
3677 goto unsupported;
3679 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3681 if (!supportable_convert_operation (code, vectype_out,
3682 cvt_type, &decl1, &codecvt1))
3683 goto unsupported;
3685 else if (!supportable_widening_operation (code, stmt, vectype_out,
3686 cvt_type, &codecvt1,
3687 &codecvt2, &multi_step_cvt,
3688 &interm_types))
3689 continue;
3690 else
3691 gcc_assert (multi_step_cvt == 0);
3693 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3694 vectype_in, &code1, &code2,
3695 &multi_step_cvt, &interm_types))
3696 break;
3699 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3700 goto unsupported;
3702 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3703 codecvt2 = ERROR_MARK;
3704 else
3706 multi_step_cvt++;
3707 interm_types.safe_push (cvt_type);
3708 cvt_type = NULL_TREE;
3710 break;
3712 case NARROW:
3713 gcc_assert (op_type == unary_op);
3714 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3715 &code1, &multi_step_cvt,
3716 &interm_types))
3717 break;
3719 if (code != FIX_TRUNC_EXPR
3720 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3721 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3722 goto unsupported;
3724 rhs_mode = TYPE_MODE (rhs_type);
3725 cvt_type
3726 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3727 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3728 if (cvt_type == NULL_TREE)
3729 goto unsupported;
3730 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3731 &decl1, &codecvt1))
3732 goto unsupported;
3733 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3734 &code1, &multi_step_cvt,
3735 &interm_types))
3736 break;
3737 goto unsupported;
3739 default:
3740 gcc_unreachable ();
3743 if (!vec_stmt) /* transformation not required. */
3745 if (dump_enabled_p ())
3746 dump_printf_loc (MSG_NOTE, vect_location,
3747 "=== vectorizable_conversion ===\n");
3748 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3750 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3751 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3753 else if (modifier == NARROW)
3755 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3756 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3758 else
3760 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3761 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3763 interm_types.release ();
3764 return true;
3767 /** Transform. **/
3768 if (dump_enabled_p ())
3769 dump_printf_loc (MSG_NOTE, vect_location,
3770 "transform conversion. ncopies = %d.\n", ncopies);
3772 if (op_type == binary_op)
3774 if (CONSTANT_CLASS_P (op0))
3775 op0 = fold_convert (TREE_TYPE (op1), op0);
3776 else if (CONSTANT_CLASS_P (op1))
3777 op1 = fold_convert (TREE_TYPE (op0), op1);
3780 /* In case of multi-step conversion, we first generate conversion operations
3781 to the intermediate types, and then from that types to the final one.
3782 We create vector destinations for the intermediate type (TYPES) received
3783 from supportable_*_operation, and store them in the correct order
3784 for future use in vect_create_vectorized_*_stmts (). */
3785 vec_dsts.create (multi_step_cvt + 1);
3786 vec_dest = vect_create_destination_var (scalar_dest,
3787 (cvt_type && modifier == WIDEN)
3788 ? cvt_type : vectype_out);
3789 vec_dsts.quick_push (vec_dest);
3791 if (multi_step_cvt)
3793 for (i = interm_types.length () - 1;
3794 interm_types.iterate (i, &intermediate_type); i--)
3796 vec_dest = vect_create_destination_var (scalar_dest,
3797 intermediate_type);
3798 vec_dsts.quick_push (vec_dest);
3802 if (cvt_type)
3803 vec_dest = vect_create_destination_var (scalar_dest,
3804 modifier == WIDEN
3805 ? vectype_out : cvt_type);
3807 if (!slp_node)
3809 if (modifier == WIDEN)
3811 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3812 if (op_type == binary_op)
3813 vec_oprnds1.create (1);
3815 else if (modifier == NARROW)
3816 vec_oprnds0.create (
3817 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3819 else if (code == WIDEN_LSHIFT_EXPR)
3820 vec_oprnds1.create (slp_node->vec_stmts_size);
3822 last_oprnd = op0;
3823 prev_stmt_info = NULL;
3824 switch (modifier)
3826 case NONE:
3827 for (j = 0; j < ncopies; j++)
3829 if (j == 0)
3830 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3831 -1);
3832 else
3833 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3835 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3837 /* Arguments are ready, create the new vector stmt. */
3838 if (code1 == CALL_EXPR)
3840 new_stmt = gimple_build_call (decl1, 1, vop0);
3841 new_temp = make_ssa_name (vec_dest, new_stmt);
3842 gimple_call_set_lhs (new_stmt, new_temp);
3844 else
3846 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3847 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3848 new_temp = make_ssa_name (vec_dest, new_stmt);
3849 gimple_assign_set_lhs (new_stmt, new_temp);
3852 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3853 if (slp_node)
3854 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3857 if (j == 0)
3858 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3859 else
3860 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3861 prev_stmt_info = vinfo_for_stmt (new_stmt);
3863 break;
3865 case WIDEN:
3866 /* In case the vectorization factor (VF) is bigger than the number
3867 of elements that we can fit in a vectype (nunits), we have to
3868 generate more than one vector stmt - i.e - we need to "unroll"
3869 the vector stmt by a factor VF/nunits. */
3870 for (j = 0; j < ncopies; j++)
3872 /* Handle uses. */
3873 if (j == 0)
3875 if (slp_node)
3877 if (code == WIDEN_LSHIFT_EXPR)
3879 unsigned int k;
3881 vec_oprnd1 = op1;
3882 /* Store vec_oprnd1 for every vector stmt to be created
3883 for SLP_NODE. We check during the analysis that all
3884 the shift arguments are the same. */
3885 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3886 vec_oprnds1.quick_push (vec_oprnd1);
3888 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3889 slp_node, -1);
3891 else
3892 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3893 &vec_oprnds1, slp_node, -1);
3895 else
3897 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3898 vec_oprnds0.quick_push (vec_oprnd0);
3899 if (op_type == binary_op)
3901 if (code == WIDEN_LSHIFT_EXPR)
3902 vec_oprnd1 = op1;
3903 else
3904 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3905 NULL);
3906 vec_oprnds1.quick_push (vec_oprnd1);
3910 else
3912 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3913 vec_oprnds0.truncate (0);
3914 vec_oprnds0.quick_push (vec_oprnd0);
3915 if (op_type == binary_op)
3917 if (code == WIDEN_LSHIFT_EXPR)
3918 vec_oprnd1 = op1;
3919 else
3920 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3921 vec_oprnd1);
3922 vec_oprnds1.truncate (0);
3923 vec_oprnds1.quick_push (vec_oprnd1);
3927 /* Arguments are ready. Create the new vector stmts. */
3928 for (i = multi_step_cvt; i >= 0; i--)
3930 tree this_dest = vec_dsts[i];
3931 enum tree_code c1 = code1, c2 = code2;
3932 if (i == 0 && codecvt2 != ERROR_MARK)
3934 c1 = codecvt1;
3935 c2 = codecvt2;
3937 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3938 &vec_oprnds1,
3939 stmt, this_dest, gsi,
3940 c1, c2, decl1, decl2,
3941 op_type);
3944 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3946 if (cvt_type)
3948 if (codecvt1 == CALL_EXPR)
3950 new_stmt = gimple_build_call (decl1, 1, vop0);
3951 new_temp = make_ssa_name (vec_dest, new_stmt);
3952 gimple_call_set_lhs (new_stmt, new_temp);
3954 else
3956 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3957 new_temp = make_ssa_name (vec_dest);
3958 new_stmt = gimple_build_assign (new_temp, codecvt1,
3959 vop0);
3962 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3964 else
3965 new_stmt = SSA_NAME_DEF_STMT (vop0);
3967 if (slp_node)
3968 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3970 if (!prev_stmt_info)
3971 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3972 else
3973 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3974 prev_stmt_info = vinfo_for_stmt (new_stmt);
3978 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3979 break;
3981 case NARROW:
3982 /* In case the vectorization factor (VF) is bigger than the number
3983 of elements that we can fit in a vectype (nunits), we have to
3984 generate more than one vector stmt - i.e - we need to "unroll"
3985 the vector stmt by a factor VF/nunits. */
3986 for (j = 0; j < ncopies; j++)
3988 /* Handle uses. */
3989 if (slp_node)
3990 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3991 slp_node, -1);
3992 else
3994 vec_oprnds0.truncate (0);
3995 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3996 vect_pow2 (multi_step_cvt) - 1);
3999 /* Arguments are ready. Create the new vector stmts. */
4000 if (cvt_type)
4001 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4003 if (codecvt1 == CALL_EXPR)
4005 new_stmt = gimple_build_call (decl1, 1, vop0);
4006 new_temp = make_ssa_name (vec_dest, new_stmt);
4007 gimple_call_set_lhs (new_stmt, new_temp);
4009 else
4011 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4012 new_temp = make_ssa_name (vec_dest);
4013 new_stmt = gimple_build_assign (new_temp, codecvt1,
4014 vop0);
4017 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4018 vec_oprnds0[i] = new_temp;
4021 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4022 stmt, vec_dsts, gsi,
4023 slp_node, code1,
4024 &prev_stmt_info);
4027 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4028 break;
4031 vec_oprnds0.release ();
4032 vec_oprnds1.release ();
4033 vec_dsts.release ();
4034 interm_types.release ();
4036 return true;
4040 /* Function vectorizable_assignment.
4042 Check if STMT performs an assignment (copy) that can be vectorized.
4043 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4044 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4045 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4047 static bool
4048 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4049 gimple *vec_stmt, slp_tree slp_node)
4051 tree vec_dest;
4052 tree scalar_dest;
4053 tree op;
4054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4055 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4056 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4057 tree new_temp;
4058 tree def;
4059 gimple def_stmt;
4060 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4061 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4062 int ncopies;
4063 int i, j;
4064 vec<tree> vec_oprnds = vNULL;
4065 tree vop;
4066 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4067 gimple new_stmt = NULL;
4068 stmt_vec_info prev_stmt_info = NULL;
4069 enum tree_code code;
4070 tree vectype_in;
4072 /* Multiple types in SLP are handled by creating the appropriate number of
4073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4074 case of SLP. */
4075 if (slp_node || PURE_SLP_STMT (stmt_info))
4076 ncopies = 1;
4077 else
4078 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4080 gcc_assert (ncopies >= 1);
4082 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4083 return false;
4085 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4086 return false;
4088 /* Is vectorizable assignment? */
4089 if (!is_gimple_assign (stmt))
4090 return false;
4092 scalar_dest = gimple_assign_lhs (stmt);
4093 if (TREE_CODE (scalar_dest) != SSA_NAME)
4094 return false;
4096 code = gimple_assign_rhs_code (stmt);
4097 if (gimple_assign_single_p (stmt)
4098 || code == PAREN_EXPR
4099 || CONVERT_EXPR_CODE_P (code))
4100 op = gimple_assign_rhs1 (stmt);
4101 else
4102 return false;
4104 if (code == VIEW_CONVERT_EXPR)
4105 op = TREE_OPERAND (op, 0);
4107 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4108 &def_stmt, &def, &dt[0], &vectype_in))
4110 if (dump_enabled_p ())
4111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4112 "use not simple.\n");
4113 return false;
4116 /* We can handle NOP_EXPR conversions that do not change the number
4117 of elements or the vector size. */
4118 if ((CONVERT_EXPR_CODE_P (code)
4119 || code == VIEW_CONVERT_EXPR)
4120 && (!vectype_in
4121 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4122 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4123 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4124 return false;
4126 /* We do not handle bit-precision changes. */
4127 if ((CONVERT_EXPR_CODE_P (code)
4128 || code == VIEW_CONVERT_EXPR)
4129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4130 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4131 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4132 || ((TYPE_PRECISION (TREE_TYPE (op))
4133 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4134 /* But a conversion that does not change the bit-pattern is ok. */
4135 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4136 > TYPE_PRECISION (TREE_TYPE (op)))
4137 && TYPE_UNSIGNED (TREE_TYPE (op))))
4139 if (dump_enabled_p ())
4140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4141 "type conversion to/from bit-precision "
4142 "unsupported.\n");
4143 return false;
4146 if (!vec_stmt) /* transformation not required. */
4148 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4149 if (dump_enabled_p ())
4150 dump_printf_loc (MSG_NOTE, vect_location,
4151 "=== vectorizable_assignment ===\n");
4152 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4153 return true;
4156 /** Transform. **/
4157 if (dump_enabled_p ())
4158 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4160 /* Handle def. */
4161 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4163 /* Handle use. */
4164 for (j = 0; j < ncopies; j++)
4166 /* Handle uses. */
4167 if (j == 0)
4168 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4169 else
4170 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4172 /* Arguments are ready. create the new vector stmt. */
4173 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4175 if (CONVERT_EXPR_CODE_P (code)
4176 || code == VIEW_CONVERT_EXPR)
4177 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4178 new_stmt = gimple_build_assign (vec_dest, vop);
4179 new_temp = make_ssa_name (vec_dest, new_stmt);
4180 gimple_assign_set_lhs (new_stmt, new_temp);
4181 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4182 if (slp_node)
4183 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4186 if (slp_node)
4187 continue;
4189 if (j == 0)
4190 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4191 else
4192 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4194 prev_stmt_info = vinfo_for_stmt (new_stmt);
4197 vec_oprnds.release ();
4198 return true;
4202 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4203 either as shift by a scalar or by a vector. */
4205 bool
4206 vect_supportable_shift (enum tree_code code, tree scalar_type)
4209 machine_mode vec_mode;
4210 optab optab;
4211 int icode;
4212 tree vectype;
4214 vectype = get_vectype_for_scalar_type (scalar_type);
4215 if (!vectype)
4216 return false;
4218 optab = optab_for_tree_code (code, vectype, optab_scalar);
4219 if (!optab
4220 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4222 optab = optab_for_tree_code (code, vectype, optab_vector);
4223 if (!optab
4224 || (optab_handler (optab, TYPE_MODE (vectype))
4225 == CODE_FOR_nothing))
4226 return false;
4229 vec_mode = TYPE_MODE (vectype);
4230 icode = (int) optab_handler (optab, vec_mode);
4231 if (icode == CODE_FOR_nothing)
4232 return false;
4234 return true;
4238 /* Function vectorizable_shift.
4240 Check if STMT performs a shift operation that can be vectorized.
4241 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4242 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4243 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4245 static bool
4246 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4247 gimple *vec_stmt, slp_tree slp_node)
4249 tree vec_dest;
4250 tree scalar_dest;
4251 tree op0, op1 = NULL;
4252 tree vec_oprnd1 = NULL_TREE;
4253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4254 tree vectype;
4255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4256 enum tree_code code;
4257 machine_mode vec_mode;
4258 tree new_temp;
4259 optab optab;
4260 int icode;
4261 machine_mode optab_op2_mode;
4262 tree def;
4263 gimple def_stmt;
4264 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4265 gimple new_stmt = NULL;
4266 stmt_vec_info prev_stmt_info;
4267 int nunits_in;
4268 int nunits_out;
4269 tree vectype_out;
4270 tree op1_vectype;
4271 int ncopies;
4272 int j, i;
4273 vec<tree> vec_oprnds0 = vNULL;
4274 vec<tree> vec_oprnds1 = vNULL;
4275 tree vop0, vop1;
4276 unsigned int k;
4277 bool scalar_shift_arg = true;
4278 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4279 int vf;
4281 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4282 return false;
4284 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4285 return false;
4287 /* Is STMT a vectorizable binary/unary operation? */
4288 if (!is_gimple_assign (stmt))
4289 return false;
4291 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4292 return false;
4294 code = gimple_assign_rhs_code (stmt);
4296 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4297 || code == RROTATE_EXPR))
4298 return false;
4300 scalar_dest = gimple_assign_lhs (stmt);
4301 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4302 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4303 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4305 if (dump_enabled_p ())
4306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4307 "bit-precision shifts not supported.\n");
4308 return false;
4311 op0 = gimple_assign_rhs1 (stmt);
4312 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4313 &def_stmt, &def, &dt[0], &vectype))
4315 if (dump_enabled_p ())
4316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4317 "use not simple.\n");
4318 return false;
4320 /* If op0 is an external or constant def use a vector type with
4321 the same size as the output vector type. */
4322 if (!vectype)
4323 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4324 if (vec_stmt)
4325 gcc_assert (vectype);
4326 if (!vectype)
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4330 "no vectype for scalar type\n");
4331 return false;
4334 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4335 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4336 if (nunits_out != nunits_in)
4337 return false;
4339 op1 = gimple_assign_rhs2 (stmt);
4340 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4341 &def, &dt[1], &op1_vectype))
4343 if (dump_enabled_p ())
4344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4345 "use not simple.\n");
4346 return false;
4349 if (loop_vinfo)
4350 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4351 else
4352 vf = 1;
4354 /* Multiple types in SLP are handled by creating the appropriate number of
4355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4356 case of SLP. */
4357 if (slp_node || PURE_SLP_STMT (stmt_info))
4358 ncopies = 1;
4359 else
4360 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4362 gcc_assert (ncopies >= 1);
4364 /* Determine whether the shift amount is a vector, or scalar. If the
4365 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4367 if (dt[1] == vect_internal_def && !slp_node)
4368 scalar_shift_arg = false;
4369 else if (dt[1] == vect_constant_def
4370 || dt[1] == vect_external_def
4371 || dt[1] == vect_internal_def)
4373 /* In SLP, need to check whether the shift count is the same,
4374 in loops if it is a constant or invariant, it is always
4375 a scalar shift. */
4376 if (slp_node)
4378 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4379 gimple slpstmt;
4381 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4382 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4383 scalar_shift_arg = false;
4386 else
4388 if (dump_enabled_p ())
4389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4390 "operand mode requires invariant argument.\n");
4391 return false;
4394 /* Vector shifted by vector. */
4395 if (!scalar_shift_arg)
4397 optab = optab_for_tree_code (code, vectype, optab_vector);
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_NOTE, vect_location,
4400 "vector/vector shift/rotate found.\n");
4402 if (!op1_vectype)
4403 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4404 if (op1_vectype == NULL_TREE
4405 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4407 if (dump_enabled_p ())
4408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4409 "unusable type for last operand in"
4410 " vector/vector shift/rotate.\n");
4411 return false;
4414 /* See if the machine has a vector shifted by scalar insn and if not
4415 then see if it has a vector shifted by vector insn. */
4416 else
4418 optab = optab_for_tree_code (code, vectype, optab_scalar);
4419 if (optab
4420 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_NOTE, vect_location,
4424 "vector/scalar shift/rotate found.\n");
4426 else
4428 optab = optab_for_tree_code (code, vectype, optab_vector);
4429 if (optab
4430 && (optab_handler (optab, TYPE_MODE (vectype))
4431 != CODE_FOR_nothing))
4433 scalar_shift_arg = false;
4435 if (dump_enabled_p ())
4436 dump_printf_loc (MSG_NOTE, vect_location,
4437 "vector/vector shift/rotate found.\n");
4439 /* Unlike the other binary operators, shifts/rotates have
4440 the rhs being int, instead of the same type as the lhs,
4441 so make sure the scalar is the right type if we are
4442 dealing with vectors of long long/long/short/char. */
4443 if (dt[1] == vect_constant_def)
4444 op1 = fold_convert (TREE_TYPE (vectype), op1);
4445 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4446 TREE_TYPE (op1)))
4448 if (slp_node
4449 && TYPE_MODE (TREE_TYPE (vectype))
4450 != TYPE_MODE (TREE_TYPE (op1)))
4452 if (dump_enabled_p ())
4453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4454 "unusable type for last operand in"
4455 " vector/vector shift/rotate.\n");
4456 return false;
4458 if (vec_stmt && !slp_node)
4460 op1 = fold_convert (TREE_TYPE (vectype), op1);
4461 op1 = vect_init_vector (stmt, op1,
4462 TREE_TYPE (vectype), NULL);
4469 /* Supportable by target? */
4470 if (!optab)
4472 if (dump_enabled_p ())
4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4474 "no optab.\n");
4475 return false;
4477 vec_mode = TYPE_MODE (vectype);
4478 icode = (int) optab_handler (optab, vec_mode);
4479 if (icode == CODE_FOR_nothing)
4481 if (dump_enabled_p ())
4482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4483 "op not supported by target.\n");
4484 /* Check only during analysis. */
4485 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4486 || (vf < vect_min_worthwhile_factor (code)
4487 && !vec_stmt))
4488 return false;
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE, vect_location,
4491 "proceeding using word mode.\n");
4494 /* Worthwhile without SIMD support? Check only during analysis. */
4495 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4496 && vf < vect_min_worthwhile_factor (code)
4497 && !vec_stmt)
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4501 "not worthwhile without SIMD support.\n");
4502 return false;
4505 if (!vec_stmt) /* transformation not required. */
4507 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE, vect_location,
4510 "=== vectorizable_shift ===\n");
4511 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4512 return true;
4515 /** Transform. **/
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_NOTE, vect_location,
4519 "transform binary/unary operation.\n");
4521 /* Handle def. */
4522 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4524 prev_stmt_info = NULL;
4525 for (j = 0; j < ncopies; j++)
4527 /* Handle uses. */
4528 if (j == 0)
4530 if (scalar_shift_arg)
4532 /* Vector shl and shr insn patterns can be defined with scalar
4533 operand 2 (shift operand). In this case, use constant or loop
4534 invariant op1 directly, without extending it to vector mode
4535 first. */
4536 optab_op2_mode = insn_data[icode].operand[2].mode;
4537 if (!VECTOR_MODE_P (optab_op2_mode))
4539 if (dump_enabled_p ())
4540 dump_printf_loc (MSG_NOTE, vect_location,
4541 "operand 1 using scalar mode.\n");
4542 vec_oprnd1 = op1;
4543 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4544 vec_oprnds1.quick_push (vec_oprnd1);
4545 if (slp_node)
4547 /* Store vec_oprnd1 for every vector stmt to be created
4548 for SLP_NODE. We check during the analysis that all
4549 the shift arguments are the same.
4550 TODO: Allow different constants for different vector
4551 stmts generated for an SLP instance. */
4552 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4553 vec_oprnds1.quick_push (vec_oprnd1);
4558 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4559 (a special case for certain kind of vector shifts); otherwise,
4560 operand 1 should be of a vector type (the usual case). */
4561 if (vec_oprnd1)
4562 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4563 slp_node, -1);
4564 else
4565 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4566 slp_node, -1);
4568 else
4569 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4571 /* Arguments are ready. Create the new vector stmt. */
4572 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4574 vop1 = vec_oprnds1[i];
4575 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4576 new_temp = make_ssa_name (vec_dest, new_stmt);
4577 gimple_assign_set_lhs (new_stmt, new_temp);
4578 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4579 if (slp_node)
4580 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4583 if (slp_node)
4584 continue;
4586 if (j == 0)
4587 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4588 else
4589 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4590 prev_stmt_info = vinfo_for_stmt (new_stmt);
4593 vec_oprnds0.release ();
4594 vec_oprnds1.release ();
4596 return true;
4600 /* Function vectorizable_operation.
4602 Check if STMT performs a binary, unary or ternary operation that can
4603 be vectorized.
4604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4605 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4608 static bool
4609 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4610 gimple *vec_stmt, slp_tree slp_node)
4612 tree vec_dest;
4613 tree scalar_dest;
4614 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4616 tree vectype;
4617 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4618 enum tree_code code;
4619 machine_mode vec_mode;
4620 tree new_temp;
4621 int op_type;
4622 optab optab;
4623 int icode;
4624 tree def;
4625 gimple def_stmt;
4626 enum vect_def_type dt[3]
4627 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4628 gimple new_stmt = NULL;
4629 stmt_vec_info prev_stmt_info;
4630 int nunits_in;
4631 int nunits_out;
4632 tree vectype_out;
4633 int ncopies;
4634 int j, i;
4635 vec<tree> vec_oprnds0 = vNULL;
4636 vec<tree> vec_oprnds1 = vNULL;
4637 vec<tree> vec_oprnds2 = vNULL;
4638 tree vop0, vop1, vop2;
4639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4640 int vf;
4642 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4643 return false;
4645 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4646 return false;
4648 /* Is STMT a vectorizable binary/unary operation? */
4649 if (!is_gimple_assign (stmt))
4650 return false;
4652 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4653 return false;
4655 code = gimple_assign_rhs_code (stmt);
4657 /* For pointer addition, we should use the normal plus for
4658 the vector addition. */
4659 if (code == POINTER_PLUS_EXPR)
4660 code = PLUS_EXPR;
4662 /* Support only unary or binary operations. */
4663 op_type = TREE_CODE_LENGTH (code);
4664 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4668 "num. args = %d (not unary/binary/ternary op).\n",
4669 op_type);
4670 return false;
4673 scalar_dest = gimple_assign_lhs (stmt);
4674 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4676 /* Most operations cannot handle bit-precision types without extra
4677 truncations. */
4678 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4679 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4680 /* Exception are bitwise binary operations. */
4681 && code != BIT_IOR_EXPR
4682 && code != BIT_XOR_EXPR
4683 && code != BIT_AND_EXPR)
4685 if (dump_enabled_p ())
4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4687 "bit-precision arithmetic not supported.\n");
4688 return false;
4691 op0 = gimple_assign_rhs1 (stmt);
4692 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4693 &def_stmt, &def, &dt[0], &vectype))
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "use not simple.\n");
4698 return false;
4700 /* If op0 is an external or constant def use a vector type with
4701 the same size as the output vector type. */
4702 if (!vectype)
4703 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4704 if (vec_stmt)
4705 gcc_assert (vectype);
4706 if (!vectype)
4708 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4711 "no vectype for scalar type ");
4712 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4713 TREE_TYPE (op0));
4714 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4717 return false;
4720 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4721 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4722 if (nunits_out != nunits_in)
4723 return false;
4725 if (op_type == binary_op || op_type == ternary_op)
4727 op1 = gimple_assign_rhs2 (stmt);
4728 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4729 &def, &dt[1]))
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4733 "use not simple.\n");
4734 return false;
4737 if (op_type == ternary_op)
4739 op2 = gimple_assign_rhs3 (stmt);
4740 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4741 &def, &dt[2]))
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4745 "use not simple.\n");
4746 return false;
4750 if (loop_vinfo)
4751 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4752 else
4753 vf = 1;
4755 /* Multiple types in SLP are handled by creating the appropriate number of
4756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4757 case of SLP. */
4758 if (slp_node || PURE_SLP_STMT (stmt_info))
4759 ncopies = 1;
4760 else
4761 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4763 gcc_assert (ncopies >= 1);
4765 /* Shifts are handled in vectorizable_shift (). */
4766 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4767 || code == RROTATE_EXPR)
4768 return false;
4770 /* Supportable by target? */
4772 vec_mode = TYPE_MODE (vectype);
4773 if (code == MULT_HIGHPART_EXPR)
4775 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4776 icode = LAST_INSN_CODE;
4777 else
4778 icode = CODE_FOR_nothing;
4780 else
4782 optab = optab_for_tree_code (code, vectype, optab_default);
4783 if (!optab)
4785 if (dump_enabled_p ())
4786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4787 "no optab.\n");
4788 return false;
4790 icode = (int) optab_handler (optab, vec_mode);
4793 if (icode == CODE_FOR_nothing)
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4797 "op not supported by target.\n");
4798 /* Check only during analysis. */
4799 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4800 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4801 return false;
4802 if (dump_enabled_p ())
4803 dump_printf_loc (MSG_NOTE, vect_location,
4804 "proceeding using word mode.\n");
4807 /* Worthwhile without SIMD support? Check only during analysis. */
4808 if (!VECTOR_MODE_P (vec_mode)
4809 && !vec_stmt
4810 && vf < vect_min_worthwhile_factor (code))
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4814 "not worthwhile without SIMD support.\n");
4815 return false;
4818 if (!vec_stmt) /* transformation not required. */
4820 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_NOTE, vect_location,
4823 "=== vectorizable_operation ===\n");
4824 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4825 return true;
4828 /** Transform. **/
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_NOTE, vect_location,
4832 "transform binary/unary operation.\n");
4834 /* Handle def. */
4835 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4837 /* In case the vectorization factor (VF) is bigger than the number
4838 of elements that we can fit in a vectype (nunits), we have to generate
4839 more than one vector stmt - i.e - we need to "unroll" the
4840 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4841 from one copy of the vector stmt to the next, in the field
4842 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4843 stages to find the correct vector defs to be used when vectorizing
4844 stmts that use the defs of the current stmt. The example below
4845 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4846 we need to create 4 vectorized stmts):
4848 before vectorization:
4849 RELATED_STMT VEC_STMT
4850 S1: x = memref - -
4851 S2: z = x + 1 - -
4853 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4854 there):
4855 RELATED_STMT VEC_STMT
4856 VS1_0: vx0 = memref0 VS1_1 -
4857 VS1_1: vx1 = memref1 VS1_2 -
4858 VS1_2: vx2 = memref2 VS1_3 -
4859 VS1_3: vx3 = memref3 - -
4860 S1: x = load - VS1_0
4861 S2: z = x + 1 - -
4863 step2: vectorize stmt S2 (done here):
4864 To vectorize stmt S2 we first need to find the relevant vector
4865 def for the first operand 'x'. This is, as usual, obtained from
4866 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4867 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4868 relevant vector def 'vx0'. Having found 'vx0' we can generate
4869 the vector stmt VS2_0, and as usual, record it in the
4870 STMT_VINFO_VEC_STMT of stmt S2.
4871 When creating the second copy (VS2_1), we obtain the relevant vector
4872 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4873 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4874 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4875 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4876 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4877 chain of stmts and pointers:
4878 RELATED_STMT VEC_STMT
4879 VS1_0: vx0 = memref0 VS1_1 -
4880 VS1_1: vx1 = memref1 VS1_2 -
4881 VS1_2: vx2 = memref2 VS1_3 -
4882 VS1_3: vx3 = memref3 - -
4883 S1: x = load - VS1_0
4884 VS2_0: vz0 = vx0 + v1 VS2_1 -
4885 VS2_1: vz1 = vx1 + v1 VS2_2 -
4886 VS2_2: vz2 = vx2 + v1 VS2_3 -
4887 VS2_3: vz3 = vx3 + v1 - -
4888 S2: z = x + 1 - VS2_0 */
4890 prev_stmt_info = NULL;
4891 for (j = 0; j < ncopies; j++)
4893 /* Handle uses. */
4894 if (j == 0)
4896 if (op_type == binary_op || op_type == ternary_op)
4897 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4898 slp_node, -1);
4899 else
4900 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4901 slp_node, -1);
4902 if (op_type == ternary_op)
4904 vec_oprnds2.create (1);
4905 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4906 stmt,
4907 NULL));
4910 else
4912 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4913 if (op_type == ternary_op)
4915 tree vec_oprnd = vec_oprnds2.pop ();
4916 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4917 vec_oprnd));
4921 /* Arguments are ready. Create the new vector stmt. */
4922 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4924 vop1 = ((op_type == binary_op || op_type == ternary_op)
4925 ? vec_oprnds1[i] : NULL_TREE);
4926 vop2 = ((op_type == ternary_op)
4927 ? vec_oprnds2[i] : NULL_TREE);
4928 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4929 new_temp = make_ssa_name (vec_dest, new_stmt);
4930 gimple_assign_set_lhs (new_stmt, new_temp);
4931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4932 if (slp_node)
4933 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4936 if (slp_node)
4937 continue;
4939 if (j == 0)
4940 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4941 else
4942 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4943 prev_stmt_info = vinfo_for_stmt (new_stmt);
4946 vec_oprnds0.release ();
4947 vec_oprnds1.release ();
4948 vec_oprnds2.release ();
4950 return true;
4953 /* A helper function to ensure data reference DR's base alignment
4954 for STMT_INFO. */
4956 static void
4957 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4959 if (!dr->aux)
4960 return;
4962 if (((dataref_aux *)dr->aux)->base_misaligned)
4964 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4965 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4967 if (decl_in_symtab_p (base_decl))
4968 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4969 else
4971 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4972 DECL_USER_ALIGN (base_decl) = 1;
4974 ((dataref_aux *)dr->aux)->base_misaligned = false;
4979 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4980 reversal of the vector elements. If that is impossible to do,
4981 returns NULL. */
4983 static tree
4984 perm_mask_for_reverse (tree vectype)
4986 int i, nunits;
4987 unsigned char *sel;
4989 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4990 sel = XALLOCAVEC (unsigned char, nunits);
4992 for (i = 0; i < nunits; ++i)
4993 sel[i] = nunits - 1 - i;
4995 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4996 return NULL_TREE;
4997 return vect_gen_perm_mask_checked (vectype, sel);
5000 /* Function vectorizable_store.
5002 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5003 can be vectorized.
5004 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5005 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5006 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5008 static bool
5009 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5010 slp_tree slp_node)
5012 tree scalar_dest;
5013 tree data_ref;
5014 tree op;
5015 tree vec_oprnd = NULL_TREE;
5016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5017 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5019 tree elem_type;
5020 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5021 struct loop *loop = NULL;
5022 machine_mode vec_mode;
5023 tree dummy;
5024 enum dr_alignment_support alignment_support_scheme;
5025 tree def;
5026 gimple def_stmt;
5027 enum vect_def_type dt;
5028 stmt_vec_info prev_stmt_info = NULL;
5029 tree dataref_ptr = NULL_TREE;
5030 tree dataref_offset = NULL_TREE;
5031 gimple ptr_incr = NULL;
5032 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5033 int ncopies;
5034 int j;
5035 gimple next_stmt, first_stmt = NULL;
5036 bool grouped_store = false;
5037 bool store_lanes_p = false;
5038 unsigned int group_size, i;
5039 vec<tree> dr_chain = vNULL;
5040 vec<tree> oprnds = vNULL;
5041 vec<tree> result_chain = vNULL;
5042 bool inv_p;
5043 bool negative = false;
5044 tree offset = NULL_TREE;
5045 vec<tree> vec_oprnds = vNULL;
5046 bool slp = (slp_node != NULL);
5047 unsigned int vec_num;
5048 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5049 tree aggr_type;
5051 if (loop_vinfo)
5052 loop = LOOP_VINFO_LOOP (loop_vinfo);
5054 /* Multiple types in SLP are handled by creating the appropriate number of
5055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5056 case of SLP. */
5057 if (slp || PURE_SLP_STMT (stmt_info))
5058 ncopies = 1;
5059 else
5060 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5062 gcc_assert (ncopies >= 1);
5064 /* FORNOW. This restriction should be relaxed. */
5065 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "multiple types in nested loop.\n");
5070 return false;
5073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5074 return false;
5076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5077 return false;
5079 /* Is vectorizable store? */
5081 if (!is_gimple_assign (stmt))
5082 return false;
5084 scalar_dest = gimple_assign_lhs (stmt);
5085 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5086 && is_pattern_stmt_p (stmt_info))
5087 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5088 if (TREE_CODE (scalar_dest) != ARRAY_REF
5089 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5090 && TREE_CODE (scalar_dest) != INDIRECT_REF
5091 && TREE_CODE (scalar_dest) != COMPONENT_REF
5092 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5093 && TREE_CODE (scalar_dest) != REALPART_EXPR
5094 && TREE_CODE (scalar_dest) != MEM_REF)
5095 return false;
5097 gcc_assert (gimple_assign_single_p (stmt));
5098 op = gimple_assign_rhs1 (stmt);
5099 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5100 &def, &dt))
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5104 "use not simple.\n");
5105 return false;
5108 elem_type = TREE_TYPE (vectype);
5109 vec_mode = TYPE_MODE (vectype);
5111 /* FORNOW. In some cases can vectorize even if data-type not supported
5112 (e.g. - array initialization with 0). */
5113 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5114 return false;
5116 if (!STMT_VINFO_DATA_REF (stmt_info))
5117 return false;
5119 if (!STMT_VINFO_STRIDED_P (stmt_info))
5121 negative =
5122 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5123 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5124 size_zero_node) < 0;
5125 if (negative && ncopies > 1)
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5129 "multiple types with negative step.\n");
5130 return false;
5132 if (negative)
5134 gcc_assert (!grouped_store);
5135 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5136 if (alignment_support_scheme != dr_aligned
5137 && alignment_support_scheme != dr_unaligned_supported)
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141 "negative step but alignment required.\n");
5142 return false;
5144 if (dt != vect_constant_def
5145 && dt != vect_external_def
5146 && !perm_mask_for_reverse (vectype))
5148 if (dump_enabled_p ())
5149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5150 "negative step and reversing not supported.\n");
5151 return false;
5156 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5158 grouped_store = true;
5159 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5160 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5161 if (!slp
5162 && !PURE_SLP_STMT (stmt_info)
5163 && !STMT_VINFO_STRIDED_P (stmt_info))
5165 if (vect_store_lanes_supported (vectype, group_size))
5166 store_lanes_p = true;
5167 else if (!vect_grouped_store_supported (vectype, group_size))
5168 return false;
5171 if (STMT_VINFO_STRIDED_P (stmt_info)
5172 && (slp || PURE_SLP_STMT (stmt_info))
5173 && (group_size > nunits
5174 || nunits % group_size != 0))
5176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5177 "unhandled strided group store\n");
5178 return false;
5181 if (first_stmt == stmt)
5183 /* STMT is the leader of the group. Check the operands of all the
5184 stmts of the group. */
5185 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5186 while (next_stmt)
5188 gcc_assert (gimple_assign_single_p (next_stmt));
5189 op = gimple_assign_rhs1 (next_stmt);
5190 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5191 &def_stmt, &def, &dt))
5193 if (dump_enabled_p ())
5194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5195 "use not simple.\n");
5196 return false;
5198 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5203 if (!vec_stmt) /* transformation not required. */
5205 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5206 /* The SLP costs are calculated during SLP analysis. */
5207 if (!PURE_SLP_STMT (stmt_info))
5208 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5209 NULL, NULL, NULL);
5210 return true;
5213 /** Transform. **/
5215 ensure_base_align (stmt_info, dr);
5217 if (grouped_store)
5219 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5220 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5222 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5224 /* FORNOW */
5225 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5227 /* We vectorize all the stmts of the interleaving group when we
5228 reach the last stmt in the group. */
5229 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5230 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5231 && !slp)
5233 *vec_stmt = NULL;
5234 return true;
5237 if (slp)
5239 grouped_store = false;
5240 /* VEC_NUM is the number of vect stmts to be created for this
5241 group. */
5242 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5243 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5244 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5245 op = gimple_assign_rhs1 (first_stmt);
5247 else
5248 /* VEC_NUM is the number of vect stmts to be created for this
5249 group. */
5250 vec_num = group_size;
5252 else
5254 first_stmt = stmt;
5255 first_dr = dr;
5256 group_size = vec_num = 1;
5259 if (dump_enabled_p ())
5260 dump_printf_loc (MSG_NOTE, vect_location,
5261 "transform store. ncopies = %d\n", ncopies);
5263 if (STMT_VINFO_STRIDED_P (stmt_info))
5265 gimple_stmt_iterator incr_gsi;
5266 bool insert_after;
5267 gimple incr;
5268 tree offvar;
5269 tree ivstep;
5270 tree running_off;
5271 gimple_seq stmts = NULL;
5272 tree stride_base, stride_step, alias_off;
5273 tree vec_oprnd;
5275 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5277 stride_base
5278 = fold_build_pointer_plus
5279 (unshare_expr (DR_BASE_ADDRESS (dr)),
5280 size_binop (PLUS_EXPR,
5281 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5282 convert_to_ptrofftype (DR_INIT(dr))));
5283 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5285 /* For a store with loop-invariant (but other than power-of-2)
5286 stride (i.e. not a grouped access) like so:
5288 for (i = 0; i < n; i += stride)
5289 array[i] = ...;
5291 we generate a new induction variable and new stores from
5292 the components of the (vectorized) rhs:
5294 for (j = 0; ; j += VF*stride)
5295 vectemp = ...;
5296 tmp1 = vectemp[0];
5297 array[j] = tmp1;
5298 tmp2 = vectemp[1];
5299 array[j + stride] = tmp2;
5303 unsigned nstores = nunits;
5304 tree ltype = elem_type;
5305 if (slp)
5307 nstores = nunits / group_size;
5308 if (group_size < nunits)
5309 ltype = build_vector_type (elem_type, group_size);
5310 else
5311 ltype = vectype;
5312 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5313 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5316 ivstep = stride_step;
5317 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5318 build_int_cst (TREE_TYPE (ivstep),
5319 ncopies * nstores));
5321 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5323 create_iv (stride_base, ivstep, NULL,
5324 loop, &incr_gsi, insert_after,
5325 &offvar, NULL);
5326 incr = gsi_stmt (incr_gsi);
5327 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5329 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5330 if (stmts)
5331 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5333 prev_stmt_info = NULL;
5334 running_off = offvar;
5335 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
5336 for (j = 0; j < ncopies; j++)
5338 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5339 and first_stmt == stmt. */
5340 if (j == 0)
5341 vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
5342 else
5343 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5345 for (i = 0; i < nstores; i++)
5347 tree newref, newoff;
5348 gimple incr, assign;
5349 tree size = TYPE_SIZE (ltype);
5350 /* Extract the i'th component. */
5351 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
5352 size);
5353 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5354 size, pos);
5356 elem = force_gimple_operand_gsi (gsi, elem, true,
5357 NULL_TREE, true,
5358 GSI_SAME_STMT);
5360 newref = build2 (MEM_REF, ltype,
5361 running_off, alias_off);
5363 /* And store it to *running_off. */
5364 assign = gimple_build_assign (newref, elem);
5365 vect_finish_stmt_generation (stmt, assign, gsi);
5367 newoff = copy_ssa_name (running_off, NULL);
5368 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5369 running_off, stride_step);
5370 vect_finish_stmt_generation (stmt, incr, gsi);
5372 running_off = newoff;
5373 if (j == 0 && i == i)
5374 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5375 else
5376 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5377 prev_stmt_info = vinfo_for_stmt (assign);
5380 return true;
5383 dr_chain.create (group_size);
5384 oprnds.create (group_size);
5386 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5387 gcc_assert (alignment_support_scheme);
5388 /* Targets with store-lane instructions must not require explicit
5389 realignment. */
5390 gcc_assert (!store_lanes_p
5391 || alignment_support_scheme == dr_aligned
5392 || alignment_support_scheme == dr_unaligned_supported);
5394 if (negative)
5395 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5397 if (store_lanes_p)
5398 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5399 else
5400 aggr_type = vectype;
5402 /* In case the vectorization factor (VF) is bigger than the number
5403 of elements that we can fit in a vectype (nunits), we have to generate
5404 more than one vector stmt - i.e - we need to "unroll" the
5405 vector stmt by a factor VF/nunits. For more details see documentation in
5406 vect_get_vec_def_for_copy_stmt. */
5408 /* In case of interleaving (non-unit grouped access):
5410 S1: &base + 2 = x2
5411 S2: &base = x0
5412 S3: &base + 1 = x1
5413 S4: &base + 3 = x3
5415 We create vectorized stores starting from base address (the access of the
5416 first stmt in the chain (S2 in the above example), when the last store stmt
5417 of the chain (S4) is reached:
5419 VS1: &base = vx2
5420 VS2: &base + vec_size*1 = vx0
5421 VS3: &base + vec_size*2 = vx1
5422 VS4: &base + vec_size*3 = vx3
5424 Then permutation statements are generated:
5426 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5427 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5430 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5431 (the order of the data-refs in the output of vect_permute_store_chain
5432 corresponds to the order of scalar stmts in the interleaving chain - see
5433 the documentation of vect_permute_store_chain()).
5435 In case of both multiple types and interleaving, above vector stores and
5436 permutation stmts are created for every copy. The result vector stmts are
5437 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5438 STMT_VINFO_RELATED_STMT for the next copies.
5441 prev_stmt_info = NULL;
5442 for (j = 0; j < ncopies; j++)
5444 gimple new_stmt;
5446 if (j == 0)
5448 if (slp)
5450 /* Get vectorized arguments for SLP_NODE. */
5451 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5452 NULL, slp_node, -1);
5454 vec_oprnd = vec_oprnds[0];
5456 else
5458 /* For interleaved stores we collect vectorized defs for all the
5459 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5460 used as an input to vect_permute_store_chain(), and OPRNDS as
5461 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5463 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5464 OPRNDS are of size 1. */
5465 next_stmt = first_stmt;
5466 for (i = 0; i < group_size; i++)
5468 /* Since gaps are not supported for interleaved stores,
5469 GROUP_SIZE is the exact number of stmts in the chain.
5470 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5471 there is no interleaving, GROUP_SIZE is 1, and only one
5472 iteration of the loop will be executed. */
5473 gcc_assert (next_stmt
5474 && gimple_assign_single_p (next_stmt));
5475 op = gimple_assign_rhs1 (next_stmt);
5477 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5478 NULL);
5479 dr_chain.quick_push (vec_oprnd);
5480 oprnds.quick_push (vec_oprnd);
5481 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5485 /* We should have catched mismatched types earlier. */
5486 gcc_assert (useless_type_conversion_p (vectype,
5487 TREE_TYPE (vec_oprnd)));
5488 bool simd_lane_access_p
5489 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5490 if (simd_lane_access_p
5491 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5492 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5493 && integer_zerop (DR_OFFSET (first_dr))
5494 && integer_zerop (DR_INIT (first_dr))
5495 && alias_sets_conflict_p (get_alias_set (aggr_type),
5496 get_alias_set (DR_REF (first_dr))))
5498 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5499 dataref_offset = build_int_cst (reference_alias_ptr_type
5500 (DR_REF (first_dr)), 0);
5501 inv_p = false;
5503 else
5504 dataref_ptr
5505 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5506 simd_lane_access_p ? loop : NULL,
5507 offset, &dummy, gsi, &ptr_incr,
5508 simd_lane_access_p, &inv_p);
5509 gcc_assert (bb_vinfo || !inv_p);
5511 else
5513 /* For interleaved stores we created vectorized defs for all the
5514 defs stored in OPRNDS in the previous iteration (previous copy).
5515 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5516 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5517 next copy.
5518 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5519 OPRNDS are of size 1. */
5520 for (i = 0; i < group_size; i++)
5522 op = oprnds[i];
5523 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5524 &def, &dt);
5525 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5526 dr_chain[i] = vec_oprnd;
5527 oprnds[i] = vec_oprnd;
5529 if (dataref_offset)
5530 dataref_offset
5531 = int_const_binop (PLUS_EXPR, dataref_offset,
5532 TYPE_SIZE_UNIT (aggr_type));
5533 else
5534 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5535 TYPE_SIZE_UNIT (aggr_type));
5538 if (store_lanes_p)
5540 tree vec_array;
5542 /* Combine all the vectors into an array. */
5543 vec_array = create_vector_array (vectype, vec_num);
5544 for (i = 0; i < vec_num; i++)
5546 vec_oprnd = dr_chain[i];
5547 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5550 /* Emit:
5551 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5552 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5553 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5554 gimple_call_set_lhs (new_stmt, data_ref);
5555 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5557 else
5559 new_stmt = NULL;
5560 if (grouped_store)
5562 if (j == 0)
5563 result_chain.create (group_size);
5564 /* Permute. */
5565 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5566 &result_chain);
5569 next_stmt = first_stmt;
5570 for (i = 0; i < vec_num; i++)
5572 unsigned align, misalign;
5574 if (i > 0)
5575 /* Bump the vector pointer. */
5576 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5577 stmt, NULL_TREE);
5579 if (slp)
5580 vec_oprnd = vec_oprnds[i];
5581 else if (grouped_store)
5582 /* For grouped stores vectorized defs are interleaved in
5583 vect_permute_store_chain(). */
5584 vec_oprnd = result_chain[i];
5586 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5587 dataref_offset
5588 ? dataref_offset
5589 : build_int_cst (reference_alias_ptr_type
5590 (DR_REF (first_dr)), 0));
5591 align = TYPE_ALIGN_UNIT (vectype);
5592 if (aligned_access_p (first_dr))
5593 misalign = 0;
5594 else if (DR_MISALIGNMENT (first_dr) == -1)
5596 TREE_TYPE (data_ref)
5597 = build_aligned_type (TREE_TYPE (data_ref),
5598 TYPE_ALIGN (elem_type));
5599 align = TYPE_ALIGN_UNIT (elem_type);
5600 misalign = 0;
5602 else
5604 TREE_TYPE (data_ref)
5605 = build_aligned_type (TREE_TYPE (data_ref),
5606 TYPE_ALIGN (elem_type));
5607 misalign = DR_MISALIGNMENT (first_dr);
5609 if (dataref_offset == NULL_TREE)
5610 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5611 misalign);
5613 if (negative
5614 && dt != vect_constant_def
5615 && dt != vect_external_def)
5617 tree perm_mask = perm_mask_for_reverse (vectype);
5618 tree perm_dest
5619 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5620 vectype);
5621 tree new_temp = make_ssa_name (perm_dest);
5623 /* Generate the permute statement. */
5624 gimple perm_stmt
5625 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5626 vec_oprnd, perm_mask);
5627 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5629 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5630 vec_oprnd = new_temp;
5633 /* Arguments are ready. Create the new vector stmt. */
5634 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5635 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5637 if (slp)
5638 continue;
5640 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5641 if (!next_stmt)
5642 break;
5645 if (!slp)
5647 if (j == 0)
5648 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5649 else
5650 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5651 prev_stmt_info = vinfo_for_stmt (new_stmt);
5655 dr_chain.release ();
5656 oprnds.release ();
5657 result_chain.release ();
5658 vec_oprnds.release ();
5660 return true;
5663 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5664 VECTOR_CST mask. No checks are made that the target platform supports the
5665 mask, so callers may wish to test can_vec_perm_p separately, or use
5666 vect_gen_perm_mask_checked. */
5668 tree
5669 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5671 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5672 int i, nunits;
5674 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5676 mask_elt_type = lang_hooks.types.type_for_mode
5677 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5678 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5680 mask_elts = XALLOCAVEC (tree, nunits);
5681 for (i = nunits - 1; i >= 0; i--)
5682 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5683 mask_vec = build_vector (mask_type, mask_elts);
5685 return mask_vec;
5688 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5689 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5691 tree
5692 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5694 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5695 return vect_gen_perm_mask_any (vectype, sel);
5698 /* Given a vector variable X and Y, that was generated for the scalar
5699 STMT, generate instructions to permute the vector elements of X and Y
5700 using permutation mask MASK_VEC, insert them at *GSI and return the
5701 permuted vector variable. */
5703 static tree
5704 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5705 gimple_stmt_iterator *gsi)
5707 tree vectype = TREE_TYPE (x);
5708 tree perm_dest, data_ref;
5709 gimple perm_stmt;
5711 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5712 data_ref = make_ssa_name (perm_dest);
5714 /* Generate the permute statement. */
5715 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5716 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5718 return data_ref;
5721 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5722 inserting them on the loops preheader edge. Returns true if we
5723 were successful in doing so (and thus STMT can be moved then),
5724 otherwise returns false. */
5726 static bool
5727 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5729 ssa_op_iter i;
5730 tree op;
5731 bool any = false;
5733 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5735 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5736 if (!gimple_nop_p (def_stmt)
5737 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5739 /* Make sure we don't need to recurse. While we could do
5740 so in simple cases when there are more complex use webs
5741 we don't have an easy way to preserve stmt order to fulfil
5742 dependencies within them. */
5743 tree op2;
5744 ssa_op_iter i2;
5745 if (gimple_code (def_stmt) == GIMPLE_PHI)
5746 return false;
5747 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5749 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5750 if (!gimple_nop_p (def_stmt2)
5751 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5752 return false;
5754 any = true;
5758 if (!any)
5759 return true;
5761 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5763 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5764 if (!gimple_nop_p (def_stmt)
5765 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5767 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5768 gsi_remove (&gsi, false);
5769 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5773 return true;
5776 /* vectorizable_load.
5778 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5779 can be vectorized.
5780 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5781 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5782 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5784 static bool
5785 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5786 slp_tree slp_node, slp_instance slp_node_instance)
5788 tree scalar_dest;
5789 tree vec_dest = NULL;
5790 tree data_ref = NULL;
5791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5792 stmt_vec_info prev_stmt_info;
5793 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5794 struct loop *loop = NULL;
5795 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5796 bool nested_in_vect_loop = false;
5797 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5798 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5799 tree elem_type;
5800 tree new_temp;
5801 machine_mode mode;
5802 gimple new_stmt = NULL;
5803 tree dummy;
5804 enum dr_alignment_support alignment_support_scheme;
5805 tree dataref_ptr = NULL_TREE;
5806 tree dataref_offset = NULL_TREE;
5807 gimple ptr_incr = NULL;
5808 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5809 int ncopies;
5810 int i, j, group_size = -1, group_gap;
5811 tree msq = NULL_TREE, lsq;
5812 tree offset = NULL_TREE;
5813 tree byte_offset = NULL_TREE;
5814 tree realignment_token = NULL_TREE;
5815 gphi *phi = NULL;
5816 vec<tree> dr_chain = vNULL;
5817 bool grouped_load = false;
5818 bool load_lanes_p = false;
5819 gimple first_stmt;
5820 bool inv_p;
5821 bool negative = false;
5822 bool compute_in_loop = false;
5823 struct loop *at_loop;
5824 int vec_num;
5825 bool slp = (slp_node != NULL);
5826 bool slp_perm = false;
5827 enum tree_code code;
5828 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5829 int vf;
5830 tree aggr_type;
5831 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5832 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5833 int gather_scale = 1;
5834 enum vect_def_type gather_dt = vect_unknown_def_type;
5836 if (loop_vinfo)
5838 loop = LOOP_VINFO_LOOP (loop_vinfo);
5839 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5840 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5842 else
5843 vf = 1;
5845 /* Multiple types in SLP are handled by creating the appropriate number of
5846 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5847 case of SLP. */
5848 if (slp || PURE_SLP_STMT (stmt_info))
5849 ncopies = 1;
5850 else
5851 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5853 gcc_assert (ncopies >= 1);
5855 /* FORNOW. This restriction should be relaxed. */
5856 if (nested_in_vect_loop && ncopies > 1)
5858 if (dump_enabled_p ())
5859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5860 "multiple types in nested loop.\n");
5861 return false;
5864 /* Invalidate assumptions made by dependence analysis when vectorization
5865 on the unrolled body effectively re-orders stmts. */
5866 if (ncopies > 1
5867 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5868 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5869 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5871 if (dump_enabled_p ())
5872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5873 "cannot perform implicit CSE when unrolling "
5874 "with negative dependence distance\n");
5875 return false;
5878 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5879 return false;
5881 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5882 return false;
5884 /* Is vectorizable load? */
5885 if (!is_gimple_assign (stmt))
5886 return false;
5888 scalar_dest = gimple_assign_lhs (stmt);
5889 if (TREE_CODE (scalar_dest) != SSA_NAME)
5890 return false;
5892 code = gimple_assign_rhs_code (stmt);
5893 if (code != ARRAY_REF
5894 && code != BIT_FIELD_REF
5895 && code != INDIRECT_REF
5896 && code != COMPONENT_REF
5897 && code != IMAGPART_EXPR
5898 && code != REALPART_EXPR
5899 && code != MEM_REF
5900 && TREE_CODE_CLASS (code) != tcc_declaration)
5901 return false;
5903 if (!STMT_VINFO_DATA_REF (stmt_info))
5904 return false;
5906 elem_type = TREE_TYPE (vectype);
5907 mode = TYPE_MODE (vectype);
5909 /* FORNOW. In some cases can vectorize even if data-type not supported
5910 (e.g. - data copies). */
5911 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5913 if (dump_enabled_p ())
5914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5915 "Aligned load, but unsupported type.\n");
5916 return false;
5919 /* Check if the load is a part of an interleaving chain. */
5920 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5922 grouped_load = true;
5923 /* FORNOW */
5924 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5926 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5928 /* If this is single-element interleaving with an element distance
5929 that leaves unused vector loads around punt - we at least create
5930 very sub-optimal code in that case (and blow up memory,
5931 see PR65518). */
5932 if (first_stmt == stmt
5933 && !GROUP_NEXT_ELEMENT (stmt_info)
5934 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5936 if (dump_enabled_p ())
5937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5938 "single-element interleaving not supported "
5939 "for not adjacent vector loads\n");
5940 return false;
5943 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
5944 slp_perm = true;
5946 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5947 if (!slp
5948 && !PURE_SLP_STMT (stmt_info)
5949 && !STMT_VINFO_STRIDED_P (stmt_info))
5951 if (vect_load_lanes_supported (vectype, group_size))
5952 load_lanes_p = true;
5953 else if (!vect_grouped_load_supported (vectype, group_size))
5954 return false;
5957 /* Invalidate assumptions made by dependence analysis when vectorization
5958 on the unrolled body effectively re-orders stmts. */
5959 if (!PURE_SLP_STMT (stmt_info)
5960 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5961 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5962 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5964 if (dump_enabled_p ())
5965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5966 "cannot perform implicit CSE when performing "
5967 "group loads with negative dependence distance\n");
5968 return false;
5971 /* Similarly when the stmt is a load that is both part of a SLP
5972 instance and a loop vectorized stmt via the same-dr mechanism
5973 we have to give up. */
5974 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5975 && (STMT_SLP_TYPE (stmt_info)
5976 != STMT_SLP_TYPE (vinfo_for_stmt
5977 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5981 "conflicting SLP types for CSEd load\n");
5982 return false;
5987 if (STMT_VINFO_GATHER_P (stmt_info))
5989 gimple def_stmt;
5990 tree def;
5991 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5992 &gather_off, &gather_scale);
5993 gcc_assert (gather_decl);
5994 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5995 &def_stmt, &def, &gather_dt,
5996 &gather_off_vectype))
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6000 "gather index use not simple.\n");
6001 return false;
6004 else if (STMT_VINFO_STRIDED_P (stmt_info))
6006 if ((grouped_load
6007 && (slp || PURE_SLP_STMT (stmt_info)))
6008 && (group_size > nunits
6009 || nunits % group_size != 0
6010 /* We don't support load permutations. */
6011 || slp_perm))
6013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6014 "unhandled strided group load\n");
6015 return false;
6018 else
6020 negative = tree_int_cst_compare (nested_in_vect_loop
6021 ? STMT_VINFO_DR_STEP (stmt_info)
6022 : DR_STEP (dr),
6023 size_zero_node) < 0;
6024 if (negative && ncopies > 1)
6026 if (dump_enabled_p ())
6027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6028 "multiple types with negative step.\n");
6029 return false;
6032 if (negative)
6034 if (grouped_load)
6036 if (dump_enabled_p ())
6037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6038 "negative step for group load not supported"
6039 "\n");
6040 return false;
6042 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6043 if (alignment_support_scheme != dr_aligned
6044 && alignment_support_scheme != dr_unaligned_supported)
6046 if (dump_enabled_p ())
6047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6048 "negative step but alignment required.\n");
6049 return false;
6051 if (!perm_mask_for_reverse (vectype))
6053 if (dump_enabled_p ())
6054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6055 "negative step and reversing not supported."
6056 "\n");
6057 return false;
6062 if (!vec_stmt) /* transformation not required. */
6064 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6065 /* The SLP costs are calculated during SLP analysis. */
6066 if (!PURE_SLP_STMT (stmt_info))
6067 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6068 NULL, NULL, NULL);
6069 return true;
6072 if (dump_enabled_p ())
6073 dump_printf_loc (MSG_NOTE, vect_location,
6074 "transform load. ncopies = %d\n", ncopies);
6076 /** Transform. **/
6078 ensure_base_align (stmt_info, dr);
6080 if (STMT_VINFO_GATHER_P (stmt_info))
6082 tree vec_oprnd0 = NULL_TREE, op;
6083 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6084 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6085 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6086 edge pe = loop_preheader_edge (loop);
6087 gimple_seq seq;
6088 basic_block new_bb;
6089 enum { NARROW, NONE, WIDEN } modifier;
6090 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6092 if (nunits == gather_off_nunits)
6093 modifier = NONE;
6094 else if (nunits == gather_off_nunits / 2)
6096 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6097 modifier = WIDEN;
6099 for (i = 0; i < gather_off_nunits; ++i)
6100 sel[i] = i | nunits;
6102 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6104 else if (nunits == gather_off_nunits * 2)
6106 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6107 modifier = NARROW;
6109 for (i = 0; i < nunits; ++i)
6110 sel[i] = i < gather_off_nunits
6111 ? i : i + nunits - gather_off_nunits;
6113 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6114 ncopies *= 2;
6116 else
6117 gcc_unreachable ();
6119 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6120 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6121 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6122 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6123 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6124 scaletype = TREE_VALUE (arglist);
6125 gcc_checking_assert (types_compatible_p (srctype, rettype));
6127 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6129 ptr = fold_convert (ptrtype, gather_base);
6130 if (!is_gimple_min_invariant (ptr))
6132 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6133 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6134 gcc_assert (!new_bb);
6137 /* Currently we support only unconditional gather loads,
6138 so mask should be all ones. */
6139 if (TREE_CODE (masktype) == INTEGER_TYPE)
6140 mask = build_int_cst (masktype, -1);
6141 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6143 mask = build_int_cst (TREE_TYPE (masktype), -1);
6144 mask = build_vector_from_val (masktype, mask);
6145 mask = vect_init_vector (stmt, mask, masktype, NULL);
6147 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6149 REAL_VALUE_TYPE r;
6150 long tmp[6];
6151 for (j = 0; j < 6; ++j)
6152 tmp[j] = -1;
6153 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6154 mask = build_real (TREE_TYPE (masktype), r);
6155 mask = build_vector_from_val (masktype, mask);
6156 mask = vect_init_vector (stmt, mask, masktype, NULL);
6158 else
6159 gcc_unreachable ();
6161 scale = build_int_cst (scaletype, gather_scale);
6163 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6164 merge = build_int_cst (TREE_TYPE (rettype), 0);
6165 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6167 REAL_VALUE_TYPE r;
6168 long tmp[6];
6169 for (j = 0; j < 6; ++j)
6170 tmp[j] = 0;
6171 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6172 merge = build_real (TREE_TYPE (rettype), r);
6174 else
6175 gcc_unreachable ();
6176 merge = build_vector_from_val (rettype, merge);
6177 merge = vect_init_vector (stmt, merge, rettype, NULL);
6179 prev_stmt_info = NULL;
6180 for (j = 0; j < ncopies; ++j)
6182 if (modifier == WIDEN && (j & 1))
6183 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6184 perm_mask, stmt, gsi);
6185 else if (j == 0)
6186 op = vec_oprnd0
6187 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6188 else
6189 op = vec_oprnd0
6190 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6192 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6194 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6195 == TYPE_VECTOR_SUBPARTS (idxtype));
6196 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6197 var = make_ssa_name (var);
6198 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6199 new_stmt
6200 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6201 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6202 op = var;
6205 new_stmt
6206 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6208 if (!useless_type_conversion_p (vectype, rettype))
6210 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6211 == TYPE_VECTOR_SUBPARTS (rettype));
6212 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6213 op = make_ssa_name (var, new_stmt);
6214 gimple_call_set_lhs (new_stmt, op);
6215 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6216 var = make_ssa_name (vec_dest);
6217 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6218 new_stmt
6219 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6221 else
6223 var = make_ssa_name (vec_dest, new_stmt);
6224 gimple_call_set_lhs (new_stmt, var);
6227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6229 if (modifier == NARROW)
6231 if ((j & 1) == 0)
6233 prev_res = var;
6234 continue;
6236 var = permute_vec_elements (prev_res, var,
6237 perm_mask, stmt, gsi);
6238 new_stmt = SSA_NAME_DEF_STMT (var);
6241 if (prev_stmt_info == NULL)
6242 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6243 else
6244 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6245 prev_stmt_info = vinfo_for_stmt (new_stmt);
6247 return true;
6249 else if (STMT_VINFO_STRIDED_P (stmt_info))
6251 gimple_stmt_iterator incr_gsi;
6252 bool insert_after;
6253 gimple incr;
6254 tree offvar;
6255 tree ivstep;
6256 tree running_off;
6257 vec<constructor_elt, va_gc> *v = NULL;
6258 gimple_seq stmts = NULL;
6259 tree stride_base, stride_step, alias_off;
6261 gcc_assert (!nested_in_vect_loop);
6263 stride_base
6264 = fold_build_pointer_plus
6265 (unshare_expr (DR_BASE_ADDRESS (dr)),
6266 size_binop (PLUS_EXPR,
6267 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6268 convert_to_ptrofftype (DR_INIT (dr))));
6269 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6271 /* For a load with loop-invariant (but other than power-of-2)
6272 stride (i.e. not a grouped access) like so:
6274 for (i = 0; i < n; i += stride)
6275 ... = array[i];
6277 we generate a new induction variable and new accesses to
6278 form a new vector (or vectors, depending on ncopies):
6280 for (j = 0; ; j += VF*stride)
6281 tmp1 = array[j];
6282 tmp2 = array[j + stride];
6284 vectemp = {tmp1, tmp2, ...}
6287 ivstep = stride_step;
6288 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6289 build_int_cst (TREE_TYPE (ivstep), vf));
6291 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6293 create_iv (stride_base, ivstep, NULL,
6294 loop, &incr_gsi, insert_after,
6295 &offvar, NULL);
6296 incr = gsi_stmt (incr_gsi);
6297 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6299 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6300 if (stmts)
6301 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6303 prev_stmt_info = NULL;
6304 running_off = offvar;
6305 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6306 int nloads = nunits;
6307 tree ltype = TREE_TYPE (vectype);
6308 if (slp)
6310 nloads = nunits / group_size;
6311 if (group_size < nunits)
6312 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6313 else
6314 ltype = vectype;
6315 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6316 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6317 gcc_assert (!slp_perm);
6319 for (j = 0; j < ncopies; j++)
6321 tree vec_inv;
6323 if (nloads > 1)
6325 vec_alloc (v, nloads);
6326 for (i = 0; i < nloads; i++)
6328 tree newref, newoff;
6329 gimple incr;
6330 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6332 newref = force_gimple_operand_gsi (gsi, newref, true,
6333 NULL_TREE, true,
6334 GSI_SAME_STMT);
6335 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6336 newoff = copy_ssa_name (running_off);
6337 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6338 running_off, stride_step);
6339 vect_finish_stmt_generation (stmt, incr, gsi);
6341 running_off = newoff;
6344 vec_inv = build_constructor (vectype, v);
6345 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6346 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6348 else
6350 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6351 build2 (MEM_REF, ltype,
6352 running_off, alias_off));
6353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6355 tree newoff = copy_ssa_name (running_off);
6356 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6357 running_off, stride_step);
6358 vect_finish_stmt_generation (stmt, incr, gsi);
6360 running_off = newoff;
6363 if (slp)
6364 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6365 if (j == 0)
6366 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6367 else
6368 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6369 prev_stmt_info = vinfo_for_stmt (new_stmt);
6371 return true;
6374 if (grouped_load)
6376 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6377 if (slp
6378 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6379 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6380 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6382 /* Check if the chain of loads is already vectorized. */
6383 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6384 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6385 ??? But we can only do so if there is exactly one
6386 as we have no way to get at the rest. Leave the CSE
6387 opportunity alone.
6388 ??? With the group load eventually participating
6389 in multiple different permutations (having multiple
6390 slp nodes which refer to the same group) the CSE
6391 is even wrong code. See PR56270. */
6392 && !slp)
6394 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6395 return true;
6397 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6398 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6400 /* VEC_NUM is the number of vect stmts to be created for this group. */
6401 if (slp)
6403 grouped_load = false;
6404 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6405 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6407 else
6409 vec_num = group_size;
6410 group_gap = 0;
6413 else
6415 first_stmt = stmt;
6416 first_dr = dr;
6417 group_size = vec_num = 1;
6418 group_gap = 0;
6421 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6422 gcc_assert (alignment_support_scheme);
6423 /* Targets with load-lane instructions must not require explicit
6424 realignment. */
6425 gcc_assert (!load_lanes_p
6426 || alignment_support_scheme == dr_aligned
6427 || alignment_support_scheme == dr_unaligned_supported);
6429 /* In case the vectorization factor (VF) is bigger than the number
6430 of elements that we can fit in a vectype (nunits), we have to generate
6431 more than one vector stmt - i.e - we need to "unroll" the
6432 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6433 from one copy of the vector stmt to the next, in the field
6434 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6435 stages to find the correct vector defs to be used when vectorizing
6436 stmts that use the defs of the current stmt. The example below
6437 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6438 need to create 4 vectorized stmts):
6440 before vectorization:
6441 RELATED_STMT VEC_STMT
6442 S1: x = memref - -
6443 S2: z = x + 1 - -
6445 step 1: vectorize stmt S1:
6446 We first create the vector stmt VS1_0, and, as usual, record a
6447 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6448 Next, we create the vector stmt VS1_1, and record a pointer to
6449 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6450 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6451 stmts and pointers:
6452 RELATED_STMT VEC_STMT
6453 VS1_0: vx0 = memref0 VS1_1 -
6454 VS1_1: vx1 = memref1 VS1_2 -
6455 VS1_2: vx2 = memref2 VS1_3 -
6456 VS1_3: vx3 = memref3 - -
6457 S1: x = load - VS1_0
6458 S2: z = x + 1 - -
6460 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6461 information we recorded in RELATED_STMT field is used to vectorize
6462 stmt S2. */
6464 /* In case of interleaving (non-unit grouped access):
6466 S1: x2 = &base + 2
6467 S2: x0 = &base
6468 S3: x1 = &base + 1
6469 S4: x3 = &base + 3
6471 Vectorized loads are created in the order of memory accesses
6472 starting from the access of the first stmt of the chain:
6474 VS1: vx0 = &base
6475 VS2: vx1 = &base + vec_size*1
6476 VS3: vx3 = &base + vec_size*2
6477 VS4: vx4 = &base + vec_size*3
6479 Then permutation statements are generated:
6481 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6482 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6485 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6486 (the order of the data-refs in the output of vect_permute_load_chain
6487 corresponds to the order of scalar stmts in the interleaving chain - see
6488 the documentation of vect_permute_load_chain()).
6489 The generation of permutation stmts and recording them in
6490 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6492 In case of both multiple types and interleaving, the vector loads and
6493 permutation stmts above are created for every copy. The result vector
6494 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6495 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6497 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6498 on a target that supports unaligned accesses (dr_unaligned_supported)
6499 we generate the following code:
6500 p = initial_addr;
6501 indx = 0;
6502 loop {
6503 p = p + indx * vectype_size;
6504 vec_dest = *(p);
6505 indx = indx + 1;
6508 Otherwise, the data reference is potentially unaligned on a target that
6509 does not support unaligned accesses (dr_explicit_realign_optimized) -
6510 then generate the following code, in which the data in each iteration is
6511 obtained by two vector loads, one from the previous iteration, and one
6512 from the current iteration:
6513 p1 = initial_addr;
6514 msq_init = *(floor(p1))
6515 p2 = initial_addr + VS - 1;
6516 realignment_token = call target_builtin;
6517 indx = 0;
6518 loop {
6519 p2 = p2 + indx * vectype_size
6520 lsq = *(floor(p2))
6521 vec_dest = realign_load (msq, lsq, realignment_token)
6522 indx = indx + 1;
6523 msq = lsq;
6524 } */
6526 /* If the misalignment remains the same throughout the execution of the
6527 loop, we can create the init_addr and permutation mask at the loop
6528 preheader. Otherwise, it needs to be created inside the loop.
6529 This can only occur when vectorizing memory accesses in the inner-loop
6530 nested within an outer-loop that is being vectorized. */
6532 if (nested_in_vect_loop
6533 && (TREE_INT_CST_LOW (DR_STEP (dr))
6534 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6536 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6537 compute_in_loop = true;
6540 if ((alignment_support_scheme == dr_explicit_realign_optimized
6541 || alignment_support_scheme == dr_explicit_realign)
6542 && !compute_in_loop)
6544 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6545 alignment_support_scheme, NULL_TREE,
6546 &at_loop);
6547 if (alignment_support_scheme == dr_explicit_realign_optimized)
6549 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6550 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6551 size_one_node);
6554 else
6555 at_loop = loop;
6557 if (negative)
6558 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6560 if (load_lanes_p)
6561 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6562 else
6563 aggr_type = vectype;
6565 prev_stmt_info = NULL;
6566 for (j = 0; j < ncopies; j++)
6568 /* 1. Create the vector or array pointer update chain. */
6569 if (j == 0)
6571 bool simd_lane_access_p
6572 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6573 if (simd_lane_access_p
6574 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6575 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6576 && integer_zerop (DR_OFFSET (first_dr))
6577 && integer_zerop (DR_INIT (first_dr))
6578 && alias_sets_conflict_p (get_alias_set (aggr_type),
6579 get_alias_set (DR_REF (first_dr)))
6580 && (alignment_support_scheme == dr_aligned
6581 || alignment_support_scheme == dr_unaligned_supported))
6583 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6584 dataref_offset = build_int_cst (reference_alias_ptr_type
6585 (DR_REF (first_dr)), 0);
6586 inv_p = false;
6588 else
6589 dataref_ptr
6590 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6591 offset, &dummy, gsi, &ptr_incr,
6592 simd_lane_access_p, &inv_p,
6593 byte_offset);
6595 else if (dataref_offset)
6596 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6597 TYPE_SIZE_UNIT (aggr_type));
6598 else
6599 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6600 TYPE_SIZE_UNIT (aggr_type));
6602 if (grouped_load || slp_perm)
6603 dr_chain.create (vec_num);
6605 if (load_lanes_p)
6607 tree vec_array;
6609 vec_array = create_vector_array (vectype, vec_num);
6611 /* Emit:
6612 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6613 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6614 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6615 gimple_call_set_lhs (new_stmt, vec_array);
6616 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6618 /* Extract each vector into an SSA_NAME. */
6619 for (i = 0; i < vec_num; i++)
6621 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6622 vec_array, i);
6623 dr_chain.quick_push (new_temp);
6626 /* Record the mapping between SSA_NAMEs and statements. */
6627 vect_record_grouped_load_vectors (stmt, dr_chain);
6629 else
6631 for (i = 0; i < vec_num; i++)
6633 if (i > 0)
6634 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6635 stmt, NULL_TREE);
6637 /* 2. Create the vector-load in the loop. */
6638 switch (alignment_support_scheme)
6640 case dr_aligned:
6641 case dr_unaligned_supported:
6643 unsigned int align, misalign;
6645 data_ref
6646 = build2 (MEM_REF, vectype, dataref_ptr,
6647 dataref_offset
6648 ? dataref_offset
6649 : build_int_cst (reference_alias_ptr_type
6650 (DR_REF (first_dr)), 0));
6651 align = TYPE_ALIGN_UNIT (vectype);
6652 if (alignment_support_scheme == dr_aligned)
6654 gcc_assert (aligned_access_p (first_dr));
6655 misalign = 0;
6657 else if (DR_MISALIGNMENT (first_dr) == -1)
6659 TREE_TYPE (data_ref)
6660 = build_aligned_type (TREE_TYPE (data_ref),
6661 TYPE_ALIGN (elem_type));
6662 align = TYPE_ALIGN_UNIT (elem_type);
6663 misalign = 0;
6665 else
6667 TREE_TYPE (data_ref)
6668 = build_aligned_type (TREE_TYPE (data_ref),
6669 TYPE_ALIGN (elem_type));
6670 misalign = DR_MISALIGNMENT (first_dr);
6672 if (dataref_offset == NULL_TREE)
6673 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6674 align, misalign);
6675 break;
6677 case dr_explicit_realign:
6679 tree ptr, bump;
6681 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6683 if (compute_in_loop)
6684 msq = vect_setup_realignment (first_stmt, gsi,
6685 &realignment_token,
6686 dr_explicit_realign,
6687 dataref_ptr, NULL);
6689 ptr = copy_ssa_name (dataref_ptr);
6690 new_stmt = gimple_build_assign
6691 (ptr, BIT_AND_EXPR, dataref_ptr,
6692 build_int_cst
6693 (TREE_TYPE (dataref_ptr),
6694 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6695 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6696 data_ref
6697 = build2 (MEM_REF, vectype, ptr,
6698 build_int_cst (reference_alias_ptr_type
6699 (DR_REF (first_dr)), 0));
6700 vec_dest = vect_create_destination_var (scalar_dest,
6701 vectype);
6702 new_stmt = gimple_build_assign (vec_dest, data_ref);
6703 new_temp = make_ssa_name (vec_dest, new_stmt);
6704 gimple_assign_set_lhs (new_stmt, new_temp);
6705 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6706 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6707 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6708 msq = new_temp;
6710 bump = size_binop (MULT_EXPR, vs,
6711 TYPE_SIZE_UNIT (elem_type));
6712 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6713 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6714 new_stmt = gimple_build_assign
6715 (NULL_TREE, BIT_AND_EXPR, ptr,
6716 build_int_cst
6717 (TREE_TYPE (ptr),
6718 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6719 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6720 gimple_assign_set_lhs (new_stmt, ptr);
6721 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6722 data_ref
6723 = build2 (MEM_REF, vectype, ptr,
6724 build_int_cst (reference_alias_ptr_type
6725 (DR_REF (first_dr)), 0));
6726 break;
6728 case dr_explicit_realign_optimized:
6729 new_temp = copy_ssa_name (dataref_ptr);
6730 new_stmt = gimple_build_assign
6731 (new_temp, BIT_AND_EXPR, dataref_ptr,
6732 build_int_cst
6733 (TREE_TYPE (dataref_ptr),
6734 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6735 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6736 data_ref
6737 = build2 (MEM_REF, vectype, new_temp,
6738 build_int_cst (reference_alias_ptr_type
6739 (DR_REF (first_dr)), 0));
6740 break;
6741 default:
6742 gcc_unreachable ();
6744 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6745 new_stmt = gimple_build_assign (vec_dest, data_ref);
6746 new_temp = make_ssa_name (vec_dest, new_stmt);
6747 gimple_assign_set_lhs (new_stmt, new_temp);
6748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6750 /* 3. Handle explicit realignment if necessary/supported.
6751 Create in loop:
6752 vec_dest = realign_load (msq, lsq, realignment_token) */
6753 if (alignment_support_scheme == dr_explicit_realign_optimized
6754 || alignment_support_scheme == dr_explicit_realign)
6756 lsq = gimple_assign_lhs (new_stmt);
6757 if (!realignment_token)
6758 realignment_token = dataref_ptr;
6759 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6760 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6761 msq, lsq, realignment_token);
6762 new_temp = make_ssa_name (vec_dest, new_stmt);
6763 gimple_assign_set_lhs (new_stmt, new_temp);
6764 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6766 if (alignment_support_scheme == dr_explicit_realign_optimized)
6768 gcc_assert (phi);
6769 if (i == vec_num - 1 && j == ncopies - 1)
6770 add_phi_arg (phi, lsq,
6771 loop_latch_edge (containing_loop),
6772 UNKNOWN_LOCATION);
6773 msq = lsq;
6777 /* 4. Handle invariant-load. */
6778 if (inv_p && !bb_vinfo)
6780 gcc_assert (!grouped_load);
6781 /* If we have versioned for aliasing or the loop doesn't
6782 have any data dependencies that would preclude this,
6783 then we are sure this is a loop invariant load and
6784 thus we can insert it on the preheader edge. */
6785 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6786 && !nested_in_vect_loop
6787 && hoist_defs_of_uses (stmt, loop))
6789 if (dump_enabled_p ())
6791 dump_printf_loc (MSG_NOTE, vect_location,
6792 "hoisting out of the vectorized "
6793 "loop: ");
6794 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6796 tree tem = copy_ssa_name (scalar_dest);
6797 gsi_insert_on_edge_immediate
6798 (loop_preheader_edge (loop),
6799 gimple_build_assign (tem,
6800 unshare_expr
6801 (gimple_assign_rhs1 (stmt))));
6802 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6804 else
6806 gimple_stmt_iterator gsi2 = *gsi;
6807 gsi_next (&gsi2);
6808 new_temp = vect_init_vector (stmt, scalar_dest,
6809 vectype, &gsi2);
6811 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6812 set_vinfo_for_stmt (new_stmt,
6813 new_stmt_vec_info (new_stmt, loop_vinfo,
6814 bb_vinfo));
6817 if (negative)
6819 tree perm_mask = perm_mask_for_reverse (vectype);
6820 new_temp = permute_vec_elements (new_temp, new_temp,
6821 perm_mask, stmt, gsi);
6822 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6825 /* Collect vector loads and later create their permutation in
6826 vect_transform_grouped_load (). */
6827 if (grouped_load || slp_perm)
6828 dr_chain.quick_push (new_temp);
6830 /* Store vector loads in the corresponding SLP_NODE. */
6831 if (slp && !slp_perm)
6832 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6834 /* Bump the vector pointer to account for a gap. */
6835 if (slp && group_gap != 0)
6837 tree bump = size_binop (MULT_EXPR,
6838 TYPE_SIZE_UNIT (elem_type),
6839 size_int (group_gap));
6840 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6841 stmt, bump);
6845 if (slp && !slp_perm)
6846 continue;
6848 if (slp_perm)
6850 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6851 slp_node_instance, false))
6853 dr_chain.release ();
6854 return false;
6857 else
6859 if (grouped_load)
6861 if (!load_lanes_p)
6862 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6863 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6865 else
6867 if (j == 0)
6868 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6869 else
6870 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6871 prev_stmt_info = vinfo_for_stmt (new_stmt);
6874 dr_chain.release ();
6877 return true;
6880 /* Function vect_is_simple_cond.
6882 Input:
6883 LOOP - the loop that is being vectorized.
6884 COND - Condition that is checked for simple use.
6886 Output:
6887 *COMP_VECTYPE - the vector type for the comparison.
6889 Returns whether a COND can be vectorized. Checks whether
6890 condition operands are supportable using vec_is_simple_use. */
6892 static bool
6893 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6894 bb_vec_info bb_vinfo, tree *comp_vectype)
6896 tree lhs, rhs;
6897 tree def;
6898 enum vect_def_type dt;
6899 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6901 if (!COMPARISON_CLASS_P (cond))
6902 return false;
6904 lhs = TREE_OPERAND (cond, 0);
6905 rhs = TREE_OPERAND (cond, 1);
6907 if (TREE_CODE (lhs) == SSA_NAME)
6909 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6910 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6911 &lhs_def_stmt, &def, &dt, &vectype1))
6912 return false;
6914 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6915 && TREE_CODE (lhs) != FIXED_CST)
6916 return false;
6918 if (TREE_CODE (rhs) == SSA_NAME)
6920 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6921 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6922 &rhs_def_stmt, &def, &dt, &vectype2))
6923 return false;
6925 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6926 && TREE_CODE (rhs) != FIXED_CST)
6927 return false;
6929 *comp_vectype = vectype1 ? vectype1 : vectype2;
6930 return true;
6933 /* vectorizable_condition.
6935 Check if STMT is conditional modify expression that can be vectorized.
6936 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6937 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6938 at GSI.
6940 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6941 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6942 else caluse if it is 2).
6944 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6946 bool
6947 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6948 gimple *vec_stmt, tree reduc_def, int reduc_index,
6949 slp_tree slp_node)
6951 tree scalar_dest = NULL_TREE;
6952 tree vec_dest = NULL_TREE;
6953 tree cond_expr, then_clause, else_clause;
6954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6956 tree comp_vectype = NULL_TREE;
6957 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6958 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6959 tree vec_compare, vec_cond_expr;
6960 tree new_temp;
6961 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6962 tree def;
6963 enum vect_def_type dt, dts[4];
6964 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6965 int ncopies;
6966 enum tree_code code;
6967 stmt_vec_info prev_stmt_info = NULL;
6968 int i, j;
6969 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6970 vec<tree> vec_oprnds0 = vNULL;
6971 vec<tree> vec_oprnds1 = vNULL;
6972 vec<tree> vec_oprnds2 = vNULL;
6973 vec<tree> vec_oprnds3 = vNULL;
6974 tree vec_cmp_type;
6976 if (slp_node || PURE_SLP_STMT (stmt_info))
6977 ncopies = 1;
6978 else
6979 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6981 gcc_assert (ncopies >= 1);
6982 if (reduc_index && ncopies > 1)
6983 return false; /* FORNOW */
6985 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6986 return false;
6988 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6989 return false;
6991 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6992 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6993 && reduc_def))
6994 return false;
6996 /* FORNOW: not yet supported. */
6997 if (STMT_VINFO_LIVE_P (stmt_info))
6999 if (dump_enabled_p ())
7000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7001 "value used after loop.\n");
7002 return false;
7005 /* Is vectorizable conditional operation? */
7006 if (!is_gimple_assign (stmt))
7007 return false;
7009 code = gimple_assign_rhs_code (stmt);
7011 if (code != COND_EXPR)
7012 return false;
7014 cond_expr = gimple_assign_rhs1 (stmt);
7015 then_clause = gimple_assign_rhs2 (stmt);
7016 else_clause = gimple_assign_rhs3 (stmt);
7018 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7019 &comp_vectype)
7020 || !comp_vectype)
7021 return false;
7023 if (TREE_CODE (then_clause) == SSA_NAME)
7025 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7026 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7027 &then_def_stmt, &def, &dt))
7028 return false;
7030 else if (TREE_CODE (then_clause) != INTEGER_CST
7031 && TREE_CODE (then_clause) != REAL_CST
7032 && TREE_CODE (then_clause) != FIXED_CST)
7033 return false;
7035 if (TREE_CODE (else_clause) == SSA_NAME)
7037 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7038 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7039 &else_def_stmt, &def, &dt))
7040 return false;
7042 else if (TREE_CODE (else_clause) != INTEGER_CST
7043 && TREE_CODE (else_clause) != REAL_CST
7044 && TREE_CODE (else_clause) != FIXED_CST)
7045 return false;
7047 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7048 /* The result of a vector comparison should be signed type. */
7049 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7050 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7051 if (vec_cmp_type == NULL_TREE)
7052 return false;
7054 if (!vec_stmt)
7056 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7057 return expand_vec_cond_expr_p (vectype, comp_vectype);
7060 /* Transform. */
7062 if (!slp_node)
7064 vec_oprnds0.create (1);
7065 vec_oprnds1.create (1);
7066 vec_oprnds2.create (1);
7067 vec_oprnds3.create (1);
7070 /* Handle def. */
7071 scalar_dest = gimple_assign_lhs (stmt);
7072 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7074 /* Handle cond expr. */
7075 for (j = 0; j < ncopies; j++)
7077 gassign *new_stmt = NULL;
7078 if (j == 0)
7080 if (slp_node)
7082 auto_vec<tree, 4> ops;
7083 auto_vec<vec<tree>, 4> vec_defs;
7085 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7086 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7087 ops.safe_push (then_clause);
7088 ops.safe_push (else_clause);
7089 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7090 vec_oprnds3 = vec_defs.pop ();
7091 vec_oprnds2 = vec_defs.pop ();
7092 vec_oprnds1 = vec_defs.pop ();
7093 vec_oprnds0 = vec_defs.pop ();
7095 ops.release ();
7096 vec_defs.release ();
7098 else
7100 gimple gtemp;
7101 vec_cond_lhs =
7102 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7103 stmt, NULL);
7104 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7105 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7107 vec_cond_rhs =
7108 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7109 stmt, NULL);
7110 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7111 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7112 if (reduc_index == 1)
7113 vec_then_clause = reduc_def;
7114 else
7116 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7117 stmt, NULL);
7118 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7119 NULL, &gtemp, &def, &dts[2]);
7121 if (reduc_index == 2)
7122 vec_else_clause = reduc_def;
7123 else
7125 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7126 stmt, NULL);
7127 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7128 NULL, &gtemp, &def, &dts[3]);
7132 else
7134 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7135 vec_oprnds0.pop ());
7136 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7137 vec_oprnds1.pop ());
7138 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7139 vec_oprnds2.pop ());
7140 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7141 vec_oprnds3.pop ());
7144 if (!slp_node)
7146 vec_oprnds0.quick_push (vec_cond_lhs);
7147 vec_oprnds1.quick_push (vec_cond_rhs);
7148 vec_oprnds2.quick_push (vec_then_clause);
7149 vec_oprnds3.quick_push (vec_else_clause);
7152 /* Arguments are ready. Create the new vector stmt. */
7153 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7155 vec_cond_rhs = vec_oprnds1[i];
7156 vec_then_clause = vec_oprnds2[i];
7157 vec_else_clause = vec_oprnds3[i];
7159 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7160 vec_cond_lhs, vec_cond_rhs);
7161 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7162 vec_compare, vec_then_clause, vec_else_clause);
7164 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7165 new_temp = make_ssa_name (vec_dest, new_stmt);
7166 gimple_assign_set_lhs (new_stmt, new_temp);
7167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7168 if (slp_node)
7169 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7172 if (slp_node)
7173 continue;
7175 if (j == 0)
7176 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7177 else
7178 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7180 prev_stmt_info = vinfo_for_stmt (new_stmt);
7183 vec_oprnds0.release ();
7184 vec_oprnds1.release ();
7185 vec_oprnds2.release ();
7186 vec_oprnds3.release ();
7188 return true;
7192 /* Make sure the statement is vectorizable. */
7194 bool
7195 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7198 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7199 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7200 bool ok;
7201 tree scalar_type, vectype;
7202 gimple pattern_stmt;
7203 gimple_seq pattern_def_seq;
7205 if (dump_enabled_p ())
7207 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7208 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7211 if (gimple_has_volatile_ops (stmt))
7213 if (dump_enabled_p ())
7214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7215 "not vectorized: stmt has volatile operands\n");
7217 return false;
7220 /* Skip stmts that do not need to be vectorized. In loops this is expected
7221 to include:
7222 - the COND_EXPR which is the loop exit condition
7223 - any LABEL_EXPRs in the loop
7224 - computations that are used only for array indexing or loop control.
7225 In basic blocks we only analyze statements that are a part of some SLP
7226 instance, therefore, all the statements are relevant.
7228 Pattern statement needs to be analyzed instead of the original statement
7229 if the original statement is not relevant. Otherwise, we analyze both
7230 statements. In basic blocks we are called from some SLP instance
7231 traversal, don't analyze pattern stmts instead, the pattern stmts
7232 already will be part of SLP instance. */
7234 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7235 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7236 && !STMT_VINFO_LIVE_P (stmt_info))
7238 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7239 && pattern_stmt
7240 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7241 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7243 /* Analyze PATTERN_STMT instead of the original stmt. */
7244 stmt = pattern_stmt;
7245 stmt_info = vinfo_for_stmt (pattern_stmt);
7246 if (dump_enabled_p ())
7248 dump_printf_loc (MSG_NOTE, vect_location,
7249 "==> examining pattern statement: ");
7250 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7253 else
7255 if (dump_enabled_p ())
7256 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7258 return true;
7261 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7262 && node == NULL
7263 && pattern_stmt
7264 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7265 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7267 /* Analyze PATTERN_STMT too. */
7268 if (dump_enabled_p ())
7270 dump_printf_loc (MSG_NOTE, vect_location,
7271 "==> examining pattern statement: ");
7272 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7275 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7276 return false;
7279 if (is_pattern_stmt_p (stmt_info)
7280 && node == NULL
7281 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7283 gimple_stmt_iterator si;
7285 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7287 gimple pattern_def_stmt = gsi_stmt (si);
7288 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7289 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7291 /* Analyze def stmt of STMT if it's a pattern stmt. */
7292 if (dump_enabled_p ())
7294 dump_printf_loc (MSG_NOTE, vect_location,
7295 "==> examining pattern def statement: ");
7296 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7299 if (!vect_analyze_stmt (pattern_def_stmt,
7300 need_to_vectorize, node))
7301 return false;
7306 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7308 case vect_internal_def:
7309 break;
7311 case vect_reduction_def:
7312 case vect_nested_cycle:
7313 gcc_assert (!bb_vinfo
7314 && (relevance == vect_used_in_outer
7315 || relevance == vect_used_in_outer_by_reduction
7316 || relevance == vect_used_by_reduction
7317 || relevance == vect_unused_in_scope));
7318 break;
7320 case vect_induction_def:
7321 case vect_constant_def:
7322 case vect_external_def:
7323 case vect_unknown_def_type:
7324 default:
7325 gcc_unreachable ();
7328 if (bb_vinfo)
7330 gcc_assert (PURE_SLP_STMT (stmt_info));
7332 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7333 if (dump_enabled_p ())
7335 dump_printf_loc (MSG_NOTE, vect_location,
7336 "get vectype for scalar type: ");
7337 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7338 dump_printf (MSG_NOTE, "\n");
7341 vectype = get_vectype_for_scalar_type (scalar_type);
7342 if (!vectype)
7344 if (dump_enabled_p ())
7346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7347 "not SLPed: unsupported data-type ");
7348 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7349 scalar_type);
7350 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7352 return false;
7355 if (dump_enabled_p ())
7357 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7358 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7359 dump_printf (MSG_NOTE, "\n");
7362 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7365 if (STMT_VINFO_RELEVANT_P (stmt_info))
7367 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7368 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7369 || (is_gimple_call (stmt)
7370 && gimple_call_lhs (stmt) == NULL_TREE));
7371 *need_to_vectorize = true;
7374 if (PURE_SLP_STMT (stmt_info) && !node)
7376 dump_printf_loc (MSG_NOTE, vect_location,
7377 "handled only by SLP analysis\n");
7378 return true;
7381 ok = true;
7382 if (!bb_vinfo
7383 && (STMT_VINFO_RELEVANT_P (stmt_info)
7384 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7385 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7386 || vectorizable_conversion (stmt, NULL, NULL, node)
7387 || vectorizable_shift (stmt, NULL, NULL, node)
7388 || vectorizable_operation (stmt, NULL, NULL, node)
7389 || vectorizable_assignment (stmt, NULL, NULL, node)
7390 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7391 || vectorizable_call (stmt, NULL, NULL, node)
7392 || vectorizable_store (stmt, NULL, NULL, node)
7393 || vectorizable_reduction (stmt, NULL, NULL, node)
7394 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7395 else
7397 if (bb_vinfo)
7398 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7399 || vectorizable_conversion (stmt, NULL, NULL, node)
7400 || vectorizable_shift (stmt, NULL, NULL, node)
7401 || vectorizable_operation (stmt, NULL, NULL, node)
7402 || vectorizable_assignment (stmt, NULL, NULL, node)
7403 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7404 || vectorizable_call (stmt, NULL, NULL, node)
7405 || vectorizable_store (stmt, NULL, NULL, node)
7406 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7409 if (!ok)
7411 if (dump_enabled_p ())
7413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7414 "not vectorized: relevant stmt not ");
7415 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7416 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7419 return false;
7422 if (bb_vinfo)
7423 return true;
7425 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7426 need extra handling, except for vectorizable reductions. */
7427 if (STMT_VINFO_LIVE_P (stmt_info)
7428 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7429 ok = vectorizable_live_operation (stmt, NULL, NULL);
7431 if (!ok)
7433 if (dump_enabled_p ())
7435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7436 "not vectorized: live stmt not ");
7437 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7438 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7441 return false;
7444 return true;
7448 /* Function vect_transform_stmt.
7450 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7452 bool
7453 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7454 bool *grouped_store, slp_tree slp_node,
7455 slp_instance slp_node_instance)
7457 bool is_store = false;
7458 gimple vec_stmt = NULL;
7459 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7460 bool done;
7462 switch (STMT_VINFO_TYPE (stmt_info))
7464 case type_demotion_vec_info_type:
7465 case type_promotion_vec_info_type:
7466 case type_conversion_vec_info_type:
7467 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7468 gcc_assert (done);
7469 break;
7471 case induc_vec_info_type:
7472 gcc_assert (!slp_node);
7473 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7474 gcc_assert (done);
7475 break;
7477 case shift_vec_info_type:
7478 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7479 gcc_assert (done);
7480 break;
7482 case op_vec_info_type:
7483 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7484 gcc_assert (done);
7485 break;
7487 case assignment_vec_info_type:
7488 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7489 gcc_assert (done);
7490 break;
7492 case load_vec_info_type:
7493 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7494 slp_node_instance);
7495 gcc_assert (done);
7496 break;
7498 case store_vec_info_type:
7499 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7500 gcc_assert (done);
7501 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7503 /* In case of interleaving, the whole chain is vectorized when the
7504 last store in the chain is reached. Store stmts before the last
7505 one are skipped, and there vec_stmt_info shouldn't be freed
7506 meanwhile. */
7507 *grouped_store = true;
7508 if (STMT_VINFO_VEC_STMT (stmt_info))
7509 is_store = true;
7511 else
7512 is_store = true;
7513 break;
7515 case condition_vec_info_type:
7516 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7517 gcc_assert (done);
7518 break;
7520 case call_vec_info_type:
7521 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7522 stmt = gsi_stmt (*gsi);
7523 if (is_gimple_call (stmt)
7524 && gimple_call_internal_p (stmt)
7525 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7526 is_store = true;
7527 break;
7529 case call_simd_clone_vec_info_type:
7530 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7531 stmt = gsi_stmt (*gsi);
7532 break;
7534 case reduc_vec_info_type:
7535 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7536 gcc_assert (done);
7537 break;
7539 default:
7540 if (!STMT_VINFO_LIVE_P (stmt_info))
7542 if (dump_enabled_p ())
7543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7544 "stmt not supported.\n");
7545 gcc_unreachable ();
7549 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7550 is being vectorized, but outside the immediately enclosing loop. */
7551 if (vec_stmt
7552 && STMT_VINFO_LOOP_VINFO (stmt_info)
7553 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7554 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7555 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7556 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7557 || STMT_VINFO_RELEVANT (stmt_info) ==
7558 vect_used_in_outer_by_reduction))
7560 struct loop *innerloop = LOOP_VINFO_LOOP (
7561 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7562 imm_use_iterator imm_iter;
7563 use_operand_p use_p;
7564 tree scalar_dest;
7565 gimple exit_phi;
7567 if (dump_enabled_p ())
7568 dump_printf_loc (MSG_NOTE, vect_location,
7569 "Record the vdef for outer-loop vectorization.\n");
7571 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7572 (to be used when vectorizing outer-loop stmts that use the DEF of
7573 STMT). */
7574 if (gimple_code (stmt) == GIMPLE_PHI)
7575 scalar_dest = PHI_RESULT (stmt);
7576 else
7577 scalar_dest = gimple_assign_lhs (stmt);
7579 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7581 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7583 exit_phi = USE_STMT (use_p);
7584 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7589 /* Handle stmts whose DEF is used outside the loop-nest that is
7590 being vectorized. */
7591 if (STMT_VINFO_LIVE_P (stmt_info)
7592 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7594 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7595 gcc_assert (done);
7598 if (vec_stmt)
7599 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7601 return is_store;
7605 /* Remove a group of stores (for SLP or interleaving), free their
7606 stmt_vec_info. */
7608 void
7609 vect_remove_stores (gimple first_stmt)
7611 gimple next = first_stmt;
7612 gimple tmp;
7613 gimple_stmt_iterator next_si;
7615 while (next)
7617 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7619 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7620 if (is_pattern_stmt_p (stmt_info))
7621 next = STMT_VINFO_RELATED_STMT (stmt_info);
7622 /* Free the attached stmt_vec_info and remove the stmt. */
7623 next_si = gsi_for_stmt (next);
7624 unlink_stmt_vdef (next);
7625 gsi_remove (&next_si, true);
7626 release_defs (next);
7627 free_stmt_vec_info (next);
7628 next = tmp;
7633 /* Function new_stmt_vec_info.
7635 Create and initialize a new stmt_vec_info struct for STMT. */
7637 stmt_vec_info
7638 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7639 bb_vec_info bb_vinfo)
7641 stmt_vec_info res;
7642 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7644 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7645 STMT_VINFO_STMT (res) = stmt;
7646 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7647 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7648 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7649 STMT_VINFO_LIVE_P (res) = false;
7650 STMT_VINFO_VECTYPE (res) = NULL;
7651 STMT_VINFO_VEC_STMT (res) = NULL;
7652 STMT_VINFO_VECTORIZABLE (res) = true;
7653 STMT_VINFO_IN_PATTERN_P (res) = false;
7654 STMT_VINFO_RELATED_STMT (res) = NULL;
7655 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7656 STMT_VINFO_DATA_REF (res) = NULL;
7658 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7659 STMT_VINFO_DR_OFFSET (res) = NULL;
7660 STMT_VINFO_DR_INIT (res) = NULL;
7661 STMT_VINFO_DR_STEP (res) = NULL;
7662 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7664 if (gimple_code (stmt) == GIMPLE_PHI
7665 && is_loop_header_bb_p (gimple_bb (stmt)))
7666 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7667 else
7668 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7670 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7671 STMT_SLP_TYPE (res) = loop_vect;
7672 GROUP_FIRST_ELEMENT (res) = NULL;
7673 GROUP_NEXT_ELEMENT (res) = NULL;
7674 GROUP_SIZE (res) = 0;
7675 GROUP_STORE_COUNT (res) = 0;
7676 GROUP_GAP (res) = 0;
7677 GROUP_SAME_DR_STMT (res) = NULL;
7679 return res;
7683 /* Create a hash table for stmt_vec_info. */
7685 void
7686 init_stmt_vec_info_vec (void)
7688 gcc_assert (!stmt_vec_info_vec.exists ());
7689 stmt_vec_info_vec.create (50);
7693 /* Free hash table for stmt_vec_info. */
7695 void
7696 free_stmt_vec_info_vec (void)
7698 unsigned int i;
7699 vec_void_p info;
7700 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7701 if (info != NULL)
7702 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7703 gcc_assert (stmt_vec_info_vec.exists ());
7704 stmt_vec_info_vec.release ();
7708 /* Free stmt vectorization related info. */
7710 void
7711 free_stmt_vec_info (gimple stmt)
7713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7715 if (!stmt_info)
7716 return;
7718 /* Check if this statement has a related "pattern stmt"
7719 (introduced by the vectorizer during the pattern recognition
7720 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7721 too. */
7722 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7724 stmt_vec_info patt_info
7725 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7726 if (patt_info)
7728 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7729 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7730 gimple_set_bb (patt_stmt, NULL);
7731 tree lhs = gimple_get_lhs (patt_stmt);
7732 if (TREE_CODE (lhs) == SSA_NAME)
7733 release_ssa_name (lhs);
7734 if (seq)
7736 gimple_stmt_iterator si;
7737 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7739 gimple seq_stmt = gsi_stmt (si);
7740 gimple_set_bb (seq_stmt, NULL);
7741 lhs = gimple_get_lhs (patt_stmt);
7742 if (TREE_CODE (lhs) == SSA_NAME)
7743 release_ssa_name (lhs);
7744 free_stmt_vec_info (seq_stmt);
7747 free_stmt_vec_info (patt_stmt);
7751 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7752 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7753 set_vinfo_for_stmt (stmt, NULL);
7754 free (stmt_info);
7758 /* Function get_vectype_for_scalar_type_and_size.
7760 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7761 by the target. */
7763 static tree
7764 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7766 machine_mode inner_mode = TYPE_MODE (scalar_type);
7767 machine_mode simd_mode;
7768 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7769 int nunits;
7770 tree vectype;
7772 if (nbytes == 0)
7773 return NULL_TREE;
7775 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7776 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7777 return NULL_TREE;
7779 /* For vector types of elements whose mode precision doesn't
7780 match their types precision we use a element type of mode
7781 precision. The vectorization routines will have to make sure
7782 they support the proper result truncation/extension.
7783 We also make sure to build vector types with INTEGER_TYPE
7784 component type only. */
7785 if (INTEGRAL_TYPE_P (scalar_type)
7786 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7787 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7788 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7789 TYPE_UNSIGNED (scalar_type));
7791 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7792 When the component mode passes the above test simply use a type
7793 corresponding to that mode. The theory is that any use that
7794 would cause problems with this will disable vectorization anyway. */
7795 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7796 && !INTEGRAL_TYPE_P (scalar_type))
7797 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7799 /* We can't build a vector type of elements with alignment bigger than
7800 their size. */
7801 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7802 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7803 TYPE_UNSIGNED (scalar_type));
7805 /* If we felt back to using the mode fail if there was
7806 no scalar type for it. */
7807 if (scalar_type == NULL_TREE)
7808 return NULL_TREE;
7810 /* If no size was supplied use the mode the target prefers. Otherwise
7811 lookup a vector mode of the specified size. */
7812 if (size == 0)
7813 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7814 else
7815 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7816 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7817 if (nunits <= 1)
7818 return NULL_TREE;
7820 vectype = build_vector_type (scalar_type, nunits);
7822 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7823 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7824 return NULL_TREE;
7826 return vectype;
7829 unsigned int current_vector_size;
7831 /* Function get_vectype_for_scalar_type.
7833 Returns the vector type corresponding to SCALAR_TYPE as supported
7834 by the target. */
7836 tree
7837 get_vectype_for_scalar_type (tree scalar_type)
7839 tree vectype;
7840 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7841 current_vector_size);
7842 if (vectype
7843 && current_vector_size == 0)
7844 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7845 return vectype;
7848 /* Function get_same_sized_vectype
7850 Returns a vector type corresponding to SCALAR_TYPE of size
7851 VECTOR_TYPE if supported by the target. */
7853 tree
7854 get_same_sized_vectype (tree scalar_type, tree vector_type)
7856 return get_vectype_for_scalar_type_and_size
7857 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7860 /* Function vect_is_simple_use.
7862 Input:
7863 LOOP_VINFO - the vect info of the loop that is being vectorized.
7864 BB_VINFO - the vect info of the basic block that is being vectorized.
7865 OPERAND - operand of STMT in the loop or bb.
7866 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7868 Returns whether a stmt with OPERAND can be vectorized.
7869 For loops, supportable operands are constants, loop invariants, and operands
7870 that are defined by the current iteration of the loop. Unsupportable
7871 operands are those that are defined by a previous iteration of the loop (as
7872 is the case in reduction/induction computations).
7873 For basic blocks, supportable operands are constants and bb invariants.
7874 For now, operands defined outside the basic block are not supported. */
7876 bool
7877 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7878 bb_vec_info bb_vinfo, gimple *def_stmt,
7879 tree *def, enum vect_def_type *dt)
7881 basic_block bb;
7882 stmt_vec_info stmt_vinfo;
7883 struct loop *loop = NULL;
7885 if (loop_vinfo)
7886 loop = LOOP_VINFO_LOOP (loop_vinfo);
7888 *def_stmt = NULL;
7889 *def = NULL_TREE;
7891 if (dump_enabled_p ())
7893 dump_printf_loc (MSG_NOTE, vect_location,
7894 "vect_is_simple_use: operand ");
7895 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7896 dump_printf (MSG_NOTE, "\n");
7899 if (CONSTANT_CLASS_P (operand))
7901 *dt = vect_constant_def;
7902 return true;
7905 if (is_gimple_min_invariant (operand))
7907 *def = operand;
7908 *dt = vect_external_def;
7909 return true;
7912 if (TREE_CODE (operand) == PAREN_EXPR)
7914 if (dump_enabled_p ())
7915 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7916 operand = TREE_OPERAND (operand, 0);
7919 if (TREE_CODE (operand) != SSA_NAME)
7921 if (dump_enabled_p ())
7922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7923 "not ssa-name.\n");
7924 return false;
7927 *def_stmt = SSA_NAME_DEF_STMT (operand);
7928 if (*def_stmt == NULL)
7930 if (dump_enabled_p ())
7931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7932 "no def_stmt.\n");
7933 return false;
7936 if (dump_enabled_p ())
7938 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7939 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7942 /* Empty stmt is expected only in case of a function argument.
7943 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7944 if (gimple_nop_p (*def_stmt))
7946 *def = operand;
7947 *dt = vect_external_def;
7948 return true;
7951 bb = gimple_bb (*def_stmt);
7953 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7954 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7955 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7956 *dt = vect_external_def;
7957 else
7959 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7960 if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
7961 *dt = vect_external_def;
7962 else
7963 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7966 if (dump_enabled_p ())
7968 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
7969 switch (*dt)
7971 case vect_uninitialized_def:
7972 dump_printf (MSG_NOTE, "uninitialized\n");
7973 break;
7974 case vect_constant_def:
7975 dump_printf (MSG_NOTE, "constant\n");
7976 break;
7977 case vect_external_def:
7978 dump_printf (MSG_NOTE, "external\n");
7979 break;
7980 case vect_internal_def:
7981 dump_printf (MSG_NOTE, "internal\n");
7982 break;
7983 case vect_induction_def:
7984 dump_printf (MSG_NOTE, "induction\n");
7985 break;
7986 case vect_reduction_def:
7987 dump_printf (MSG_NOTE, "reduction\n");
7988 break;
7989 case vect_double_reduction_def:
7990 dump_printf (MSG_NOTE, "double reduction\n");
7991 break;
7992 case vect_nested_cycle:
7993 dump_printf (MSG_NOTE, "nested cycle\n");
7994 break;
7995 case vect_unknown_def_type:
7996 dump_printf (MSG_NOTE, "unknown\n");
7997 break;
8001 if (*dt == vect_unknown_def_type
8002 || (stmt
8003 && *dt == vect_double_reduction_def
8004 && gimple_code (stmt) != GIMPLE_PHI))
8006 if (dump_enabled_p ())
8007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8008 "Unsupported pattern.\n");
8009 return false;
8012 switch (gimple_code (*def_stmt))
8014 case GIMPLE_PHI:
8015 *def = gimple_phi_result (*def_stmt);
8016 break;
8018 case GIMPLE_ASSIGN:
8019 *def = gimple_assign_lhs (*def_stmt);
8020 break;
8022 case GIMPLE_CALL:
8023 *def = gimple_call_lhs (*def_stmt);
8024 if (*def != NULL)
8025 break;
8026 /* FALLTHRU */
8027 default:
8028 if (dump_enabled_p ())
8029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8030 "unsupported defining stmt:\n");
8031 return false;
8034 return true;
8037 /* Function vect_is_simple_use_1.
8039 Same as vect_is_simple_use_1 but also determines the vector operand
8040 type of OPERAND and stores it to *VECTYPE. If the definition of
8041 OPERAND is vect_uninitialized_def, vect_constant_def or
8042 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8043 is responsible to compute the best suited vector type for the
8044 scalar operand. */
8046 bool
8047 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8048 bb_vec_info bb_vinfo, gimple *def_stmt,
8049 tree *def, enum vect_def_type *dt, tree *vectype)
8051 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8052 def, dt))
8053 return false;
8055 /* Now get a vector type if the def is internal, otherwise supply
8056 NULL_TREE and leave it up to the caller to figure out a proper
8057 type for the use stmt. */
8058 if (*dt == vect_internal_def
8059 || *dt == vect_induction_def
8060 || *dt == vect_reduction_def
8061 || *dt == vect_double_reduction_def
8062 || *dt == vect_nested_cycle)
8064 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8066 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8067 && !STMT_VINFO_RELEVANT (stmt_info)
8068 && !STMT_VINFO_LIVE_P (stmt_info))
8069 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8071 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8072 gcc_assert (*vectype != NULL_TREE);
8074 else if (*dt == vect_uninitialized_def
8075 || *dt == vect_constant_def
8076 || *dt == vect_external_def)
8077 *vectype = NULL_TREE;
8078 else
8079 gcc_unreachable ();
8081 return true;
8085 /* Function supportable_widening_operation
8087 Check whether an operation represented by the code CODE is a
8088 widening operation that is supported by the target platform in
8089 vector form (i.e., when operating on arguments of type VECTYPE_IN
8090 producing a result of type VECTYPE_OUT).
8092 Widening operations we currently support are NOP (CONVERT), FLOAT
8093 and WIDEN_MULT. This function checks if these operations are supported
8094 by the target platform either directly (via vector tree-codes), or via
8095 target builtins.
8097 Output:
8098 - CODE1 and CODE2 are codes of vector operations to be used when
8099 vectorizing the operation, if available.
8100 - MULTI_STEP_CVT determines the number of required intermediate steps in
8101 case of multi-step conversion (like char->short->int - in that case
8102 MULTI_STEP_CVT will be 1).
8103 - INTERM_TYPES contains the intermediate type required to perform the
8104 widening operation (short in the above example). */
8106 bool
8107 supportable_widening_operation (enum tree_code code, gimple stmt,
8108 tree vectype_out, tree vectype_in,
8109 enum tree_code *code1, enum tree_code *code2,
8110 int *multi_step_cvt,
8111 vec<tree> *interm_types)
8113 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8114 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8115 struct loop *vect_loop = NULL;
8116 machine_mode vec_mode;
8117 enum insn_code icode1, icode2;
8118 optab optab1, optab2;
8119 tree vectype = vectype_in;
8120 tree wide_vectype = vectype_out;
8121 enum tree_code c1, c2;
8122 int i;
8123 tree prev_type, intermediate_type;
8124 machine_mode intermediate_mode, prev_mode;
8125 optab optab3, optab4;
8127 *multi_step_cvt = 0;
8128 if (loop_info)
8129 vect_loop = LOOP_VINFO_LOOP (loop_info);
8131 switch (code)
8133 case WIDEN_MULT_EXPR:
8134 /* The result of a vectorized widening operation usually requires
8135 two vectors (because the widened results do not fit into one vector).
8136 The generated vector results would normally be expected to be
8137 generated in the same order as in the original scalar computation,
8138 i.e. if 8 results are generated in each vector iteration, they are
8139 to be organized as follows:
8140 vect1: [res1,res2,res3,res4],
8141 vect2: [res5,res6,res7,res8].
8143 However, in the special case that the result of the widening
8144 operation is used in a reduction computation only, the order doesn't
8145 matter (because when vectorizing a reduction we change the order of
8146 the computation). Some targets can take advantage of this and
8147 generate more efficient code. For example, targets like Altivec,
8148 that support widen_mult using a sequence of {mult_even,mult_odd}
8149 generate the following vectors:
8150 vect1: [res1,res3,res5,res7],
8151 vect2: [res2,res4,res6,res8].
8153 When vectorizing outer-loops, we execute the inner-loop sequentially
8154 (each vectorized inner-loop iteration contributes to VF outer-loop
8155 iterations in parallel). We therefore don't allow to change the
8156 order of the computation in the inner-loop during outer-loop
8157 vectorization. */
8158 /* TODO: Another case in which order doesn't *really* matter is when we
8159 widen and then contract again, e.g. (short)((int)x * y >> 8).
8160 Normally, pack_trunc performs an even/odd permute, whereas the
8161 repack from an even/odd expansion would be an interleave, which
8162 would be significantly simpler for e.g. AVX2. */
8163 /* In any case, in order to avoid duplicating the code below, recurse
8164 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8165 are properly set up for the caller. If we fail, we'll continue with
8166 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8167 if (vect_loop
8168 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8169 && !nested_in_vect_loop_p (vect_loop, stmt)
8170 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8171 stmt, vectype_out, vectype_in,
8172 code1, code2, multi_step_cvt,
8173 interm_types))
8175 /* Elements in a vector with vect_used_by_reduction property cannot
8176 be reordered if the use chain with this property does not have the
8177 same operation. One such an example is s += a * b, where elements
8178 in a and b cannot be reordered. Here we check if the vector defined
8179 by STMT is only directly used in the reduction statement. */
8180 tree lhs = gimple_assign_lhs (stmt);
8181 use_operand_p dummy;
8182 gimple use_stmt;
8183 stmt_vec_info use_stmt_info = NULL;
8184 if (single_imm_use (lhs, &dummy, &use_stmt)
8185 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8186 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8187 return true;
8189 c1 = VEC_WIDEN_MULT_LO_EXPR;
8190 c2 = VEC_WIDEN_MULT_HI_EXPR;
8191 break;
8193 case VEC_WIDEN_MULT_EVEN_EXPR:
8194 /* Support the recursion induced just above. */
8195 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8196 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8197 break;
8199 case WIDEN_LSHIFT_EXPR:
8200 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8201 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8202 break;
8204 CASE_CONVERT:
8205 c1 = VEC_UNPACK_LO_EXPR;
8206 c2 = VEC_UNPACK_HI_EXPR;
8207 break;
8209 case FLOAT_EXPR:
8210 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8211 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8212 break;
8214 case FIX_TRUNC_EXPR:
8215 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8216 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8217 computing the operation. */
8218 return false;
8220 default:
8221 gcc_unreachable ();
8224 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8226 enum tree_code ctmp = c1;
8227 c1 = c2;
8228 c2 = ctmp;
8231 if (code == FIX_TRUNC_EXPR)
8233 /* The signedness is determined from output operand. */
8234 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8235 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8237 else
8239 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8240 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8243 if (!optab1 || !optab2)
8244 return false;
8246 vec_mode = TYPE_MODE (vectype);
8247 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8248 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8249 return false;
8251 *code1 = c1;
8252 *code2 = c2;
8254 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8255 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8256 return true;
8258 /* Check if it's a multi-step conversion that can be done using intermediate
8259 types. */
8261 prev_type = vectype;
8262 prev_mode = vec_mode;
8264 if (!CONVERT_EXPR_CODE_P (code))
8265 return false;
8267 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8268 intermediate steps in promotion sequence. We try
8269 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8270 not. */
8271 interm_types->create (MAX_INTERM_CVT_STEPS);
8272 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8274 intermediate_mode = insn_data[icode1].operand[0].mode;
8275 intermediate_type
8276 = lang_hooks.types.type_for_mode (intermediate_mode,
8277 TYPE_UNSIGNED (prev_type));
8278 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8279 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8281 if (!optab3 || !optab4
8282 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8283 || insn_data[icode1].operand[0].mode != intermediate_mode
8284 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8285 || insn_data[icode2].operand[0].mode != intermediate_mode
8286 || ((icode1 = optab_handler (optab3, intermediate_mode))
8287 == CODE_FOR_nothing)
8288 || ((icode2 = optab_handler (optab4, intermediate_mode))
8289 == CODE_FOR_nothing))
8290 break;
8292 interm_types->quick_push (intermediate_type);
8293 (*multi_step_cvt)++;
8295 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8296 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8297 return true;
8299 prev_type = intermediate_type;
8300 prev_mode = intermediate_mode;
8303 interm_types->release ();
8304 return false;
8308 /* Function supportable_narrowing_operation
8310 Check whether an operation represented by the code CODE is a
8311 narrowing operation that is supported by the target platform in
8312 vector form (i.e., when operating on arguments of type VECTYPE_IN
8313 and producing a result of type VECTYPE_OUT).
8315 Narrowing operations we currently support are NOP (CONVERT) and
8316 FIX_TRUNC. This function checks if these operations are supported by
8317 the target platform directly via vector tree-codes.
8319 Output:
8320 - CODE1 is the code of a vector operation to be used when
8321 vectorizing the operation, if available.
8322 - MULTI_STEP_CVT determines the number of required intermediate steps in
8323 case of multi-step conversion (like int->short->char - in that case
8324 MULTI_STEP_CVT will be 1).
8325 - INTERM_TYPES contains the intermediate type required to perform the
8326 narrowing operation (short in the above example). */
8328 bool
8329 supportable_narrowing_operation (enum tree_code code,
8330 tree vectype_out, tree vectype_in,
8331 enum tree_code *code1, int *multi_step_cvt,
8332 vec<tree> *interm_types)
8334 machine_mode vec_mode;
8335 enum insn_code icode1;
8336 optab optab1, interm_optab;
8337 tree vectype = vectype_in;
8338 tree narrow_vectype = vectype_out;
8339 enum tree_code c1;
8340 tree intermediate_type;
8341 machine_mode intermediate_mode, prev_mode;
8342 int i;
8343 bool uns;
8345 *multi_step_cvt = 0;
8346 switch (code)
8348 CASE_CONVERT:
8349 c1 = VEC_PACK_TRUNC_EXPR;
8350 break;
8352 case FIX_TRUNC_EXPR:
8353 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8354 break;
8356 case FLOAT_EXPR:
8357 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8358 tree code and optabs used for computing the operation. */
8359 return false;
8361 default:
8362 gcc_unreachable ();
8365 if (code == FIX_TRUNC_EXPR)
8366 /* The signedness is determined from output operand. */
8367 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8368 else
8369 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8371 if (!optab1)
8372 return false;
8374 vec_mode = TYPE_MODE (vectype);
8375 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8376 return false;
8378 *code1 = c1;
8380 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8381 return true;
8383 /* Check if it's a multi-step conversion that can be done using intermediate
8384 types. */
8385 prev_mode = vec_mode;
8386 if (code == FIX_TRUNC_EXPR)
8387 uns = TYPE_UNSIGNED (vectype_out);
8388 else
8389 uns = TYPE_UNSIGNED (vectype);
8391 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8392 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8393 costly than signed. */
8394 if (code == FIX_TRUNC_EXPR && uns)
8396 enum insn_code icode2;
8398 intermediate_type
8399 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8400 interm_optab
8401 = optab_for_tree_code (c1, intermediate_type, optab_default);
8402 if (interm_optab != unknown_optab
8403 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8404 && insn_data[icode1].operand[0].mode
8405 == insn_data[icode2].operand[0].mode)
8407 uns = false;
8408 optab1 = interm_optab;
8409 icode1 = icode2;
8413 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8414 intermediate steps in promotion sequence. We try
8415 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8416 interm_types->create (MAX_INTERM_CVT_STEPS);
8417 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8419 intermediate_mode = insn_data[icode1].operand[0].mode;
8420 intermediate_type
8421 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8422 interm_optab
8423 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8424 optab_default);
8425 if (!interm_optab
8426 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8427 || insn_data[icode1].operand[0].mode != intermediate_mode
8428 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8429 == CODE_FOR_nothing))
8430 break;
8432 interm_types->quick_push (intermediate_type);
8433 (*multi_step_cvt)++;
8435 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8436 return true;
8438 prev_mode = intermediate_mode;
8439 optab1 = interm_optab;
8442 interm_types->release ();
8443 return false;