2015-05-22 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-vect-stmts.c
blob1f68ff52698e035378c25224237643251371ed84
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "basic-block.h"
46 #include "gimple-pretty-print.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "tree-eh.h"
50 #include "gimple-expr.h"
51 #include "is-a.h"
52 #include "gimple.h"
53 #include "gimplify.h"
54 #include "gimple-iterator.h"
55 #include "gimplify-me.h"
56 #include "gimple-ssa.h"
57 #include "tree-cfg.h"
58 #include "tree-phinodes.h"
59 #include "ssa-iterators.h"
60 #include "stringpool.h"
61 #include "tree-ssanames.h"
62 #include "tree-ssa-loop-manip.h"
63 #include "cfgloop.h"
64 #include "tree-ssa-loop.h"
65 #include "tree-scalar-evolution.h"
66 #include "hashtab.h"
67 #include "rtl.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "recog.h" /* FIXME: for insn_data */
82 #include "insn-codes.h"
83 #include "optabs.h"
84 #include "diagnostic-core.h"
85 #include "tree-vectorizer.h"
86 #include "hash-map.h"
87 #include "plugin-api.h"
88 #include "ipa-ref.h"
89 #include "cgraph.h"
90 #include "builtins.h"
92 /* For lang_hooks.types.type_for_mode. */
93 #include "langhooks.h"
95 /* Return the vectorized type for the given statement. */
97 tree
98 stmt_vectype (struct _stmt_vec_info *stmt_info)
100 return STMT_VINFO_VECTYPE (stmt_info);
103 /* Return TRUE iff the given statement is in an inner loop relative to
104 the loop being vectorized. */
105 bool
106 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
108 gimple stmt = STMT_VINFO_STMT (stmt_info);
109 basic_block bb = gimple_bb (stmt);
110 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111 struct loop* loop;
113 if (!loop_vinfo)
114 return false;
116 loop = LOOP_VINFO_LOOP (loop_vinfo);
118 return (bb->loop_father == loop->inner);
121 /* Record the cost of a statement, either by directly informing the
122 target model or by saving it in a vector for later processing.
123 Return a preliminary estimate of the statement's cost. */
125 unsigned
126 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128 int misalign, enum vect_cost_model_location where)
130 if (body_cost_vec)
132 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133 add_stmt_info_to_vec (body_cost_vec, count, kind,
134 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135 misalign);
136 return (unsigned)
137 (builtin_vectorization_cost (kind, vectype, misalign) * count);
140 else
142 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144 void *target_cost_data;
146 if (loop_vinfo)
147 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148 else
149 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
151 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152 misalign, where);
156 /* Return a variable of type ELEM_TYPE[NELEMS]. */
158 static tree
159 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
161 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162 "vect_array");
165 /* ARRAY is an array of vectors created by create_vector_array.
166 Return an SSA_NAME for the vector in index N. The reference
167 is part of the vectorization of STMT and the vector is associated
168 with scalar destination SCALAR_DEST. */
170 static tree
171 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172 tree array, unsigned HOST_WIDE_INT n)
174 tree vect_type, vect, vect_name, array_ref;
175 gimple new_stmt;
177 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178 vect_type = TREE_TYPE (TREE_TYPE (array));
179 vect = vect_create_destination_var (scalar_dest, vect_type);
180 array_ref = build4 (ARRAY_REF, vect_type, array,
181 build_int_cst (size_type_node, n),
182 NULL_TREE, NULL_TREE);
184 new_stmt = gimple_build_assign (vect, array_ref);
185 vect_name = make_ssa_name (vect, new_stmt);
186 gimple_assign_set_lhs (new_stmt, vect_name);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
189 return vect_name;
192 /* ARRAY is an array of vectors created by create_vector_array.
193 Emit code to store SSA_NAME VECT in index N of the array.
194 The store is part of the vectorization of STMT. */
196 static void
197 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198 tree array, unsigned HOST_WIDE_INT n)
200 tree array_ref;
201 gimple new_stmt;
203 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204 build_int_cst (size_type_node, n),
205 NULL_TREE, NULL_TREE);
207 new_stmt = gimple_build_assign (array_ref, vect);
208 vect_finish_stmt_generation (stmt, new_stmt, gsi);
211 /* PTR is a pointer to an array of type TYPE. Return a representation
212 of *PTR. The memory reference replaces those in FIRST_DR
213 (and its group). */
215 static tree
216 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
218 tree mem_ref, alias_ptr_type;
220 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222 /* Arrays have the same alignment as their type. */
223 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224 return mem_ref;
227 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
229 /* Function vect_mark_relevant.
231 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
233 static void
234 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235 enum vect_relevant relevant, bool live_p,
236 bool used_in_pattern)
238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241 gimple pattern_stmt;
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "mark relevant %d, live %d.\n", relevant, live_p);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 bool found = false;
254 if (!used_in_pattern)
256 imm_use_iterator imm_iter;
257 use_operand_p use_p;
258 gimple use_stmt;
259 tree lhs;
260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
263 if (is_gimple_assign (stmt))
264 lhs = gimple_assign_lhs (stmt);
265 else
266 lhs = gimple_call_lhs (stmt);
268 /* This use is out of pattern use, if LHS has other uses that are
269 pattern uses, we should mark the stmt itself, and not the pattern
270 stmt. */
271 if (lhs && TREE_CODE (lhs) == SSA_NAME)
272 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
274 if (is_gimple_debug (USE_STMT (use_p)))
275 continue;
276 use_stmt = USE_STMT (use_p);
278 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279 continue;
281 if (vinfo_for_stmt (use_stmt)
282 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
284 found = true;
285 break;
290 if (!found)
292 /* This is the last stmt in a sequence that was detected as a
293 pattern that can potentially be vectorized. Don't mark the stmt
294 as relevant/live because it's not going to be vectorized.
295 Instead mark the pattern-stmt that replaces it. */
297 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
299 if (dump_enabled_p ())
300 dump_printf_loc (MSG_NOTE, vect_location,
301 "last stmt in pattern. don't mark"
302 " relevant/live.\n");
303 stmt_info = vinfo_for_stmt (pattern_stmt);
304 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307 stmt = pattern_stmt;
311 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313 STMT_VINFO_RELEVANT (stmt_info) = relevant;
315 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "already marked relevant/live.\n");
321 return;
324 worklist->safe_push (stmt);
328 /* Function vect_stmt_relevant_p.
330 Return true if STMT in loop that is represented by LOOP_VINFO is
331 "relevant for vectorization".
333 A stmt is considered "relevant for vectorization" if:
334 - it has uses outside the loop.
335 - it has vdefs (it alters memory).
336 - control stmts in the loop (except for the exit condition).
338 CHECKME: what other side effects would the vectorizer allow? */
340 static bool
341 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342 enum vect_relevant *relevant, bool *live_p)
344 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345 ssa_op_iter op_iter;
346 imm_use_iterator imm_iter;
347 use_operand_p use_p;
348 def_operand_p def_p;
350 *relevant = vect_unused_in_scope;
351 *live_p = false;
353 /* cond stmt other than loop exit cond. */
354 if (is_ctrl_stmt (stmt)
355 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356 != loop_exit_ctrl_vec_info_type)
357 *relevant = vect_used_in_scope;
359 /* changing memory. */
360 if (gimple_code (stmt) != GIMPLE_PHI)
361 if (gimple_vdef (stmt)
362 && !gimple_clobber_p (stmt))
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_NOTE, vect_location,
366 "vec_stmt_relevant_p: stmt has vdefs.\n");
367 *relevant = vect_used_in_scope;
370 /* uses outside the loop. */
371 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
373 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
375 basic_block bb = gimple_bb (USE_STMT (use_p));
376 if (!flow_bb_inside_loop_p (loop, bb))
378 if (dump_enabled_p ())
379 dump_printf_loc (MSG_NOTE, vect_location,
380 "vec_stmt_relevant_p: used out of loop.\n");
382 if (is_gimple_debug (USE_STMT (use_p)))
383 continue;
385 /* We expect all such uses to be in the loop exit phis
386 (because of loop closed form) */
387 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388 gcc_assert (bb == single_exit (loop)->dest);
390 *live_p = true;
395 return (*live_p || *relevant);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT. Check if USE is
402 used in STMT for anything other than indexing an array. */
404 static bool
405 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
407 tree operand;
408 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
410 /* USE corresponds to some operand in STMT. If there is no data
411 reference in STMT, then any operand that corresponds to USE
412 is not indexing an array. */
413 if (!STMT_VINFO_DATA_REF (stmt_info))
414 return true;
416 /* STMT has a data_ref. FORNOW this means that its of one of
417 the following forms:
418 -1- ARRAY_REF = var
419 -2- var = ARRAY_REF
420 (This should have been verified in analyze_data_refs).
422 'var' in the second case corresponds to a def, not a use,
423 so USE cannot correspond to any operands that are not used
424 for array indexing.
426 Therefore, all we need to check is if STMT falls into the
427 first case, and whether var corresponds to USE. */
429 if (!gimple_assign_copy_p (stmt))
431 if (is_gimple_call (stmt)
432 && gimple_call_internal_p (stmt))
433 switch (gimple_call_internal_fn (stmt))
435 case IFN_MASK_STORE:
436 operand = gimple_call_arg (stmt, 3);
437 if (operand == use)
438 return true;
439 /* FALLTHRU */
440 case IFN_MASK_LOAD:
441 operand = gimple_call_arg (stmt, 2);
442 if (operand == use)
443 return true;
444 break;
445 default:
446 break;
448 return false;
451 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452 return false;
453 operand = gimple_assign_rhs1 (stmt);
454 if (TREE_CODE (operand) != SSA_NAME)
455 return false;
457 if (operand == use)
458 return true;
460 return false;
465 Function process_use.
467 Inputs:
468 - a USE in STMT in a loop represented by LOOP_VINFO
469 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470 that defined USE. This is done by calling mark_relevant and passing it
471 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473 be performed.
475 Outputs:
476 Generally, LIVE_P and RELEVANT are used to define the liveness and
477 relevance info of the DEF_STMT of this USE:
478 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480 Exceptions:
481 - case 1: If USE is used only for address computations (e.g. array indexing),
482 which does not need to be directly vectorized, then the liveness/relevance
483 of the respective DEF_STMT is left unchanged.
484 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485 skip DEF_STMT cause it had already been processed.
486 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
487 be modified accordingly.
489 Return true if everything is as expected. Return false otherwise. */
491 static bool
492 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493 enum vect_relevant relevant, vec<gimple> *worklist,
494 bool force)
496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498 stmt_vec_info dstmt_vinfo;
499 basic_block bb, def_bb;
500 tree def;
501 gimple def_stmt;
502 enum vect_def_type dt;
504 /* case 1: we are only interested in uses that need to be vectorized. Uses
505 that are used for address computation are not considered relevant. */
506 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507 return true;
509 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "not vectorized: unsupported use in stmt.\n");
514 return false;
517 if (!def_stmt || gimple_nop_p (def_stmt))
518 return true;
520 def_bb = gimple_bb (def_stmt);
521 if (!flow_bb_inside_loop_p (loop, def_bb))
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525 return true;
528 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529 DEF_STMT must have already been processed, because this should be the
530 only way that STMT, which is a reduction-phi, was put in the worklist,
531 as there should be no other uses for DEF_STMT in the loop. So we just
532 check that everything is as expected, and we are done. */
533 dstmt_vinfo = vinfo_for_stmt (def_stmt);
534 bb = gimple_bb (stmt);
535 if (gimple_code (stmt) == GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537 && gimple_code (def_stmt) != GIMPLE_PHI
538 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539 && bb->loop_father == def_bb->loop_father)
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "reduc-stmt defining reduc-phi in the same nest.\n");
544 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549 return true;
552 /* case 3a: outer-loop stmt defining an inner-loop stmt:
553 outer-loop-header-bb:
554 d = def_stmt
555 inner-loop:
556 stmt # use (d)
557 outer-loop-tail-bb:
558 ... */
559 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
561 if (dump_enabled_p ())
562 dump_printf_loc (MSG_NOTE, vect_location,
563 "outer-loop def-stmt defining inner-loop stmt.\n");
565 switch (relevant)
567 case vect_unused_in_scope:
568 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569 vect_used_in_scope : vect_unused_in_scope;
570 break;
572 case vect_used_in_outer_by_reduction:
573 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574 relevant = vect_used_by_reduction;
575 break;
577 case vect_used_in_outer:
578 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579 relevant = vect_used_in_scope;
580 break;
582 case vect_used_in_scope:
583 break;
585 default:
586 gcc_unreachable ();
590 /* case 3b: inner-loop stmt defining an outer-loop stmt:
591 outer-loop-header-bb:
593 inner-loop:
594 d = def_stmt
595 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596 stmt # use (d) */
597 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
599 if (dump_enabled_p ())
600 dump_printf_loc (MSG_NOTE, vect_location,
601 "inner-loop def-stmt defining outer-loop stmt.\n");
603 switch (relevant)
605 case vect_unused_in_scope:
606 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608 vect_used_in_outer_by_reduction : vect_unused_in_scope;
609 break;
611 case vect_used_by_reduction:
612 relevant = vect_used_in_outer_by_reduction;
613 break;
615 case vect_used_in_scope:
616 relevant = vect_used_in_outer;
617 break;
619 default:
620 gcc_unreachable ();
624 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625 is_pattern_stmt_p (stmt_vinfo));
626 return true;
630 /* Function vect_mark_stmts_to_be_vectorized.
632 Not all stmts in the loop need to be vectorized. For example:
634 for i...
635 for j...
636 1. T0 = i + j
637 2. T1 = a[T0]
639 3. j = j + 1
641 Stmt 1 and 3 do not need to be vectorized, because loop control and
642 addressing of vectorized data-refs are handled differently.
644 This pass detects such stmts. */
646 bool
647 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
649 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651 unsigned int nbbs = loop->num_nodes;
652 gimple_stmt_iterator si;
653 gimple stmt;
654 unsigned int i;
655 stmt_vec_info stmt_vinfo;
656 basic_block bb;
657 gimple phi;
658 bool live_p;
659 enum vect_relevant relevant, tmp_relevant;
660 enum vect_def_type def_type;
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_NOTE, vect_location,
664 "=== vect_mark_stmts_to_be_vectorized ===\n");
666 auto_vec<gimple, 64> worklist;
668 /* 1. Init worklist. */
669 for (i = 0; i < nbbs; i++)
671 bb = bbs[i];
672 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
674 phi = gsi_stmt (si);
675 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
681 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
682 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
684 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
686 stmt = gsi_stmt (si);
687 if (dump_enabled_p ())
689 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
693 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
694 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
698 /* 2. Process_worklist */
699 while (worklist.length () > 0)
701 use_operand_p use_p;
702 ssa_op_iter iter;
704 stmt = worklist.pop ();
705 if (dump_enabled_p ())
707 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
711 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713 liveness and relevance properties of STMT. */
714 stmt_vinfo = vinfo_for_stmt (stmt);
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
718 /* Generally, the liveness and relevance properties of STMT are
719 propagated as is to the DEF_STMTs of its USEs:
720 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
723 One exception is when STMT has been identified as defining a reduction
724 variable; in this case we set the liveness/relevance as follows:
725 live_p = false
726 relevant = vect_used_by_reduction
727 This is because we distinguish between two kinds of relevant stmts -
728 those that are used by a reduction computation, and those that are
729 (also) used by a regular computation. This allows us later on to
730 identify stmts that are used solely by a reduction, and therefore the
731 order of the results that they produce does not have to be kept. */
733 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734 tmp_relevant = relevant;
735 switch (def_type)
737 case vect_reduction_def:
738 switch (tmp_relevant)
740 case vect_unused_in_scope:
741 relevant = vect_used_by_reduction;
742 break;
744 case vect_used_by_reduction:
745 if (gimple_code (stmt) == GIMPLE_PHI)
746 break;
747 /* fall through */
749 default:
750 if (dump_enabled_p ())
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "unsupported use of reduction.\n");
753 return false;
756 live_p = false;
757 break;
759 case vect_nested_cycle:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_in_outer_by_reduction
762 && tmp_relevant != vect_used_in_outer)
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766 "unsupported use of nested cycle.\n");
768 return false;
771 live_p = false;
772 break;
774 case vect_double_reduction_def:
775 if (tmp_relevant != vect_unused_in_scope
776 && tmp_relevant != vect_used_by_reduction)
778 if (dump_enabled_p ())
779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780 "unsupported use of double reduction.\n");
782 return false;
785 live_p = false;
786 break;
788 default:
789 break;
792 if (is_pattern_stmt_p (stmt_vinfo))
794 /* Pattern statements are not inserted into the code, so
795 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796 have to scan the RHS or function arguments instead. */
797 if (is_gimple_assign (stmt))
799 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800 tree op = gimple_assign_rhs1 (stmt);
802 i = 1;
803 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
805 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
806 live_p, relevant, &worklist, false)
807 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
808 live_p, relevant, &worklist, false))
809 return false;
810 i = 2;
812 for (; i < gimple_num_ops (stmt); i++)
814 op = gimple_op (stmt, i);
815 if (TREE_CODE (op) == SSA_NAME
816 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
817 &worklist, false))
818 return false;
821 else if (is_gimple_call (stmt))
823 for (i = 0; i < gimple_call_num_args (stmt); i++)
825 tree arg = gimple_call_arg (stmt, i);
826 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
827 &worklist, false))
828 return false;
832 else
833 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
835 tree op = USE_FROM_PTR (use_p);
836 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
837 &worklist, false))
838 return false;
841 if (STMT_VINFO_GATHER_P (stmt_vinfo))
843 tree off;
844 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
845 gcc_assert (decl);
846 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
847 &worklist, true))
848 return false;
850 } /* while worklist */
852 return true;
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
862 void
863 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
864 enum vect_def_type *dt,
865 stmt_vector_for_cost *prologue_cost_vec,
866 stmt_vector_for_cost *body_cost_vec)
868 int i;
869 int inside_cost = 0, prologue_cost = 0;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info))
873 return;
875 /* FORNOW: Assuming maximum 2 args per stmts. */
876 for (i = 0; i < 2; i++)
877 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
878 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
879 stmt_info, 0, vect_prologue);
881 /* Pass the inside-of-loop statements to the target-specific cost model. */
882 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
883 stmt_info, 0, vect_body);
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE, vect_location,
887 "vect_model_simple_cost: inside_cost = %d, "
888 "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 /* Model cost for type demotion and promotion operations. PWR is normally
893 zero for single-step promotions and demotions. It will be one if
894 two-step promotion/demotion is required, and so on. Each additional
895 step doubles the number of instructions required. */
897 static void
898 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
899 enum vect_def_type *dt, int pwr)
901 int i, tmp;
902 int inside_cost = 0, prologue_cost = 0;
903 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
904 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
905 void *target_cost_data;
907 /* The SLP costs were already calculated during SLP tree build. */
908 if (PURE_SLP_STMT (stmt_info))
909 return;
911 if (loop_vinfo)
912 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
913 else
914 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
916 for (i = 0; i < pwr + 1; i++)
918 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
919 (i + 1) : i;
920 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
921 vec_promote_demote, stmt_info, 0,
922 vect_body);
925 /* FORNOW: Assuming maximum 2 args per stmts. */
926 for (i = 0; i < 2; i++)
927 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
928 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
929 stmt_info, 0, vect_prologue);
931 if (dump_enabled_p ())
932 dump_printf_loc (MSG_NOTE, vect_location,
933 "vect_model_promotion_demotion_cost: inside_cost = %d, "
934 "prologue_cost = %d .\n", inside_cost, prologue_cost);
937 /* Function vect_cost_group_size
939 For grouped load or store, return the group_size only if it is the first
940 load or store of a group, else return 1. This ensures that group size is
941 only returned once per group. */
943 static int
944 vect_cost_group_size (stmt_vec_info stmt_info)
946 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
948 if (first_stmt == STMT_VINFO_STMT (stmt_info))
949 return GROUP_SIZE (stmt_info);
951 return 1;
955 /* Function vect_model_store_cost
957 Models cost for stores. In the case of grouped accesses, one access
958 has the overhead of the grouped access attributed to it. */
960 void
961 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
962 bool store_lanes_p, enum vect_def_type dt,
963 slp_tree slp_node,
964 stmt_vector_for_cost *prologue_cost_vec,
965 stmt_vector_for_cost *body_cost_vec)
967 int group_size;
968 unsigned int inside_cost = 0, prologue_cost = 0;
969 struct data_reference *first_dr;
970 gimple first_stmt;
972 if (dt == vect_constant_def || dt == vect_external_def)
973 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
974 stmt_info, 0, vect_prologue);
976 /* Grouped access? */
977 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
979 if (slp_node)
981 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
982 group_size = 1;
984 else
986 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
987 group_size = vect_cost_group_size (stmt_info);
990 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
992 /* Not a grouped access. */
993 else
995 group_size = 1;
996 first_dr = STMT_VINFO_DATA_REF (stmt_info);
999 /* We assume that the cost of a single store-lanes instruction is
1000 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
1001 access is instead being provided by a permute-and-store operation,
1002 include the cost of the permutes. */
1003 if (!store_lanes_p && group_size > 1)
1005 /* Uses a high and low interleave or shuffle operations for each
1006 needed permute. */
1007 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1008 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1009 stmt_info, 0, vect_body);
1011 if (dump_enabled_p ())
1012 dump_printf_loc (MSG_NOTE, vect_location,
1013 "vect_model_store_cost: strided group_size = %d .\n",
1014 group_size);
1017 /* Costs of the stores. */
1018 if (STMT_VINFO_STRIDED_P (stmt_info))
1020 /* N scalar stores plus extracting the elements. */
1021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1022 inside_cost += record_stmt_cost (body_cost_vec,
1023 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1024 scalar_store, stmt_info, 0, vect_body);
1025 inside_cost += record_stmt_cost (body_cost_vec,
1026 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1027 vec_to_scalar, stmt_info, 0, vect_body);
1029 else
1030 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1032 if (dump_enabled_p ())
1033 dump_printf_loc (MSG_NOTE, vect_location,
1034 "vect_model_store_cost: inside_cost = %d, "
1035 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1039 /* Calculate cost of DR's memory access. */
1040 void
1041 vect_get_store_cost (struct data_reference *dr, int ncopies,
1042 unsigned int *inside_cost,
1043 stmt_vector_for_cost *body_cost_vec)
1045 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1046 gimple stmt = DR_STMT (dr);
1047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1049 switch (alignment_support_scheme)
1051 case dr_aligned:
1053 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1054 vector_store, stmt_info, 0,
1055 vect_body);
1057 if (dump_enabled_p ())
1058 dump_printf_loc (MSG_NOTE, vect_location,
1059 "vect_model_store_cost: aligned.\n");
1060 break;
1063 case dr_unaligned_supported:
1065 /* Here, we assign an additional cost for the unaligned store. */
1066 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1067 unaligned_store, stmt_info,
1068 DR_MISALIGNMENT (dr), vect_body);
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_NOTE, vect_location,
1071 "vect_model_store_cost: unaligned supported by "
1072 "hardware.\n");
1073 break;
1076 case dr_unaligned_unsupported:
1078 *inside_cost = VECT_MAX_COST;
1080 if (dump_enabled_p ())
1081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1082 "vect_model_store_cost: unsupported access.\n");
1083 break;
1086 default:
1087 gcc_unreachable ();
1092 /* Function vect_model_load_cost
1094 Models cost for loads. In the case of grouped accesses, the last access
1095 has the overhead of the grouped access attributed to it. Since unaligned
1096 accesses are supported for loads, we also account for the costs of the
1097 access scheme chosen. */
1099 void
1100 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1101 bool load_lanes_p, slp_tree slp_node,
1102 stmt_vector_for_cost *prologue_cost_vec,
1103 stmt_vector_for_cost *body_cost_vec)
1105 int group_size;
1106 gimple first_stmt;
1107 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1108 unsigned int inside_cost = 0, prologue_cost = 0;
1110 /* Grouped accesses? */
1111 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1112 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1114 group_size = vect_cost_group_size (stmt_info);
1115 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1117 /* Not a grouped access. */
1118 else
1120 group_size = 1;
1121 first_dr = dr;
1124 /* We assume that the cost of a single load-lanes instruction is
1125 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1126 access is instead being provided by a load-and-permute operation,
1127 include the cost of the permutes. */
1128 if (!load_lanes_p && group_size > 1
1129 && !STMT_VINFO_STRIDED_P (stmt_info))
1131 /* Uses an even and odd extract operations or shuffle operations
1132 for each needed permute. */
1133 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1134 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: strided group_size = %d .\n",
1140 group_size);
1143 /* The loads themselves. */
1144 if (STMT_VINFO_STRIDED_P (stmt_info)
1145 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1147 /* N scalar loads plus gathering them into a vector. */
1148 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1149 inside_cost += record_stmt_cost (body_cost_vec,
1150 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1151 scalar_load, stmt_info, 0, vect_body);
1153 else
1154 vect_get_load_cost (first_dr, ncopies,
1155 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1156 || group_size > 1 || slp_node),
1157 &inside_cost, &prologue_cost,
1158 prologue_cost_vec, body_cost_vec, true);
1159 if (STMT_VINFO_STRIDED_P (stmt_info))
1160 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1161 stmt_info, 0, vect_body);
1163 if (dump_enabled_p ())
1164 dump_printf_loc (MSG_NOTE, vect_location,
1165 "vect_model_load_cost: inside_cost = %d, "
1166 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1170 /* Calculate cost of DR's memory access. */
1171 void
1172 vect_get_load_cost (struct data_reference *dr, int ncopies,
1173 bool add_realign_cost, unsigned int *inside_cost,
1174 unsigned int *prologue_cost,
1175 stmt_vector_for_cost *prologue_cost_vec,
1176 stmt_vector_for_cost *body_cost_vec,
1177 bool record_prologue_costs)
1179 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1180 gimple stmt = DR_STMT (dr);
1181 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1183 switch (alignment_support_scheme)
1185 case dr_aligned:
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1190 if (dump_enabled_p ())
1191 dump_printf_loc (MSG_NOTE, vect_location,
1192 "vect_model_load_cost: aligned.\n");
1194 break;
1196 case dr_unaligned_supported:
1198 /* Here, we assign an additional cost for the unaligned load. */
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1200 unaligned_load, stmt_info,
1201 DR_MISALIGNMENT (dr), vect_body);
1203 if (dump_enabled_p ())
1204 dump_printf_loc (MSG_NOTE, vect_location,
1205 "vect_model_load_cost: unaligned supported by "
1206 "hardware.\n");
1208 break;
1210 case dr_explicit_realign:
1212 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1213 vector_load, stmt_info, 0, vect_body);
1214 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1215 vec_perm, stmt_info, 0, vect_body);
1217 /* FIXME: If the misalignment remains fixed across the iterations of
1218 the containing loop, the following cost should be added to the
1219 prologue costs. */
1220 if (targetm.vectorize.builtin_mask_for_load)
1221 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1222 stmt_info, 0, vect_body);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_model_load_cost: explicit realign\n");
1228 break;
1230 case dr_explicit_realign_optimized:
1232 if (dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location,
1234 "vect_model_load_cost: unaligned software "
1235 "pipelined.\n");
1237 /* Unaligned software pipeline has a load of an address, an initial
1238 load, and possibly a mask operation to "prime" the loop. However,
1239 if this is an access in a group of loads, which provide grouped
1240 access, then the above cost should only be considered for one
1241 access in the group. Inside the loop, there is a load op
1242 and a realignment op. */
1244 if (add_realign_cost && record_prologue_costs)
1246 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1247 vector_stmt, stmt_info,
1248 0, vect_prologue);
1249 if (targetm.vectorize.builtin_mask_for_load)
1250 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1251 vector_stmt, stmt_info,
1252 0, vect_prologue);
1255 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1256 stmt_info, 0, vect_body);
1257 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1258 stmt_info, 0, vect_body);
1260 if (dump_enabled_p ())
1261 dump_printf_loc (MSG_NOTE, vect_location,
1262 "vect_model_load_cost: explicit realign optimized"
1263 "\n");
1265 break;
1268 case dr_unaligned_unsupported:
1270 *inside_cost = VECT_MAX_COST;
1272 if (dump_enabled_p ())
1273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1274 "vect_model_load_cost: unsupported access.\n");
1275 break;
1278 default:
1279 gcc_unreachable ();
1283 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1284 the loop preheader for the vectorized stmt STMT. */
1286 static void
1287 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1289 if (gsi)
1290 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1291 else
1293 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1294 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1296 if (loop_vinfo)
1298 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1299 basic_block new_bb;
1300 edge pe;
1302 if (nested_in_vect_loop_p (loop, stmt))
1303 loop = loop->inner;
1305 pe = loop_preheader_edge (loop);
1306 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1307 gcc_assert (!new_bb);
1309 else
1311 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1312 basic_block bb;
1313 gimple_stmt_iterator gsi_bb_start;
1315 gcc_assert (bb_vinfo);
1316 bb = BB_VINFO_BB (bb_vinfo);
1317 gsi_bb_start = gsi_after_labels (bb);
1318 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1322 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE, vect_location,
1325 "created new init_stmt: ");
1326 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1330 /* Function vect_init_vector.
1332 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1333 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1334 vector type a vector with all elements equal to VAL is created first.
1335 Place the initialization at BSI if it is not NULL. Otherwise, place the
1336 initialization at the loop preheader.
1337 Return the DEF of INIT_STMT.
1338 It will be used in the vectorization of STMT. */
1340 tree
1341 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1343 tree new_var;
1344 gimple init_stmt;
1345 tree vec_oprnd;
1346 tree new_temp;
1348 if (TREE_CODE (type) == VECTOR_TYPE
1349 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1351 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1353 if (CONSTANT_CLASS_P (val))
1354 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1355 else
1357 new_temp = make_ssa_name (TREE_TYPE (type));
1358 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1359 vect_init_vector_1 (stmt, init_stmt, gsi);
1360 val = new_temp;
1363 val = build_vector_from_val (type, val);
1366 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1367 init_stmt = gimple_build_assign (new_var, val);
1368 new_temp = make_ssa_name (new_var, init_stmt);
1369 gimple_assign_set_lhs (init_stmt, new_temp);
1370 vect_init_vector_1 (stmt, init_stmt, gsi);
1371 vec_oprnd = gimple_assign_lhs (init_stmt);
1372 return vec_oprnd;
1376 /* Function vect_get_vec_def_for_operand.
1378 OP is an operand in STMT. This function returns a (vector) def that will be
1379 used in the vectorized stmt for STMT.
1381 In the case that OP is an SSA_NAME which is defined in the loop, then
1382 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1384 In case OP is an invariant or constant, a new stmt that creates a vector def
1385 needs to be introduced. */
1387 tree
1388 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1390 tree vec_oprnd;
1391 gimple vec_stmt;
1392 gimple def_stmt;
1393 stmt_vec_info def_stmt_info = NULL;
1394 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1395 unsigned int nunits;
1396 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1397 tree def;
1398 enum vect_def_type dt;
1399 bool is_simple_use;
1400 tree vector_type;
1402 if (dump_enabled_p ())
1404 dump_printf_loc (MSG_NOTE, vect_location,
1405 "vect_get_vec_def_for_operand: ");
1406 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1407 dump_printf (MSG_NOTE, "\n");
1410 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1411 &def_stmt, &def, &dt);
1412 gcc_assert (is_simple_use);
1413 if (dump_enabled_p ())
1415 int loc_printed = 0;
1416 if (def)
1418 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1419 loc_printed = 1;
1420 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1421 dump_printf (MSG_NOTE, "\n");
1423 if (def_stmt)
1425 if (loc_printed)
1426 dump_printf (MSG_NOTE, " def_stmt = ");
1427 else
1428 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1429 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1433 switch (dt)
1435 /* Case 1: operand is a constant. */
1436 case vect_constant_def:
1438 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1439 gcc_assert (vector_type);
1440 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1442 if (scalar_def)
1443 *scalar_def = op;
1445 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1446 if (dump_enabled_p ())
1447 dump_printf_loc (MSG_NOTE, vect_location,
1448 "Create vector_cst. nunits = %d\n", nunits);
1450 return vect_init_vector (stmt, op, vector_type, NULL);
1453 /* Case 2: operand is defined outside the loop - loop invariant. */
1454 case vect_external_def:
1456 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1457 gcc_assert (vector_type);
1459 if (scalar_def)
1460 *scalar_def = def;
1462 /* Create 'vec_inv = {inv,inv,..,inv}' */
1463 if (dump_enabled_p ())
1464 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1466 return vect_init_vector (stmt, def, vector_type, NULL);
1469 /* Case 3: operand is defined inside the loop. */
1470 case vect_internal_def:
1472 if (scalar_def)
1473 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1475 /* Get the def from the vectorized stmt. */
1476 def_stmt_info = vinfo_for_stmt (def_stmt);
1478 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1479 /* Get vectorized pattern statement. */
1480 if (!vec_stmt
1481 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1482 && !STMT_VINFO_RELEVANT (def_stmt_info))
1483 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1484 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1485 gcc_assert (vec_stmt);
1486 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1487 vec_oprnd = PHI_RESULT (vec_stmt);
1488 else if (is_gimple_call (vec_stmt))
1489 vec_oprnd = gimple_call_lhs (vec_stmt);
1490 else
1491 vec_oprnd = gimple_assign_lhs (vec_stmt);
1492 return vec_oprnd;
1495 /* Case 4: operand is defined by a loop header phi - reduction */
1496 case vect_reduction_def:
1497 case vect_double_reduction_def:
1498 case vect_nested_cycle:
1500 struct loop *loop;
1502 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1503 loop = (gimple_bb (def_stmt))->loop_father;
1505 /* Get the def before the loop */
1506 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1507 return get_initial_def_for_reduction (stmt, op, scalar_def);
1510 /* Case 5: operand is defined by loop-header phi - induction. */
1511 case vect_induction_def:
1513 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1515 /* Get the def from the vectorized stmt. */
1516 def_stmt_info = vinfo_for_stmt (def_stmt);
1517 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1518 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1519 vec_oprnd = PHI_RESULT (vec_stmt);
1520 else
1521 vec_oprnd = gimple_get_lhs (vec_stmt);
1522 return vec_oprnd;
1525 default:
1526 gcc_unreachable ();
1531 /* Function vect_get_vec_def_for_stmt_copy
1533 Return a vector-def for an operand. This function is used when the
1534 vectorized stmt to be created (by the caller to this function) is a "copy"
1535 created in case the vectorized result cannot fit in one vector, and several
1536 copies of the vector-stmt are required. In this case the vector-def is
1537 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1538 of the stmt that defines VEC_OPRND.
1539 DT is the type of the vector def VEC_OPRND.
1541 Context:
1542 In case the vectorization factor (VF) is bigger than the number
1543 of elements that can fit in a vectype (nunits), we have to generate
1544 more than one vector stmt to vectorize the scalar stmt. This situation
1545 arises when there are multiple data-types operated upon in the loop; the
1546 smallest data-type determines the VF, and as a result, when vectorizing
1547 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1548 vector stmt (each computing a vector of 'nunits' results, and together
1549 computing 'VF' results in each iteration). This function is called when
1550 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1551 which VF=16 and nunits=4, so the number of copies required is 4):
1553 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1555 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1556 VS1.1: vx.1 = memref1 VS1.2
1557 VS1.2: vx.2 = memref2 VS1.3
1558 VS1.3: vx.3 = memref3
1560 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1561 VSnew.1: vz1 = vx.1 + ... VSnew.2
1562 VSnew.2: vz2 = vx.2 + ... VSnew.3
1563 VSnew.3: vz3 = vx.3 + ...
1565 The vectorization of S1 is explained in vectorizable_load.
1566 The vectorization of S2:
1567 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1568 the function 'vect_get_vec_def_for_operand' is called to
1569 get the relevant vector-def for each operand of S2. For operand x it
1570 returns the vector-def 'vx.0'.
1572 To create the remaining copies of the vector-stmt (VSnew.j), this
1573 function is called to get the relevant vector-def for each operand. It is
1574 obtained from the respective VS1.j stmt, which is recorded in the
1575 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1577 For example, to obtain the vector-def 'vx.1' in order to create the
1578 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1579 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1580 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1581 and return its def ('vx.1').
1582 Overall, to create the above sequence this function will be called 3 times:
1583 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1584 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1585 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1587 tree
1588 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1590 gimple vec_stmt_for_operand;
1591 stmt_vec_info def_stmt_info;
1593 /* Do nothing; can reuse same def. */
1594 if (dt == vect_external_def || dt == vect_constant_def )
1595 return vec_oprnd;
1597 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1598 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1599 gcc_assert (def_stmt_info);
1600 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1601 gcc_assert (vec_stmt_for_operand);
1602 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1603 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1604 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1605 else
1606 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1607 return vec_oprnd;
1611 /* Get vectorized definitions for the operands to create a copy of an original
1612 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1614 static void
1615 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1616 vec<tree> *vec_oprnds0,
1617 vec<tree> *vec_oprnds1)
1619 tree vec_oprnd = vec_oprnds0->pop ();
1621 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1622 vec_oprnds0->quick_push (vec_oprnd);
1624 if (vec_oprnds1 && vec_oprnds1->length ())
1626 vec_oprnd = vec_oprnds1->pop ();
1627 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1628 vec_oprnds1->quick_push (vec_oprnd);
1633 /* Get vectorized definitions for OP0 and OP1.
1634 REDUC_INDEX is the index of reduction operand in case of reduction,
1635 and -1 otherwise. */
1637 void
1638 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1639 vec<tree> *vec_oprnds0,
1640 vec<tree> *vec_oprnds1,
1641 slp_tree slp_node, int reduc_index)
1643 if (slp_node)
1645 int nops = (op1 == NULL_TREE) ? 1 : 2;
1646 auto_vec<tree> ops (nops);
1647 auto_vec<vec<tree> > vec_defs (nops);
1649 ops.quick_push (op0);
1650 if (op1)
1651 ops.quick_push (op1);
1653 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1655 *vec_oprnds0 = vec_defs[0];
1656 if (op1)
1657 *vec_oprnds1 = vec_defs[1];
1659 else
1661 tree vec_oprnd;
1663 vec_oprnds0->create (1);
1664 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1665 vec_oprnds0->quick_push (vec_oprnd);
1667 if (op1)
1669 vec_oprnds1->create (1);
1670 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1671 vec_oprnds1->quick_push (vec_oprnd);
1677 /* Function vect_finish_stmt_generation.
1679 Insert a new stmt. */
1681 void
1682 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1683 gimple_stmt_iterator *gsi)
1685 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1686 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1687 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1689 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1691 if (!gsi_end_p (*gsi)
1692 && gimple_has_mem_ops (vec_stmt))
1694 gimple at_stmt = gsi_stmt (*gsi);
1695 tree vuse = gimple_vuse (at_stmt);
1696 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1698 tree vdef = gimple_vdef (at_stmt);
1699 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1700 /* If we have an SSA vuse and insert a store, update virtual
1701 SSA form to avoid triggering the renamer. Do so only
1702 if we can easily see all uses - which is what almost always
1703 happens with the way vectorized stmts are inserted. */
1704 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1705 && ((is_gimple_assign (vec_stmt)
1706 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1707 || (is_gimple_call (vec_stmt)
1708 && !(gimple_call_flags (vec_stmt)
1709 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1711 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1712 gimple_set_vdef (vec_stmt, new_vdef);
1713 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1717 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1719 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1720 bb_vinfo));
1722 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1725 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1728 gimple_set_location (vec_stmt, gimple_location (stmt));
1730 /* While EH edges will generally prevent vectorization, stmt might
1731 e.g. be in a must-not-throw region. Ensure newly created stmts
1732 that could throw are part of the same region. */
1733 int lp_nr = lookup_stmt_eh_lp (stmt);
1734 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1735 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1738 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1739 a function declaration if the target has a vectorized version
1740 of the function, or NULL_TREE if the function cannot be vectorized. */
1742 tree
1743 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1745 tree fndecl = gimple_call_fndecl (call);
1747 /* We only handle functions that do not read or clobber memory -- i.e.
1748 const or novops ones. */
1749 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1750 return NULL_TREE;
1752 if (!fndecl
1753 || TREE_CODE (fndecl) != FUNCTION_DECL
1754 || !DECL_BUILT_IN (fndecl))
1755 return NULL_TREE;
1757 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1758 vectype_in);
1762 static tree permute_vec_elements (tree, tree, tree, gimple,
1763 gimple_stmt_iterator *);
1766 /* Function vectorizable_mask_load_store.
1768 Check if STMT performs a conditional load or store that can be vectorized.
1769 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1770 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1771 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1773 static bool
1774 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1775 gimple *vec_stmt, slp_tree slp_node)
1777 tree vec_dest = NULL;
1778 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1779 stmt_vec_info prev_stmt_info;
1780 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1781 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1782 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1783 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1784 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1785 tree elem_type;
1786 gimple new_stmt;
1787 tree dummy;
1788 tree dataref_ptr = NULL_TREE;
1789 gimple ptr_incr;
1790 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1791 int ncopies;
1792 int i, j;
1793 bool inv_p;
1794 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1795 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1796 int gather_scale = 1;
1797 enum vect_def_type gather_dt = vect_unknown_def_type;
1798 bool is_store;
1799 tree mask;
1800 gimple def_stmt;
1801 tree def;
1802 enum vect_def_type dt;
1804 if (slp_node != NULL)
1805 return false;
1807 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1808 gcc_assert (ncopies >= 1);
1810 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1811 mask = gimple_call_arg (stmt, 2);
1812 if (TYPE_PRECISION (TREE_TYPE (mask))
1813 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1814 return false;
1816 /* FORNOW. This restriction should be relaxed. */
1817 if (nested_in_vect_loop && ncopies > 1)
1819 if (dump_enabled_p ())
1820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1821 "multiple types in nested loop.");
1822 return false;
1825 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1826 return false;
1828 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1829 return false;
1831 if (!STMT_VINFO_DATA_REF (stmt_info))
1832 return false;
1834 elem_type = TREE_TYPE (vectype);
1836 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1837 return false;
1839 if (STMT_VINFO_STRIDED_P (stmt_info))
1840 return false;
1842 if (STMT_VINFO_GATHER_P (stmt_info))
1844 gimple def_stmt;
1845 tree def;
1846 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1847 &gather_off, &gather_scale);
1848 gcc_assert (gather_decl);
1849 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1850 &def_stmt, &def, &gather_dt,
1851 &gather_off_vectype))
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1855 "gather index use not simple.");
1856 return false;
1859 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1860 tree masktype
1861 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1862 if (TREE_CODE (masktype) == INTEGER_TYPE)
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1866 "masked gather with integer mask not supported.");
1867 return false;
1870 else if (tree_int_cst_compare (nested_in_vect_loop
1871 ? STMT_VINFO_DR_STEP (stmt_info)
1872 : DR_STEP (dr), size_zero_node) <= 0)
1873 return false;
1874 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1875 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1876 return false;
1878 if (TREE_CODE (mask) != SSA_NAME)
1879 return false;
1881 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1882 &def_stmt, &def, &dt))
1883 return false;
1885 if (is_store)
1887 tree rhs = gimple_call_arg (stmt, 3);
1888 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1889 &def_stmt, &def, &dt))
1890 return false;
1893 if (!vec_stmt) /* transformation not required. */
1895 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1896 if (is_store)
1897 vect_model_store_cost (stmt_info, ncopies, false, dt,
1898 NULL, NULL, NULL);
1899 else
1900 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1901 return true;
1904 /** Transform. **/
1906 if (STMT_VINFO_GATHER_P (stmt_info))
1908 tree vec_oprnd0 = NULL_TREE, op;
1909 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1910 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1911 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1912 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1913 tree mask_perm_mask = NULL_TREE;
1914 edge pe = loop_preheader_edge (loop);
1915 gimple_seq seq;
1916 basic_block new_bb;
1917 enum { NARROW, NONE, WIDEN } modifier;
1918 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1920 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1921 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1922 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1923 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1924 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1925 scaletype = TREE_VALUE (arglist);
1926 gcc_checking_assert (types_compatible_p (srctype, rettype)
1927 && types_compatible_p (srctype, masktype));
1929 if (nunits == gather_off_nunits)
1930 modifier = NONE;
1931 else if (nunits == gather_off_nunits / 2)
1933 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1934 modifier = WIDEN;
1936 for (i = 0; i < gather_off_nunits; ++i)
1937 sel[i] = i | nunits;
1939 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1941 else if (nunits == gather_off_nunits * 2)
1943 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1944 modifier = NARROW;
1946 for (i = 0; i < nunits; ++i)
1947 sel[i] = i < gather_off_nunits
1948 ? i : i + nunits - gather_off_nunits;
1950 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1951 ncopies *= 2;
1952 for (i = 0; i < nunits; ++i)
1953 sel[i] = i | gather_off_nunits;
1954 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1956 else
1957 gcc_unreachable ();
1959 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1961 ptr = fold_convert (ptrtype, gather_base);
1962 if (!is_gimple_min_invariant (ptr))
1964 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1965 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1966 gcc_assert (!new_bb);
1969 scale = build_int_cst (scaletype, gather_scale);
1971 prev_stmt_info = NULL;
1972 for (j = 0; j < ncopies; ++j)
1974 if (modifier == WIDEN && (j & 1))
1975 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1976 perm_mask, stmt, gsi);
1977 else if (j == 0)
1978 op = vec_oprnd0
1979 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1980 else
1981 op = vec_oprnd0
1982 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1984 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1986 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1987 == TYPE_VECTOR_SUBPARTS (idxtype));
1988 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1989 var = make_ssa_name (var);
1990 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1991 new_stmt
1992 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1993 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1994 op = var;
1997 if (mask_perm_mask && (j & 1))
1998 mask_op = permute_vec_elements (mask_op, mask_op,
1999 mask_perm_mask, stmt, gsi);
2000 else
2002 if (j == 0)
2003 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2004 else
2006 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
2007 &def_stmt, &def, &dt);
2008 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2011 mask_op = vec_mask;
2012 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2014 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2015 == TYPE_VECTOR_SUBPARTS (masktype));
2016 var = vect_get_new_vect_var (masktype, vect_simple_var,
2017 NULL);
2018 var = make_ssa_name (var);
2019 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2020 new_stmt
2021 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2023 mask_op = var;
2027 new_stmt
2028 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2029 scale);
2031 if (!useless_type_conversion_p (vectype, rettype))
2033 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2034 == TYPE_VECTOR_SUBPARTS (rettype));
2035 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2036 op = make_ssa_name (var, new_stmt);
2037 gimple_call_set_lhs (new_stmt, op);
2038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2039 var = make_ssa_name (vec_dest);
2040 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2041 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2043 else
2045 var = make_ssa_name (vec_dest, new_stmt);
2046 gimple_call_set_lhs (new_stmt, var);
2049 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2051 if (modifier == NARROW)
2053 if ((j & 1) == 0)
2055 prev_res = var;
2056 continue;
2058 var = permute_vec_elements (prev_res, var,
2059 perm_mask, stmt, gsi);
2060 new_stmt = SSA_NAME_DEF_STMT (var);
2063 if (prev_stmt_info == NULL)
2064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2065 else
2066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2067 prev_stmt_info = vinfo_for_stmt (new_stmt);
2070 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2071 from the IL. */
2072 tree lhs = gimple_call_lhs (stmt);
2073 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2074 set_vinfo_for_stmt (new_stmt, stmt_info);
2075 set_vinfo_for_stmt (stmt, NULL);
2076 STMT_VINFO_STMT (stmt_info) = new_stmt;
2077 gsi_replace (gsi, new_stmt, true);
2078 return true;
2080 else if (is_store)
2082 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2083 prev_stmt_info = NULL;
2084 for (i = 0; i < ncopies; i++)
2086 unsigned align, misalign;
2088 if (i == 0)
2090 tree rhs = gimple_call_arg (stmt, 3);
2091 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2092 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2093 /* We should have catched mismatched types earlier. */
2094 gcc_assert (useless_type_conversion_p (vectype,
2095 TREE_TYPE (vec_rhs)));
2096 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2097 NULL_TREE, &dummy, gsi,
2098 &ptr_incr, false, &inv_p);
2099 gcc_assert (!inv_p);
2101 else
2103 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2104 &def, &dt);
2105 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2106 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2107 &def, &dt);
2108 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2109 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2110 TYPE_SIZE_UNIT (vectype));
2113 align = TYPE_ALIGN_UNIT (vectype);
2114 if (aligned_access_p (dr))
2115 misalign = 0;
2116 else if (DR_MISALIGNMENT (dr) == -1)
2118 align = TYPE_ALIGN_UNIT (elem_type);
2119 misalign = 0;
2121 else
2122 misalign = DR_MISALIGNMENT (dr);
2123 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2124 misalign);
2125 new_stmt
2126 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2127 gimple_call_arg (stmt, 1),
2128 vec_mask, vec_rhs);
2129 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2130 if (i == 0)
2131 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2132 else
2133 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2134 prev_stmt_info = vinfo_for_stmt (new_stmt);
2137 else
2139 tree vec_mask = NULL_TREE;
2140 prev_stmt_info = NULL;
2141 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2142 for (i = 0; i < ncopies; i++)
2144 unsigned align, misalign;
2146 if (i == 0)
2148 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2149 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2150 NULL_TREE, &dummy, gsi,
2151 &ptr_incr, false, &inv_p);
2152 gcc_assert (!inv_p);
2154 else
2156 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2157 &def, &dt);
2158 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2159 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2160 TYPE_SIZE_UNIT (vectype));
2163 align = TYPE_ALIGN_UNIT (vectype);
2164 if (aligned_access_p (dr))
2165 misalign = 0;
2166 else if (DR_MISALIGNMENT (dr) == -1)
2168 align = TYPE_ALIGN_UNIT (elem_type);
2169 misalign = 0;
2171 else
2172 misalign = DR_MISALIGNMENT (dr);
2173 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2174 misalign);
2175 new_stmt
2176 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2177 gimple_call_arg (stmt, 1),
2178 vec_mask);
2179 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2181 if (i == 0)
2182 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2183 else
2184 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2185 prev_stmt_info = vinfo_for_stmt (new_stmt);
2189 if (!is_store)
2191 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2192 from the IL. */
2193 tree lhs = gimple_call_lhs (stmt);
2194 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2195 set_vinfo_for_stmt (new_stmt, stmt_info);
2196 set_vinfo_for_stmt (stmt, NULL);
2197 STMT_VINFO_STMT (stmt_info) = new_stmt;
2198 gsi_replace (gsi, new_stmt, true);
2201 return true;
2205 /* Function vectorizable_call.
2207 Check if GS performs a function call that can be vectorized.
2208 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2209 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2210 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2212 static bool
2213 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2214 slp_tree slp_node)
2216 gcall *stmt;
2217 tree vec_dest;
2218 tree scalar_dest;
2219 tree op, type;
2220 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2221 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2222 tree vectype_out, vectype_in;
2223 int nunits_in;
2224 int nunits_out;
2225 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2226 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2227 tree fndecl, new_temp, def, rhs_type;
2228 gimple def_stmt;
2229 enum vect_def_type dt[3]
2230 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2231 gimple new_stmt = NULL;
2232 int ncopies, j;
2233 vec<tree> vargs = vNULL;
2234 enum { NARROW, NONE, WIDEN } modifier;
2235 size_t i, nargs;
2236 tree lhs;
2238 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2239 return false;
2241 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2242 return false;
2244 /* Is GS a vectorizable call? */
2245 stmt = dyn_cast <gcall *> (gs);
2246 if (!stmt)
2247 return false;
2249 if (gimple_call_internal_p (stmt)
2250 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2251 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2252 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2253 slp_node);
2255 if (gimple_call_lhs (stmt) == NULL_TREE
2256 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2257 return false;
2259 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2261 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2263 /* Process function arguments. */
2264 rhs_type = NULL_TREE;
2265 vectype_in = NULL_TREE;
2266 nargs = gimple_call_num_args (stmt);
2268 /* Bail out if the function has more than three arguments, we do not have
2269 interesting builtin functions to vectorize with more than two arguments
2270 except for fma. No arguments is also not good. */
2271 if (nargs == 0 || nargs > 3)
2272 return false;
2274 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2275 if (gimple_call_internal_p (stmt)
2276 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2278 nargs = 0;
2279 rhs_type = unsigned_type_node;
2282 for (i = 0; i < nargs; i++)
2284 tree opvectype;
2286 op = gimple_call_arg (stmt, i);
2288 /* We can only handle calls with arguments of the same type. */
2289 if (rhs_type
2290 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "argument types differ.\n");
2295 return false;
2297 if (!rhs_type)
2298 rhs_type = TREE_TYPE (op);
2300 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2301 &def_stmt, &def, &dt[i], &opvectype))
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2305 "use not simple.\n");
2306 return false;
2309 if (!vectype_in)
2310 vectype_in = opvectype;
2311 else if (opvectype
2312 && opvectype != vectype_in)
2314 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2316 "argument vector types differ.\n");
2317 return false;
2320 /* If all arguments are external or constant defs use a vector type with
2321 the same size as the output vector type. */
2322 if (!vectype_in)
2323 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2324 if (vec_stmt)
2325 gcc_assert (vectype_in);
2326 if (!vectype_in)
2328 if (dump_enabled_p ())
2330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2331 "no vectype for scalar type ");
2332 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2333 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2336 return false;
2339 /* FORNOW */
2340 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2341 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2342 if (nunits_in == nunits_out / 2)
2343 modifier = NARROW;
2344 else if (nunits_out == nunits_in)
2345 modifier = NONE;
2346 else if (nunits_out == nunits_in / 2)
2347 modifier = WIDEN;
2348 else
2349 return false;
2351 /* For now, we only vectorize functions if a target specific builtin
2352 is available. TODO -- in some cases, it might be profitable to
2353 insert the calls for pieces of the vector, in order to be able
2354 to vectorize other operations in the loop. */
2355 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2356 if (fndecl == NULL_TREE)
2358 if (gimple_call_internal_p (stmt)
2359 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2360 && !slp_node
2361 && loop_vinfo
2362 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2363 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2364 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2365 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2367 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2368 { 0, 1, 2, ... vf - 1 } vector. */
2369 gcc_assert (nargs == 0);
2371 else
2373 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2375 "function is not vectorizable.\n");
2376 return false;
2380 gcc_assert (!gimple_vuse (stmt));
2382 if (slp_node || PURE_SLP_STMT (stmt_info))
2383 ncopies = 1;
2384 else if (modifier == NARROW)
2385 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2386 else
2387 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2389 /* Sanity check: make sure that at least one copy of the vectorized stmt
2390 needs to be generated. */
2391 gcc_assert (ncopies >= 1);
2393 if (!vec_stmt) /* transformation not required. */
2395 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2398 "\n");
2399 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2400 return true;
2403 /** Transform. **/
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2408 /* Handle def. */
2409 scalar_dest = gimple_call_lhs (stmt);
2410 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2412 prev_stmt_info = NULL;
2413 switch (modifier)
2415 case NONE:
2416 for (j = 0; j < ncopies; ++j)
2418 /* Build argument list for the vectorized call. */
2419 if (j == 0)
2420 vargs.create (nargs);
2421 else
2422 vargs.truncate (0);
2424 if (slp_node)
2426 auto_vec<vec<tree> > vec_defs (nargs);
2427 vec<tree> vec_oprnds0;
2429 for (i = 0; i < nargs; i++)
2430 vargs.quick_push (gimple_call_arg (stmt, i));
2431 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2432 vec_oprnds0 = vec_defs[0];
2434 /* Arguments are ready. Create the new vector stmt. */
2435 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2437 size_t k;
2438 for (k = 0; k < nargs; k++)
2440 vec<tree> vec_oprndsk = vec_defs[k];
2441 vargs[k] = vec_oprndsk[i];
2443 new_stmt = gimple_build_call_vec (fndecl, vargs);
2444 new_temp = make_ssa_name (vec_dest, new_stmt);
2445 gimple_call_set_lhs (new_stmt, new_temp);
2446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2447 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2450 for (i = 0; i < nargs; i++)
2452 vec<tree> vec_oprndsi = vec_defs[i];
2453 vec_oprndsi.release ();
2455 continue;
2458 for (i = 0; i < nargs; i++)
2460 op = gimple_call_arg (stmt, i);
2461 if (j == 0)
2462 vec_oprnd0
2463 = vect_get_vec_def_for_operand (op, stmt, NULL);
2464 else
2466 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2467 vec_oprnd0
2468 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2471 vargs.quick_push (vec_oprnd0);
2474 if (gimple_call_internal_p (stmt)
2475 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2477 tree *v = XALLOCAVEC (tree, nunits_out);
2478 int k;
2479 for (k = 0; k < nunits_out; ++k)
2480 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2481 tree cst = build_vector (vectype_out, v);
2482 tree new_var
2483 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2484 gimple init_stmt = gimple_build_assign (new_var, cst);
2485 new_temp = make_ssa_name (new_var, init_stmt);
2486 gimple_assign_set_lhs (init_stmt, new_temp);
2487 vect_init_vector_1 (stmt, init_stmt, NULL);
2488 new_temp = make_ssa_name (vec_dest);
2489 new_stmt = gimple_build_assign (new_temp,
2490 gimple_assign_lhs (init_stmt));
2492 else
2494 new_stmt = gimple_build_call_vec (fndecl, vargs);
2495 new_temp = make_ssa_name (vec_dest, new_stmt);
2496 gimple_call_set_lhs (new_stmt, new_temp);
2498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2500 if (j == 0)
2501 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2502 else
2503 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2505 prev_stmt_info = vinfo_for_stmt (new_stmt);
2508 break;
2510 case NARROW:
2511 for (j = 0; j < ncopies; ++j)
2513 /* Build argument list for the vectorized call. */
2514 if (j == 0)
2515 vargs.create (nargs * 2);
2516 else
2517 vargs.truncate (0);
2519 if (slp_node)
2521 auto_vec<vec<tree> > vec_defs (nargs);
2522 vec<tree> vec_oprnds0;
2524 for (i = 0; i < nargs; i++)
2525 vargs.quick_push (gimple_call_arg (stmt, i));
2526 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2527 vec_oprnds0 = vec_defs[0];
2529 /* Arguments are ready. Create the new vector stmt. */
2530 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2532 size_t k;
2533 vargs.truncate (0);
2534 for (k = 0; k < nargs; k++)
2536 vec<tree> vec_oprndsk = vec_defs[k];
2537 vargs.quick_push (vec_oprndsk[i]);
2538 vargs.quick_push (vec_oprndsk[i + 1]);
2540 new_stmt = gimple_build_call_vec (fndecl, vargs);
2541 new_temp = make_ssa_name (vec_dest, new_stmt);
2542 gimple_call_set_lhs (new_stmt, new_temp);
2543 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2544 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2547 for (i = 0; i < nargs; i++)
2549 vec<tree> vec_oprndsi = vec_defs[i];
2550 vec_oprndsi.release ();
2552 continue;
2555 for (i = 0; i < nargs; i++)
2557 op = gimple_call_arg (stmt, i);
2558 if (j == 0)
2560 vec_oprnd0
2561 = vect_get_vec_def_for_operand (op, stmt, NULL);
2562 vec_oprnd1
2563 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2565 else
2567 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2568 vec_oprnd0
2569 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2570 vec_oprnd1
2571 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2574 vargs.quick_push (vec_oprnd0);
2575 vargs.quick_push (vec_oprnd1);
2578 new_stmt = gimple_build_call_vec (fndecl, vargs);
2579 new_temp = make_ssa_name (vec_dest, new_stmt);
2580 gimple_call_set_lhs (new_stmt, new_temp);
2581 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2583 if (j == 0)
2584 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2585 else
2586 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2588 prev_stmt_info = vinfo_for_stmt (new_stmt);
2591 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2593 break;
2595 case WIDEN:
2596 /* No current target implements this case. */
2597 return false;
2600 vargs.release ();
2602 /* The call in STMT might prevent it from being removed in dce.
2603 We however cannot remove it here, due to the way the ssa name
2604 it defines is mapped to the new definition. So just replace
2605 rhs of the statement with something harmless. */
2607 if (slp_node)
2608 return true;
2610 type = TREE_TYPE (scalar_dest);
2611 if (is_pattern_stmt_p (stmt_info))
2612 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2613 else
2614 lhs = gimple_call_lhs (stmt);
2615 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2616 set_vinfo_for_stmt (new_stmt, stmt_info);
2617 set_vinfo_for_stmt (stmt, NULL);
2618 STMT_VINFO_STMT (stmt_info) = new_stmt;
2619 gsi_replace (gsi, new_stmt, false);
2621 return true;
2625 struct simd_call_arg_info
2627 tree vectype;
2628 tree op;
2629 enum vect_def_type dt;
2630 HOST_WIDE_INT linear_step;
2631 unsigned int align;
2634 /* Function vectorizable_simd_clone_call.
2636 Check if STMT performs a function call that can be vectorized
2637 by calling a simd clone of the function.
2638 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2639 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2640 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2642 static bool
2643 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2644 gimple *vec_stmt, slp_tree slp_node)
2646 tree vec_dest;
2647 tree scalar_dest;
2648 tree op, type;
2649 tree vec_oprnd0 = NULL_TREE;
2650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2651 tree vectype;
2652 unsigned int nunits;
2653 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2654 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2655 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2656 tree fndecl, new_temp, def;
2657 gimple def_stmt;
2658 gimple new_stmt = NULL;
2659 int ncopies, j;
2660 vec<simd_call_arg_info> arginfo = vNULL;
2661 vec<tree> vargs = vNULL;
2662 size_t i, nargs;
2663 tree lhs, rtype, ratype;
2664 vec<constructor_elt, va_gc> *ret_ctor_elts;
2666 /* Is STMT a vectorizable call? */
2667 if (!is_gimple_call (stmt))
2668 return false;
2670 fndecl = gimple_call_fndecl (stmt);
2671 if (fndecl == NULL_TREE)
2672 return false;
2674 struct cgraph_node *node = cgraph_node::get (fndecl);
2675 if (node == NULL || node->simd_clones == NULL)
2676 return false;
2678 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2679 return false;
2681 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2682 return false;
2684 if (gimple_call_lhs (stmt)
2685 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2686 return false;
2688 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2690 vectype = STMT_VINFO_VECTYPE (stmt_info);
2692 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2693 return false;
2695 /* FORNOW */
2696 if (slp_node || PURE_SLP_STMT (stmt_info))
2697 return false;
2699 /* Process function arguments. */
2700 nargs = gimple_call_num_args (stmt);
2702 /* Bail out if the function has zero arguments. */
2703 if (nargs == 0)
2704 return false;
2706 arginfo.create (nargs);
2708 for (i = 0; i < nargs; i++)
2710 simd_call_arg_info thisarginfo;
2711 affine_iv iv;
2713 thisarginfo.linear_step = 0;
2714 thisarginfo.align = 0;
2715 thisarginfo.op = NULL_TREE;
2717 op = gimple_call_arg (stmt, i);
2718 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2719 &def_stmt, &def, &thisarginfo.dt,
2720 &thisarginfo.vectype)
2721 || thisarginfo.dt == vect_uninitialized_def)
2723 if (dump_enabled_p ())
2724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2725 "use not simple.\n");
2726 arginfo.release ();
2727 return false;
2730 if (thisarginfo.dt == vect_constant_def
2731 || thisarginfo.dt == vect_external_def)
2732 gcc_assert (thisarginfo.vectype == NULL_TREE);
2733 else
2734 gcc_assert (thisarginfo.vectype != NULL_TREE);
2736 /* For linear arguments, the analyze phase should have saved
2737 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2738 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2739 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2741 gcc_assert (vec_stmt);
2742 thisarginfo.linear_step
2743 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2744 thisarginfo.op
2745 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2746 /* If loop has been peeled for alignment, we need to adjust it. */
2747 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2748 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2749 if (n1 != n2)
2751 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2752 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2753 tree opt = TREE_TYPE (thisarginfo.op);
2754 bias = fold_convert (TREE_TYPE (step), bias);
2755 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2756 thisarginfo.op
2757 = fold_build2 (POINTER_TYPE_P (opt)
2758 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2759 thisarginfo.op, bias);
2762 else if (!vec_stmt
2763 && thisarginfo.dt != vect_constant_def
2764 && thisarginfo.dt != vect_external_def
2765 && loop_vinfo
2766 && TREE_CODE (op) == SSA_NAME
2767 && simple_iv (loop, loop_containing_stmt (stmt), op,
2768 &iv, false)
2769 && tree_fits_shwi_p (iv.step))
2771 thisarginfo.linear_step = tree_to_shwi (iv.step);
2772 thisarginfo.op = iv.base;
2774 else if ((thisarginfo.dt == vect_constant_def
2775 || thisarginfo.dt == vect_external_def)
2776 && POINTER_TYPE_P (TREE_TYPE (op)))
2777 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2779 arginfo.quick_push (thisarginfo);
2782 unsigned int badness = 0;
2783 struct cgraph_node *bestn = NULL;
2784 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2785 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2786 else
2787 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2788 n = n->simdclone->next_clone)
2790 unsigned int this_badness = 0;
2791 if (n->simdclone->simdlen
2792 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2793 || n->simdclone->nargs != nargs)
2794 continue;
2795 if (n->simdclone->simdlen
2796 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2797 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2798 - exact_log2 (n->simdclone->simdlen)) * 1024;
2799 if (n->simdclone->inbranch)
2800 this_badness += 2048;
2801 int target_badness = targetm.simd_clone.usable (n);
2802 if (target_badness < 0)
2803 continue;
2804 this_badness += target_badness * 512;
2805 /* FORNOW: Have to add code to add the mask argument. */
2806 if (n->simdclone->inbranch)
2807 continue;
2808 for (i = 0; i < nargs; i++)
2810 switch (n->simdclone->args[i].arg_type)
2812 case SIMD_CLONE_ARG_TYPE_VECTOR:
2813 if (!useless_type_conversion_p
2814 (n->simdclone->args[i].orig_type,
2815 TREE_TYPE (gimple_call_arg (stmt, i))))
2816 i = -1;
2817 else if (arginfo[i].dt == vect_constant_def
2818 || arginfo[i].dt == vect_external_def
2819 || arginfo[i].linear_step)
2820 this_badness += 64;
2821 break;
2822 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2823 if (arginfo[i].dt != vect_constant_def
2824 && arginfo[i].dt != vect_external_def)
2825 i = -1;
2826 break;
2827 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2828 if (arginfo[i].dt == vect_constant_def
2829 || arginfo[i].dt == vect_external_def
2830 || (arginfo[i].linear_step
2831 != n->simdclone->args[i].linear_step))
2832 i = -1;
2833 break;
2834 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2835 /* FORNOW */
2836 i = -1;
2837 break;
2838 case SIMD_CLONE_ARG_TYPE_MASK:
2839 gcc_unreachable ();
2841 if (i == (size_t) -1)
2842 break;
2843 if (n->simdclone->args[i].alignment > arginfo[i].align)
2845 i = -1;
2846 break;
2848 if (arginfo[i].align)
2849 this_badness += (exact_log2 (arginfo[i].align)
2850 - exact_log2 (n->simdclone->args[i].alignment));
2852 if (i == (size_t) -1)
2853 continue;
2854 if (bestn == NULL || this_badness < badness)
2856 bestn = n;
2857 badness = this_badness;
2861 if (bestn == NULL)
2863 arginfo.release ();
2864 return false;
2867 for (i = 0; i < nargs; i++)
2868 if ((arginfo[i].dt == vect_constant_def
2869 || arginfo[i].dt == vect_external_def)
2870 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2872 arginfo[i].vectype
2873 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2874 i)));
2875 if (arginfo[i].vectype == NULL
2876 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2877 > bestn->simdclone->simdlen))
2879 arginfo.release ();
2880 return false;
2884 fndecl = bestn->decl;
2885 nunits = bestn->simdclone->simdlen;
2886 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2888 /* If the function isn't const, only allow it in simd loops where user
2889 has asserted that at least nunits consecutive iterations can be
2890 performed using SIMD instructions. */
2891 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2892 && gimple_vuse (stmt))
2894 arginfo.release ();
2895 return false;
2898 /* Sanity check: make sure that at least one copy of the vectorized stmt
2899 needs to be generated. */
2900 gcc_assert (ncopies >= 1);
2902 if (!vec_stmt) /* transformation not required. */
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2905 for (i = 0; i < nargs; i++)
2906 if (bestn->simdclone->args[i].arg_type
2907 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2909 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2910 + 1);
2911 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2912 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2913 ? size_type_node : TREE_TYPE (arginfo[i].op);
2914 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2915 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2917 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2918 if (dump_enabled_p ())
2919 dump_printf_loc (MSG_NOTE, vect_location,
2920 "=== vectorizable_simd_clone_call ===\n");
2921 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2922 arginfo.release ();
2923 return true;
2926 /** Transform. **/
2928 if (dump_enabled_p ())
2929 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2931 /* Handle def. */
2932 scalar_dest = gimple_call_lhs (stmt);
2933 vec_dest = NULL_TREE;
2934 rtype = NULL_TREE;
2935 ratype = NULL_TREE;
2936 if (scalar_dest)
2938 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2939 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2940 if (TREE_CODE (rtype) == ARRAY_TYPE)
2942 ratype = rtype;
2943 rtype = TREE_TYPE (ratype);
2947 prev_stmt_info = NULL;
2948 for (j = 0; j < ncopies; ++j)
2950 /* Build argument list for the vectorized call. */
2951 if (j == 0)
2952 vargs.create (nargs);
2953 else
2954 vargs.truncate (0);
2956 for (i = 0; i < nargs; i++)
2958 unsigned int k, l, m, o;
2959 tree atype;
2960 op = gimple_call_arg (stmt, i);
2961 switch (bestn->simdclone->args[i].arg_type)
2963 case SIMD_CLONE_ARG_TYPE_VECTOR:
2964 atype = bestn->simdclone->args[i].vector_type;
2965 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2966 for (m = j * o; m < (j + 1) * o; m++)
2968 if (TYPE_VECTOR_SUBPARTS (atype)
2969 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2971 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2972 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2973 / TYPE_VECTOR_SUBPARTS (atype));
2974 gcc_assert ((k & (k - 1)) == 0);
2975 if (m == 0)
2976 vec_oprnd0
2977 = vect_get_vec_def_for_operand (op, stmt, NULL);
2978 else
2980 vec_oprnd0 = arginfo[i].op;
2981 if ((m & (k - 1)) == 0)
2982 vec_oprnd0
2983 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2984 vec_oprnd0);
2986 arginfo[i].op = vec_oprnd0;
2987 vec_oprnd0
2988 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2989 size_int (prec),
2990 bitsize_int ((m & (k - 1)) * prec));
2991 new_stmt
2992 = gimple_build_assign (make_ssa_name (atype),
2993 vec_oprnd0);
2994 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2995 vargs.safe_push (gimple_assign_lhs (new_stmt));
2997 else
2999 k = (TYPE_VECTOR_SUBPARTS (atype)
3000 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3001 gcc_assert ((k & (k - 1)) == 0);
3002 vec<constructor_elt, va_gc> *ctor_elts;
3003 if (k != 1)
3004 vec_alloc (ctor_elts, k);
3005 else
3006 ctor_elts = NULL;
3007 for (l = 0; l < k; l++)
3009 if (m == 0 && l == 0)
3010 vec_oprnd0
3011 = vect_get_vec_def_for_operand (op, stmt, NULL);
3012 else
3013 vec_oprnd0
3014 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3015 arginfo[i].op);
3016 arginfo[i].op = vec_oprnd0;
3017 if (k == 1)
3018 break;
3019 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3020 vec_oprnd0);
3022 if (k == 1)
3023 vargs.safe_push (vec_oprnd0);
3024 else
3026 vec_oprnd0 = build_constructor (atype, ctor_elts);
3027 new_stmt
3028 = gimple_build_assign (make_ssa_name (atype),
3029 vec_oprnd0);
3030 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3031 vargs.safe_push (gimple_assign_lhs (new_stmt));
3035 break;
3036 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3037 vargs.safe_push (op);
3038 break;
3039 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3040 if (j == 0)
3042 gimple_seq stmts;
3043 arginfo[i].op
3044 = force_gimple_operand (arginfo[i].op, &stmts, true,
3045 NULL_TREE);
3046 if (stmts != NULL)
3048 basic_block new_bb;
3049 edge pe = loop_preheader_edge (loop);
3050 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3051 gcc_assert (!new_bb);
3053 tree phi_res = copy_ssa_name (op);
3054 gphi *new_phi = create_phi_node (phi_res, loop->header);
3055 set_vinfo_for_stmt (new_phi,
3056 new_stmt_vec_info (new_phi, loop_vinfo,
3057 NULL));
3058 add_phi_arg (new_phi, arginfo[i].op,
3059 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3060 enum tree_code code
3061 = POINTER_TYPE_P (TREE_TYPE (op))
3062 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3063 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3064 ? sizetype : TREE_TYPE (op);
3065 widest_int cst
3066 = wi::mul (bestn->simdclone->args[i].linear_step,
3067 ncopies * nunits);
3068 tree tcst = wide_int_to_tree (type, cst);
3069 tree phi_arg = copy_ssa_name (op);
3070 new_stmt
3071 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3072 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3073 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3074 set_vinfo_for_stmt (new_stmt,
3075 new_stmt_vec_info (new_stmt, loop_vinfo,
3076 NULL));
3077 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3078 UNKNOWN_LOCATION);
3079 arginfo[i].op = phi_res;
3080 vargs.safe_push (phi_res);
3082 else
3084 enum tree_code code
3085 = POINTER_TYPE_P (TREE_TYPE (op))
3086 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3087 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3088 ? sizetype : TREE_TYPE (op);
3089 widest_int cst
3090 = wi::mul (bestn->simdclone->args[i].linear_step,
3091 j * nunits);
3092 tree tcst = wide_int_to_tree (type, cst);
3093 new_temp = make_ssa_name (TREE_TYPE (op));
3094 new_stmt = gimple_build_assign (new_temp, code,
3095 arginfo[i].op, tcst);
3096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3097 vargs.safe_push (new_temp);
3099 break;
3100 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3101 default:
3102 gcc_unreachable ();
3106 new_stmt = gimple_build_call_vec (fndecl, vargs);
3107 if (vec_dest)
3109 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3110 if (ratype)
3111 new_temp = create_tmp_var (ratype);
3112 else if (TYPE_VECTOR_SUBPARTS (vectype)
3113 == TYPE_VECTOR_SUBPARTS (rtype))
3114 new_temp = make_ssa_name (vec_dest, new_stmt);
3115 else
3116 new_temp = make_ssa_name (rtype, new_stmt);
3117 gimple_call_set_lhs (new_stmt, new_temp);
3119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3121 if (vec_dest)
3123 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3125 unsigned int k, l;
3126 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3127 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3128 gcc_assert ((k & (k - 1)) == 0);
3129 for (l = 0; l < k; l++)
3131 tree t;
3132 if (ratype)
3134 t = build_fold_addr_expr (new_temp);
3135 t = build2 (MEM_REF, vectype, t,
3136 build_int_cst (TREE_TYPE (t),
3137 l * prec / BITS_PER_UNIT));
3139 else
3140 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3141 size_int (prec), bitsize_int (l * prec));
3142 new_stmt
3143 = gimple_build_assign (make_ssa_name (vectype), t);
3144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3145 if (j == 0 && l == 0)
3146 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3147 else
3148 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3150 prev_stmt_info = vinfo_for_stmt (new_stmt);
3153 if (ratype)
3155 tree clobber = build_constructor (ratype, NULL);
3156 TREE_THIS_VOLATILE (clobber) = 1;
3157 new_stmt = gimple_build_assign (new_temp, clobber);
3158 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 continue;
3162 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3164 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3165 / TYPE_VECTOR_SUBPARTS (rtype));
3166 gcc_assert ((k & (k - 1)) == 0);
3167 if ((j & (k - 1)) == 0)
3168 vec_alloc (ret_ctor_elts, k);
3169 if (ratype)
3171 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3172 for (m = 0; m < o; m++)
3174 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3175 size_int (m), NULL_TREE, NULL_TREE);
3176 new_stmt
3177 = gimple_build_assign (make_ssa_name (rtype), tem);
3178 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3179 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3180 gimple_assign_lhs (new_stmt));
3182 tree clobber = build_constructor (ratype, NULL);
3183 TREE_THIS_VOLATILE (clobber) = 1;
3184 new_stmt = gimple_build_assign (new_temp, clobber);
3185 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187 else
3188 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3189 if ((j & (k - 1)) != k - 1)
3190 continue;
3191 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3192 new_stmt
3193 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3194 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3196 if ((unsigned) j == k - 1)
3197 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3198 else
3199 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3201 prev_stmt_info = vinfo_for_stmt (new_stmt);
3202 continue;
3204 else if (ratype)
3206 tree t = build_fold_addr_expr (new_temp);
3207 t = build2 (MEM_REF, vectype, t,
3208 build_int_cst (TREE_TYPE (t), 0));
3209 new_stmt
3210 = gimple_build_assign (make_ssa_name (vec_dest), t);
3211 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3212 tree clobber = build_constructor (ratype, NULL);
3213 TREE_THIS_VOLATILE (clobber) = 1;
3214 vect_finish_stmt_generation (stmt,
3215 gimple_build_assign (new_temp,
3216 clobber), gsi);
3220 if (j == 0)
3221 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3222 else
3223 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3225 prev_stmt_info = vinfo_for_stmt (new_stmt);
3228 vargs.release ();
3230 /* The call in STMT might prevent it from being removed in dce.
3231 We however cannot remove it here, due to the way the ssa name
3232 it defines is mapped to the new definition. So just replace
3233 rhs of the statement with something harmless. */
3235 if (slp_node)
3236 return true;
3238 if (scalar_dest)
3240 type = TREE_TYPE (scalar_dest);
3241 if (is_pattern_stmt_p (stmt_info))
3242 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3243 else
3244 lhs = gimple_call_lhs (stmt);
3245 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3247 else
3248 new_stmt = gimple_build_nop ();
3249 set_vinfo_for_stmt (new_stmt, stmt_info);
3250 set_vinfo_for_stmt (stmt, NULL);
3251 STMT_VINFO_STMT (stmt_info) = new_stmt;
3252 gsi_replace (gsi, new_stmt, true);
3253 unlink_stmt_vdef (stmt);
3255 return true;
3259 /* Function vect_gen_widened_results_half
3261 Create a vector stmt whose code, type, number of arguments, and result
3262 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3263 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3264 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3265 needs to be created (DECL is a function-decl of a target-builtin).
3266 STMT is the original scalar stmt that we are vectorizing. */
3268 static gimple
3269 vect_gen_widened_results_half (enum tree_code code,
3270 tree decl,
3271 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3272 tree vec_dest, gimple_stmt_iterator *gsi,
3273 gimple stmt)
3275 gimple new_stmt;
3276 tree new_temp;
3278 /* Generate half of the widened result: */
3279 if (code == CALL_EXPR)
3281 /* Target specific support */
3282 if (op_type == binary_op)
3283 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3284 else
3285 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3286 new_temp = make_ssa_name (vec_dest, new_stmt);
3287 gimple_call_set_lhs (new_stmt, new_temp);
3289 else
3291 /* Generic support */
3292 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3293 if (op_type != binary_op)
3294 vec_oprnd1 = NULL;
3295 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3296 new_temp = make_ssa_name (vec_dest, new_stmt);
3297 gimple_assign_set_lhs (new_stmt, new_temp);
3299 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3301 return new_stmt;
3305 /* Get vectorized definitions for loop-based vectorization. For the first
3306 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3307 scalar operand), and for the rest we get a copy with
3308 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3309 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3310 The vectors are collected into VEC_OPRNDS. */
3312 static void
3313 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3314 vec<tree> *vec_oprnds, int multi_step_cvt)
3316 tree vec_oprnd;
3318 /* Get first vector operand. */
3319 /* All the vector operands except the very first one (that is scalar oprnd)
3320 are stmt copies. */
3321 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3322 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3323 else
3324 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3326 vec_oprnds->quick_push (vec_oprnd);
3328 /* Get second vector operand. */
3329 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3330 vec_oprnds->quick_push (vec_oprnd);
3332 *oprnd = vec_oprnd;
3334 /* For conversion in multiple steps, continue to get operands
3335 recursively. */
3336 if (multi_step_cvt)
3337 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3341 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3342 For multi-step conversions store the resulting vectors and call the function
3343 recursively. */
3345 static void
3346 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3347 int multi_step_cvt, gimple stmt,
3348 vec<tree> vec_dsts,
3349 gimple_stmt_iterator *gsi,
3350 slp_tree slp_node, enum tree_code code,
3351 stmt_vec_info *prev_stmt_info)
3353 unsigned int i;
3354 tree vop0, vop1, new_tmp, vec_dest;
3355 gimple new_stmt;
3356 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3358 vec_dest = vec_dsts.pop ();
3360 for (i = 0; i < vec_oprnds->length (); i += 2)
3362 /* Create demotion operation. */
3363 vop0 = (*vec_oprnds)[i];
3364 vop1 = (*vec_oprnds)[i + 1];
3365 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3366 new_tmp = make_ssa_name (vec_dest, new_stmt);
3367 gimple_assign_set_lhs (new_stmt, new_tmp);
3368 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3370 if (multi_step_cvt)
3371 /* Store the resulting vector for next recursive call. */
3372 (*vec_oprnds)[i/2] = new_tmp;
3373 else
3375 /* This is the last step of the conversion sequence. Store the
3376 vectors in SLP_NODE or in vector info of the scalar statement
3377 (or in STMT_VINFO_RELATED_STMT chain). */
3378 if (slp_node)
3379 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3380 else
3382 if (!*prev_stmt_info)
3383 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3384 else
3385 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3387 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3392 /* For multi-step demotion operations we first generate demotion operations
3393 from the source type to the intermediate types, and then combine the
3394 results (stored in VEC_OPRNDS) in demotion operation to the destination
3395 type. */
3396 if (multi_step_cvt)
3398 /* At each level of recursion we have half of the operands we had at the
3399 previous level. */
3400 vec_oprnds->truncate ((i+1)/2);
3401 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3402 stmt, vec_dsts, gsi, slp_node,
3403 VEC_PACK_TRUNC_EXPR,
3404 prev_stmt_info);
3407 vec_dsts.quick_push (vec_dest);
3411 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3412 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3413 the resulting vectors and call the function recursively. */
3415 static void
3416 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3417 vec<tree> *vec_oprnds1,
3418 gimple stmt, tree vec_dest,
3419 gimple_stmt_iterator *gsi,
3420 enum tree_code code1,
3421 enum tree_code code2, tree decl1,
3422 tree decl2, int op_type)
3424 int i;
3425 tree vop0, vop1, new_tmp1, new_tmp2;
3426 gimple new_stmt1, new_stmt2;
3427 vec<tree> vec_tmp = vNULL;
3429 vec_tmp.create (vec_oprnds0->length () * 2);
3430 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3432 if (op_type == binary_op)
3433 vop1 = (*vec_oprnds1)[i];
3434 else
3435 vop1 = NULL_TREE;
3437 /* Generate the two halves of promotion operation. */
3438 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3439 op_type, vec_dest, gsi, stmt);
3440 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3441 op_type, vec_dest, gsi, stmt);
3442 if (is_gimple_call (new_stmt1))
3444 new_tmp1 = gimple_call_lhs (new_stmt1);
3445 new_tmp2 = gimple_call_lhs (new_stmt2);
3447 else
3449 new_tmp1 = gimple_assign_lhs (new_stmt1);
3450 new_tmp2 = gimple_assign_lhs (new_stmt2);
3453 /* Store the results for the next step. */
3454 vec_tmp.quick_push (new_tmp1);
3455 vec_tmp.quick_push (new_tmp2);
3458 vec_oprnds0->release ();
3459 *vec_oprnds0 = vec_tmp;
3463 /* Check if STMT performs a conversion operation, that can be vectorized.
3464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3465 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3466 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3468 static bool
3469 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3470 gimple *vec_stmt, slp_tree slp_node)
3472 tree vec_dest;
3473 tree scalar_dest;
3474 tree op0, op1 = NULL_TREE;
3475 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3478 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3479 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3480 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3481 tree new_temp;
3482 tree def;
3483 gimple def_stmt;
3484 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3485 gimple new_stmt = NULL;
3486 stmt_vec_info prev_stmt_info;
3487 int nunits_in;
3488 int nunits_out;
3489 tree vectype_out, vectype_in;
3490 int ncopies, i, j;
3491 tree lhs_type, rhs_type;
3492 enum { NARROW, NONE, WIDEN } modifier;
3493 vec<tree> vec_oprnds0 = vNULL;
3494 vec<tree> vec_oprnds1 = vNULL;
3495 tree vop0;
3496 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3497 int multi_step_cvt = 0;
3498 vec<tree> vec_dsts = vNULL;
3499 vec<tree> interm_types = vNULL;
3500 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3501 int op_type;
3502 machine_mode rhs_mode;
3503 unsigned short fltsz;
3505 /* Is STMT a vectorizable conversion? */
3507 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3508 return false;
3510 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3511 return false;
3513 if (!is_gimple_assign (stmt))
3514 return false;
3516 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3517 return false;
3519 code = gimple_assign_rhs_code (stmt);
3520 if (!CONVERT_EXPR_CODE_P (code)
3521 && code != FIX_TRUNC_EXPR
3522 && code != FLOAT_EXPR
3523 && code != WIDEN_MULT_EXPR
3524 && code != WIDEN_LSHIFT_EXPR)
3525 return false;
3527 op_type = TREE_CODE_LENGTH (code);
3529 /* Check types of lhs and rhs. */
3530 scalar_dest = gimple_assign_lhs (stmt);
3531 lhs_type = TREE_TYPE (scalar_dest);
3532 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3534 op0 = gimple_assign_rhs1 (stmt);
3535 rhs_type = TREE_TYPE (op0);
3537 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3538 && !((INTEGRAL_TYPE_P (lhs_type)
3539 && INTEGRAL_TYPE_P (rhs_type))
3540 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3541 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3542 return false;
3544 if ((INTEGRAL_TYPE_P (lhs_type)
3545 && (TYPE_PRECISION (lhs_type)
3546 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3547 || (INTEGRAL_TYPE_P (rhs_type)
3548 && (TYPE_PRECISION (rhs_type)
3549 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3553 "type conversion to/from bit-precision unsupported."
3554 "\n");
3555 return false;
3558 /* Check the operands of the operation. */
3559 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3560 &def_stmt, &def, &dt[0], &vectype_in))
3562 if (dump_enabled_p ())
3563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3564 "use not simple.\n");
3565 return false;
3567 if (op_type == binary_op)
3569 bool ok;
3571 op1 = gimple_assign_rhs2 (stmt);
3572 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3573 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3574 OP1. */
3575 if (CONSTANT_CLASS_P (op0))
3576 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3577 &def_stmt, &def, &dt[1], &vectype_in);
3578 else
3579 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3580 &def, &dt[1]);
3582 if (!ok)
3584 if (dump_enabled_p ())
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3586 "use not simple.\n");
3587 return false;
3591 /* If op0 is an external or constant defs use a vector type of
3592 the same size as the output vector type. */
3593 if (!vectype_in)
3594 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3595 if (vec_stmt)
3596 gcc_assert (vectype_in);
3597 if (!vectype_in)
3599 if (dump_enabled_p ())
3601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3602 "no vectype for scalar type ");
3603 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3604 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3607 return false;
3610 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3611 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3612 if (nunits_in < nunits_out)
3613 modifier = NARROW;
3614 else if (nunits_out == nunits_in)
3615 modifier = NONE;
3616 else
3617 modifier = WIDEN;
3619 /* Multiple types in SLP are handled by creating the appropriate number of
3620 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3621 case of SLP. */
3622 if (slp_node || PURE_SLP_STMT (stmt_info))
3623 ncopies = 1;
3624 else if (modifier == NARROW)
3625 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3626 else
3627 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3629 /* Sanity check: make sure that at least one copy of the vectorized stmt
3630 needs to be generated. */
3631 gcc_assert (ncopies >= 1);
3633 /* Supportable by target? */
3634 switch (modifier)
3636 case NONE:
3637 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3638 return false;
3639 if (supportable_convert_operation (code, vectype_out, vectype_in,
3640 &decl1, &code1))
3641 break;
3642 /* FALLTHRU */
3643 unsupported:
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3646 "conversion not supported by target.\n");
3647 return false;
3649 case WIDEN:
3650 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3651 &code1, &code2, &multi_step_cvt,
3652 &interm_types))
3654 /* Binary widening operation can only be supported directly by the
3655 architecture. */
3656 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3657 break;
3660 if (code != FLOAT_EXPR
3661 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3662 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3663 goto unsupported;
3665 rhs_mode = TYPE_MODE (rhs_type);
3666 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3667 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3668 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3669 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3671 cvt_type
3672 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3673 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3674 if (cvt_type == NULL_TREE)
3675 goto unsupported;
3677 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3679 if (!supportable_convert_operation (code, vectype_out,
3680 cvt_type, &decl1, &codecvt1))
3681 goto unsupported;
3683 else if (!supportable_widening_operation (code, stmt, vectype_out,
3684 cvt_type, &codecvt1,
3685 &codecvt2, &multi_step_cvt,
3686 &interm_types))
3687 continue;
3688 else
3689 gcc_assert (multi_step_cvt == 0);
3691 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3692 vectype_in, &code1, &code2,
3693 &multi_step_cvt, &interm_types))
3694 break;
3697 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3698 goto unsupported;
3700 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3701 codecvt2 = ERROR_MARK;
3702 else
3704 multi_step_cvt++;
3705 interm_types.safe_push (cvt_type);
3706 cvt_type = NULL_TREE;
3708 break;
3710 case NARROW:
3711 gcc_assert (op_type == unary_op);
3712 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3713 &code1, &multi_step_cvt,
3714 &interm_types))
3715 break;
3717 if (code != FIX_TRUNC_EXPR
3718 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3719 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3720 goto unsupported;
3722 rhs_mode = TYPE_MODE (rhs_type);
3723 cvt_type
3724 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3725 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3726 if (cvt_type == NULL_TREE)
3727 goto unsupported;
3728 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3729 &decl1, &codecvt1))
3730 goto unsupported;
3731 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3732 &code1, &multi_step_cvt,
3733 &interm_types))
3734 break;
3735 goto unsupported;
3737 default:
3738 gcc_unreachable ();
3741 if (!vec_stmt) /* transformation not required. */
3743 if (dump_enabled_p ())
3744 dump_printf_loc (MSG_NOTE, vect_location,
3745 "=== vectorizable_conversion ===\n");
3746 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3748 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3749 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3751 else if (modifier == NARROW)
3753 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3754 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3756 else
3758 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3759 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3761 interm_types.release ();
3762 return true;
3765 /** Transform. **/
3766 if (dump_enabled_p ())
3767 dump_printf_loc (MSG_NOTE, vect_location,
3768 "transform conversion. ncopies = %d.\n", ncopies);
3770 if (op_type == binary_op)
3772 if (CONSTANT_CLASS_P (op0))
3773 op0 = fold_convert (TREE_TYPE (op1), op0);
3774 else if (CONSTANT_CLASS_P (op1))
3775 op1 = fold_convert (TREE_TYPE (op0), op1);
3778 /* In case of multi-step conversion, we first generate conversion operations
3779 to the intermediate types, and then from that types to the final one.
3780 We create vector destinations for the intermediate type (TYPES) received
3781 from supportable_*_operation, and store them in the correct order
3782 for future use in vect_create_vectorized_*_stmts (). */
3783 vec_dsts.create (multi_step_cvt + 1);
3784 vec_dest = vect_create_destination_var (scalar_dest,
3785 (cvt_type && modifier == WIDEN)
3786 ? cvt_type : vectype_out);
3787 vec_dsts.quick_push (vec_dest);
3789 if (multi_step_cvt)
3791 for (i = interm_types.length () - 1;
3792 interm_types.iterate (i, &intermediate_type); i--)
3794 vec_dest = vect_create_destination_var (scalar_dest,
3795 intermediate_type);
3796 vec_dsts.quick_push (vec_dest);
3800 if (cvt_type)
3801 vec_dest = vect_create_destination_var (scalar_dest,
3802 modifier == WIDEN
3803 ? vectype_out : cvt_type);
3805 if (!slp_node)
3807 if (modifier == WIDEN)
3809 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3810 if (op_type == binary_op)
3811 vec_oprnds1.create (1);
3813 else if (modifier == NARROW)
3814 vec_oprnds0.create (
3815 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3817 else if (code == WIDEN_LSHIFT_EXPR)
3818 vec_oprnds1.create (slp_node->vec_stmts_size);
3820 last_oprnd = op0;
3821 prev_stmt_info = NULL;
3822 switch (modifier)
3824 case NONE:
3825 for (j = 0; j < ncopies; j++)
3827 if (j == 0)
3828 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3829 -1);
3830 else
3831 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3833 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3835 /* Arguments are ready, create the new vector stmt. */
3836 if (code1 == CALL_EXPR)
3838 new_stmt = gimple_build_call (decl1, 1, vop0);
3839 new_temp = make_ssa_name (vec_dest, new_stmt);
3840 gimple_call_set_lhs (new_stmt, new_temp);
3842 else
3844 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3845 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3846 new_temp = make_ssa_name (vec_dest, new_stmt);
3847 gimple_assign_set_lhs (new_stmt, new_temp);
3850 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3851 if (slp_node)
3852 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3855 if (j == 0)
3856 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3857 else
3858 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3859 prev_stmt_info = vinfo_for_stmt (new_stmt);
3861 break;
3863 case WIDEN:
3864 /* In case the vectorization factor (VF) is bigger than the number
3865 of elements that we can fit in a vectype (nunits), we have to
3866 generate more than one vector stmt - i.e - we need to "unroll"
3867 the vector stmt by a factor VF/nunits. */
3868 for (j = 0; j < ncopies; j++)
3870 /* Handle uses. */
3871 if (j == 0)
3873 if (slp_node)
3875 if (code == WIDEN_LSHIFT_EXPR)
3877 unsigned int k;
3879 vec_oprnd1 = op1;
3880 /* Store vec_oprnd1 for every vector stmt to be created
3881 for SLP_NODE. We check during the analysis that all
3882 the shift arguments are the same. */
3883 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3884 vec_oprnds1.quick_push (vec_oprnd1);
3886 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3887 slp_node, -1);
3889 else
3890 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3891 &vec_oprnds1, slp_node, -1);
3893 else
3895 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3896 vec_oprnds0.quick_push (vec_oprnd0);
3897 if (op_type == binary_op)
3899 if (code == WIDEN_LSHIFT_EXPR)
3900 vec_oprnd1 = op1;
3901 else
3902 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3903 NULL);
3904 vec_oprnds1.quick_push (vec_oprnd1);
3908 else
3910 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3911 vec_oprnds0.truncate (0);
3912 vec_oprnds0.quick_push (vec_oprnd0);
3913 if (op_type == binary_op)
3915 if (code == WIDEN_LSHIFT_EXPR)
3916 vec_oprnd1 = op1;
3917 else
3918 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3919 vec_oprnd1);
3920 vec_oprnds1.truncate (0);
3921 vec_oprnds1.quick_push (vec_oprnd1);
3925 /* Arguments are ready. Create the new vector stmts. */
3926 for (i = multi_step_cvt; i >= 0; i--)
3928 tree this_dest = vec_dsts[i];
3929 enum tree_code c1 = code1, c2 = code2;
3930 if (i == 0 && codecvt2 != ERROR_MARK)
3932 c1 = codecvt1;
3933 c2 = codecvt2;
3935 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3936 &vec_oprnds1,
3937 stmt, this_dest, gsi,
3938 c1, c2, decl1, decl2,
3939 op_type);
3942 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3944 if (cvt_type)
3946 if (codecvt1 == CALL_EXPR)
3948 new_stmt = gimple_build_call (decl1, 1, vop0);
3949 new_temp = make_ssa_name (vec_dest, new_stmt);
3950 gimple_call_set_lhs (new_stmt, new_temp);
3952 else
3954 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3955 new_temp = make_ssa_name (vec_dest);
3956 new_stmt = gimple_build_assign (new_temp, codecvt1,
3957 vop0);
3960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3962 else
3963 new_stmt = SSA_NAME_DEF_STMT (vop0);
3965 if (slp_node)
3966 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3967 else
3969 if (!prev_stmt_info)
3970 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3971 else
3972 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3973 prev_stmt_info = vinfo_for_stmt (new_stmt);
3978 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3979 break;
3981 case NARROW:
3982 /* In case the vectorization factor (VF) is bigger than the number
3983 of elements that we can fit in a vectype (nunits), we have to
3984 generate more than one vector stmt - i.e - we need to "unroll"
3985 the vector stmt by a factor VF/nunits. */
3986 for (j = 0; j < ncopies; j++)
3988 /* Handle uses. */
3989 if (slp_node)
3990 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3991 slp_node, -1);
3992 else
3994 vec_oprnds0.truncate (0);
3995 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3996 vect_pow2 (multi_step_cvt) - 1);
3999 /* Arguments are ready. Create the new vector stmts. */
4000 if (cvt_type)
4001 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4003 if (codecvt1 == CALL_EXPR)
4005 new_stmt = gimple_build_call (decl1, 1, vop0);
4006 new_temp = make_ssa_name (vec_dest, new_stmt);
4007 gimple_call_set_lhs (new_stmt, new_temp);
4009 else
4011 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4012 new_temp = make_ssa_name (vec_dest);
4013 new_stmt = gimple_build_assign (new_temp, codecvt1,
4014 vop0);
4017 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4018 vec_oprnds0[i] = new_temp;
4021 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4022 stmt, vec_dsts, gsi,
4023 slp_node, code1,
4024 &prev_stmt_info);
4027 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4028 break;
4031 vec_oprnds0.release ();
4032 vec_oprnds1.release ();
4033 vec_dsts.release ();
4034 interm_types.release ();
4036 return true;
4040 /* Function vectorizable_assignment.
4042 Check if STMT performs an assignment (copy) that can be vectorized.
4043 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4044 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4045 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4047 static bool
4048 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4049 gimple *vec_stmt, slp_tree slp_node)
4051 tree vec_dest;
4052 tree scalar_dest;
4053 tree op;
4054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4055 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4056 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4057 tree new_temp;
4058 tree def;
4059 gimple def_stmt;
4060 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4061 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4062 int ncopies;
4063 int i, j;
4064 vec<tree> vec_oprnds = vNULL;
4065 tree vop;
4066 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4067 gimple new_stmt = NULL;
4068 stmt_vec_info prev_stmt_info = NULL;
4069 enum tree_code code;
4070 tree vectype_in;
4072 /* Multiple types in SLP are handled by creating the appropriate number of
4073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4074 case of SLP. */
4075 if (slp_node || PURE_SLP_STMT (stmt_info))
4076 ncopies = 1;
4077 else
4078 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4080 gcc_assert (ncopies >= 1);
4082 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4083 return false;
4085 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4086 return false;
4088 /* Is vectorizable assignment? */
4089 if (!is_gimple_assign (stmt))
4090 return false;
4092 scalar_dest = gimple_assign_lhs (stmt);
4093 if (TREE_CODE (scalar_dest) != SSA_NAME)
4094 return false;
4096 code = gimple_assign_rhs_code (stmt);
4097 if (gimple_assign_single_p (stmt)
4098 || code == PAREN_EXPR
4099 || CONVERT_EXPR_CODE_P (code))
4100 op = gimple_assign_rhs1 (stmt);
4101 else
4102 return false;
4104 if (code == VIEW_CONVERT_EXPR)
4105 op = TREE_OPERAND (op, 0);
4107 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4108 &def_stmt, &def, &dt[0], &vectype_in))
4110 if (dump_enabled_p ())
4111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4112 "use not simple.\n");
4113 return false;
4116 /* We can handle NOP_EXPR conversions that do not change the number
4117 of elements or the vector size. */
4118 if ((CONVERT_EXPR_CODE_P (code)
4119 || code == VIEW_CONVERT_EXPR)
4120 && (!vectype_in
4121 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4122 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4123 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4124 return false;
4126 /* We do not handle bit-precision changes. */
4127 if ((CONVERT_EXPR_CODE_P (code)
4128 || code == VIEW_CONVERT_EXPR)
4129 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4130 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4131 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4132 || ((TYPE_PRECISION (TREE_TYPE (op))
4133 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4134 /* But a conversion that does not change the bit-pattern is ok. */
4135 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4136 > TYPE_PRECISION (TREE_TYPE (op)))
4137 && TYPE_UNSIGNED (TREE_TYPE (op))))
4139 if (dump_enabled_p ())
4140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4141 "type conversion to/from bit-precision "
4142 "unsupported.\n");
4143 return false;
4146 if (!vec_stmt) /* transformation not required. */
4148 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4149 if (dump_enabled_p ())
4150 dump_printf_loc (MSG_NOTE, vect_location,
4151 "=== vectorizable_assignment ===\n");
4152 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4153 return true;
4156 /** Transform. **/
4157 if (dump_enabled_p ())
4158 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4160 /* Handle def. */
4161 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4163 /* Handle use. */
4164 for (j = 0; j < ncopies; j++)
4166 /* Handle uses. */
4167 if (j == 0)
4168 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4169 else
4170 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4172 /* Arguments are ready. create the new vector stmt. */
4173 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4175 if (CONVERT_EXPR_CODE_P (code)
4176 || code == VIEW_CONVERT_EXPR)
4177 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4178 new_stmt = gimple_build_assign (vec_dest, vop);
4179 new_temp = make_ssa_name (vec_dest, new_stmt);
4180 gimple_assign_set_lhs (new_stmt, new_temp);
4181 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4182 if (slp_node)
4183 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4186 if (slp_node)
4187 continue;
4189 if (j == 0)
4190 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4191 else
4192 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4194 prev_stmt_info = vinfo_for_stmt (new_stmt);
4197 vec_oprnds.release ();
4198 return true;
4202 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4203 either as shift by a scalar or by a vector. */
4205 bool
4206 vect_supportable_shift (enum tree_code code, tree scalar_type)
4209 machine_mode vec_mode;
4210 optab optab;
4211 int icode;
4212 tree vectype;
4214 vectype = get_vectype_for_scalar_type (scalar_type);
4215 if (!vectype)
4216 return false;
4218 optab = optab_for_tree_code (code, vectype, optab_scalar);
4219 if (!optab
4220 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4222 optab = optab_for_tree_code (code, vectype, optab_vector);
4223 if (!optab
4224 || (optab_handler (optab, TYPE_MODE (vectype))
4225 == CODE_FOR_nothing))
4226 return false;
4229 vec_mode = TYPE_MODE (vectype);
4230 icode = (int) optab_handler (optab, vec_mode);
4231 if (icode == CODE_FOR_nothing)
4232 return false;
4234 return true;
4238 /* Function vectorizable_shift.
4240 Check if STMT performs a shift operation that can be vectorized.
4241 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4242 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4243 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4245 static bool
4246 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4247 gimple *vec_stmt, slp_tree slp_node)
4249 tree vec_dest;
4250 tree scalar_dest;
4251 tree op0, op1 = NULL;
4252 tree vec_oprnd1 = NULL_TREE;
4253 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4254 tree vectype;
4255 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4256 enum tree_code code;
4257 machine_mode vec_mode;
4258 tree new_temp;
4259 optab optab;
4260 int icode;
4261 machine_mode optab_op2_mode;
4262 tree def;
4263 gimple def_stmt;
4264 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4265 gimple new_stmt = NULL;
4266 stmt_vec_info prev_stmt_info;
4267 int nunits_in;
4268 int nunits_out;
4269 tree vectype_out;
4270 tree op1_vectype;
4271 int ncopies;
4272 int j, i;
4273 vec<tree> vec_oprnds0 = vNULL;
4274 vec<tree> vec_oprnds1 = vNULL;
4275 tree vop0, vop1;
4276 unsigned int k;
4277 bool scalar_shift_arg = true;
4278 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4279 int vf;
4281 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4282 return false;
4284 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4285 return false;
4287 /* Is STMT a vectorizable binary/unary operation? */
4288 if (!is_gimple_assign (stmt))
4289 return false;
4291 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4292 return false;
4294 code = gimple_assign_rhs_code (stmt);
4296 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4297 || code == RROTATE_EXPR))
4298 return false;
4300 scalar_dest = gimple_assign_lhs (stmt);
4301 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4302 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4303 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4305 if (dump_enabled_p ())
4306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4307 "bit-precision shifts not supported.\n");
4308 return false;
4311 op0 = gimple_assign_rhs1 (stmt);
4312 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4313 &def_stmt, &def, &dt[0], &vectype))
4315 if (dump_enabled_p ())
4316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4317 "use not simple.\n");
4318 return false;
4320 /* If op0 is an external or constant def use a vector type with
4321 the same size as the output vector type. */
4322 if (!vectype)
4323 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4324 if (vec_stmt)
4325 gcc_assert (vectype);
4326 if (!vectype)
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4330 "no vectype for scalar type\n");
4331 return false;
4334 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4335 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4336 if (nunits_out != nunits_in)
4337 return false;
4339 op1 = gimple_assign_rhs2 (stmt);
4340 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4341 &def, &dt[1], &op1_vectype))
4343 if (dump_enabled_p ())
4344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4345 "use not simple.\n");
4346 return false;
4349 if (loop_vinfo)
4350 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4351 else
4352 vf = 1;
4354 /* Multiple types in SLP are handled by creating the appropriate number of
4355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4356 case of SLP. */
4357 if (slp_node || PURE_SLP_STMT (stmt_info))
4358 ncopies = 1;
4359 else
4360 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4362 gcc_assert (ncopies >= 1);
4364 /* Determine whether the shift amount is a vector, or scalar. If the
4365 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4367 if (dt[1] == vect_internal_def && !slp_node)
4368 scalar_shift_arg = false;
4369 else if (dt[1] == vect_constant_def
4370 || dt[1] == vect_external_def
4371 || dt[1] == vect_internal_def)
4373 /* In SLP, need to check whether the shift count is the same,
4374 in loops if it is a constant or invariant, it is always
4375 a scalar shift. */
4376 if (slp_node)
4378 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4379 gimple slpstmt;
4381 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4382 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4383 scalar_shift_arg = false;
4386 else
4388 if (dump_enabled_p ())
4389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4390 "operand mode requires invariant argument.\n");
4391 return false;
4394 /* Vector shifted by vector. */
4395 if (!scalar_shift_arg)
4397 optab = optab_for_tree_code (code, vectype, optab_vector);
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_NOTE, vect_location,
4400 "vector/vector shift/rotate found.\n");
4402 if (!op1_vectype)
4403 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4404 if (op1_vectype == NULL_TREE
4405 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4407 if (dump_enabled_p ())
4408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4409 "unusable type for last operand in"
4410 " vector/vector shift/rotate.\n");
4411 return false;
4414 /* See if the machine has a vector shifted by scalar insn and if not
4415 then see if it has a vector shifted by vector insn. */
4416 else
4418 optab = optab_for_tree_code (code, vectype, optab_scalar);
4419 if (optab
4420 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4422 if (dump_enabled_p ())
4423 dump_printf_loc (MSG_NOTE, vect_location,
4424 "vector/scalar shift/rotate found.\n");
4426 else
4428 optab = optab_for_tree_code (code, vectype, optab_vector);
4429 if (optab
4430 && (optab_handler (optab, TYPE_MODE (vectype))
4431 != CODE_FOR_nothing))
4433 scalar_shift_arg = false;
4435 if (dump_enabled_p ())
4436 dump_printf_loc (MSG_NOTE, vect_location,
4437 "vector/vector shift/rotate found.\n");
4439 /* Unlike the other binary operators, shifts/rotates have
4440 the rhs being int, instead of the same type as the lhs,
4441 so make sure the scalar is the right type if we are
4442 dealing with vectors of long long/long/short/char. */
4443 if (dt[1] == vect_constant_def)
4444 op1 = fold_convert (TREE_TYPE (vectype), op1);
4445 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4446 TREE_TYPE (op1)))
4448 if (slp_node
4449 && TYPE_MODE (TREE_TYPE (vectype))
4450 != TYPE_MODE (TREE_TYPE (op1)))
4452 if (dump_enabled_p ())
4453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4454 "unusable type for last operand in"
4455 " vector/vector shift/rotate.\n");
4456 return false;
4458 if (vec_stmt && !slp_node)
4460 op1 = fold_convert (TREE_TYPE (vectype), op1);
4461 op1 = vect_init_vector (stmt, op1,
4462 TREE_TYPE (vectype), NULL);
4469 /* Supportable by target? */
4470 if (!optab)
4472 if (dump_enabled_p ())
4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4474 "no optab.\n");
4475 return false;
4477 vec_mode = TYPE_MODE (vectype);
4478 icode = (int) optab_handler (optab, vec_mode);
4479 if (icode == CODE_FOR_nothing)
4481 if (dump_enabled_p ())
4482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4483 "op not supported by target.\n");
4484 /* Check only during analysis. */
4485 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4486 || (vf < vect_min_worthwhile_factor (code)
4487 && !vec_stmt))
4488 return false;
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE, vect_location,
4491 "proceeding using word mode.\n");
4494 /* Worthwhile without SIMD support? Check only during analysis. */
4495 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4496 && vf < vect_min_worthwhile_factor (code)
4497 && !vec_stmt)
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4501 "not worthwhile without SIMD support.\n");
4502 return false;
4505 if (!vec_stmt) /* transformation not required. */
4507 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE, vect_location,
4510 "=== vectorizable_shift ===\n");
4511 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4512 return true;
4515 /** Transform. **/
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_NOTE, vect_location,
4519 "transform binary/unary operation.\n");
4521 /* Handle def. */
4522 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4524 prev_stmt_info = NULL;
4525 for (j = 0; j < ncopies; j++)
4527 /* Handle uses. */
4528 if (j == 0)
4530 if (scalar_shift_arg)
4532 /* Vector shl and shr insn patterns can be defined with scalar
4533 operand 2 (shift operand). In this case, use constant or loop
4534 invariant op1 directly, without extending it to vector mode
4535 first. */
4536 optab_op2_mode = insn_data[icode].operand[2].mode;
4537 if (!VECTOR_MODE_P (optab_op2_mode))
4539 if (dump_enabled_p ())
4540 dump_printf_loc (MSG_NOTE, vect_location,
4541 "operand 1 using scalar mode.\n");
4542 vec_oprnd1 = op1;
4543 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4544 vec_oprnds1.quick_push (vec_oprnd1);
4545 if (slp_node)
4547 /* Store vec_oprnd1 for every vector stmt to be created
4548 for SLP_NODE. We check during the analysis that all
4549 the shift arguments are the same.
4550 TODO: Allow different constants for different vector
4551 stmts generated for an SLP instance. */
4552 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4553 vec_oprnds1.quick_push (vec_oprnd1);
4558 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4559 (a special case for certain kind of vector shifts); otherwise,
4560 operand 1 should be of a vector type (the usual case). */
4561 if (vec_oprnd1)
4562 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4563 slp_node, -1);
4564 else
4565 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4566 slp_node, -1);
4568 else
4569 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4571 /* Arguments are ready. Create the new vector stmt. */
4572 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4574 vop1 = vec_oprnds1[i];
4575 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4576 new_temp = make_ssa_name (vec_dest, new_stmt);
4577 gimple_assign_set_lhs (new_stmt, new_temp);
4578 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4579 if (slp_node)
4580 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4583 if (slp_node)
4584 continue;
4586 if (j == 0)
4587 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4588 else
4589 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4590 prev_stmt_info = vinfo_for_stmt (new_stmt);
4593 vec_oprnds0.release ();
4594 vec_oprnds1.release ();
4596 return true;
4600 /* Function vectorizable_operation.
4602 Check if STMT performs a binary, unary or ternary operation that can
4603 be vectorized.
4604 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4605 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4606 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4608 static bool
4609 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4610 gimple *vec_stmt, slp_tree slp_node)
4612 tree vec_dest;
4613 tree scalar_dest;
4614 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4615 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4616 tree vectype;
4617 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4618 enum tree_code code;
4619 machine_mode vec_mode;
4620 tree new_temp;
4621 int op_type;
4622 optab optab;
4623 int icode;
4624 tree def;
4625 gimple def_stmt;
4626 enum vect_def_type dt[3]
4627 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4628 gimple new_stmt = NULL;
4629 stmt_vec_info prev_stmt_info;
4630 int nunits_in;
4631 int nunits_out;
4632 tree vectype_out;
4633 int ncopies;
4634 int j, i;
4635 vec<tree> vec_oprnds0 = vNULL;
4636 vec<tree> vec_oprnds1 = vNULL;
4637 vec<tree> vec_oprnds2 = vNULL;
4638 tree vop0, vop1, vop2;
4639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4640 int vf;
4642 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4643 return false;
4645 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4646 return false;
4648 /* Is STMT a vectorizable binary/unary operation? */
4649 if (!is_gimple_assign (stmt))
4650 return false;
4652 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4653 return false;
4655 code = gimple_assign_rhs_code (stmt);
4657 /* For pointer addition, we should use the normal plus for
4658 the vector addition. */
4659 if (code == POINTER_PLUS_EXPR)
4660 code = PLUS_EXPR;
4662 /* Support only unary or binary operations. */
4663 op_type = TREE_CODE_LENGTH (code);
4664 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4666 if (dump_enabled_p ())
4667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4668 "num. args = %d (not unary/binary/ternary op).\n",
4669 op_type);
4670 return false;
4673 scalar_dest = gimple_assign_lhs (stmt);
4674 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4676 /* Most operations cannot handle bit-precision types without extra
4677 truncations. */
4678 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4679 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4680 /* Exception are bitwise binary operations. */
4681 && code != BIT_IOR_EXPR
4682 && code != BIT_XOR_EXPR
4683 && code != BIT_AND_EXPR)
4685 if (dump_enabled_p ())
4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4687 "bit-precision arithmetic not supported.\n");
4688 return false;
4691 op0 = gimple_assign_rhs1 (stmt);
4692 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4693 &def_stmt, &def, &dt[0], &vectype))
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "use not simple.\n");
4698 return false;
4700 /* If op0 is an external or constant def use a vector type with
4701 the same size as the output vector type. */
4702 if (!vectype)
4703 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4704 if (vec_stmt)
4705 gcc_assert (vectype);
4706 if (!vectype)
4708 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4711 "no vectype for scalar type ");
4712 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4713 TREE_TYPE (op0));
4714 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4717 return false;
4720 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4721 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4722 if (nunits_out != nunits_in)
4723 return false;
4725 if (op_type == binary_op || op_type == ternary_op)
4727 op1 = gimple_assign_rhs2 (stmt);
4728 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4729 &def, &dt[1]))
4731 if (dump_enabled_p ())
4732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4733 "use not simple.\n");
4734 return false;
4737 if (op_type == ternary_op)
4739 op2 = gimple_assign_rhs3 (stmt);
4740 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4741 &def, &dt[2]))
4743 if (dump_enabled_p ())
4744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4745 "use not simple.\n");
4746 return false;
4750 if (loop_vinfo)
4751 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4752 else
4753 vf = 1;
4755 /* Multiple types in SLP are handled by creating the appropriate number of
4756 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4757 case of SLP. */
4758 if (slp_node || PURE_SLP_STMT (stmt_info))
4759 ncopies = 1;
4760 else
4761 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4763 gcc_assert (ncopies >= 1);
4765 /* Shifts are handled in vectorizable_shift (). */
4766 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4767 || code == RROTATE_EXPR)
4768 return false;
4770 /* Supportable by target? */
4772 vec_mode = TYPE_MODE (vectype);
4773 if (code == MULT_HIGHPART_EXPR)
4775 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4776 icode = LAST_INSN_CODE;
4777 else
4778 icode = CODE_FOR_nothing;
4780 else
4782 optab = optab_for_tree_code (code, vectype, optab_default);
4783 if (!optab)
4785 if (dump_enabled_p ())
4786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4787 "no optab.\n");
4788 return false;
4790 icode = (int) optab_handler (optab, vec_mode);
4793 if (icode == CODE_FOR_nothing)
4795 if (dump_enabled_p ())
4796 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4797 "op not supported by target.\n");
4798 /* Check only during analysis. */
4799 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4800 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4801 return false;
4802 if (dump_enabled_p ())
4803 dump_printf_loc (MSG_NOTE, vect_location,
4804 "proceeding using word mode.\n");
4807 /* Worthwhile without SIMD support? Check only during analysis. */
4808 if (!VECTOR_MODE_P (vec_mode)
4809 && !vec_stmt
4810 && vf < vect_min_worthwhile_factor (code))
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4814 "not worthwhile without SIMD support.\n");
4815 return false;
4818 if (!vec_stmt) /* transformation not required. */
4820 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_NOTE, vect_location,
4823 "=== vectorizable_operation ===\n");
4824 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4825 return true;
4828 /** Transform. **/
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_NOTE, vect_location,
4832 "transform binary/unary operation.\n");
4834 /* Handle def. */
4835 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4837 /* In case the vectorization factor (VF) is bigger than the number
4838 of elements that we can fit in a vectype (nunits), we have to generate
4839 more than one vector stmt - i.e - we need to "unroll" the
4840 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4841 from one copy of the vector stmt to the next, in the field
4842 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4843 stages to find the correct vector defs to be used when vectorizing
4844 stmts that use the defs of the current stmt. The example below
4845 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4846 we need to create 4 vectorized stmts):
4848 before vectorization:
4849 RELATED_STMT VEC_STMT
4850 S1: x = memref - -
4851 S2: z = x + 1 - -
4853 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4854 there):
4855 RELATED_STMT VEC_STMT
4856 VS1_0: vx0 = memref0 VS1_1 -
4857 VS1_1: vx1 = memref1 VS1_2 -
4858 VS1_2: vx2 = memref2 VS1_3 -
4859 VS1_3: vx3 = memref3 - -
4860 S1: x = load - VS1_0
4861 S2: z = x + 1 - -
4863 step2: vectorize stmt S2 (done here):
4864 To vectorize stmt S2 we first need to find the relevant vector
4865 def for the first operand 'x'. This is, as usual, obtained from
4866 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4867 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4868 relevant vector def 'vx0'. Having found 'vx0' we can generate
4869 the vector stmt VS2_0, and as usual, record it in the
4870 STMT_VINFO_VEC_STMT of stmt S2.
4871 When creating the second copy (VS2_1), we obtain the relevant vector
4872 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4873 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4874 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4875 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4876 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4877 chain of stmts and pointers:
4878 RELATED_STMT VEC_STMT
4879 VS1_0: vx0 = memref0 VS1_1 -
4880 VS1_1: vx1 = memref1 VS1_2 -
4881 VS1_2: vx2 = memref2 VS1_3 -
4882 VS1_3: vx3 = memref3 - -
4883 S1: x = load - VS1_0
4884 VS2_0: vz0 = vx0 + v1 VS2_1 -
4885 VS2_1: vz1 = vx1 + v1 VS2_2 -
4886 VS2_2: vz2 = vx2 + v1 VS2_3 -
4887 VS2_3: vz3 = vx3 + v1 - -
4888 S2: z = x + 1 - VS2_0 */
4890 prev_stmt_info = NULL;
4891 for (j = 0; j < ncopies; j++)
4893 /* Handle uses. */
4894 if (j == 0)
4896 if (op_type == binary_op || op_type == ternary_op)
4897 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4898 slp_node, -1);
4899 else
4900 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4901 slp_node, -1);
4902 if (op_type == ternary_op)
4904 vec_oprnds2.create (1);
4905 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4906 stmt,
4907 NULL));
4910 else
4912 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4913 if (op_type == ternary_op)
4915 tree vec_oprnd = vec_oprnds2.pop ();
4916 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4917 vec_oprnd));
4921 /* Arguments are ready. Create the new vector stmt. */
4922 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4924 vop1 = ((op_type == binary_op || op_type == ternary_op)
4925 ? vec_oprnds1[i] : NULL_TREE);
4926 vop2 = ((op_type == ternary_op)
4927 ? vec_oprnds2[i] : NULL_TREE);
4928 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4929 new_temp = make_ssa_name (vec_dest, new_stmt);
4930 gimple_assign_set_lhs (new_stmt, new_temp);
4931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4932 if (slp_node)
4933 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4936 if (slp_node)
4937 continue;
4939 if (j == 0)
4940 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4941 else
4942 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4943 prev_stmt_info = vinfo_for_stmt (new_stmt);
4946 vec_oprnds0.release ();
4947 vec_oprnds1.release ();
4948 vec_oprnds2.release ();
4950 return true;
4953 /* A helper function to ensure data reference DR's base alignment
4954 for STMT_INFO. */
4956 static void
4957 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4959 if (!dr->aux)
4960 return;
4962 if (((dataref_aux *)dr->aux)->base_misaligned)
4964 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4965 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4967 if (decl_in_symtab_p (base_decl))
4968 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4969 else
4971 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4972 DECL_USER_ALIGN (base_decl) = 1;
4974 ((dataref_aux *)dr->aux)->base_misaligned = false;
4979 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4980 reversal of the vector elements. If that is impossible to do,
4981 returns NULL. */
4983 static tree
4984 perm_mask_for_reverse (tree vectype)
4986 int i, nunits;
4987 unsigned char *sel;
4989 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4990 sel = XALLOCAVEC (unsigned char, nunits);
4992 for (i = 0; i < nunits; ++i)
4993 sel[i] = nunits - 1 - i;
4995 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4996 return NULL_TREE;
4997 return vect_gen_perm_mask_checked (vectype, sel);
5000 /* Function vectorizable_store.
5002 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5003 can be vectorized.
5004 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5005 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5006 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5008 static bool
5009 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5010 slp_tree slp_node)
5012 tree scalar_dest;
5013 tree data_ref;
5014 tree op;
5015 tree vec_oprnd = NULL_TREE;
5016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5017 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5019 tree elem_type;
5020 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5021 struct loop *loop = NULL;
5022 machine_mode vec_mode;
5023 tree dummy;
5024 enum dr_alignment_support alignment_support_scheme;
5025 tree def;
5026 gimple def_stmt;
5027 enum vect_def_type dt;
5028 stmt_vec_info prev_stmt_info = NULL;
5029 tree dataref_ptr = NULL_TREE;
5030 tree dataref_offset = NULL_TREE;
5031 gimple ptr_incr = NULL;
5032 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5033 int ncopies;
5034 int j;
5035 gimple next_stmt, first_stmt = NULL;
5036 bool grouped_store = false;
5037 bool store_lanes_p = false;
5038 unsigned int group_size, i;
5039 vec<tree> dr_chain = vNULL;
5040 vec<tree> oprnds = vNULL;
5041 vec<tree> result_chain = vNULL;
5042 bool inv_p;
5043 bool negative = false;
5044 tree offset = NULL_TREE;
5045 vec<tree> vec_oprnds = vNULL;
5046 bool slp = (slp_node != NULL);
5047 unsigned int vec_num;
5048 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5049 tree aggr_type;
5051 if (loop_vinfo)
5052 loop = LOOP_VINFO_LOOP (loop_vinfo);
5054 /* Multiple types in SLP are handled by creating the appropriate number of
5055 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5056 case of SLP. */
5057 if (slp || PURE_SLP_STMT (stmt_info))
5058 ncopies = 1;
5059 else
5060 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5062 gcc_assert (ncopies >= 1);
5064 /* FORNOW. This restriction should be relaxed. */
5065 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5067 if (dump_enabled_p ())
5068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5069 "multiple types in nested loop.\n");
5070 return false;
5073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5074 return false;
5076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5077 return false;
5079 /* Is vectorizable store? */
5081 if (!is_gimple_assign (stmt))
5082 return false;
5084 scalar_dest = gimple_assign_lhs (stmt);
5085 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5086 && is_pattern_stmt_p (stmt_info))
5087 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5088 if (TREE_CODE (scalar_dest) != ARRAY_REF
5089 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5090 && TREE_CODE (scalar_dest) != INDIRECT_REF
5091 && TREE_CODE (scalar_dest) != COMPONENT_REF
5092 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5093 && TREE_CODE (scalar_dest) != REALPART_EXPR
5094 && TREE_CODE (scalar_dest) != MEM_REF)
5095 return false;
5097 gcc_assert (gimple_assign_single_p (stmt));
5098 op = gimple_assign_rhs1 (stmt);
5099 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5100 &def, &dt))
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5104 "use not simple.\n");
5105 return false;
5108 elem_type = TREE_TYPE (vectype);
5109 vec_mode = TYPE_MODE (vectype);
5111 /* FORNOW. In some cases can vectorize even if data-type not supported
5112 (e.g. - array initialization with 0). */
5113 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5114 return false;
5116 if (!STMT_VINFO_DATA_REF (stmt_info))
5117 return false;
5119 if (!STMT_VINFO_STRIDED_P (stmt_info))
5121 negative =
5122 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5123 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5124 size_zero_node) < 0;
5125 if (negative && ncopies > 1)
5127 if (dump_enabled_p ())
5128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5129 "multiple types with negative step.\n");
5130 return false;
5132 if (negative)
5134 gcc_assert (!grouped_store);
5135 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5136 if (alignment_support_scheme != dr_aligned
5137 && alignment_support_scheme != dr_unaligned_supported)
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141 "negative step but alignment required.\n");
5142 return false;
5144 if (dt != vect_constant_def
5145 && dt != vect_external_def
5146 && !perm_mask_for_reverse (vectype))
5148 if (dump_enabled_p ())
5149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5150 "negative step and reversing not supported.\n");
5151 return false;
5156 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5158 grouped_store = true;
5159 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5160 if (!slp && !PURE_SLP_STMT (stmt_info))
5162 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5163 if (vect_store_lanes_supported (vectype, group_size))
5164 store_lanes_p = true;
5165 else if (!vect_grouped_store_supported (vectype, group_size))
5166 return false;
5169 if (first_stmt == stmt)
5171 /* STMT is the leader of the group. Check the operands of all the
5172 stmts of the group. */
5173 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5174 while (next_stmt)
5176 gcc_assert (gimple_assign_single_p (next_stmt));
5177 op = gimple_assign_rhs1 (next_stmt);
5178 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5179 &def_stmt, &def, &dt))
5181 if (dump_enabled_p ())
5182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5183 "use not simple.\n");
5184 return false;
5186 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5191 if (!vec_stmt) /* transformation not required. */
5193 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5194 /* The SLP costs are calculated during SLP analysis. */
5195 if (!PURE_SLP_STMT (stmt_info))
5196 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5197 NULL, NULL, NULL);
5198 return true;
5201 /** Transform. **/
5203 ensure_base_align (stmt_info, dr);
5205 if (grouped_store)
5207 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5208 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5210 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5212 /* FORNOW */
5213 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5215 /* We vectorize all the stmts of the interleaving group when we
5216 reach the last stmt in the group. */
5217 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5218 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5219 && !slp)
5221 *vec_stmt = NULL;
5222 return true;
5225 if (slp)
5227 grouped_store = false;
5228 /* VEC_NUM is the number of vect stmts to be created for this
5229 group. */
5230 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5231 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5232 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5233 op = gimple_assign_rhs1 (first_stmt);
5235 else
5236 /* VEC_NUM is the number of vect stmts to be created for this
5237 group. */
5238 vec_num = group_size;
5240 else
5242 first_stmt = stmt;
5243 first_dr = dr;
5244 group_size = vec_num = 1;
5247 if (dump_enabled_p ())
5248 dump_printf_loc (MSG_NOTE, vect_location,
5249 "transform store. ncopies = %d\n", ncopies);
5251 if (STMT_VINFO_STRIDED_P (stmt_info))
5253 gimple_stmt_iterator incr_gsi;
5254 bool insert_after;
5255 gimple incr;
5256 tree offvar;
5257 tree ivstep;
5258 tree running_off;
5259 gimple_seq stmts = NULL;
5260 tree stride_base, stride_step, alias_off;
5261 tree vec_oprnd;
5263 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5265 stride_base
5266 = fold_build_pointer_plus
5267 (unshare_expr (DR_BASE_ADDRESS (dr)),
5268 size_binop (PLUS_EXPR,
5269 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
5270 convert_to_ptrofftype (DR_INIT(dr))));
5271 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
5273 /* For a store with loop-invariant (but other than power-of-2)
5274 stride (i.e. not a grouped access) like so:
5276 for (i = 0; i < n; i += stride)
5277 array[i] = ...;
5279 we generate a new induction variable and new stores from
5280 the components of the (vectorized) rhs:
5282 for (j = 0; ; j += VF*stride)
5283 vectemp = ...;
5284 tmp1 = vectemp[0];
5285 array[j] = tmp1;
5286 tmp2 = vectemp[1];
5287 array[j + stride] = tmp2;
5291 ivstep = stride_step;
5292 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5293 build_int_cst (TREE_TYPE (ivstep),
5294 ncopies * nunits));
5296 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5298 create_iv (stride_base, ivstep, NULL,
5299 loop, &incr_gsi, insert_after,
5300 &offvar, NULL);
5301 incr = gsi_stmt (incr_gsi);
5302 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5304 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5305 if (stmts)
5306 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5308 prev_stmt_info = NULL;
5309 running_off = offvar;
5310 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
5311 for (j = 0; j < ncopies; j++)
5313 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5314 and first_stmt == stmt. */
5315 if (j == 0)
5316 vec_oprnd = vect_get_vec_def_for_operand (op, first_stmt, NULL);
5317 else
5318 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5320 for (i = 0; i < nunits; i++)
5322 tree newref, newoff;
5323 gimple incr, assign;
5324 tree size = TYPE_SIZE (elem_type);
5325 /* Extract the i'th component. */
5326 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (i),
5327 size);
5328 tree elem = fold_build3 (BIT_FIELD_REF, elem_type, vec_oprnd,
5329 size, pos);
5331 elem = force_gimple_operand_gsi (gsi, elem, true,
5332 NULL_TREE, true,
5333 GSI_SAME_STMT);
5335 newref = build2 (MEM_REF, TREE_TYPE (vectype),
5336 running_off, alias_off);
5338 /* And store it to *running_off. */
5339 assign = gimple_build_assign (newref, elem);
5340 vect_finish_stmt_generation (stmt, assign, gsi);
5342 newoff = copy_ssa_name (running_off, NULL);
5343 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5344 running_off, stride_step);
5345 vect_finish_stmt_generation (stmt, incr, gsi);
5347 running_off = newoff;
5348 if (j == 0 && i == i)
5349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = assign;
5350 else
5351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5352 prev_stmt_info = vinfo_for_stmt (assign);
5355 return true;
5358 dr_chain.create (group_size);
5359 oprnds.create (group_size);
5361 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5362 gcc_assert (alignment_support_scheme);
5363 /* Targets with store-lane instructions must not require explicit
5364 realignment. */
5365 gcc_assert (!store_lanes_p
5366 || alignment_support_scheme == dr_aligned
5367 || alignment_support_scheme == dr_unaligned_supported);
5369 if (negative)
5370 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5372 if (store_lanes_p)
5373 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5374 else
5375 aggr_type = vectype;
5377 /* In case the vectorization factor (VF) is bigger than the number
5378 of elements that we can fit in a vectype (nunits), we have to generate
5379 more than one vector stmt - i.e - we need to "unroll" the
5380 vector stmt by a factor VF/nunits. For more details see documentation in
5381 vect_get_vec_def_for_copy_stmt. */
5383 /* In case of interleaving (non-unit grouped access):
5385 S1: &base + 2 = x2
5386 S2: &base = x0
5387 S3: &base + 1 = x1
5388 S4: &base + 3 = x3
5390 We create vectorized stores starting from base address (the access of the
5391 first stmt in the chain (S2 in the above example), when the last store stmt
5392 of the chain (S4) is reached:
5394 VS1: &base = vx2
5395 VS2: &base + vec_size*1 = vx0
5396 VS3: &base + vec_size*2 = vx1
5397 VS4: &base + vec_size*3 = vx3
5399 Then permutation statements are generated:
5401 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5402 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5405 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5406 (the order of the data-refs in the output of vect_permute_store_chain
5407 corresponds to the order of scalar stmts in the interleaving chain - see
5408 the documentation of vect_permute_store_chain()).
5410 In case of both multiple types and interleaving, above vector stores and
5411 permutation stmts are created for every copy. The result vector stmts are
5412 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5413 STMT_VINFO_RELATED_STMT for the next copies.
5416 prev_stmt_info = NULL;
5417 for (j = 0; j < ncopies; j++)
5419 gimple new_stmt;
5421 if (j == 0)
5423 if (slp)
5425 /* Get vectorized arguments for SLP_NODE. */
5426 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5427 NULL, slp_node, -1);
5429 vec_oprnd = vec_oprnds[0];
5431 else
5433 /* For interleaved stores we collect vectorized defs for all the
5434 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5435 used as an input to vect_permute_store_chain(), and OPRNDS as
5436 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5438 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5439 OPRNDS are of size 1. */
5440 next_stmt = first_stmt;
5441 for (i = 0; i < group_size; i++)
5443 /* Since gaps are not supported for interleaved stores,
5444 GROUP_SIZE is the exact number of stmts in the chain.
5445 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5446 there is no interleaving, GROUP_SIZE is 1, and only one
5447 iteration of the loop will be executed. */
5448 gcc_assert (next_stmt
5449 && gimple_assign_single_p (next_stmt));
5450 op = gimple_assign_rhs1 (next_stmt);
5452 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5453 NULL);
5454 dr_chain.quick_push (vec_oprnd);
5455 oprnds.quick_push (vec_oprnd);
5456 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5460 /* We should have catched mismatched types earlier. */
5461 gcc_assert (useless_type_conversion_p (vectype,
5462 TREE_TYPE (vec_oprnd)));
5463 bool simd_lane_access_p
5464 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5465 if (simd_lane_access_p
5466 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5467 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5468 && integer_zerop (DR_OFFSET (first_dr))
5469 && integer_zerop (DR_INIT (first_dr))
5470 && alias_sets_conflict_p (get_alias_set (aggr_type),
5471 get_alias_set (DR_REF (first_dr))))
5473 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5474 dataref_offset = build_int_cst (reference_alias_ptr_type
5475 (DR_REF (first_dr)), 0);
5476 inv_p = false;
5478 else
5479 dataref_ptr
5480 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5481 simd_lane_access_p ? loop : NULL,
5482 offset, &dummy, gsi, &ptr_incr,
5483 simd_lane_access_p, &inv_p);
5484 gcc_assert (bb_vinfo || !inv_p);
5486 else
5488 /* For interleaved stores we created vectorized defs for all the
5489 defs stored in OPRNDS in the previous iteration (previous copy).
5490 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5491 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5492 next copy.
5493 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5494 OPRNDS are of size 1. */
5495 for (i = 0; i < group_size; i++)
5497 op = oprnds[i];
5498 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5499 &def, &dt);
5500 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5501 dr_chain[i] = vec_oprnd;
5502 oprnds[i] = vec_oprnd;
5504 if (dataref_offset)
5505 dataref_offset
5506 = int_const_binop (PLUS_EXPR, dataref_offset,
5507 TYPE_SIZE_UNIT (aggr_type));
5508 else
5509 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5510 TYPE_SIZE_UNIT (aggr_type));
5513 if (store_lanes_p)
5515 tree vec_array;
5517 /* Combine all the vectors into an array. */
5518 vec_array = create_vector_array (vectype, vec_num);
5519 for (i = 0; i < vec_num; i++)
5521 vec_oprnd = dr_chain[i];
5522 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5525 /* Emit:
5526 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5527 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5528 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5529 gimple_call_set_lhs (new_stmt, data_ref);
5530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5532 else
5534 new_stmt = NULL;
5535 if (grouped_store)
5537 if (j == 0)
5538 result_chain.create (group_size);
5539 /* Permute. */
5540 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5541 &result_chain);
5544 next_stmt = first_stmt;
5545 for (i = 0; i < vec_num; i++)
5547 unsigned align, misalign;
5549 if (i > 0)
5550 /* Bump the vector pointer. */
5551 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5552 stmt, NULL_TREE);
5554 if (slp)
5555 vec_oprnd = vec_oprnds[i];
5556 else if (grouped_store)
5557 /* For grouped stores vectorized defs are interleaved in
5558 vect_permute_store_chain(). */
5559 vec_oprnd = result_chain[i];
5561 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5562 dataref_offset
5563 ? dataref_offset
5564 : build_int_cst (reference_alias_ptr_type
5565 (DR_REF (first_dr)), 0));
5566 align = TYPE_ALIGN_UNIT (vectype);
5567 if (aligned_access_p (first_dr))
5568 misalign = 0;
5569 else if (DR_MISALIGNMENT (first_dr) == -1)
5571 TREE_TYPE (data_ref)
5572 = build_aligned_type (TREE_TYPE (data_ref),
5573 TYPE_ALIGN (elem_type));
5574 align = TYPE_ALIGN_UNIT (elem_type);
5575 misalign = 0;
5577 else
5579 TREE_TYPE (data_ref)
5580 = build_aligned_type (TREE_TYPE (data_ref),
5581 TYPE_ALIGN (elem_type));
5582 misalign = DR_MISALIGNMENT (first_dr);
5584 if (dataref_offset == NULL_TREE)
5585 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5586 misalign);
5588 if (negative
5589 && dt != vect_constant_def
5590 && dt != vect_external_def)
5592 tree perm_mask = perm_mask_for_reverse (vectype);
5593 tree perm_dest
5594 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5595 vectype);
5596 tree new_temp = make_ssa_name (perm_dest);
5598 /* Generate the permute statement. */
5599 gimple perm_stmt
5600 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5601 vec_oprnd, perm_mask);
5602 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5604 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5605 vec_oprnd = new_temp;
5608 /* Arguments are ready. Create the new vector stmt. */
5609 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5610 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5612 if (slp)
5613 continue;
5615 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5616 if (!next_stmt)
5617 break;
5620 if (!slp)
5622 if (j == 0)
5623 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5624 else
5625 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5626 prev_stmt_info = vinfo_for_stmt (new_stmt);
5630 dr_chain.release ();
5631 oprnds.release ();
5632 result_chain.release ();
5633 vec_oprnds.release ();
5635 return true;
5638 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5639 VECTOR_CST mask. No checks are made that the target platform supports the
5640 mask, so callers may wish to test can_vec_perm_p separately, or use
5641 vect_gen_perm_mask_checked. */
5643 tree
5644 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5646 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5647 int i, nunits;
5649 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5651 mask_elt_type = lang_hooks.types.type_for_mode
5652 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5653 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5655 mask_elts = XALLOCAVEC (tree, nunits);
5656 for (i = nunits - 1; i >= 0; i--)
5657 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5658 mask_vec = build_vector (mask_type, mask_elts);
5660 return mask_vec;
5663 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5664 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5666 tree
5667 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5669 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5670 return vect_gen_perm_mask_any (vectype, sel);
5673 /* Given a vector variable X and Y, that was generated for the scalar
5674 STMT, generate instructions to permute the vector elements of X and Y
5675 using permutation mask MASK_VEC, insert them at *GSI and return the
5676 permuted vector variable. */
5678 static tree
5679 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5680 gimple_stmt_iterator *gsi)
5682 tree vectype = TREE_TYPE (x);
5683 tree perm_dest, data_ref;
5684 gimple perm_stmt;
5686 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5687 data_ref = make_ssa_name (perm_dest);
5689 /* Generate the permute statement. */
5690 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5691 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5693 return data_ref;
5696 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5697 inserting them on the loops preheader edge. Returns true if we
5698 were successful in doing so (and thus STMT can be moved then),
5699 otherwise returns false. */
5701 static bool
5702 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5704 ssa_op_iter i;
5705 tree op;
5706 bool any = false;
5708 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5710 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5711 if (!gimple_nop_p (def_stmt)
5712 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5714 /* Make sure we don't need to recurse. While we could do
5715 so in simple cases when there are more complex use webs
5716 we don't have an easy way to preserve stmt order to fulfil
5717 dependencies within them. */
5718 tree op2;
5719 ssa_op_iter i2;
5720 if (gimple_code (def_stmt) == GIMPLE_PHI)
5721 return false;
5722 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5724 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5725 if (!gimple_nop_p (def_stmt2)
5726 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5727 return false;
5729 any = true;
5733 if (!any)
5734 return true;
5736 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5738 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5739 if (!gimple_nop_p (def_stmt)
5740 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5742 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5743 gsi_remove (&gsi, false);
5744 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5748 return true;
5751 /* vectorizable_load.
5753 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5754 can be vectorized.
5755 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5756 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5757 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5759 static bool
5760 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5761 slp_tree slp_node, slp_instance slp_node_instance)
5763 tree scalar_dest;
5764 tree vec_dest = NULL;
5765 tree data_ref = NULL;
5766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5767 stmt_vec_info prev_stmt_info;
5768 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5769 struct loop *loop = NULL;
5770 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5771 bool nested_in_vect_loop = false;
5772 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5773 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5774 tree elem_type;
5775 tree new_temp;
5776 machine_mode mode;
5777 gimple new_stmt = NULL;
5778 tree dummy;
5779 enum dr_alignment_support alignment_support_scheme;
5780 tree dataref_ptr = NULL_TREE;
5781 tree dataref_offset = NULL_TREE;
5782 gimple ptr_incr = NULL;
5783 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5784 int ncopies;
5785 int i, j, group_size = -1, group_gap;
5786 tree msq = NULL_TREE, lsq;
5787 tree offset = NULL_TREE;
5788 tree byte_offset = NULL_TREE;
5789 tree realignment_token = NULL_TREE;
5790 gphi *phi = NULL;
5791 vec<tree> dr_chain = vNULL;
5792 bool grouped_load = false;
5793 bool load_lanes_p = false;
5794 gimple first_stmt;
5795 bool inv_p;
5796 bool negative = false;
5797 bool compute_in_loop = false;
5798 struct loop *at_loop;
5799 int vec_num;
5800 bool slp = (slp_node != NULL);
5801 bool slp_perm = false;
5802 enum tree_code code;
5803 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5804 int vf;
5805 tree aggr_type;
5806 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5807 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5808 int gather_scale = 1;
5809 enum vect_def_type gather_dt = vect_unknown_def_type;
5811 if (loop_vinfo)
5813 loop = LOOP_VINFO_LOOP (loop_vinfo);
5814 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5815 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5817 else
5818 vf = 1;
5820 /* Multiple types in SLP are handled by creating the appropriate number of
5821 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5822 case of SLP. */
5823 if (slp || PURE_SLP_STMT (stmt_info))
5824 ncopies = 1;
5825 else
5826 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5828 gcc_assert (ncopies >= 1);
5830 /* FORNOW. This restriction should be relaxed. */
5831 if (nested_in_vect_loop && ncopies > 1)
5833 if (dump_enabled_p ())
5834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5835 "multiple types in nested loop.\n");
5836 return false;
5839 /* Invalidate assumptions made by dependence analysis when vectorization
5840 on the unrolled body effectively re-orders stmts. */
5841 if (ncopies > 1
5842 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5843 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5844 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5846 if (dump_enabled_p ())
5847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5848 "cannot perform implicit CSE when unrolling "
5849 "with negative dependence distance\n");
5850 return false;
5853 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5854 return false;
5856 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5857 return false;
5859 /* Is vectorizable load? */
5860 if (!is_gimple_assign (stmt))
5861 return false;
5863 scalar_dest = gimple_assign_lhs (stmt);
5864 if (TREE_CODE (scalar_dest) != SSA_NAME)
5865 return false;
5867 code = gimple_assign_rhs_code (stmt);
5868 if (code != ARRAY_REF
5869 && code != BIT_FIELD_REF
5870 && code != INDIRECT_REF
5871 && code != COMPONENT_REF
5872 && code != IMAGPART_EXPR
5873 && code != REALPART_EXPR
5874 && code != MEM_REF
5875 && TREE_CODE_CLASS (code) != tcc_declaration)
5876 return false;
5878 if (!STMT_VINFO_DATA_REF (stmt_info))
5879 return false;
5881 elem_type = TREE_TYPE (vectype);
5882 mode = TYPE_MODE (vectype);
5884 /* FORNOW. In some cases can vectorize even if data-type not supported
5885 (e.g. - data copies). */
5886 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5888 if (dump_enabled_p ())
5889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5890 "Aligned load, but unsupported type.\n");
5891 return false;
5894 /* Check if the load is a part of an interleaving chain. */
5895 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5897 grouped_load = true;
5898 /* FORNOW */
5899 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5901 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5903 /* If this is single-element interleaving with an element distance
5904 that leaves unused vector loads around punt - we at least create
5905 very sub-optimal code in that case (and blow up memory,
5906 see PR65518). */
5907 if (first_stmt == stmt
5908 && !GROUP_NEXT_ELEMENT (stmt_info)
5909 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5911 if (dump_enabled_p ())
5912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5913 "single-element interleaving not supported "
5914 "for not adjacent vector loads\n");
5915 return false;
5918 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5919 if (!slp
5920 && !PURE_SLP_STMT (stmt_info)
5921 && !STMT_VINFO_STRIDED_P (stmt_info))
5923 if (vect_load_lanes_supported (vectype, group_size))
5924 load_lanes_p = true;
5925 else if (!vect_grouped_load_supported (vectype, group_size))
5926 return false;
5929 /* Invalidate assumptions made by dependence analysis when vectorization
5930 on the unrolled body effectively re-orders stmts. */
5931 if (!PURE_SLP_STMT (stmt_info)
5932 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5933 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5934 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5936 if (dump_enabled_p ())
5937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5938 "cannot perform implicit CSE when performing "
5939 "group loads with negative dependence distance\n");
5940 return false;
5943 /* Similarly when the stmt is a load that is both part of a SLP
5944 instance and a loop vectorized stmt via the same-dr mechanism
5945 we have to give up. */
5946 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5947 && (STMT_SLP_TYPE (stmt_info)
5948 != STMT_SLP_TYPE (vinfo_for_stmt
5949 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5951 if (dump_enabled_p ())
5952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5953 "conflicting SLP types for CSEd load\n");
5954 return false;
5959 if (STMT_VINFO_GATHER_P (stmt_info))
5961 gimple def_stmt;
5962 tree def;
5963 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5964 &gather_off, &gather_scale);
5965 gcc_assert (gather_decl);
5966 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5967 &def_stmt, &def, &gather_dt,
5968 &gather_off_vectype))
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5972 "gather index use not simple.\n");
5973 return false;
5976 else if (STMT_VINFO_STRIDED_P (stmt_info))
5978 if ((grouped_load
5979 && (slp || PURE_SLP_STMT (stmt_info)))
5980 && (group_size > nunits
5981 || nunits % group_size != 0
5982 /* ??? During analysis phase we are not called with the
5983 slp node/instance we are in so whether we'll end up
5984 with a permutation we don't know. Still we don't
5985 support load permutations. */
5986 || slp_perm))
5988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5989 "unhandled strided group load\n");
5990 return false;
5993 else
5995 negative = tree_int_cst_compare (nested_in_vect_loop
5996 ? STMT_VINFO_DR_STEP (stmt_info)
5997 : DR_STEP (dr),
5998 size_zero_node) < 0;
5999 if (negative && ncopies > 1)
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6003 "multiple types with negative step.\n");
6004 return false;
6007 if (negative)
6009 if (grouped_load)
6011 if (dump_enabled_p ())
6012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6013 "negative step for group load not supported"
6014 "\n");
6015 return false;
6017 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6018 if (alignment_support_scheme != dr_aligned
6019 && alignment_support_scheme != dr_unaligned_supported)
6021 if (dump_enabled_p ())
6022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6023 "negative step but alignment required.\n");
6024 return false;
6026 if (!perm_mask_for_reverse (vectype))
6028 if (dump_enabled_p ())
6029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6030 "negative step and reversing not supported."
6031 "\n");
6032 return false;
6037 if (!vec_stmt) /* transformation not required. */
6039 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6040 /* The SLP costs are calculated during SLP analysis. */
6041 if (!PURE_SLP_STMT (stmt_info))
6042 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6043 NULL, NULL, NULL);
6044 return true;
6047 if (dump_enabled_p ())
6048 dump_printf_loc (MSG_NOTE, vect_location,
6049 "transform load. ncopies = %d\n", ncopies);
6051 /** Transform. **/
6053 ensure_base_align (stmt_info, dr);
6055 if (STMT_VINFO_GATHER_P (stmt_info))
6057 tree vec_oprnd0 = NULL_TREE, op;
6058 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6059 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6060 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6061 edge pe = loop_preheader_edge (loop);
6062 gimple_seq seq;
6063 basic_block new_bb;
6064 enum { NARROW, NONE, WIDEN } modifier;
6065 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6067 if (nunits == gather_off_nunits)
6068 modifier = NONE;
6069 else if (nunits == gather_off_nunits / 2)
6071 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6072 modifier = WIDEN;
6074 for (i = 0; i < gather_off_nunits; ++i)
6075 sel[i] = i | nunits;
6077 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6079 else if (nunits == gather_off_nunits * 2)
6081 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6082 modifier = NARROW;
6084 for (i = 0; i < nunits; ++i)
6085 sel[i] = i < gather_off_nunits
6086 ? i : i + nunits - gather_off_nunits;
6088 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6089 ncopies *= 2;
6091 else
6092 gcc_unreachable ();
6094 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6095 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6096 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6097 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6098 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6099 scaletype = TREE_VALUE (arglist);
6100 gcc_checking_assert (types_compatible_p (srctype, rettype));
6102 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6104 ptr = fold_convert (ptrtype, gather_base);
6105 if (!is_gimple_min_invariant (ptr))
6107 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6108 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6109 gcc_assert (!new_bb);
6112 /* Currently we support only unconditional gather loads,
6113 so mask should be all ones. */
6114 if (TREE_CODE (masktype) == INTEGER_TYPE)
6115 mask = build_int_cst (masktype, -1);
6116 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6118 mask = build_int_cst (TREE_TYPE (masktype), -1);
6119 mask = build_vector_from_val (masktype, mask);
6120 mask = vect_init_vector (stmt, mask, masktype, NULL);
6122 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6124 REAL_VALUE_TYPE r;
6125 long tmp[6];
6126 for (j = 0; j < 6; ++j)
6127 tmp[j] = -1;
6128 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6129 mask = build_real (TREE_TYPE (masktype), r);
6130 mask = build_vector_from_val (masktype, mask);
6131 mask = vect_init_vector (stmt, mask, masktype, NULL);
6133 else
6134 gcc_unreachable ();
6136 scale = build_int_cst (scaletype, gather_scale);
6138 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6139 merge = build_int_cst (TREE_TYPE (rettype), 0);
6140 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6142 REAL_VALUE_TYPE r;
6143 long tmp[6];
6144 for (j = 0; j < 6; ++j)
6145 tmp[j] = 0;
6146 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6147 merge = build_real (TREE_TYPE (rettype), r);
6149 else
6150 gcc_unreachable ();
6151 merge = build_vector_from_val (rettype, merge);
6152 merge = vect_init_vector (stmt, merge, rettype, NULL);
6154 prev_stmt_info = NULL;
6155 for (j = 0; j < ncopies; ++j)
6157 if (modifier == WIDEN && (j & 1))
6158 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6159 perm_mask, stmt, gsi);
6160 else if (j == 0)
6161 op = vec_oprnd0
6162 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6163 else
6164 op = vec_oprnd0
6165 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6167 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6169 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6170 == TYPE_VECTOR_SUBPARTS (idxtype));
6171 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6172 var = make_ssa_name (var);
6173 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6174 new_stmt
6175 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6176 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6177 op = var;
6180 new_stmt
6181 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6183 if (!useless_type_conversion_p (vectype, rettype))
6185 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6186 == TYPE_VECTOR_SUBPARTS (rettype));
6187 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6188 op = make_ssa_name (var, new_stmt);
6189 gimple_call_set_lhs (new_stmt, op);
6190 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6191 var = make_ssa_name (vec_dest);
6192 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6193 new_stmt
6194 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6196 else
6198 var = make_ssa_name (vec_dest, new_stmt);
6199 gimple_call_set_lhs (new_stmt, var);
6202 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6204 if (modifier == NARROW)
6206 if ((j & 1) == 0)
6208 prev_res = var;
6209 continue;
6211 var = permute_vec_elements (prev_res, var,
6212 perm_mask, stmt, gsi);
6213 new_stmt = SSA_NAME_DEF_STMT (var);
6216 if (prev_stmt_info == NULL)
6217 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6218 else
6219 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6220 prev_stmt_info = vinfo_for_stmt (new_stmt);
6222 return true;
6224 else if (STMT_VINFO_STRIDED_P (stmt_info))
6226 gimple_stmt_iterator incr_gsi;
6227 bool insert_after;
6228 gimple incr;
6229 tree offvar;
6230 tree ivstep;
6231 tree running_off;
6232 vec<constructor_elt, va_gc> *v = NULL;
6233 gimple_seq stmts = NULL;
6234 tree stride_base, stride_step, alias_off;
6236 gcc_assert (!nested_in_vect_loop);
6238 stride_base
6239 = fold_build_pointer_plus
6240 (unshare_expr (DR_BASE_ADDRESS (dr)),
6241 size_binop (PLUS_EXPR,
6242 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6243 convert_to_ptrofftype (DR_INIT (dr))));
6244 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6246 /* For a load with loop-invariant (but other than power-of-2)
6247 stride (i.e. not a grouped access) like so:
6249 for (i = 0; i < n; i += stride)
6250 ... = array[i];
6252 we generate a new induction variable and new accesses to
6253 form a new vector (or vectors, depending on ncopies):
6255 for (j = 0; ; j += VF*stride)
6256 tmp1 = array[j];
6257 tmp2 = array[j + stride];
6259 vectemp = {tmp1, tmp2, ...}
6262 ivstep = stride_step;
6263 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6264 build_int_cst (TREE_TYPE (ivstep), vf));
6266 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6268 create_iv (stride_base, ivstep, NULL,
6269 loop, &incr_gsi, insert_after,
6270 &offvar, NULL);
6271 incr = gsi_stmt (incr_gsi);
6272 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6274 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6275 if (stmts)
6276 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6278 prev_stmt_info = NULL;
6279 running_off = offvar;
6280 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6281 int nloads = nunits;
6282 tree ltype = TREE_TYPE (vectype);
6283 if (slp)
6285 nloads = nunits / group_size;
6286 if (group_size < nunits)
6287 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6288 else
6289 ltype = vectype;
6290 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6291 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6292 gcc_assert (!slp_perm);
6294 for (j = 0; j < ncopies; j++)
6296 tree vec_inv;
6298 if (nloads > 1)
6300 vec_alloc (v, nloads);
6301 for (i = 0; i < nloads; i++)
6303 tree newref, newoff;
6304 gimple incr;
6305 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6307 newref = force_gimple_operand_gsi (gsi, newref, true,
6308 NULL_TREE, true,
6309 GSI_SAME_STMT);
6310 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6311 newoff = copy_ssa_name (running_off);
6312 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6313 running_off, stride_step);
6314 vect_finish_stmt_generation (stmt, incr, gsi);
6316 running_off = newoff;
6319 vec_inv = build_constructor (vectype, v);
6320 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6321 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6323 else
6325 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6326 build2 (MEM_REF, ltype,
6327 running_off, alias_off));
6328 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6330 tree newoff = copy_ssa_name (running_off);
6331 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6332 running_off, stride_step);
6333 vect_finish_stmt_generation (stmt, incr, gsi);
6335 running_off = newoff;
6338 if (slp)
6339 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6340 if (j == 0)
6341 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6342 else
6343 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6344 prev_stmt_info = vinfo_for_stmt (new_stmt);
6346 return true;
6349 if (grouped_load)
6351 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6352 if (slp
6353 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6354 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6355 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6357 /* Check if the chain of loads is already vectorized. */
6358 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6359 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6360 ??? But we can only do so if there is exactly one
6361 as we have no way to get at the rest. Leave the CSE
6362 opportunity alone.
6363 ??? With the group load eventually participating
6364 in multiple different permutations (having multiple
6365 slp nodes which refer to the same group) the CSE
6366 is even wrong code. See PR56270. */
6367 && !slp)
6369 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6370 return true;
6372 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6373 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6375 /* VEC_NUM is the number of vect stmts to be created for this group. */
6376 if (slp)
6378 grouped_load = false;
6379 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6380 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6381 slp_perm = true;
6382 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6384 else
6386 vec_num = group_size;
6387 group_gap = 0;
6390 else
6392 first_stmt = stmt;
6393 first_dr = dr;
6394 group_size = vec_num = 1;
6395 group_gap = 0;
6398 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6399 gcc_assert (alignment_support_scheme);
6400 /* Targets with load-lane instructions must not require explicit
6401 realignment. */
6402 gcc_assert (!load_lanes_p
6403 || alignment_support_scheme == dr_aligned
6404 || alignment_support_scheme == dr_unaligned_supported);
6406 /* In case the vectorization factor (VF) is bigger than the number
6407 of elements that we can fit in a vectype (nunits), we have to generate
6408 more than one vector stmt - i.e - we need to "unroll" the
6409 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6410 from one copy of the vector stmt to the next, in the field
6411 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6412 stages to find the correct vector defs to be used when vectorizing
6413 stmts that use the defs of the current stmt. The example below
6414 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6415 need to create 4 vectorized stmts):
6417 before vectorization:
6418 RELATED_STMT VEC_STMT
6419 S1: x = memref - -
6420 S2: z = x + 1 - -
6422 step 1: vectorize stmt S1:
6423 We first create the vector stmt VS1_0, and, as usual, record a
6424 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6425 Next, we create the vector stmt VS1_1, and record a pointer to
6426 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6427 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6428 stmts and pointers:
6429 RELATED_STMT VEC_STMT
6430 VS1_0: vx0 = memref0 VS1_1 -
6431 VS1_1: vx1 = memref1 VS1_2 -
6432 VS1_2: vx2 = memref2 VS1_3 -
6433 VS1_3: vx3 = memref3 - -
6434 S1: x = load - VS1_0
6435 S2: z = x + 1 - -
6437 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6438 information we recorded in RELATED_STMT field is used to vectorize
6439 stmt S2. */
6441 /* In case of interleaving (non-unit grouped access):
6443 S1: x2 = &base + 2
6444 S2: x0 = &base
6445 S3: x1 = &base + 1
6446 S4: x3 = &base + 3
6448 Vectorized loads are created in the order of memory accesses
6449 starting from the access of the first stmt of the chain:
6451 VS1: vx0 = &base
6452 VS2: vx1 = &base + vec_size*1
6453 VS3: vx3 = &base + vec_size*2
6454 VS4: vx4 = &base + vec_size*3
6456 Then permutation statements are generated:
6458 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6459 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6462 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6463 (the order of the data-refs in the output of vect_permute_load_chain
6464 corresponds to the order of scalar stmts in the interleaving chain - see
6465 the documentation of vect_permute_load_chain()).
6466 The generation of permutation stmts and recording them in
6467 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6469 In case of both multiple types and interleaving, the vector loads and
6470 permutation stmts above are created for every copy. The result vector
6471 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6472 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6474 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6475 on a target that supports unaligned accesses (dr_unaligned_supported)
6476 we generate the following code:
6477 p = initial_addr;
6478 indx = 0;
6479 loop {
6480 p = p + indx * vectype_size;
6481 vec_dest = *(p);
6482 indx = indx + 1;
6485 Otherwise, the data reference is potentially unaligned on a target that
6486 does not support unaligned accesses (dr_explicit_realign_optimized) -
6487 then generate the following code, in which the data in each iteration is
6488 obtained by two vector loads, one from the previous iteration, and one
6489 from the current iteration:
6490 p1 = initial_addr;
6491 msq_init = *(floor(p1))
6492 p2 = initial_addr + VS - 1;
6493 realignment_token = call target_builtin;
6494 indx = 0;
6495 loop {
6496 p2 = p2 + indx * vectype_size
6497 lsq = *(floor(p2))
6498 vec_dest = realign_load (msq, lsq, realignment_token)
6499 indx = indx + 1;
6500 msq = lsq;
6501 } */
6503 /* If the misalignment remains the same throughout the execution of the
6504 loop, we can create the init_addr and permutation mask at the loop
6505 preheader. Otherwise, it needs to be created inside the loop.
6506 This can only occur when vectorizing memory accesses in the inner-loop
6507 nested within an outer-loop that is being vectorized. */
6509 if (nested_in_vect_loop
6510 && (TREE_INT_CST_LOW (DR_STEP (dr))
6511 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6513 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6514 compute_in_loop = true;
6517 if ((alignment_support_scheme == dr_explicit_realign_optimized
6518 || alignment_support_scheme == dr_explicit_realign)
6519 && !compute_in_loop)
6521 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6522 alignment_support_scheme, NULL_TREE,
6523 &at_loop);
6524 if (alignment_support_scheme == dr_explicit_realign_optimized)
6526 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6527 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6528 size_one_node);
6531 else
6532 at_loop = loop;
6534 if (negative)
6535 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6537 if (load_lanes_p)
6538 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6539 else
6540 aggr_type = vectype;
6542 prev_stmt_info = NULL;
6543 for (j = 0; j < ncopies; j++)
6545 /* 1. Create the vector or array pointer update chain. */
6546 if (j == 0)
6548 bool simd_lane_access_p
6549 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6550 if (simd_lane_access_p
6551 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6552 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6553 && integer_zerop (DR_OFFSET (first_dr))
6554 && integer_zerop (DR_INIT (first_dr))
6555 && alias_sets_conflict_p (get_alias_set (aggr_type),
6556 get_alias_set (DR_REF (first_dr)))
6557 && (alignment_support_scheme == dr_aligned
6558 || alignment_support_scheme == dr_unaligned_supported))
6560 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6561 dataref_offset = build_int_cst (reference_alias_ptr_type
6562 (DR_REF (first_dr)), 0);
6563 inv_p = false;
6565 else
6566 dataref_ptr
6567 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6568 offset, &dummy, gsi, &ptr_incr,
6569 simd_lane_access_p, &inv_p,
6570 byte_offset);
6572 else if (dataref_offset)
6573 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6574 TYPE_SIZE_UNIT (aggr_type));
6575 else
6576 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6577 TYPE_SIZE_UNIT (aggr_type));
6579 if (grouped_load || slp_perm)
6580 dr_chain.create (vec_num);
6582 if (load_lanes_p)
6584 tree vec_array;
6586 vec_array = create_vector_array (vectype, vec_num);
6588 /* Emit:
6589 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6590 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6591 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6592 gimple_call_set_lhs (new_stmt, vec_array);
6593 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6595 /* Extract each vector into an SSA_NAME. */
6596 for (i = 0; i < vec_num; i++)
6598 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6599 vec_array, i);
6600 dr_chain.quick_push (new_temp);
6603 /* Record the mapping between SSA_NAMEs and statements. */
6604 vect_record_grouped_load_vectors (stmt, dr_chain);
6606 else
6608 for (i = 0; i < vec_num; i++)
6610 if (i > 0)
6611 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6612 stmt, NULL_TREE);
6614 /* 2. Create the vector-load in the loop. */
6615 switch (alignment_support_scheme)
6617 case dr_aligned:
6618 case dr_unaligned_supported:
6620 unsigned int align, misalign;
6622 data_ref
6623 = build2 (MEM_REF, vectype, dataref_ptr,
6624 dataref_offset
6625 ? dataref_offset
6626 : build_int_cst (reference_alias_ptr_type
6627 (DR_REF (first_dr)), 0));
6628 align = TYPE_ALIGN_UNIT (vectype);
6629 if (alignment_support_scheme == dr_aligned)
6631 gcc_assert (aligned_access_p (first_dr));
6632 misalign = 0;
6634 else if (DR_MISALIGNMENT (first_dr) == -1)
6636 TREE_TYPE (data_ref)
6637 = build_aligned_type (TREE_TYPE (data_ref),
6638 TYPE_ALIGN (elem_type));
6639 align = TYPE_ALIGN_UNIT (elem_type);
6640 misalign = 0;
6642 else
6644 TREE_TYPE (data_ref)
6645 = build_aligned_type (TREE_TYPE (data_ref),
6646 TYPE_ALIGN (elem_type));
6647 misalign = DR_MISALIGNMENT (first_dr);
6649 if (dataref_offset == NULL_TREE)
6650 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6651 align, misalign);
6652 break;
6654 case dr_explicit_realign:
6656 tree ptr, bump;
6658 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6660 if (compute_in_loop)
6661 msq = vect_setup_realignment (first_stmt, gsi,
6662 &realignment_token,
6663 dr_explicit_realign,
6664 dataref_ptr, NULL);
6666 ptr = copy_ssa_name (dataref_ptr);
6667 new_stmt = gimple_build_assign
6668 (ptr, BIT_AND_EXPR, dataref_ptr,
6669 build_int_cst
6670 (TREE_TYPE (dataref_ptr),
6671 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6672 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6673 data_ref
6674 = build2 (MEM_REF, vectype, ptr,
6675 build_int_cst (reference_alias_ptr_type
6676 (DR_REF (first_dr)), 0));
6677 vec_dest = vect_create_destination_var (scalar_dest,
6678 vectype);
6679 new_stmt = gimple_build_assign (vec_dest, data_ref);
6680 new_temp = make_ssa_name (vec_dest, new_stmt);
6681 gimple_assign_set_lhs (new_stmt, new_temp);
6682 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6683 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6684 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6685 msq = new_temp;
6687 bump = size_binop (MULT_EXPR, vs,
6688 TYPE_SIZE_UNIT (elem_type));
6689 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6690 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6691 new_stmt = gimple_build_assign
6692 (NULL_TREE, BIT_AND_EXPR, ptr,
6693 build_int_cst
6694 (TREE_TYPE (ptr),
6695 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6696 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6697 gimple_assign_set_lhs (new_stmt, ptr);
6698 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6699 data_ref
6700 = build2 (MEM_REF, vectype, ptr,
6701 build_int_cst (reference_alias_ptr_type
6702 (DR_REF (first_dr)), 0));
6703 break;
6705 case dr_explicit_realign_optimized:
6706 new_temp = copy_ssa_name (dataref_ptr);
6707 new_stmt = gimple_build_assign
6708 (new_temp, BIT_AND_EXPR, dataref_ptr,
6709 build_int_cst
6710 (TREE_TYPE (dataref_ptr),
6711 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6713 data_ref
6714 = build2 (MEM_REF, vectype, new_temp,
6715 build_int_cst (reference_alias_ptr_type
6716 (DR_REF (first_dr)), 0));
6717 break;
6718 default:
6719 gcc_unreachable ();
6721 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6722 new_stmt = gimple_build_assign (vec_dest, data_ref);
6723 new_temp = make_ssa_name (vec_dest, new_stmt);
6724 gimple_assign_set_lhs (new_stmt, new_temp);
6725 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6727 /* 3. Handle explicit realignment if necessary/supported.
6728 Create in loop:
6729 vec_dest = realign_load (msq, lsq, realignment_token) */
6730 if (alignment_support_scheme == dr_explicit_realign_optimized
6731 || alignment_support_scheme == dr_explicit_realign)
6733 lsq = gimple_assign_lhs (new_stmt);
6734 if (!realignment_token)
6735 realignment_token = dataref_ptr;
6736 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6737 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6738 msq, lsq, realignment_token);
6739 new_temp = make_ssa_name (vec_dest, new_stmt);
6740 gimple_assign_set_lhs (new_stmt, new_temp);
6741 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6743 if (alignment_support_scheme == dr_explicit_realign_optimized)
6745 gcc_assert (phi);
6746 if (i == vec_num - 1 && j == ncopies - 1)
6747 add_phi_arg (phi, lsq,
6748 loop_latch_edge (containing_loop),
6749 UNKNOWN_LOCATION);
6750 msq = lsq;
6754 /* 4. Handle invariant-load. */
6755 if (inv_p && !bb_vinfo)
6757 gcc_assert (!grouped_load);
6758 /* If we have versioned for aliasing or the loop doesn't
6759 have any data dependencies that would preclude this,
6760 then we are sure this is a loop invariant load and
6761 thus we can insert it on the preheader edge. */
6762 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6763 && !nested_in_vect_loop
6764 && hoist_defs_of_uses (stmt, loop))
6766 if (dump_enabled_p ())
6768 dump_printf_loc (MSG_NOTE, vect_location,
6769 "hoisting out of the vectorized "
6770 "loop: ");
6771 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6773 tree tem = copy_ssa_name (scalar_dest);
6774 gsi_insert_on_edge_immediate
6775 (loop_preheader_edge (loop),
6776 gimple_build_assign (tem,
6777 unshare_expr
6778 (gimple_assign_rhs1 (stmt))));
6779 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6781 else
6783 gimple_stmt_iterator gsi2 = *gsi;
6784 gsi_next (&gsi2);
6785 new_temp = vect_init_vector (stmt, scalar_dest,
6786 vectype, &gsi2);
6788 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6789 set_vinfo_for_stmt (new_stmt,
6790 new_stmt_vec_info (new_stmt, loop_vinfo,
6791 bb_vinfo));
6794 if (negative)
6796 tree perm_mask = perm_mask_for_reverse (vectype);
6797 new_temp = permute_vec_elements (new_temp, new_temp,
6798 perm_mask, stmt, gsi);
6799 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6802 /* Collect vector loads and later create their permutation in
6803 vect_transform_grouped_load (). */
6804 if (grouped_load || slp_perm)
6805 dr_chain.quick_push (new_temp);
6807 /* Store vector loads in the corresponding SLP_NODE. */
6808 if (slp && !slp_perm)
6809 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6811 /* Bump the vector pointer to account for a gap. */
6812 if (slp && group_gap != 0)
6814 tree bump = size_binop (MULT_EXPR,
6815 TYPE_SIZE_UNIT (elem_type),
6816 size_int (group_gap));
6817 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6818 stmt, bump);
6822 if (slp && !slp_perm)
6823 continue;
6825 if (slp_perm)
6827 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6828 slp_node_instance, false))
6830 dr_chain.release ();
6831 return false;
6834 else
6836 if (grouped_load)
6838 if (!load_lanes_p)
6839 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6840 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6842 else
6844 if (j == 0)
6845 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6846 else
6847 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6848 prev_stmt_info = vinfo_for_stmt (new_stmt);
6851 dr_chain.release ();
6854 return true;
6857 /* Function vect_is_simple_cond.
6859 Input:
6860 LOOP - the loop that is being vectorized.
6861 COND - Condition that is checked for simple use.
6863 Output:
6864 *COMP_VECTYPE - the vector type for the comparison.
6866 Returns whether a COND can be vectorized. Checks whether
6867 condition operands are supportable using vec_is_simple_use. */
6869 static bool
6870 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6871 bb_vec_info bb_vinfo, tree *comp_vectype)
6873 tree lhs, rhs;
6874 tree def;
6875 enum vect_def_type dt;
6876 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6878 if (!COMPARISON_CLASS_P (cond))
6879 return false;
6881 lhs = TREE_OPERAND (cond, 0);
6882 rhs = TREE_OPERAND (cond, 1);
6884 if (TREE_CODE (lhs) == SSA_NAME)
6886 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6887 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6888 &lhs_def_stmt, &def, &dt, &vectype1))
6889 return false;
6891 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6892 && TREE_CODE (lhs) != FIXED_CST)
6893 return false;
6895 if (TREE_CODE (rhs) == SSA_NAME)
6897 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6898 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6899 &rhs_def_stmt, &def, &dt, &vectype2))
6900 return false;
6902 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6903 && TREE_CODE (rhs) != FIXED_CST)
6904 return false;
6906 *comp_vectype = vectype1 ? vectype1 : vectype2;
6907 return true;
6910 /* vectorizable_condition.
6912 Check if STMT is conditional modify expression that can be vectorized.
6913 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6914 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6915 at GSI.
6917 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6918 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6919 else caluse if it is 2).
6921 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6923 bool
6924 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6925 gimple *vec_stmt, tree reduc_def, int reduc_index,
6926 slp_tree slp_node)
6928 tree scalar_dest = NULL_TREE;
6929 tree vec_dest = NULL_TREE;
6930 tree cond_expr, then_clause, else_clause;
6931 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6932 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6933 tree comp_vectype = NULL_TREE;
6934 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6935 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6936 tree vec_compare, vec_cond_expr;
6937 tree new_temp;
6938 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6939 tree def;
6940 enum vect_def_type dt, dts[4];
6941 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6942 int ncopies;
6943 enum tree_code code;
6944 stmt_vec_info prev_stmt_info = NULL;
6945 int i, j;
6946 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6947 vec<tree> vec_oprnds0 = vNULL;
6948 vec<tree> vec_oprnds1 = vNULL;
6949 vec<tree> vec_oprnds2 = vNULL;
6950 vec<tree> vec_oprnds3 = vNULL;
6951 tree vec_cmp_type;
6953 if (slp_node || PURE_SLP_STMT (stmt_info))
6954 ncopies = 1;
6955 else
6956 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6958 gcc_assert (ncopies >= 1);
6959 if (reduc_index && ncopies > 1)
6960 return false; /* FORNOW */
6962 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6963 return false;
6965 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6966 return false;
6968 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6969 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6970 && reduc_def))
6971 return false;
6973 /* FORNOW: not yet supported. */
6974 if (STMT_VINFO_LIVE_P (stmt_info))
6976 if (dump_enabled_p ())
6977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6978 "value used after loop.\n");
6979 return false;
6982 /* Is vectorizable conditional operation? */
6983 if (!is_gimple_assign (stmt))
6984 return false;
6986 code = gimple_assign_rhs_code (stmt);
6988 if (code != COND_EXPR)
6989 return false;
6991 cond_expr = gimple_assign_rhs1 (stmt);
6992 then_clause = gimple_assign_rhs2 (stmt);
6993 else_clause = gimple_assign_rhs3 (stmt);
6995 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6996 &comp_vectype)
6997 || !comp_vectype)
6998 return false;
7000 if (TREE_CODE (then_clause) == SSA_NAME)
7002 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7003 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7004 &then_def_stmt, &def, &dt))
7005 return false;
7007 else if (TREE_CODE (then_clause) != INTEGER_CST
7008 && TREE_CODE (then_clause) != REAL_CST
7009 && TREE_CODE (then_clause) != FIXED_CST)
7010 return false;
7012 if (TREE_CODE (else_clause) == SSA_NAME)
7014 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7015 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7016 &else_def_stmt, &def, &dt))
7017 return false;
7019 else if (TREE_CODE (else_clause) != INTEGER_CST
7020 && TREE_CODE (else_clause) != REAL_CST
7021 && TREE_CODE (else_clause) != FIXED_CST)
7022 return false;
7024 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7025 /* The result of a vector comparison should be signed type. */
7026 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7027 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7028 if (vec_cmp_type == NULL_TREE)
7029 return false;
7031 if (!vec_stmt)
7033 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7034 return expand_vec_cond_expr_p (vectype, comp_vectype);
7037 /* Transform. */
7039 if (!slp_node)
7041 vec_oprnds0.create (1);
7042 vec_oprnds1.create (1);
7043 vec_oprnds2.create (1);
7044 vec_oprnds3.create (1);
7047 /* Handle def. */
7048 scalar_dest = gimple_assign_lhs (stmt);
7049 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7051 /* Handle cond expr. */
7052 for (j = 0; j < ncopies; j++)
7054 gassign *new_stmt = NULL;
7055 if (j == 0)
7057 if (slp_node)
7059 auto_vec<tree, 4> ops;
7060 auto_vec<vec<tree>, 4> vec_defs;
7062 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7063 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7064 ops.safe_push (then_clause);
7065 ops.safe_push (else_clause);
7066 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7067 vec_oprnds3 = vec_defs.pop ();
7068 vec_oprnds2 = vec_defs.pop ();
7069 vec_oprnds1 = vec_defs.pop ();
7070 vec_oprnds0 = vec_defs.pop ();
7072 ops.release ();
7073 vec_defs.release ();
7075 else
7077 gimple gtemp;
7078 vec_cond_lhs =
7079 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7080 stmt, NULL);
7081 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7082 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7084 vec_cond_rhs =
7085 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7086 stmt, NULL);
7087 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7088 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7089 if (reduc_index == 1)
7090 vec_then_clause = reduc_def;
7091 else
7093 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7094 stmt, NULL);
7095 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7096 NULL, &gtemp, &def, &dts[2]);
7098 if (reduc_index == 2)
7099 vec_else_clause = reduc_def;
7100 else
7102 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7103 stmt, NULL);
7104 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7105 NULL, &gtemp, &def, &dts[3]);
7109 else
7111 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7112 vec_oprnds0.pop ());
7113 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7114 vec_oprnds1.pop ());
7115 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7116 vec_oprnds2.pop ());
7117 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7118 vec_oprnds3.pop ());
7121 if (!slp_node)
7123 vec_oprnds0.quick_push (vec_cond_lhs);
7124 vec_oprnds1.quick_push (vec_cond_rhs);
7125 vec_oprnds2.quick_push (vec_then_clause);
7126 vec_oprnds3.quick_push (vec_else_clause);
7129 /* Arguments are ready. Create the new vector stmt. */
7130 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7132 vec_cond_rhs = vec_oprnds1[i];
7133 vec_then_clause = vec_oprnds2[i];
7134 vec_else_clause = vec_oprnds3[i];
7136 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7137 vec_cond_lhs, vec_cond_rhs);
7138 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7139 vec_compare, vec_then_clause, vec_else_clause);
7141 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7142 new_temp = make_ssa_name (vec_dest, new_stmt);
7143 gimple_assign_set_lhs (new_stmt, new_temp);
7144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7145 if (slp_node)
7146 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7149 if (slp_node)
7150 continue;
7152 if (j == 0)
7153 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7154 else
7155 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7157 prev_stmt_info = vinfo_for_stmt (new_stmt);
7160 vec_oprnds0.release ();
7161 vec_oprnds1.release ();
7162 vec_oprnds2.release ();
7163 vec_oprnds3.release ();
7165 return true;
7169 /* Make sure the statement is vectorizable. */
7171 bool
7172 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7175 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7176 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7177 bool ok;
7178 tree scalar_type, vectype;
7179 gimple pattern_stmt;
7180 gimple_seq pattern_def_seq;
7182 if (dump_enabled_p ())
7184 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7185 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7188 if (gimple_has_volatile_ops (stmt))
7190 if (dump_enabled_p ())
7191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7192 "not vectorized: stmt has volatile operands\n");
7194 return false;
7197 /* Skip stmts that do not need to be vectorized. In loops this is expected
7198 to include:
7199 - the COND_EXPR which is the loop exit condition
7200 - any LABEL_EXPRs in the loop
7201 - computations that are used only for array indexing or loop control.
7202 In basic blocks we only analyze statements that are a part of some SLP
7203 instance, therefore, all the statements are relevant.
7205 Pattern statement needs to be analyzed instead of the original statement
7206 if the original statement is not relevant. Otherwise, we analyze both
7207 statements. In basic blocks we are called from some SLP instance
7208 traversal, don't analyze pattern stmts instead, the pattern stmts
7209 already will be part of SLP instance. */
7211 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7212 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7213 && !STMT_VINFO_LIVE_P (stmt_info))
7215 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7216 && pattern_stmt
7217 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7218 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7220 /* Analyze PATTERN_STMT instead of the original stmt. */
7221 stmt = pattern_stmt;
7222 stmt_info = vinfo_for_stmt (pattern_stmt);
7223 if (dump_enabled_p ())
7225 dump_printf_loc (MSG_NOTE, vect_location,
7226 "==> examining pattern statement: ");
7227 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7230 else
7232 if (dump_enabled_p ())
7233 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7235 return true;
7238 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7239 && node == NULL
7240 && pattern_stmt
7241 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7242 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7244 /* Analyze PATTERN_STMT too. */
7245 if (dump_enabled_p ())
7247 dump_printf_loc (MSG_NOTE, vect_location,
7248 "==> examining pattern statement: ");
7249 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7252 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7253 return false;
7256 if (is_pattern_stmt_p (stmt_info)
7257 && node == NULL
7258 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7260 gimple_stmt_iterator si;
7262 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7264 gimple pattern_def_stmt = gsi_stmt (si);
7265 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7266 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7268 /* Analyze def stmt of STMT if it's a pattern stmt. */
7269 if (dump_enabled_p ())
7271 dump_printf_loc (MSG_NOTE, vect_location,
7272 "==> examining pattern def statement: ");
7273 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7276 if (!vect_analyze_stmt (pattern_def_stmt,
7277 need_to_vectorize, node))
7278 return false;
7283 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7285 case vect_internal_def:
7286 break;
7288 case vect_reduction_def:
7289 case vect_nested_cycle:
7290 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7291 || relevance == vect_used_in_outer_by_reduction
7292 || relevance == vect_unused_in_scope));
7293 break;
7295 case vect_induction_def:
7296 case vect_constant_def:
7297 case vect_external_def:
7298 case vect_unknown_def_type:
7299 default:
7300 gcc_unreachable ();
7303 if (bb_vinfo)
7305 gcc_assert (PURE_SLP_STMT (stmt_info));
7307 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7308 if (dump_enabled_p ())
7310 dump_printf_loc (MSG_NOTE, vect_location,
7311 "get vectype for scalar type: ");
7312 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7313 dump_printf (MSG_NOTE, "\n");
7316 vectype = get_vectype_for_scalar_type (scalar_type);
7317 if (!vectype)
7319 if (dump_enabled_p ())
7321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7322 "not SLPed: unsupported data-type ");
7323 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7324 scalar_type);
7325 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7327 return false;
7330 if (dump_enabled_p ())
7332 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7333 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7334 dump_printf (MSG_NOTE, "\n");
7337 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7340 if (STMT_VINFO_RELEVANT_P (stmt_info))
7342 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7343 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7344 || (is_gimple_call (stmt)
7345 && gimple_call_lhs (stmt) == NULL_TREE));
7346 *need_to_vectorize = true;
7349 ok = true;
7350 if (!bb_vinfo
7351 && (STMT_VINFO_RELEVANT_P (stmt_info)
7352 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7353 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7354 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7355 || vectorizable_shift (stmt, NULL, NULL, NULL)
7356 || vectorizable_operation (stmt, NULL, NULL, NULL)
7357 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7358 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7359 || vectorizable_call (stmt, NULL, NULL, NULL)
7360 || vectorizable_store (stmt, NULL, NULL, NULL)
7361 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7362 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7363 else
7365 if (bb_vinfo)
7366 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7367 || vectorizable_conversion (stmt, NULL, NULL, node)
7368 || vectorizable_shift (stmt, NULL, NULL, node)
7369 || vectorizable_operation (stmt, NULL, NULL, node)
7370 || vectorizable_assignment (stmt, NULL, NULL, node)
7371 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7372 || vectorizable_call (stmt, NULL, NULL, node)
7373 || vectorizable_store (stmt, NULL, NULL, node)
7374 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7377 if (!ok)
7379 if (dump_enabled_p ())
7381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7382 "not vectorized: relevant stmt not ");
7383 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7384 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7387 return false;
7390 if (bb_vinfo)
7391 return true;
7393 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7394 need extra handling, except for vectorizable reductions. */
7395 if (STMT_VINFO_LIVE_P (stmt_info)
7396 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7397 ok = vectorizable_live_operation (stmt, NULL, NULL);
7399 if (!ok)
7401 if (dump_enabled_p ())
7403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7404 "not vectorized: live stmt not ");
7405 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7406 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7409 return false;
7412 return true;
7416 /* Function vect_transform_stmt.
7418 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7420 bool
7421 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7422 bool *grouped_store, slp_tree slp_node,
7423 slp_instance slp_node_instance)
7425 bool is_store = false;
7426 gimple vec_stmt = NULL;
7427 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7428 bool done;
7430 switch (STMT_VINFO_TYPE (stmt_info))
7432 case type_demotion_vec_info_type:
7433 case type_promotion_vec_info_type:
7434 case type_conversion_vec_info_type:
7435 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7436 gcc_assert (done);
7437 break;
7439 case induc_vec_info_type:
7440 gcc_assert (!slp_node);
7441 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7442 gcc_assert (done);
7443 break;
7445 case shift_vec_info_type:
7446 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7447 gcc_assert (done);
7448 break;
7450 case op_vec_info_type:
7451 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7452 gcc_assert (done);
7453 break;
7455 case assignment_vec_info_type:
7456 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7457 gcc_assert (done);
7458 break;
7460 case load_vec_info_type:
7461 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7462 slp_node_instance);
7463 gcc_assert (done);
7464 break;
7466 case store_vec_info_type:
7467 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7468 gcc_assert (done);
7469 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7471 /* In case of interleaving, the whole chain is vectorized when the
7472 last store in the chain is reached. Store stmts before the last
7473 one are skipped, and there vec_stmt_info shouldn't be freed
7474 meanwhile. */
7475 *grouped_store = true;
7476 if (STMT_VINFO_VEC_STMT (stmt_info))
7477 is_store = true;
7479 else
7480 is_store = true;
7481 break;
7483 case condition_vec_info_type:
7484 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7485 gcc_assert (done);
7486 break;
7488 case call_vec_info_type:
7489 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7490 stmt = gsi_stmt (*gsi);
7491 if (is_gimple_call (stmt)
7492 && gimple_call_internal_p (stmt)
7493 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7494 is_store = true;
7495 break;
7497 case call_simd_clone_vec_info_type:
7498 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7499 stmt = gsi_stmt (*gsi);
7500 break;
7502 case reduc_vec_info_type:
7503 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7504 gcc_assert (done);
7505 break;
7507 default:
7508 if (!STMT_VINFO_LIVE_P (stmt_info))
7510 if (dump_enabled_p ())
7511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7512 "stmt not supported.\n");
7513 gcc_unreachable ();
7517 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7518 is being vectorized, but outside the immediately enclosing loop. */
7519 if (vec_stmt
7520 && STMT_VINFO_LOOP_VINFO (stmt_info)
7521 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7522 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7523 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7524 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7525 || STMT_VINFO_RELEVANT (stmt_info) ==
7526 vect_used_in_outer_by_reduction))
7528 struct loop *innerloop = LOOP_VINFO_LOOP (
7529 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7530 imm_use_iterator imm_iter;
7531 use_operand_p use_p;
7532 tree scalar_dest;
7533 gimple exit_phi;
7535 if (dump_enabled_p ())
7536 dump_printf_loc (MSG_NOTE, vect_location,
7537 "Record the vdef for outer-loop vectorization.\n");
7539 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7540 (to be used when vectorizing outer-loop stmts that use the DEF of
7541 STMT). */
7542 if (gimple_code (stmt) == GIMPLE_PHI)
7543 scalar_dest = PHI_RESULT (stmt);
7544 else
7545 scalar_dest = gimple_assign_lhs (stmt);
7547 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7549 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7551 exit_phi = USE_STMT (use_p);
7552 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7557 /* Handle stmts whose DEF is used outside the loop-nest that is
7558 being vectorized. */
7559 if (STMT_VINFO_LIVE_P (stmt_info)
7560 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7562 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7563 gcc_assert (done);
7566 if (vec_stmt)
7567 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7569 return is_store;
7573 /* Remove a group of stores (for SLP or interleaving), free their
7574 stmt_vec_info. */
7576 void
7577 vect_remove_stores (gimple first_stmt)
7579 gimple next = first_stmt;
7580 gimple tmp;
7581 gimple_stmt_iterator next_si;
7583 while (next)
7585 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7587 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7588 if (is_pattern_stmt_p (stmt_info))
7589 next = STMT_VINFO_RELATED_STMT (stmt_info);
7590 /* Free the attached stmt_vec_info and remove the stmt. */
7591 next_si = gsi_for_stmt (next);
7592 unlink_stmt_vdef (next);
7593 gsi_remove (&next_si, true);
7594 release_defs (next);
7595 free_stmt_vec_info (next);
7596 next = tmp;
7601 /* Function new_stmt_vec_info.
7603 Create and initialize a new stmt_vec_info struct for STMT. */
7605 stmt_vec_info
7606 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7607 bb_vec_info bb_vinfo)
7609 stmt_vec_info res;
7610 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7612 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7613 STMT_VINFO_STMT (res) = stmt;
7614 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7615 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7616 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7617 STMT_VINFO_LIVE_P (res) = false;
7618 STMT_VINFO_VECTYPE (res) = NULL;
7619 STMT_VINFO_VEC_STMT (res) = NULL;
7620 STMT_VINFO_VECTORIZABLE (res) = true;
7621 STMT_VINFO_IN_PATTERN_P (res) = false;
7622 STMT_VINFO_RELATED_STMT (res) = NULL;
7623 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7624 STMT_VINFO_DATA_REF (res) = NULL;
7626 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7627 STMT_VINFO_DR_OFFSET (res) = NULL;
7628 STMT_VINFO_DR_INIT (res) = NULL;
7629 STMT_VINFO_DR_STEP (res) = NULL;
7630 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7632 if (gimple_code (stmt) == GIMPLE_PHI
7633 && is_loop_header_bb_p (gimple_bb (stmt)))
7634 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7635 else
7636 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7638 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7639 STMT_SLP_TYPE (res) = loop_vect;
7640 GROUP_FIRST_ELEMENT (res) = NULL;
7641 GROUP_NEXT_ELEMENT (res) = NULL;
7642 GROUP_SIZE (res) = 0;
7643 GROUP_STORE_COUNT (res) = 0;
7644 GROUP_GAP (res) = 0;
7645 GROUP_SAME_DR_STMT (res) = NULL;
7647 return res;
7651 /* Create a hash table for stmt_vec_info. */
7653 void
7654 init_stmt_vec_info_vec (void)
7656 gcc_assert (!stmt_vec_info_vec.exists ());
7657 stmt_vec_info_vec.create (50);
7661 /* Free hash table for stmt_vec_info. */
7663 void
7664 free_stmt_vec_info_vec (void)
7666 unsigned int i;
7667 vec_void_p info;
7668 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7669 if (info != NULL)
7670 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7671 gcc_assert (stmt_vec_info_vec.exists ());
7672 stmt_vec_info_vec.release ();
7676 /* Free stmt vectorization related info. */
7678 void
7679 free_stmt_vec_info (gimple stmt)
7681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7683 if (!stmt_info)
7684 return;
7686 /* Check if this statement has a related "pattern stmt"
7687 (introduced by the vectorizer during the pattern recognition
7688 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7689 too. */
7690 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7692 stmt_vec_info patt_info
7693 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7694 if (patt_info)
7696 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7697 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7698 gimple_set_bb (patt_stmt, NULL);
7699 tree lhs = gimple_get_lhs (patt_stmt);
7700 if (TREE_CODE (lhs) == SSA_NAME)
7701 release_ssa_name (lhs);
7702 if (seq)
7704 gimple_stmt_iterator si;
7705 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7707 gimple seq_stmt = gsi_stmt (si);
7708 gimple_set_bb (seq_stmt, NULL);
7709 lhs = gimple_get_lhs (patt_stmt);
7710 if (TREE_CODE (lhs) == SSA_NAME)
7711 release_ssa_name (lhs);
7712 free_stmt_vec_info (seq_stmt);
7715 free_stmt_vec_info (patt_stmt);
7719 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7720 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7721 set_vinfo_for_stmt (stmt, NULL);
7722 free (stmt_info);
7726 /* Function get_vectype_for_scalar_type_and_size.
7728 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7729 by the target. */
7731 static tree
7732 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7734 machine_mode inner_mode = TYPE_MODE (scalar_type);
7735 machine_mode simd_mode;
7736 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7737 int nunits;
7738 tree vectype;
7740 if (nbytes == 0)
7741 return NULL_TREE;
7743 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7744 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7745 return NULL_TREE;
7747 /* For vector types of elements whose mode precision doesn't
7748 match their types precision we use a element type of mode
7749 precision. The vectorization routines will have to make sure
7750 they support the proper result truncation/extension.
7751 We also make sure to build vector types with INTEGER_TYPE
7752 component type only. */
7753 if (INTEGRAL_TYPE_P (scalar_type)
7754 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7755 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7756 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7757 TYPE_UNSIGNED (scalar_type));
7759 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7760 When the component mode passes the above test simply use a type
7761 corresponding to that mode. The theory is that any use that
7762 would cause problems with this will disable vectorization anyway. */
7763 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7764 && !INTEGRAL_TYPE_P (scalar_type))
7765 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7767 /* We can't build a vector type of elements with alignment bigger than
7768 their size. */
7769 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7770 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7771 TYPE_UNSIGNED (scalar_type));
7773 /* If we felt back to using the mode fail if there was
7774 no scalar type for it. */
7775 if (scalar_type == NULL_TREE)
7776 return NULL_TREE;
7778 /* If no size was supplied use the mode the target prefers. Otherwise
7779 lookup a vector mode of the specified size. */
7780 if (size == 0)
7781 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7782 else
7783 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7784 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7785 if (nunits <= 1)
7786 return NULL_TREE;
7788 vectype = build_vector_type (scalar_type, nunits);
7790 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7791 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7792 return NULL_TREE;
7794 return vectype;
7797 unsigned int current_vector_size;
7799 /* Function get_vectype_for_scalar_type.
7801 Returns the vector type corresponding to SCALAR_TYPE as supported
7802 by the target. */
7804 tree
7805 get_vectype_for_scalar_type (tree scalar_type)
7807 tree vectype;
7808 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7809 current_vector_size);
7810 if (vectype
7811 && current_vector_size == 0)
7812 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7813 return vectype;
7816 /* Function get_same_sized_vectype
7818 Returns a vector type corresponding to SCALAR_TYPE of size
7819 VECTOR_TYPE if supported by the target. */
7821 tree
7822 get_same_sized_vectype (tree scalar_type, tree vector_type)
7824 return get_vectype_for_scalar_type_and_size
7825 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7828 /* Function vect_is_simple_use.
7830 Input:
7831 LOOP_VINFO - the vect info of the loop that is being vectorized.
7832 BB_VINFO - the vect info of the basic block that is being vectorized.
7833 OPERAND - operand of STMT in the loop or bb.
7834 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7836 Returns whether a stmt with OPERAND can be vectorized.
7837 For loops, supportable operands are constants, loop invariants, and operands
7838 that are defined by the current iteration of the loop. Unsupportable
7839 operands are those that are defined by a previous iteration of the loop (as
7840 is the case in reduction/induction computations).
7841 For basic blocks, supportable operands are constants and bb invariants.
7842 For now, operands defined outside the basic block are not supported. */
7844 bool
7845 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7846 bb_vec_info bb_vinfo, gimple *def_stmt,
7847 tree *def, enum vect_def_type *dt)
7849 basic_block bb;
7850 stmt_vec_info stmt_vinfo;
7851 struct loop *loop = NULL;
7853 if (loop_vinfo)
7854 loop = LOOP_VINFO_LOOP (loop_vinfo);
7856 *def_stmt = NULL;
7857 *def = NULL_TREE;
7859 if (dump_enabled_p ())
7861 dump_printf_loc (MSG_NOTE, vect_location,
7862 "vect_is_simple_use: operand ");
7863 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7864 dump_printf (MSG_NOTE, "\n");
7867 if (CONSTANT_CLASS_P (operand))
7869 *dt = vect_constant_def;
7870 return true;
7873 if (is_gimple_min_invariant (operand))
7875 *def = operand;
7876 *dt = vect_external_def;
7877 return true;
7880 if (TREE_CODE (operand) == PAREN_EXPR)
7882 if (dump_enabled_p ())
7883 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7884 operand = TREE_OPERAND (operand, 0);
7887 if (TREE_CODE (operand) != SSA_NAME)
7889 if (dump_enabled_p ())
7890 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7891 "not ssa-name.\n");
7892 return false;
7895 *def_stmt = SSA_NAME_DEF_STMT (operand);
7896 if (*def_stmt == NULL)
7898 if (dump_enabled_p ())
7899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7900 "no def_stmt.\n");
7901 return false;
7904 if (dump_enabled_p ())
7906 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7907 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7910 /* Empty stmt is expected only in case of a function argument.
7911 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7912 if (gimple_nop_p (*def_stmt))
7914 *def = operand;
7915 *dt = vect_external_def;
7916 return true;
7919 bb = gimple_bb (*def_stmt);
7921 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7922 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7923 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7924 *dt = vect_external_def;
7925 else
7927 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7928 if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
7929 *dt = vect_external_def;
7930 else
7931 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7934 if (dump_enabled_p ())
7936 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
7937 switch (*dt)
7939 case vect_uninitialized_def:
7940 dump_printf (MSG_NOTE, "uninitialized\n");
7941 break;
7942 case vect_constant_def:
7943 dump_printf (MSG_NOTE, "constant\n");
7944 break;
7945 case vect_external_def:
7946 dump_printf (MSG_NOTE, "external\n");
7947 break;
7948 case vect_internal_def:
7949 dump_printf (MSG_NOTE, "internal\n");
7950 break;
7951 case vect_induction_def:
7952 dump_printf (MSG_NOTE, "induction\n");
7953 break;
7954 case vect_reduction_def:
7955 dump_printf (MSG_NOTE, "reduction\n");
7956 break;
7957 case vect_double_reduction_def:
7958 dump_printf (MSG_NOTE, "double reduction\n");
7959 break;
7960 case vect_nested_cycle:
7961 dump_printf (MSG_NOTE, "nested cycle\n");
7962 break;
7963 case vect_unknown_def_type:
7964 dump_printf (MSG_NOTE, "unknown\n");
7965 break;
7969 if (*dt == vect_unknown_def_type
7970 || (stmt
7971 && *dt == vect_double_reduction_def
7972 && gimple_code (stmt) != GIMPLE_PHI))
7974 if (dump_enabled_p ())
7975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7976 "Unsupported pattern.\n");
7977 return false;
7980 switch (gimple_code (*def_stmt))
7982 case GIMPLE_PHI:
7983 *def = gimple_phi_result (*def_stmt);
7984 break;
7986 case GIMPLE_ASSIGN:
7987 *def = gimple_assign_lhs (*def_stmt);
7988 break;
7990 case GIMPLE_CALL:
7991 *def = gimple_call_lhs (*def_stmt);
7992 if (*def != NULL)
7993 break;
7994 /* FALLTHRU */
7995 default:
7996 if (dump_enabled_p ())
7997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7998 "unsupported defining stmt:\n");
7999 return false;
8002 return true;
8005 /* Function vect_is_simple_use_1.
8007 Same as vect_is_simple_use_1 but also determines the vector operand
8008 type of OPERAND and stores it to *VECTYPE. If the definition of
8009 OPERAND is vect_uninitialized_def, vect_constant_def or
8010 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8011 is responsible to compute the best suited vector type for the
8012 scalar operand. */
8014 bool
8015 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8016 bb_vec_info bb_vinfo, gimple *def_stmt,
8017 tree *def, enum vect_def_type *dt, tree *vectype)
8019 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8020 def, dt))
8021 return false;
8023 /* Now get a vector type if the def is internal, otherwise supply
8024 NULL_TREE and leave it up to the caller to figure out a proper
8025 type for the use stmt. */
8026 if (*dt == vect_internal_def
8027 || *dt == vect_induction_def
8028 || *dt == vect_reduction_def
8029 || *dt == vect_double_reduction_def
8030 || *dt == vect_nested_cycle)
8032 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8034 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8035 && !STMT_VINFO_RELEVANT (stmt_info)
8036 && !STMT_VINFO_LIVE_P (stmt_info))
8037 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8039 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8040 gcc_assert (*vectype != NULL_TREE);
8042 else if (*dt == vect_uninitialized_def
8043 || *dt == vect_constant_def
8044 || *dt == vect_external_def)
8045 *vectype = NULL_TREE;
8046 else
8047 gcc_unreachable ();
8049 return true;
8053 /* Function supportable_widening_operation
8055 Check whether an operation represented by the code CODE is a
8056 widening operation that is supported by the target platform in
8057 vector form (i.e., when operating on arguments of type VECTYPE_IN
8058 producing a result of type VECTYPE_OUT).
8060 Widening operations we currently support are NOP (CONVERT), FLOAT
8061 and WIDEN_MULT. This function checks if these operations are supported
8062 by the target platform either directly (via vector tree-codes), or via
8063 target builtins.
8065 Output:
8066 - CODE1 and CODE2 are codes of vector operations to be used when
8067 vectorizing the operation, if available.
8068 - MULTI_STEP_CVT determines the number of required intermediate steps in
8069 case of multi-step conversion (like char->short->int - in that case
8070 MULTI_STEP_CVT will be 1).
8071 - INTERM_TYPES contains the intermediate type required to perform the
8072 widening operation (short in the above example). */
8074 bool
8075 supportable_widening_operation (enum tree_code code, gimple stmt,
8076 tree vectype_out, tree vectype_in,
8077 enum tree_code *code1, enum tree_code *code2,
8078 int *multi_step_cvt,
8079 vec<tree> *interm_types)
8081 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8082 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8083 struct loop *vect_loop = NULL;
8084 machine_mode vec_mode;
8085 enum insn_code icode1, icode2;
8086 optab optab1, optab2;
8087 tree vectype = vectype_in;
8088 tree wide_vectype = vectype_out;
8089 enum tree_code c1, c2;
8090 int i;
8091 tree prev_type, intermediate_type;
8092 machine_mode intermediate_mode, prev_mode;
8093 optab optab3, optab4;
8095 *multi_step_cvt = 0;
8096 if (loop_info)
8097 vect_loop = LOOP_VINFO_LOOP (loop_info);
8099 switch (code)
8101 case WIDEN_MULT_EXPR:
8102 /* The result of a vectorized widening operation usually requires
8103 two vectors (because the widened results do not fit into one vector).
8104 The generated vector results would normally be expected to be
8105 generated in the same order as in the original scalar computation,
8106 i.e. if 8 results are generated in each vector iteration, they are
8107 to be organized as follows:
8108 vect1: [res1,res2,res3,res4],
8109 vect2: [res5,res6,res7,res8].
8111 However, in the special case that the result of the widening
8112 operation is used in a reduction computation only, the order doesn't
8113 matter (because when vectorizing a reduction we change the order of
8114 the computation). Some targets can take advantage of this and
8115 generate more efficient code. For example, targets like Altivec,
8116 that support widen_mult using a sequence of {mult_even,mult_odd}
8117 generate the following vectors:
8118 vect1: [res1,res3,res5,res7],
8119 vect2: [res2,res4,res6,res8].
8121 When vectorizing outer-loops, we execute the inner-loop sequentially
8122 (each vectorized inner-loop iteration contributes to VF outer-loop
8123 iterations in parallel). We therefore don't allow to change the
8124 order of the computation in the inner-loop during outer-loop
8125 vectorization. */
8126 /* TODO: Another case in which order doesn't *really* matter is when we
8127 widen and then contract again, e.g. (short)((int)x * y >> 8).
8128 Normally, pack_trunc performs an even/odd permute, whereas the
8129 repack from an even/odd expansion would be an interleave, which
8130 would be significantly simpler for e.g. AVX2. */
8131 /* In any case, in order to avoid duplicating the code below, recurse
8132 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8133 are properly set up for the caller. If we fail, we'll continue with
8134 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8135 if (vect_loop
8136 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8137 && !nested_in_vect_loop_p (vect_loop, stmt)
8138 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8139 stmt, vectype_out, vectype_in,
8140 code1, code2, multi_step_cvt,
8141 interm_types))
8143 /* Elements in a vector with vect_used_by_reduction property cannot
8144 be reordered if the use chain with this property does not have the
8145 same operation. One such an example is s += a * b, where elements
8146 in a and b cannot be reordered. Here we check if the vector defined
8147 by STMT is only directly used in the reduction statement. */
8148 tree lhs = gimple_assign_lhs (stmt);
8149 use_operand_p dummy;
8150 gimple use_stmt;
8151 stmt_vec_info use_stmt_info = NULL;
8152 if (single_imm_use (lhs, &dummy, &use_stmt)
8153 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8154 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8155 return true;
8157 c1 = VEC_WIDEN_MULT_LO_EXPR;
8158 c2 = VEC_WIDEN_MULT_HI_EXPR;
8159 break;
8161 case VEC_WIDEN_MULT_EVEN_EXPR:
8162 /* Support the recursion induced just above. */
8163 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8164 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8165 break;
8167 case WIDEN_LSHIFT_EXPR:
8168 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8169 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8170 break;
8172 CASE_CONVERT:
8173 c1 = VEC_UNPACK_LO_EXPR;
8174 c2 = VEC_UNPACK_HI_EXPR;
8175 break;
8177 case FLOAT_EXPR:
8178 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8179 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8180 break;
8182 case FIX_TRUNC_EXPR:
8183 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8184 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8185 computing the operation. */
8186 return false;
8188 default:
8189 gcc_unreachable ();
8192 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8194 enum tree_code ctmp = c1;
8195 c1 = c2;
8196 c2 = ctmp;
8199 if (code == FIX_TRUNC_EXPR)
8201 /* The signedness is determined from output operand. */
8202 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8203 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8205 else
8207 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8208 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8211 if (!optab1 || !optab2)
8212 return false;
8214 vec_mode = TYPE_MODE (vectype);
8215 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8216 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8217 return false;
8219 *code1 = c1;
8220 *code2 = c2;
8222 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8223 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8224 return true;
8226 /* Check if it's a multi-step conversion that can be done using intermediate
8227 types. */
8229 prev_type = vectype;
8230 prev_mode = vec_mode;
8232 if (!CONVERT_EXPR_CODE_P (code))
8233 return false;
8235 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8236 intermediate steps in promotion sequence. We try
8237 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8238 not. */
8239 interm_types->create (MAX_INTERM_CVT_STEPS);
8240 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8242 intermediate_mode = insn_data[icode1].operand[0].mode;
8243 intermediate_type
8244 = lang_hooks.types.type_for_mode (intermediate_mode,
8245 TYPE_UNSIGNED (prev_type));
8246 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8247 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8249 if (!optab3 || !optab4
8250 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8251 || insn_data[icode1].operand[0].mode != intermediate_mode
8252 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8253 || insn_data[icode2].operand[0].mode != intermediate_mode
8254 || ((icode1 = optab_handler (optab3, intermediate_mode))
8255 == CODE_FOR_nothing)
8256 || ((icode2 = optab_handler (optab4, intermediate_mode))
8257 == CODE_FOR_nothing))
8258 break;
8260 interm_types->quick_push (intermediate_type);
8261 (*multi_step_cvt)++;
8263 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8264 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8265 return true;
8267 prev_type = intermediate_type;
8268 prev_mode = intermediate_mode;
8271 interm_types->release ();
8272 return false;
8276 /* Function supportable_narrowing_operation
8278 Check whether an operation represented by the code CODE is a
8279 narrowing operation that is supported by the target platform in
8280 vector form (i.e., when operating on arguments of type VECTYPE_IN
8281 and producing a result of type VECTYPE_OUT).
8283 Narrowing operations we currently support are NOP (CONVERT) and
8284 FIX_TRUNC. This function checks if these operations are supported by
8285 the target platform directly via vector tree-codes.
8287 Output:
8288 - CODE1 is the code of a vector operation to be used when
8289 vectorizing the operation, if available.
8290 - MULTI_STEP_CVT determines the number of required intermediate steps in
8291 case of multi-step conversion (like int->short->char - in that case
8292 MULTI_STEP_CVT will be 1).
8293 - INTERM_TYPES contains the intermediate type required to perform the
8294 narrowing operation (short in the above example). */
8296 bool
8297 supportable_narrowing_operation (enum tree_code code,
8298 tree vectype_out, tree vectype_in,
8299 enum tree_code *code1, int *multi_step_cvt,
8300 vec<tree> *interm_types)
8302 machine_mode vec_mode;
8303 enum insn_code icode1;
8304 optab optab1, interm_optab;
8305 tree vectype = vectype_in;
8306 tree narrow_vectype = vectype_out;
8307 enum tree_code c1;
8308 tree intermediate_type;
8309 machine_mode intermediate_mode, prev_mode;
8310 int i;
8311 bool uns;
8313 *multi_step_cvt = 0;
8314 switch (code)
8316 CASE_CONVERT:
8317 c1 = VEC_PACK_TRUNC_EXPR;
8318 break;
8320 case FIX_TRUNC_EXPR:
8321 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8322 break;
8324 case FLOAT_EXPR:
8325 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8326 tree code and optabs used for computing the operation. */
8327 return false;
8329 default:
8330 gcc_unreachable ();
8333 if (code == FIX_TRUNC_EXPR)
8334 /* The signedness is determined from output operand. */
8335 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8336 else
8337 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8339 if (!optab1)
8340 return false;
8342 vec_mode = TYPE_MODE (vectype);
8343 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8344 return false;
8346 *code1 = c1;
8348 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8349 return true;
8351 /* Check if it's a multi-step conversion that can be done using intermediate
8352 types. */
8353 prev_mode = vec_mode;
8354 if (code == FIX_TRUNC_EXPR)
8355 uns = TYPE_UNSIGNED (vectype_out);
8356 else
8357 uns = TYPE_UNSIGNED (vectype);
8359 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8360 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8361 costly than signed. */
8362 if (code == FIX_TRUNC_EXPR && uns)
8364 enum insn_code icode2;
8366 intermediate_type
8367 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8368 interm_optab
8369 = optab_for_tree_code (c1, intermediate_type, optab_default);
8370 if (interm_optab != unknown_optab
8371 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8372 && insn_data[icode1].operand[0].mode
8373 == insn_data[icode2].operand[0].mode)
8375 uns = false;
8376 optab1 = interm_optab;
8377 icode1 = icode2;
8381 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8382 intermediate steps in promotion sequence. We try
8383 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8384 interm_types->create (MAX_INTERM_CVT_STEPS);
8385 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8387 intermediate_mode = insn_data[icode1].operand[0].mode;
8388 intermediate_type
8389 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8390 interm_optab
8391 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8392 optab_default);
8393 if (!interm_optab
8394 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8395 || insn_data[icode1].operand[0].mode != intermediate_mode
8396 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8397 == CODE_FOR_nothing))
8398 break;
8400 interm_types->quick_push (intermediate_type);
8401 (*multi_step_cvt)++;
8403 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8404 return true;
8406 prev_mode = intermediate_mode;
8407 optab1 = interm_optab;
8410 interm_types->release ();
8411 return false;