gcc/
[official-gcc.git] / gcc / tree-vect-stmts.c
blob6592be2e884d83f81b2a1160462d1be5d0779e49
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
32 #include "target.h"
33 #include "predict.h"
34 #include "hard-reg-set.h"
35 #include "function.h"
36 #include "dominance.h"
37 #include "cfg.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-ssa-alias.h"
41 #include "internal-fn.h"
42 #include "tree-eh.h"
43 #include "gimple-expr.h"
44 #include "gimple.h"
45 #include "gimplify.h"
46 #include "gimple-iterator.h"
47 #include "gimplify-me.h"
48 #include "gimple-ssa.h"
49 #include "tree-cfg.h"
50 #include "tree-phinodes.h"
51 #include "ssa-iterators.h"
52 #include "stringpool.h"
53 #include "tree-ssanames.h"
54 #include "tree-ssa-loop-manip.h"
55 #include "cfgloop.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-scalar-evolution.h"
58 #include "rtl.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "expmed.h"
62 #include "dojump.h"
63 #include "explow.h"
64 #include "calls.h"
65 #include "emit-rtl.h"
66 #include "varasm.h"
67 #include "stmt.h"
68 #include "expr.h"
69 #include "recog.h" /* FIXME: for insn_data */
70 #include "insn-codes.h"
71 #include "optabs.h"
72 #include "diagnostic-core.h"
73 #include "tree-vectorizer.h"
74 #include "cgraph.h"
75 #include "builtins.h"
77 /* For lang_hooks.types.type_for_mode. */
78 #include "langhooks.h"
80 /* Return the vectorized type for the given statement. */
82 tree
83 stmt_vectype (struct _stmt_vec_info *stmt_info)
85 return STMT_VINFO_VECTYPE (stmt_info);
88 /* Return TRUE iff the given statement is in an inner loop relative to
89 the loop being vectorized. */
90 bool
91 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
93 gimple stmt = STMT_VINFO_STMT (stmt_info);
94 basic_block bb = gimple_bb (stmt);
95 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
96 struct loop* loop;
98 if (!loop_vinfo)
99 return false;
101 loop = LOOP_VINFO_LOOP (loop_vinfo);
103 return (bb->loop_father == loop->inner);
106 /* Record the cost of a statement, either by directly informing the
107 target model or by saving it in a vector for later processing.
108 Return a preliminary estimate of the statement's cost. */
110 unsigned
111 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
112 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
113 int misalign, enum vect_cost_model_location where)
115 if (body_cost_vec)
117 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
118 add_stmt_info_to_vec (body_cost_vec, count, kind,
119 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
120 misalign);
121 return (unsigned)
122 (builtin_vectorization_cost (kind, vectype, misalign) * count);
125 else
127 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
128 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
129 void *target_cost_data;
131 if (loop_vinfo)
132 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
133 else
134 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
136 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
137 misalign, where);
141 /* Return a variable of type ELEM_TYPE[NELEMS]. */
143 static tree
144 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
146 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
147 "vect_array");
150 /* ARRAY is an array of vectors created by create_vector_array.
151 Return an SSA_NAME for the vector in index N. The reference
152 is part of the vectorization of STMT and the vector is associated
153 with scalar destination SCALAR_DEST. */
155 static tree
156 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
157 tree array, unsigned HOST_WIDE_INT n)
159 tree vect_type, vect, vect_name, array_ref;
160 gimple new_stmt;
162 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
163 vect_type = TREE_TYPE (TREE_TYPE (array));
164 vect = vect_create_destination_var (scalar_dest, vect_type);
165 array_ref = build4 (ARRAY_REF, vect_type, array,
166 build_int_cst (size_type_node, n),
167 NULL_TREE, NULL_TREE);
169 new_stmt = gimple_build_assign (vect, array_ref);
170 vect_name = make_ssa_name (vect, new_stmt);
171 gimple_assign_set_lhs (new_stmt, vect_name);
172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
174 return vect_name;
177 /* ARRAY is an array of vectors created by create_vector_array.
178 Emit code to store SSA_NAME VECT in index N of the array.
179 The store is part of the vectorization of STMT. */
181 static void
182 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
183 tree array, unsigned HOST_WIDE_INT n)
185 tree array_ref;
186 gimple new_stmt;
188 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
189 build_int_cst (size_type_node, n),
190 NULL_TREE, NULL_TREE);
192 new_stmt = gimple_build_assign (array_ref, vect);
193 vect_finish_stmt_generation (stmt, new_stmt, gsi);
196 /* PTR is a pointer to an array of type TYPE. Return a representation
197 of *PTR. The memory reference replaces those in FIRST_DR
198 (and its group). */
200 static tree
201 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
203 tree mem_ref, alias_ptr_type;
205 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
206 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
207 /* Arrays have the same alignment as their type. */
208 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
209 return mem_ref;
212 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
214 /* Function vect_mark_relevant.
216 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
218 static void
219 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
220 enum vect_relevant relevant, bool live_p,
221 bool used_in_pattern)
223 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
224 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 gimple pattern_stmt;
228 if (dump_enabled_p ())
229 dump_printf_loc (MSG_NOTE, vect_location,
230 "mark relevant %d, live %d.\n", relevant, live_p);
232 /* If this stmt is an original stmt in a pattern, we might need to mark its
233 related pattern stmt instead of the original stmt. However, such stmts
234 may have their own uses that are not in any pattern, in such cases the
235 stmt itself should be marked. */
236 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
238 bool found = false;
239 if (!used_in_pattern)
241 imm_use_iterator imm_iter;
242 use_operand_p use_p;
243 gimple use_stmt;
244 tree lhs;
245 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
246 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
248 if (is_gimple_assign (stmt))
249 lhs = gimple_assign_lhs (stmt);
250 else
251 lhs = gimple_call_lhs (stmt);
253 /* This use is out of pattern use, if LHS has other uses that are
254 pattern uses, we should mark the stmt itself, and not the pattern
255 stmt. */
256 if (lhs && TREE_CODE (lhs) == SSA_NAME)
257 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
259 if (is_gimple_debug (USE_STMT (use_p)))
260 continue;
261 use_stmt = USE_STMT (use_p);
263 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
264 continue;
266 if (vinfo_for_stmt (use_stmt)
267 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
269 found = true;
270 break;
275 if (!found)
277 /* This is the last stmt in a sequence that was detected as a
278 pattern that can potentially be vectorized. Don't mark the stmt
279 as relevant/live because it's not going to be vectorized.
280 Instead mark the pattern-stmt that replaces it. */
282 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
284 if (dump_enabled_p ())
285 dump_printf_loc (MSG_NOTE, vect_location,
286 "last stmt in pattern. don't mark"
287 " relevant/live.\n");
288 stmt_info = vinfo_for_stmt (pattern_stmt);
289 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
290 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
291 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
292 stmt = pattern_stmt;
296 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
297 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
298 STMT_VINFO_RELEVANT (stmt_info) = relevant;
300 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
301 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
303 if (dump_enabled_p ())
304 dump_printf_loc (MSG_NOTE, vect_location,
305 "already marked relevant/live.\n");
306 return;
309 worklist->safe_push (stmt);
313 /* Function vect_stmt_relevant_p.
315 Return true if STMT in loop that is represented by LOOP_VINFO is
316 "relevant for vectorization".
318 A stmt is considered "relevant for vectorization" if:
319 - it has uses outside the loop.
320 - it has vdefs (it alters memory).
321 - control stmts in the loop (except for the exit condition).
323 CHECKME: what other side effects would the vectorizer allow? */
325 static bool
326 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
327 enum vect_relevant *relevant, bool *live_p)
329 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
330 ssa_op_iter op_iter;
331 imm_use_iterator imm_iter;
332 use_operand_p use_p;
333 def_operand_p def_p;
335 *relevant = vect_unused_in_scope;
336 *live_p = false;
338 /* cond stmt other than loop exit cond. */
339 if (is_ctrl_stmt (stmt)
340 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
341 != loop_exit_ctrl_vec_info_type)
342 *relevant = vect_used_in_scope;
344 /* changing memory. */
345 if (gimple_code (stmt) != GIMPLE_PHI)
346 if (gimple_vdef (stmt)
347 && !gimple_clobber_p (stmt))
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_NOTE, vect_location,
351 "vec_stmt_relevant_p: stmt has vdefs.\n");
352 *relevant = vect_used_in_scope;
355 /* uses outside the loop. */
356 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
358 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
360 basic_block bb = gimple_bb (USE_STMT (use_p));
361 if (!flow_bb_inside_loop_p (loop, bb))
363 if (dump_enabled_p ())
364 dump_printf_loc (MSG_NOTE, vect_location,
365 "vec_stmt_relevant_p: used out of loop.\n");
367 if (is_gimple_debug (USE_STMT (use_p)))
368 continue;
370 /* We expect all such uses to be in the loop exit phis
371 (because of loop closed form) */
372 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
373 gcc_assert (bb == single_exit (loop)->dest);
375 *live_p = true;
380 return (*live_p || *relevant);
384 /* Function exist_non_indexing_operands_for_use_p
386 USE is one of the uses attached to STMT. Check if USE is
387 used in STMT for anything other than indexing an array. */
389 static bool
390 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
392 tree operand;
393 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
395 /* USE corresponds to some operand in STMT. If there is no data
396 reference in STMT, then any operand that corresponds to USE
397 is not indexing an array. */
398 if (!STMT_VINFO_DATA_REF (stmt_info))
399 return true;
401 /* STMT has a data_ref. FORNOW this means that its of one of
402 the following forms:
403 -1- ARRAY_REF = var
404 -2- var = ARRAY_REF
405 (This should have been verified in analyze_data_refs).
407 'var' in the second case corresponds to a def, not a use,
408 so USE cannot correspond to any operands that are not used
409 for array indexing.
411 Therefore, all we need to check is if STMT falls into the
412 first case, and whether var corresponds to USE. */
414 if (!gimple_assign_copy_p (stmt))
416 if (is_gimple_call (stmt)
417 && gimple_call_internal_p (stmt))
418 switch (gimple_call_internal_fn (stmt))
420 case IFN_MASK_STORE:
421 operand = gimple_call_arg (stmt, 3);
422 if (operand == use)
423 return true;
424 /* FALLTHRU */
425 case IFN_MASK_LOAD:
426 operand = gimple_call_arg (stmt, 2);
427 if (operand == use)
428 return true;
429 break;
430 default:
431 break;
433 return false;
436 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
437 return false;
438 operand = gimple_assign_rhs1 (stmt);
439 if (TREE_CODE (operand) != SSA_NAME)
440 return false;
442 if (operand == use)
443 return true;
445 return false;
450 Function process_use.
452 Inputs:
453 - a USE in STMT in a loop represented by LOOP_VINFO
454 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
455 that defined USE. This is done by calling mark_relevant and passing it
456 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
457 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
458 be performed.
460 Outputs:
461 Generally, LIVE_P and RELEVANT are used to define the liveness and
462 relevance info of the DEF_STMT of this USE:
463 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
464 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
465 Exceptions:
466 - case 1: If USE is used only for address computations (e.g. array indexing),
467 which does not need to be directly vectorized, then the liveness/relevance
468 of the respective DEF_STMT is left unchanged.
469 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
470 skip DEF_STMT cause it had already been processed.
471 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
472 be modified accordingly.
474 Return true if everything is as expected. Return false otherwise. */
476 static bool
477 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
478 enum vect_relevant relevant, vec<gimple> *worklist,
479 bool force)
481 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
482 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
483 stmt_vec_info dstmt_vinfo;
484 basic_block bb, def_bb;
485 tree def;
486 gimple def_stmt;
487 enum vect_def_type dt;
489 /* case 1: we are only interested in uses that need to be vectorized. Uses
490 that are used for address computation are not considered relevant. */
491 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
492 return true;
494 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
496 if (dump_enabled_p ())
497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
498 "not vectorized: unsupported use in stmt.\n");
499 return false;
502 if (!def_stmt || gimple_nop_p (def_stmt))
503 return true;
505 def_bb = gimple_bb (def_stmt);
506 if (!flow_bb_inside_loop_p (loop, def_bb))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
510 return true;
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
514 DEF_STMT must have already been processed, because this should be the
515 only way that STMT, which is a reduction-phi, was put in the worklist,
516 as there should be no other uses for DEF_STMT in the loop. So we just
517 check that everything is as expected, and we are done. */
518 dstmt_vinfo = vinfo_for_stmt (def_stmt);
519 bb = gimple_bb (stmt);
520 if (gimple_code (stmt) == GIMPLE_PHI
521 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
522 && gimple_code (def_stmt) != GIMPLE_PHI
523 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
524 && bb->loop_father == def_bb->loop_father)
526 if (dump_enabled_p ())
527 dump_printf_loc (MSG_NOTE, vect_location,
528 "reduc-stmt defining reduc-phi in the same nest.\n");
529 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
530 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
531 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
532 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
533 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
534 return true;
537 /* case 3a: outer-loop stmt defining an inner-loop stmt:
538 outer-loop-header-bb:
539 d = def_stmt
540 inner-loop:
541 stmt # use (d)
542 outer-loop-tail-bb:
543 ... */
544 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE, vect_location,
548 "outer-loop def-stmt defining inner-loop stmt.\n");
550 switch (relevant)
552 case vect_unused_in_scope:
553 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
554 vect_used_in_scope : vect_unused_in_scope;
555 break;
557 case vect_used_in_outer_by_reduction:
558 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
559 relevant = vect_used_by_reduction;
560 break;
562 case vect_used_in_outer:
563 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
564 relevant = vect_used_in_scope;
565 break;
567 case vect_used_in_scope:
568 break;
570 default:
571 gcc_unreachable ();
575 /* case 3b: inner-loop stmt defining an outer-loop stmt:
576 outer-loop-header-bb:
578 inner-loop:
579 d = def_stmt
580 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
581 stmt # use (d) */
582 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
584 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE, vect_location,
586 "inner-loop def-stmt defining outer-loop stmt.\n");
588 switch (relevant)
590 case vect_unused_in_scope:
591 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
592 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
593 vect_used_in_outer_by_reduction : vect_unused_in_scope;
594 break;
596 case vect_used_by_reduction:
597 relevant = vect_used_in_outer_by_reduction;
598 break;
600 case vect_used_in_scope:
601 relevant = vect_used_in_outer;
602 break;
604 default:
605 gcc_unreachable ();
609 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
610 is_pattern_stmt_p (stmt_vinfo));
611 return true;
615 /* Function vect_mark_stmts_to_be_vectorized.
617 Not all stmts in the loop need to be vectorized. For example:
619 for i...
620 for j...
621 1. T0 = i + j
622 2. T1 = a[T0]
624 3. j = j + 1
626 Stmt 1 and 3 do not need to be vectorized, because loop control and
627 addressing of vectorized data-refs are handled differently.
629 This pass detects such stmts. */
631 bool
632 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
634 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
635 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
636 unsigned int nbbs = loop->num_nodes;
637 gimple_stmt_iterator si;
638 gimple stmt;
639 unsigned int i;
640 stmt_vec_info stmt_vinfo;
641 basic_block bb;
642 gimple phi;
643 bool live_p;
644 enum vect_relevant relevant, tmp_relevant;
645 enum vect_def_type def_type;
647 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE, vect_location,
649 "=== vect_mark_stmts_to_be_vectorized ===\n");
651 auto_vec<gimple, 64> worklist;
653 /* 1. Init worklist. */
654 for (i = 0; i < nbbs; i++)
656 bb = bbs[i];
657 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
659 phi = gsi_stmt (si);
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
666 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
667 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
669 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
671 stmt = gsi_stmt (si);
672 if (dump_enabled_p ())
674 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
675 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
678 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
679 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
683 /* 2. Process_worklist */
684 while (worklist.length () > 0)
686 use_operand_p use_p;
687 ssa_op_iter iter;
689 stmt = worklist.pop ();
690 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
693 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
696 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
697 (DEF_STMT) as relevant/irrelevant and live/dead according to the
698 liveness and relevance properties of STMT. */
699 stmt_vinfo = vinfo_for_stmt (stmt);
700 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
701 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
703 /* Generally, the liveness and relevance properties of STMT are
704 propagated as is to the DEF_STMTs of its USEs:
705 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
706 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
708 One exception is when STMT has been identified as defining a reduction
709 variable; in this case we set the liveness/relevance as follows:
710 live_p = false
711 relevant = vect_used_by_reduction
712 This is because we distinguish between two kinds of relevant stmts -
713 those that are used by a reduction computation, and those that are
714 (also) used by a regular computation. This allows us later on to
715 identify stmts that are used solely by a reduction, and therefore the
716 order of the results that they produce does not have to be kept. */
718 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
719 tmp_relevant = relevant;
720 switch (def_type)
722 case vect_reduction_def:
723 switch (tmp_relevant)
725 case vect_unused_in_scope:
726 relevant = vect_used_by_reduction;
727 break;
729 case vect_used_by_reduction:
730 if (gimple_code (stmt) == GIMPLE_PHI)
731 break;
732 /* fall through */
734 default:
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "unsupported use of reduction.\n");
738 return false;
741 live_p = false;
742 break;
744 case vect_nested_cycle:
745 if (tmp_relevant != vect_unused_in_scope
746 && tmp_relevant != vect_used_in_outer_by_reduction
747 && tmp_relevant != vect_used_in_outer)
749 if (dump_enabled_p ())
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
751 "unsupported use of nested cycle.\n");
753 return false;
756 live_p = false;
757 break;
759 case vect_double_reduction_def:
760 if (tmp_relevant != vect_unused_in_scope
761 && tmp_relevant != vect_used_by_reduction)
763 if (dump_enabled_p ())
764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
765 "unsupported use of double reduction.\n");
767 return false;
770 live_p = false;
771 break;
773 default:
774 break;
777 if (is_pattern_stmt_p (stmt_vinfo))
779 /* Pattern statements are not inserted into the code, so
780 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
781 have to scan the RHS or function arguments instead. */
782 if (is_gimple_assign (stmt))
784 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
785 tree op = gimple_assign_rhs1 (stmt);
787 i = 1;
788 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
790 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
791 live_p, relevant, &worklist, false)
792 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
793 live_p, relevant, &worklist, false))
794 return false;
795 i = 2;
797 for (; i < gimple_num_ops (stmt); i++)
799 op = gimple_op (stmt, i);
800 if (TREE_CODE (op) == SSA_NAME
801 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
802 &worklist, false))
803 return false;
806 else if (is_gimple_call (stmt))
808 for (i = 0; i < gimple_call_num_args (stmt); i++)
810 tree arg = gimple_call_arg (stmt, i);
811 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
812 &worklist, false))
813 return false;
817 else
818 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
820 tree op = USE_FROM_PTR (use_p);
821 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
822 &worklist, false))
823 return false;
826 if (STMT_VINFO_GATHER_P (stmt_vinfo))
828 tree off;
829 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
830 gcc_assert (decl);
831 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
832 &worklist, true))
833 return false;
835 } /* while worklist */
837 return true;
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 void
848 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
849 enum vect_def_type *dt,
850 stmt_vector_for_cost *prologue_cost_vec,
851 stmt_vector_for_cost *body_cost_vec)
853 int i;
854 int inside_cost = 0, prologue_cost = 0;
856 /* The SLP costs were already calculated during SLP tree build. */
857 if (PURE_SLP_STMT (stmt_info))
858 return;
860 /* FORNOW: Assuming maximum 2 args per stmts. */
861 for (i = 0; i < 2; i++)
862 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
863 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
864 stmt_info, 0, vect_prologue);
866 /* Pass the inside-of-loop statements to the target-specific cost model. */
867 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
868 stmt_info, 0, vect_body);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_simple_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
877 /* Model cost for type demotion and promotion operations. PWR is normally
878 zero for single-step promotions and demotions. It will be one if
879 two-step promotion/demotion is required, and so on. Each additional
880 step doubles the number of instructions required. */
882 static void
883 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
884 enum vect_def_type *dt, int pwr)
886 int i, tmp;
887 int inside_cost = 0, prologue_cost = 0;
888 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
889 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
890 void *target_cost_data;
892 /* The SLP costs were already calculated during SLP tree build. */
893 if (PURE_SLP_STMT (stmt_info))
894 return;
896 if (loop_vinfo)
897 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
898 else
899 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
901 for (i = 0; i < pwr + 1; i++)
903 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
904 (i + 1) : i;
905 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
906 vec_promote_demote, stmt_info, 0,
907 vect_body);
910 /* FORNOW: Assuming maximum 2 args per stmts. */
911 for (i = 0; i < 2; i++)
912 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
913 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
914 stmt_info, 0, vect_prologue);
916 if (dump_enabled_p ())
917 dump_printf_loc (MSG_NOTE, vect_location,
918 "vect_model_promotion_demotion_cost: inside_cost = %d, "
919 "prologue_cost = %d .\n", inside_cost, prologue_cost);
922 /* Function vect_cost_group_size
924 For grouped load or store, return the group_size only if it is the first
925 load or store of a group, else return 1. This ensures that group size is
926 only returned once per group. */
928 static int
929 vect_cost_group_size (stmt_vec_info stmt_info)
931 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
933 if (first_stmt == STMT_VINFO_STMT (stmt_info))
934 return GROUP_SIZE (stmt_info);
936 return 1;
940 /* Function vect_model_store_cost
942 Models cost for stores. In the case of grouped accesses, one access
943 has the overhead of the grouped access attributed to it. */
945 void
946 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
947 bool store_lanes_p, enum vect_def_type dt,
948 slp_tree slp_node,
949 stmt_vector_for_cost *prologue_cost_vec,
950 stmt_vector_for_cost *body_cost_vec)
952 int group_size;
953 unsigned int inside_cost = 0, prologue_cost = 0;
954 struct data_reference *first_dr;
955 gimple first_stmt;
957 if (dt == vect_constant_def || dt == vect_external_def)
958 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
959 stmt_info, 0, vect_prologue);
961 /* Grouped access? */
962 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
964 if (slp_node)
966 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
967 group_size = 1;
969 else
971 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
972 group_size = vect_cost_group_size (stmt_info);
975 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
977 /* Not a grouped access. */
978 else
980 group_size = 1;
981 first_dr = STMT_VINFO_DATA_REF (stmt_info);
984 /* We assume that the cost of a single store-lanes instruction is
985 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
986 access is instead being provided by a permute-and-store operation,
987 include the cost of the permutes. */
988 if (!store_lanes_p && group_size > 1
989 && !STMT_VINFO_STRIDED_P (stmt_info))
991 /* Uses a high and low interleave or shuffle operations for each
992 needed permute. */
993 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
994 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
995 stmt_info, 0, vect_body);
997 if (dump_enabled_p ())
998 dump_printf_loc (MSG_NOTE, vect_location,
999 "vect_model_store_cost: strided group_size = %d .\n",
1000 group_size);
1003 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1004 /* Costs of the stores. */
1005 if (STMT_VINFO_STRIDED_P (stmt_info)
1006 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1008 /* N scalar stores plus extracting the elements. */
1009 inside_cost += record_stmt_cost (body_cost_vec,
1010 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1011 scalar_store, stmt_info, 0, vect_body);
1013 else
1014 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1016 if (STMT_VINFO_STRIDED_P (stmt_info))
1017 inside_cost += record_stmt_cost (body_cost_vec,
1018 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1019 vec_to_scalar, stmt_info, 0, vect_body);
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_NOTE, vect_location,
1023 "vect_model_store_cost: inside_cost = %d, "
1024 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1028 /* Calculate cost of DR's memory access. */
1029 void
1030 vect_get_store_cost (struct data_reference *dr, int ncopies,
1031 unsigned int *inside_cost,
1032 stmt_vector_for_cost *body_cost_vec)
1034 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1035 gimple stmt = DR_STMT (dr);
1036 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1038 switch (alignment_support_scheme)
1040 case dr_aligned:
1042 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1043 vector_store, stmt_info, 0,
1044 vect_body);
1046 if (dump_enabled_p ())
1047 dump_printf_loc (MSG_NOTE, vect_location,
1048 "vect_model_store_cost: aligned.\n");
1049 break;
1052 case dr_unaligned_supported:
1054 /* Here, we assign an additional cost for the unaligned store. */
1055 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1056 unaligned_store, stmt_info,
1057 DR_MISALIGNMENT (dr), vect_body);
1058 if (dump_enabled_p ())
1059 dump_printf_loc (MSG_NOTE, vect_location,
1060 "vect_model_store_cost: unaligned supported by "
1061 "hardware.\n");
1062 break;
1065 case dr_unaligned_unsupported:
1067 *inside_cost = VECT_MAX_COST;
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1071 "vect_model_store_cost: unsupported access.\n");
1072 break;
1075 default:
1076 gcc_unreachable ();
1081 /* Function vect_model_load_cost
1083 Models cost for loads. In the case of grouped accesses, the last access
1084 has the overhead of the grouped access attributed to it. Since unaligned
1085 accesses are supported for loads, we also account for the costs of the
1086 access scheme chosen. */
1088 void
1089 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1090 bool load_lanes_p, slp_tree slp_node,
1091 stmt_vector_for_cost *prologue_cost_vec,
1092 stmt_vector_for_cost *body_cost_vec)
1094 int group_size;
1095 gimple first_stmt;
1096 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1097 unsigned int inside_cost = 0, prologue_cost = 0;
1099 /* Grouped accesses? */
1100 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1101 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1103 group_size = vect_cost_group_size (stmt_info);
1104 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1106 /* Not a grouped access. */
1107 else
1109 group_size = 1;
1110 first_dr = dr;
1113 /* We assume that the cost of a single load-lanes instruction is
1114 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1115 access is instead being provided by a load-and-permute operation,
1116 include the cost of the permutes. */
1117 if (!load_lanes_p && group_size > 1
1118 && !STMT_VINFO_STRIDED_P (stmt_info))
1120 /* Uses an even and odd extract operations or shuffle operations
1121 for each needed permute. */
1122 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1123 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1124 stmt_info, 0, vect_body);
1126 if (dump_enabled_p ())
1127 dump_printf_loc (MSG_NOTE, vect_location,
1128 "vect_model_load_cost: strided group_size = %d .\n",
1129 group_size);
1132 /* The loads themselves. */
1133 if (STMT_VINFO_STRIDED_P (stmt_info)
1134 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1136 /* N scalar loads plus gathering them into a vector. */
1137 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1138 inside_cost += record_stmt_cost (body_cost_vec,
1139 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1140 scalar_load, stmt_info, 0, vect_body);
1142 else
1143 vect_get_load_cost (first_dr, ncopies,
1144 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1145 || group_size > 1 || slp_node),
1146 &inside_cost, &prologue_cost,
1147 prologue_cost_vec, body_cost_vec, true);
1148 if (STMT_VINFO_STRIDED_P (stmt_info))
1149 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1150 stmt_info, 0, vect_body);
1152 if (dump_enabled_p ())
1153 dump_printf_loc (MSG_NOTE, vect_location,
1154 "vect_model_load_cost: inside_cost = %d, "
1155 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1159 /* Calculate cost of DR's memory access. */
1160 void
1161 vect_get_load_cost (struct data_reference *dr, int ncopies,
1162 bool add_realign_cost, unsigned int *inside_cost,
1163 unsigned int *prologue_cost,
1164 stmt_vector_for_cost *prologue_cost_vec,
1165 stmt_vector_for_cost *body_cost_vec,
1166 bool record_prologue_costs)
1168 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1169 gimple stmt = DR_STMT (dr);
1170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1172 switch (alignment_support_scheme)
1174 case dr_aligned:
1176 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1177 stmt_info, 0, vect_body);
1179 if (dump_enabled_p ())
1180 dump_printf_loc (MSG_NOTE, vect_location,
1181 "vect_model_load_cost: aligned.\n");
1183 break;
1185 case dr_unaligned_supported:
1187 /* Here, we assign an additional cost for the unaligned load. */
1188 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1189 unaligned_load, stmt_info,
1190 DR_MISALIGNMENT (dr), vect_body);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: unaligned supported by "
1195 "hardware.\n");
1197 break;
1199 case dr_explicit_realign:
1201 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1202 vector_load, stmt_info, 0, vect_body);
1203 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1204 vec_perm, stmt_info, 0, vect_body);
1206 /* FIXME: If the misalignment remains fixed across the iterations of
1207 the containing loop, the following cost should be added to the
1208 prologue costs. */
1209 if (targetm.vectorize.builtin_mask_for_load)
1210 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1211 stmt_info, 0, vect_body);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE, vect_location,
1215 "vect_model_load_cost: explicit realign\n");
1217 break;
1219 case dr_explicit_realign_optimized:
1221 if (dump_enabled_p ())
1222 dump_printf_loc (MSG_NOTE, vect_location,
1223 "vect_model_load_cost: unaligned software "
1224 "pipelined.\n");
1226 /* Unaligned software pipeline has a load of an address, an initial
1227 load, and possibly a mask operation to "prime" the loop. However,
1228 if this is an access in a group of loads, which provide grouped
1229 access, then the above cost should only be considered for one
1230 access in the group. Inside the loop, there is a load op
1231 and a realignment op. */
1233 if (add_realign_cost && record_prologue_costs)
1235 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1236 vector_stmt, stmt_info,
1237 0, vect_prologue);
1238 if (targetm.vectorize.builtin_mask_for_load)
1239 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1240 vector_stmt, stmt_info,
1241 0, vect_prologue);
1244 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1245 stmt_info, 0, vect_body);
1246 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: explicit realign optimized"
1252 "\n");
1254 break;
1257 case dr_unaligned_unsupported:
1259 *inside_cost = VECT_MAX_COST;
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1263 "vect_model_load_cost: unsupported access.\n");
1264 break;
1267 default:
1268 gcc_unreachable ();
1272 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1273 the loop preheader for the vectorized stmt STMT. */
1275 static void
1276 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1278 if (gsi)
1279 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1280 else
1282 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1283 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1285 if (loop_vinfo)
1287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1288 basic_block new_bb;
1289 edge pe;
1291 if (nested_in_vect_loop_p (loop, stmt))
1292 loop = loop->inner;
1294 pe = loop_preheader_edge (loop);
1295 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1296 gcc_assert (!new_bb);
1298 else
1300 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1301 basic_block bb;
1302 gimple_stmt_iterator gsi_bb_start;
1304 gcc_assert (bb_vinfo);
1305 bb = BB_VINFO_BB (bb_vinfo);
1306 gsi_bb_start = gsi_after_labels (bb);
1307 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1311 if (dump_enabled_p ())
1313 dump_printf_loc (MSG_NOTE, vect_location,
1314 "created new init_stmt: ");
1315 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1319 /* Function vect_init_vector.
1321 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1322 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1323 vector type a vector with all elements equal to VAL is created first.
1324 Place the initialization at BSI if it is not NULL. Otherwise, place the
1325 initialization at the loop preheader.
1326 Return the DEF of INIT_STMT.
1327 It will be used in the vectorization of STMT. */
1329 tree
1330 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1332 tree new_var;
1333 gimple init_stmt;
1334 tree vec_oprnd;
1335 tree new_temp;
1337 if (TREE_CODE (type) == VECTOR_TYPE
1338 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1340 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1342 if (CONSTANT_CLASS_P (val))
1343 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1344 else
1346 new_temp = make_ssa_name (TREE_TYPE (type));
1347 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1348 vect_init_vector_1 (stmt, init_stmt, gsi);
1349 val = new_temp;
1352 val = build_vector_from_val (type, val);
1355 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1356 init_stmt = gimple_build_assign (new_var, val);
1357 new_temp = make_ssa_name (new_var, init_stmt);
1358 gimple_assign_set_lhs (init_stmt, new_temp);
1359 vect_init_vector_1 (stmt, init_stmt, gsi);
1360 vec_oprnd = gimple_assign_lhs (init_stmt);
1361 return vec_oprnd;
1365 /* Function vect_get_vec_def_for_operand.
1367 OP is an operand in STMT. This function returns a (vector) def that will be
1368 used in the vectorized stmt for STMT.
1370 In the case that OP is an SSA_NAME which is defined in the loop, then
1371 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1373 In case OP is an invariant or constant, a new stmt that creates a vector def
1374 needs to be introduced. */
1376 tree
1377 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1379 tree vec_oprnd;
1380 gimple vec_stmt;
1381 gimple def_stmt;
1382 stmt_vec_info def_stmt_info = NULL;
1383 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1384 unsigned int nunits;
1385 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1386 tree def;
1387 enum vect_def_type dt;
1388 bool is_simple_use;
1389 tree vector_type;
1391 if (dump_enabled_p ())
1393 dump_printf_loc (MSG_NOTE, vect_location,
1394 "vect_get_vec_def_for_operand: ");
1395 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1396 dump_printf (MSG_NOTE, "\n");
1399 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1400 &def_stmt, &def, &dt);
1401 gcc_assert (is_simple_use);
1402 if (dump_enabled_p ())
1404 int loc_printed = 0;
1405 if (def)
1407 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1408 loc_printed = 1;
1409 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1410 dump_printf (MSG_NOTE, "\n");
1412 if (def_stmt)
1414 if (loc_printed)
1415 dump_printf (MSG_NOTE, " def_stmt = ");
1416 else
1417 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1418 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1422 switch (dt)
1424 /* Case 1: operand is a constant. */
1425 case vect_constant_def:
1427 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1428 gcc_assert (vector_type);
1429 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1431 if (scalar_def)
1432 *scalar_def = op;
1434 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1435 if (dump_enabled_p ())
1436 dump_printf_loc (MSG_NOTE, vect_location,
1437 "Create vector_cst. nunits = %d\n", nunits);
1439 return vect_init_vector (stmt, op, vector_type, NULL);
1442 /* Case 2: operand is defined outside the loop - loop invariant. */
1443 case vect_external_def:
1445 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1446 gcc_assert (vector_type);
1448 if (scalar_def)
1449 *scalar_def = def;
1451 /* Create 'vec_inv = {inv,inv,..,inv}' */
1452 if (dump_enabled_p ())
1453 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1455 return vect_init_vector (stmt, def, vector_type, NULL);
1458 /* Case 3: operand is defined inside the loop. */
1459 case vect_internal_def:
1461 if (scalar_def)
1462 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1464 /* Get the def from the vectorized stmt. */
1465 def_stmt_info = vinfo_for_stmt (def_stmt);
1467 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1468 /* Get vectorized pattern statement. */
1469 if (!vec_stmt
1470 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1471 && !STMT_VINFO_RELEVANT (def_stmt_info))
1472 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1473 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1474 gcc_assert (vec_stmt);
1475 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1476 vec_oprnd = PHI_RESULT (vec_stmt);
1477 else if (is_gimple_call (vec_stmt))
1478 vec_oprnd = gimple_call_lhs (vec_stmt);
1479 else
1480 vec_oprnd = gimple_assign_lhs (vec_stmt);
1481 return vec_oprnd;
1484 /* Case 4: operand is defined by a loop header phi - reduction */
1485 case vect_reduction_def:
1486 case vect_double_reduction_def:
1487 case vect_nested_cycle:
1489 struct loop *loop;
1491 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1492 loop = (gimple_bb (def_stmt))->loop_father;
1494 /* Get the def before the loop */
1495 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1496 return get_initial_def_for_reduction (stmt, op, scalar_def);
1499 /* Case 5: operand is defined by loop-header phi - induction. */
1500 case vect_induction_def:
1502 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1504 /* Get the def from the vectorized stmt. */
1505 def_stmt_info = vinfo_for_stmt (def_stmt);
1506 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1507 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1508 vec_oprnd = PHI_RESULT (vec_stmt);
1509 else
1510 vec_oprnd = gimple_get_lhs (vec_stmt);
1511 return vec_oprnd;
1514 default:
1515 gcc_unreachable ();
1520 /* Function vect_get_vec_def_for_stmt_copy
1522 Return a vector-def for an operand. This function is used when the
1523 vectorized stmt to be created (by the caller to this function) is a "copy"
1524 created in case the vectorized result cannot fit in one vector, and several
1525 copies of the vector-stmt are required. In this case the vector-def is
1526 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1527 of the stmt that defines VEC_OPRND.
1528 DT is the type of the vector def VEC_OPRND.
1530 Context:
1531 In case the vectorization factor (VF) is bigger than the number
1532 of elements that can fit in a vectype (nunits), we have to generate
1533 more than one vector stmt to vectorize the scalar stmt. This situation
1534 arises when there are multiple data-types operated upon in the loop; the
1535 smallest data-type determines the VF, and as a result, when vectorizing
1536 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1537 vector stmt (each computing a vector of 'nunits' results, and together
1538 computing 'VF' results in each iteration). This function is called when
1539 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1540 which VF=16 and nunits=4, so the number of copies required is 4):
1542 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1544 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1545 VS1.1: vx.1 = memref1 VS1.2
1546 VS1.2: vx.2 = memref2 VS1.3
1547 VS1.3: vx.3 = memref3
1549 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1550 VSnew.1: vz1 = vx.1 + ... VSnew.2
1551 VSnew.2: vz2 = vx.2 + ... VSnew.3
1552 VSnew.3: vz3 = vx.3 + ...
1554 The vectorization of S1 is explained in vectorizable_load.
1555 The vectorization of S2:
1556 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1557 the function 'vect_get_vec_def_for_operand' is called to
1558 get the relevant vector-def for each operand of S2. For operand x it
1559 returns the vector-def 'vx.0'.
1561 To create the remaining copies of the vector-stmt (VSnew.j), this
1562 function is called to get the relevant vector-def for each operand. It is
1563 obtained from the respective VS1.j stmt, which is recorded in the
1564 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1566 For example, to obtain the vector-def 'vx.1' in order to create the
1567 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1568 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1569 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1570 and return its def ('vx.1').
1571 Overall, to create the above sequence this function will be called 3 times:
1572 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1573 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1574 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1576 tree
1577 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1579 gimple vec_stmt_for_operand;
1580 stmt_vec_info def_stmt_info;
1582 /* Do nothing; can reuse same def. */
1583 if (dt == vect_external_def || dt == vect_constant_def )
1584 return vec_oprnd;
1586 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1587 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1588 gcc_assert (def_stmt_info);
1589 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1590 gcc_assert (vec_stmt_for_operand);
1591 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1592 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1593 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1594 else
1595 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1596 return vec_oprnd;
1600 /* Get vectorized definitions for the operands to create a copy of an original
1601 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1603 static void
1604 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1605 vec<tree> *vec_oprnds0,
1606 vec<tree> *vec_oprnds1)
1608 tree vec_oprnd = vec_oprnds0->pop ();
1610 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1611 vec_oprnds0->quick_push (vec_oprnd);
1613 if (vec_oprnds1 && vec_oprnds1->length ())
1615 vec_oprnd = vec_oprnds1->pop ();
1616 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1617 vec_oprnds1->quick_push (vec_oprnd);
1622 /* Get vectorized definitions for OP0 and OP1.
1623 REDUC_INDEX is the index of reduction operand in case of reduction,
1624 and -1 otherwise. */
1626 void
1627 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1628 vec<tree> *vec_oprnds0,
1629 vec<tree> *vec_oprnds1,
1630 slp_tree slp_node, int reduc_index)
1632 if (slp_node)
1634 int nops = (op1 == NULL_TREE) ? 1 : 2;
1635 auto_vec<tree> ops (nops);
1636 auto_vec<vec<tree> > vec_defs (nops);
1638 ops.quick_push (op0);
1639 if (op1)
1640 ops.quick_push (op1);
1642 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1644 *vec_oprnds0 = vec_defs[0];
1645 if (op1)
1646 *vec_oprnds1 = vec_defs[1];
1648 else
1650 tree vec_oprnd;
1652 vec_oprnds0->create (1);
1653 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1654 vec_oprnds0->quick_push (vec_oprnd);
1656 if (op1)
1658 vec_oprnds1->create (1);
1659 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1660 vec_oprnds1->quick_push (vec_oprnd);
1666 /* Function vect_finish_stmt_generation.
1668 Insert a new stmt. */
1670 void
1671 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1672 gimple_stmt_iterator *gsi)
1674 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1675 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1676 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1678 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1680 if (!gsi_end_p (*gsi)
1681 && gimple_has_mem_ops (vec_stmt))
1683 gimple at_stmt = gsi_stmt (*gsi);
1684 tree vuse = gimple_vuse (at_stmt);
1685 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1687 tree vdef = gimple_vdef (at_stmt);
1688 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1689 /* If we have an SSA vuse and insert a store, update virtual
1690 SSA form to avoid triggering the renamer. Do so only
1691 if we can easily see all uses - which is what almost always
1692 happens with the way vectorized stmts are inserted. */
1693 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1694 && ((is_gimple_assign (vec_stmt)
1695 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1696 || (is_gimple_call (vec_stmt)
1697 && !(gimple_call_flags (vec_stmt)
1698 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1700 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1701 gimple_set_vdef (vec_stmt, new_vdef);
1702 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1706 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1708 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1709 bb_vinfo));
1711 if (dump_enabled_p ())
1713 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1714 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1717 gimple_set_location (vec_stmt, gimple_location (stmt));
1719 /* While EH edges will generally prevent vectorization, stmt might
1720 e.g. be in a must-not-throw region. Ensure newly created stmts
1721 that could throw are part of the same region. */
1722 int lp_nr = lookup_stmt_eh_lp (stmt);
1723 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1724 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1727 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1728 a function declaration if the target has a vectorized version
1729 of the function, or NULL_TREE if the function cannot be vectorized. */
1731 tree
1732 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1734 tree fndecl = gimple_call_fndecl (call);
1736 /* We only handle functions that do not read or clobber memory -- i.e.
1737 const or novops ones. */
1738 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1739 return NULL_TREE;
1741 if (!fndecl
1742 || TREE_CODE (fndecl) != FUNCTION_DECL
1743 || !DECL_BUILT_IN (fndecl))
1744 return NULL_TREE;
1746 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1747 vectype_in);
1751 static tree permute_vec_elements (tree, tree, tree, gimple,
1752 gimple_stmt_iterator *);
1755 /* Function vectorizable_mask_load_store.
1757 Check if STMT performs a conditional load or store that can be vectorized.
1758 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1760 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1762 static bool
1763 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1764 gimple *vec_stmt, slp_tree slp_node)
1766 tree vec_dest = NULL;
1767 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1768 stmt_vec_info prev_stmt_info;
1769 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1770 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1771 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1772 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1773 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1774 tree elem_type;
1775 gimple new_stmt;
1776 tree dummy;
1777 tree dataref_ptr = NULL_TREE;
1778 gimple ptr_incr;
1779 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1780 int ncopies;
1781 int i, j;
1782 bool inv_p;
1783 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1784 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1785 int gather_scale = 1;
1786 enum vect_def_type gather_dt = vect_unknown_def_type;
1787 bool is_store;
1788 tree mask;
1789 gimple def_stmt;
1790 tree def;
1791 enum vect_def_type dt;
1793 if (slp_node != NULL)
1794 return false;
1796 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1797 gcc_assert (ncopies >= 1);
1799 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1800 mask = gimple_call_arg (stmt, 2);
1801 if (TYPE_PRECISION (TREE_TYPE (mask))
1802 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1803 return false;
1805 /* FORNOW. This restriction should be relaxed. */
1806 if (nested_in_vect_loop && ncopies > 1)
1808 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810 "multiple types in nested loop.");
1811 return false;
1814 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1815 return false;
1817 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1818 return false;
1820 if (!STMT_VINFO_DATA_REF (stmt_info))
1821 return false;
1823 elem_type = TREE_TYPE (vectype);
1825 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1826 return false;
1828 if (STMT_VINFO_STRIDED_P (stmt_info))
1829 return false;
1831 if (STMT_VINFO_GATHER_P (stmt_info))
1833 gimple def_stmt;
1834 tree def;
1835 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1836 &gather_off, &gather_scale);
1837 gcc_assert (gather_decl);
1838 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1839 &def_stmt, &def, &gather_dt,
1840 &gather_off_vectype))
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1844 "gather index use not simple.");
1845 return false;
1848 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1849 tree masktype
1850 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1851 if (TREE_CODE (masktype) == INTEGER_TYPE)
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1855 "masked gather with integer mask not supported.");
1856 return false;
1859 else if (tree_int_cst_compare (nested_in_vect_loop
1860 ? STMT_VINFO_DR_STEP (stmt_info)
1861 : DR_STEP (dr), size_zero_node) <= 0)
1862 return false;
1863 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1864 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1865 return false;
1867 if (TREE_CODE (mask) != SSA_NAME)
1868 return false;
1870 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1871 &def_stmt, &def, &dt))
1872 return false;
1874 if (is_store)
1876 tree rhs = gimple_call_arg (stmt, 3);
1877 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1878 &def_stmt, &def, &dt))
1879 return false;
1882 if (!vec_stmt) /* transformation not required. */
1884 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1885 if (is_store)
1886 vect_model_store_cost (stmt_info, ncopies, false, dt,
1887 NULL, NULL, NULL);
1888 else
1889 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1890 return true;
1893 /** Transform. **/
1895 if (STMT_VINFO_GATHER_P (stmt_info))
1897 tree vec_oprnd0 = NULL_TREE, op;
1898 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1899 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1900 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1901 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1902 tree mask_perm_mask = NULL_TREE;
1903 edge pe = loop_preheader_edge (loop);
1904 gimple_seq seq;
1905 basic_block new_bb;
1906 enum { NARROW, NONE, WIDEN } modifier;
1907 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1909 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1910 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1911 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1912 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1913 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914 scaletype = TREE_VALUE (arglist);
1915 gcc_checking_assert (types_compatible_p (srctype, rettype)
1916 && types_compatible_p (srctype, masktype));
1918 if (nunits == gather_off_nunits)
1919 modifier = NONE;
1920 else if (nunits == gather_off_nunits / 2)
1922 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1923 modifier = WIDEN;
1925 for (i = 0; i < gather_off_nunits; ++i)
1926 sel[i] = i | nunits;
1928 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1930 else if (nunits == gather_off_nunits * 2)
1932 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1933 modifier = NARROW;
1935 for (i = 0; i < nunits; ++i)
1936 sel[i] = i < gather_off_nunits
1937 ? i : i + nunits - gather_off_nunits;
1939 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1940 ncopies *= 2;
1941 for (i = 0; i < nunits; ++i)
1942 sel[i] = i | gather_off_nunits;
1943 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1945 else
1946 gcc_unreachable ();
1948 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1950 ptr = fold_convert (ptrtype, gather_base);
1951 if (!is_gimple_min_invariant (ptr))
1953 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1954 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1955 gcc_assert (!new_bb);
1958 scale = build_int_cst (scaletype, gather_scale);
1960 prev_stmt_info = NULL;
1961 for (j = 0; j < ncopies; ++j)
1963 if (modifier == WIDEN && (j & 1))
1964 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1965 perm_mask, stmt, gsi);
1966 else if (j == 0)
1967 op = vec_oprnd0
1968 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1969 else
1970 op = vec_oprnd0
1971 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1973 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1975 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1976 == TYPE_VECTOR_SUBPARTS (idxtype));
1977 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1978 var = make_ssa_name (var);
1979 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1980 new_stmt
1981 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1982 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1983 op = var;
1986 if (mask_perm_mask && (j & 1))
1987 mask_op = permute_vec_elements (mask_op, mask_op,
1988 mask_perm_mask, stmt, gsi);
1989 else
1991 if (j == 0)
1992 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1993 else
1995 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1996 &def_stmt, &def, &dt);
1997 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2000 mask_op = vec_mask;
2001 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2003 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2004 == TYPE_VECTOR_SUBPARTS (masktype));
2005 var = vect_get_new_vect_var (masktype, vect_simple_var,
2006 NULL);
2007 var = make_ssa_name (var);
2008 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2009 new_stmt
2010 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2011 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2012 mask_op = var;
2016 new_stmt
2017 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2018 scale);
2020 if (!useless_type_conversion_p (vectype, rettype))
2022 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2023 == TYPE_VECTOR_SUBPARTS (rettype));
2024 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2025 op = make_ssa_name (var, new_stmt);
2026 gimple_call_set_lhs (new_stmt, op);
2027 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2028 var = make_ssa_name (vec_dest);
2029 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2030 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2032 else
2034 var = make_ssa_name (vec_dest, new_stmt);
2035 gimple_call_set_lhs (new_stmt, var);
2038 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2040 if (modifier == NARROW)
2042 if ((j & 1) == 0)
2044 prev_res = var;
2045 continue;
2047 var = permute_vec_elements (prev_res, var,
2048 perm_mask, stmt, gsi);
2049 new_stmt = SSA_NAME_DEF_STMT (var);
2052 if (prev_stmt_info == NULL)
2053 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2054 else
2055 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2056 prev_stmt_info = vinfo_for_stmt (new_stmt);
2059 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2060 from the IL. */
2061 tree lhs = gimple_call_lhs (stmt);
2062 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2063 set_vinfo_for_stmt (new_stmt, stmt_info);
2064 set_vinfo_for_stmt (stmt, NULL);
2065 STMT_VINFO_STMT (stmt_info) = new_stmt;
2066 gsi_replace (gsi, new_stmt, true);
2067 return true;
2069 else if (is_store)
2071 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2072 prev_stmt_info = NULL;
2073 for (i = 0; i < ncopies; i++)
2075 unsigned align, misalign;
2077 if (i == 0)
2079 tree rhs = gimple_call_arg (stmt, 3);
2080 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2081 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2082 /* We should have catched mismatched types earlier. */
2083 gcc_assert (useless_type_conversion_p (vectype,
2084 TREE_TYPE (vec_rhs)));
2085 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2086 NULL_TREE, &dummy, gsi,
2087 &ptr_incr, false, &inv_p);
2088 gcc_assert (!inv_p);
2090 else
2092 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2093 &def, &dt);
2094 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2095 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2096 &def, &dt);
2097 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2098 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2099 TYPE_SIZE_UNIT (vectype));
2102 align = TYPE_ALIGN_UNIT (vectype);
2103 if (aligned_access_p (dr))
2104 misalign = 0;
2105 else if (DR_MISALIGNMENT (dr) == -1)
2107 align = TYPE_ALIGN_UNIT (elem_type);
2108 misalign = 0;
2110 else
2111 misalign = DR_MISALIGNMENT (dr);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2113 misalign);
2114 new_stmt
2115 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2116 gimple_call_arg (stmt, 1),
2117 vec_mask, vec_rhs);
2118 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2119 if (i == 0)
2120 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2121 else
2122 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2123 prev_stmt_info = vinfo_for_stmt (new_stmt);
2126 else
2128 tree vec_mask = NULL_TREE;
2129 prev_stmt_info = NULL;
2130 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2131 for (i = 0; i < ncopies; i++)
2133 unsigned align, misalign;
2135 if (i == 0)
2137 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2138 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2139 NULL_TREE, &dummy, gsi,
2140 &ptr_incr, false, &inv_p);
2141 gcc_assert (!inv_p);
2143 else
2145 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2146 &def, &dt);
2147 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2148 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2149 TYPE_SIZE_UNIT (vectype));
2152 align = TYPE_ALIGN_UNIT (vectype);
2153 if (aligned_access_p (dr))
2154 misalign = 0;
2155 else if (DR_MISALIGNMENT (dr) == -1)
2157 align = TYPE_ALIGN_UNIT (elem_type);
2158 misalign = 0;
2160 else
2161 misalign = DR_MISALIGNMENT (dr);
2162 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2163 misalign);
2164 new_stmt
2165 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2166 gimple_call_arg (stmt, 1),
2167 vec_mask);
2168 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2169 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2170 if (i == 0)
2171 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2172 else
2173 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2174 prev_stmt_info = vinfo_for_stmt (new_stmt);
2178 if (!is_store)
2180 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2181 from the IL. */
2182 tree lhs = gimple_call_lhs (stmt);
2183 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2184 set_vinfo_for_stmt (new_stmt, stmt_info);
2185 set_vinfo_for_stmt (stmt, NULL);
2186 STMT_VINFO_STMT (stmt_info) = new_stmt;
2187 gsi_replace (gsi, new_stmt, true);
2190 return true;
2194 /* Function vectorizable_call.
2196 Check if GS performs a function call that can be vectorized.
2197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2201 static bool
2202 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2203 slp_tree slp_node)
2205 gcall *stmt;
2206 tree vec_dest;
2207 tree scalar_dest;
2208 tree op, type;
2209 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2210 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2211 tree vectype_out, vectype_in;
2212 int nunits_in;
2213 int nunits_out;
2214 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2215 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2216 tree fndecl, new_temp, def, rhs_type;
2217 gimple def_stmt;
2218 enum vect_def_type dt[3]
2219 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2220 gimple new_stmt = NULL;
2221 int ncopies, j;
2222 vec<tree> vargs = vNULL;
2223 enum { NARROW, NONE, WIDEN } modifier;
2224 size_t i, nargs;
2225 tree lhs;
2227 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2228 return false;
2230 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2231 return false;
2233 /* Is GS a vectorizable call? */
2234 stmt = dyn_cast <gcall *> (gs);
2235 if (!stmt)
2236 return false;
2238 if (gimple_call_internal_p (stmt)
2239 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2240 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2241 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2242 slp_node);
2244 if (gimple_call_lhs (stmt) == NULL_TREE
2245 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2246 return false;
2248 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2250 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2252 /* Process function arguments. */
2253 rhs_type = NULL_TREE;
2254 vectype_in = NULL_TREE;
2255 nargs = gimple_call_num_args (stmt);
2257 /* Bail out if the function has more than three arguments, we do not have
2258 interesting builtin functions to vectorize with more than two arguments
2259 except for fma. No arguments is also not good. */
2260 if (nargs == 0 || nargs > 3)
2261 return false;
2263 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2264 if (gimple_call_internal_p (stmt)
2265 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2267 nargs = 0;
2268 rhs_type = unsigned_type_node;
2271 for (i = 0; i < nargs; i++)
2273 tree opvectype;
2275 op = gimple_call_arg (stmt, i);
2277 /* We can only handle calls with arguments of the same type. */
2278 if (rhs_type
2279 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2281 if (dump_enabled_p ())
2282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2283 "argument types differ.\n");
2284 return false;
2286 if (!rhs_type)
2287 rhs_type = TREE_TYPE (op);
2289 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2290 &def_stmt, &def, &dt[i], &opvectype))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "use not simple.\n");
2295 return false;
2298 if (!vectype_in)
2299 vectype_in = opvectype;
2300 else if (opvectype
2301 && opvectype != vectype_in)
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2305 "argument vector types differ.\n");
2306 return false;
2309 /* If all arguments are external or constant defs use a vector type with
2310 the same size as the output vector type. */
2311 if (!vectype_in)
2312 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2313 if (vec_stmt)
2314 gcc_assert (vectype_in);
2315 if (!vectype_in)
2317 if (dump_enabled_p ())
2319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2320 "no vectype for scalar type ");
2321 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2322 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2325 return false;
2328 /* FORNOW */
2329 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2330 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2331 if (nunits_in == nunits_out / 2)
2332 modifier = NARROW;
2333 else if (nunits_out == nunits_in)
2334 modifier = NONE;
2335 else if (nunits_out == nunits_in / 2)
2336 modifier = WIDEN;
2337 else
2338 return false;
2340 /* For now, we only vectorize functions if a target specific builtin
2341 is available. TODO -- in some cases, it might be profitable to
2342 insert the calls for pieces of the vector, in order to be able
2343 to vectorize other operations in the loop. */
2344 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2345 if (fndecl == NULL_TREE)
2347 if (gimple_call_internal_p (stmt)
2348 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2349 && !slp_node
2350 && loop_vinfo
2351 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2352 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2353 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2354 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2356 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2357 { 0, 1, 2, ... vf - 1 } vector. */
2358 gcc_assert (nargs == 0);
2360 else
2362 if (dump_enabled_p ())
2363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2364 "function is not vectorizable.\n");
2365 return false;
2369 gcc_assert (!gimple_vuse (stmt));
2371 if (slp_node || PURE_SLP_STMT (stmt_info))
2372 ncopies = 1;
2373 else if (modifier == NARROW)
2374 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2375 else
2376 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2378 /* Sanity check: make sure that at least one copy of the vectorized stmt
2379 needs to be generated. */
2380 gcc_assert (ncopies >= 1);
2382 if (!vec_stmt) /* transformation not required. */
2384 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2387 "\n");
2388 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2389 return true;
2392 /** Transform. **/
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2397 /* Handle def. */
2398 scalar_dest = gimple_call_lhs (stmt);
2399 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2401 prev_stmt_info = NULL;
2402 switch (modifier)
2404 case NONE:
2405 for (j = 0; j < ncopies; ++j)
2407 /* Build argument list for the vectorized call. */
2408 if (j == 0)
2409 vargs.create (nargs);
2410 else
2411 vargs.truncate (0);
2413 if (slp_node)
2415 auto_vec<vec<tree> > vec_defs (nargs);
2416 vec<tree> vec_oprnds0;
2418 for (i = 0; i < nargs; i++)
2419 vargs.quick_push (gimple_call_arg (stmt, i));
2420 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2421 vec_oprnds0 = vec_defs[0];
2423 /* Arguments are ready. Create the new vector stmt. */
2424 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2426 size_t k;
2427 for (k = 0; k < nargs; k++)
2429 vec<tree> vec_oprndsk = vec_defs[k];
2430 vargs[k] = vec_oprndsk[i];
2432 new_stmt = gimple_build_call_vec (fndecl, vargs);
2433 new_temp = make_ssa_name (vec_dest, new_stmt);
2434 gimple_call_set_lhs (new_stmt, new_temp);
2435 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2436 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2439 for (i = 0; i < nargs; i++)
2441 vec<tree> vec_oprndsi = vec_defs[i];
2442 vec_oprndsi.release ();
2444 continue;
2447 for (i = 0; i < nargs; i++)
2449 op = gimple_call_arg (stmt, i);
2450 if (j == 0)
2451 vec_oprnd0
2452 = vect_get_vec_def_for_operand (op, stmt, NULL);
2453 else
2455 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2456 vec_oprnd0
2457 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2460 vargs.quick_push (vec_oprnd0);
2463 if (gimple_call_internal_p (stmt)
2464 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2466 tree *v = XALLOCAVEC (tree, nunits_out);
2467 int k;
2468 for (k = 0; k < nunits_out; ++k)
2469 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2470 tree cst = build_vector (vectype_out, v);
2471 tree new_var
2472 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2473 gimple init_stmt = gimple_build_assign (new_var, cst);
2474 new_temp = make_ssa_name (new_var, init_stmt);
2475 gimple_assign_set_lhs (init_stmt, new_temp);
2476 vect_init_vector_1 (stmt, init_stmt, NULL);
2477 new_temp = make_ssa_name (vec_dest);
2478 new_stmt = gimple_build_assign (new_temp,
2479 gimple_assign_lhs (init_stmt));
2481 else
2483 new_stmt = gimple_build_call_vec (fndecl, vargs);
2484 new_temp = make_ssa_name (vec_dest, new_stmt);
2485 gimple_call_set_lhs (new_stmt, new_temp);
2487 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2489 if (j == 0)
2490 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2491 else
2492 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2494 prev_stmt_info = vinfo_for_stmt (new_stmt);
2497 break;
2499 case NARROW:
2500 for (j = 0; j < ncopies; ++j)
2502 /* Build argument list for the vectorized call. */
2503 if (j == 0)
2504 vargs.create (nargs * 2);
2505 else
2506 vargs.truncate (0);
2508 if (slp_node)
2510 auto_vec<vec<tree> > vec_defs (nargs);
2511 vec<tree> vec_oprnds0;
2513 for (i = 0; i < nargs; i++)
2514 vargs.quick_push (gimple_call_arg (stmt, i));
2515 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2516 vec_oprnds0 = vec_defs[0];
2518 /* Arguments are ready. Create the new vector stmt. */
2519 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2521 size_t k;
2522 vargs.truncate (0);
2523 for (k = 0; k < nargs; k++)
2525 vec<tree> vec_oprndsk = vec_defs[k];
2526 vargs.quick_push (vec_oprndsk[i]);
2527 vargs.quick_push (vec_oprndsk[i + 1]);
2529 new_stmt = gimple_build_call_vec (fndecl, vargs);
2530 new_temp = make_ssa_name (vec_dest, new_stmt);
2531 gimple_call_set_lhs (new_stmt, new_temp);
2532 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2533 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2536 for (i = 0; i < nargs; i++)
2538 vec<tree> vec_oprndsi = vec_defs[i];
2539 vec_oprndsi.release ();
2541 continue;
2544 for (i = 0; i < nargs; i++)
2546 op = gimple_call_arg (stmt, i);
2547 if (j == 0)
2549 vec_oprnd0
2550 = vect_get_vec_def_for_operand (op, stmt, NULL);
2551 vec_oprnd1
2552 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2554 else
2556 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2557 vec_oprnd0
2558 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2559 vec_oprnd1
2560 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2563 vargs.quick_push (vec_oprnd0);
2564 vargs.quick_push (vec_oprnd1);
2567 new_stmt = gimple_build_call_vec (fndecl, vargs);
2568 new_temp = make_ssa_name (vec_dest, new_stmt);
2569 gimple_call_set_lhs (new_stmt, new_temp);
2570 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2572 if (j == 0)
2573 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2574 else
2575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2577 prev_stmt_info = vinfo_for_stmt (new_stmt);
2580 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2582 break;
2584 case WIDEN:
2585 /* No current target implements this case. */
2586 return false;
2589 vargs.release ();
2591 /* The call in STMT might prevent it from being removed in dce.
2592 We however cannot remove it here, due to the way the ssa name
2593 it defines is mapped to the new definition. So just replace
2594 rhs of the statement with something harmless. */
2596 if (slp_node)
2597 return true;
2599 type = TREE_TYPE (scalar_dest);
2600 if (is_pattern_stmt_p (stmt_info))
2601 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2602 else
2603 lhs = gimple_call_lhs (stmt);
2604 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2605 set_vinfo_for_stmt (new_stmt, stmt_info);
2606 set_vinfo_for_stmt (stmt, NULL);
2607 STMT_VINFO_STMT (stmt_info) = new_stmt;
2608 gsi_replace (gsi, new_stmt, false);
2610 return true;
2614 struct simd_call_arg_info
2616 tree vectype;
2617 tree op;
2618 enum vect_def_type dt;
2619 HOST_WIDE_INT linear_step;
2620 unsigned int align;
2623 /* Function vectorizable_simd_clone_call.
2625 Check if STMT performs a function call that can be vectorized
2626 by calling a simd clone of the function.
2627 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2628 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2629 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2631 static bool
2632 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2633 gimple *vec_stmt, slp_tree slp_node)
2635 tree vec_dest;
2636 tree scalar_dest;
2637 tree op, type;
2638 tree vec_oprnd0 = NULL_TREE;
2639 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2640 tree vectype;
2641 unsigned int nunits;
2642 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2643 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2644 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2645 tree fndecl, new_temp, def;
2646 gimple def_stmt;
2647 gimple new_stmt = NULL;
2648 int ncopies, j;
2649 vec<simd_call_arg_info> arginfo = vNULL;
2650 vec<tree> vargs = vNULL;
2651 size_t i, nargs;
2652 tree lhs, rtype, ratype;
2653 vec<constructor_elt, va_gc> *ret_ctor_elts;
2655 /* Is STMT a vectorizable call? */
2656 if (!is_gimple_call (stmt))
2657 return false;
2659 fndecl = gimple_call_fndecl (stmt);
2660 if (fndecl == NULL_TREE)
2661 return false;
2663 struct cgraph_node *node = cgraph_node::get (fndecl);
2664 if (node == NULL || node->simd_clones == NULL)
2665 return false;
2667 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2668 return false;
2670 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2671 return false;
2673 if (gimple_call_lhs (stmt)
2674 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2675 return false;
2677 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2679 vectype = STMT_VINFO_VECTYPE (stmt_info);
2681 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2682 return false;
2684 /* FORNOW */
2685 if (slp_node || PURE_SLP_STMT (stmt_info))
2686 return false;
2688 /* Process function arguments. */
2689 nargs = gimple_call_num_args (stmt);
2691 /* Bail out if the function has zero arguments. */
2692 if (nargs == 0)
2693 return false;
2695 arginfo.create (nargs);
2697 for (i = 0; i < nargs; i++)
2699 simd_call_arg_info thisarginfo;
2700 affine_iv iv;
2702 thisarginfo.linear_step = 0;
2703 thisarginfo.align = 0;
2704 thisarginfo.op = NULL_TREE;
2706 op = gimple_call_arg (stmt, i);
2707 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2708 &def_stmt, &def, &thisarginfo.dt,
2709 &thisarginfo.vectype)
2710 || thisarginfo.dt == vect_uninitialized_def)
2712 if (dump_enabled_p ())
2713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2714 "use not simple.\n");
2715 arginfo.release ();
2716 return false;
2719 if (thisarginfo.dt == vect_constant_def
2720 || thisarginfo.dt == vect_external_def)
2721 gcc_assert (thisarginfo.vectype == NULL_TREE);
2722 else
2723 gcc_assert (thisarginfo.vectype != NULL_TREE);
2725 /* For linear arguments, the analyze phase should have saved
2726 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2727 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2728 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2730 gcc_assert (vec_stmt);
2731 thisarginfo.linear_step
2732 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2733 thisarginfo.op
2734 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2735 /* If loop has been peeled for alignment, we need to adjust it. */
2736 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2737 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2738 if (n1 != n2)
2740 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2741 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2742 tree opt = TREE_TYPE (thisarginfo.op);
2743 bias = fold_convert (TREE_TYPE (step), bias);
2744 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2745 thisarginfo.op
2746 = fold_build2 (POINTER_TYPE_P (opt)
2747 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2748 thisarginfo.op, bias);
2751 else if (!vec_stmt
2752 && thisarginfo.dt != vect_constant_def
2753 && thisarginfo.dt != vect_external_def
2754 && loop_vinfo
2755 && TREE_CODE (op) == SSA_NAME
2756 && simple_iv (loop, loop_containing_stmt (stmt), op,
2757 &iv, false)
2758 && tree_fits_shwi_p (iv.step))
2760 thisarginfo.linear_step = tree_to_shwi (iv.step);
2761 thisarginfo.op = iv.base;
2763 else if ((thisarginfo.dt == vect_constant_def
2764 || thisarginfo.dt == vect_external_def)
2765 && POINTER_TYPE_P (TREE_TYPE (op)))
2766 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2768 arginfo.quick_push (thisarginfo);
2771 unsigned int badness = 0;
2772 struct cgraph_node *bestn = NULL;
2773 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2774 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2775 else
2776 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2777 n = n->simdclone->next_clone)
2779 unsigned int this_badness = 0;
2780 if (n->simdclone->simdlen
2781 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2782 || n->simdclone->nargs != nargs)
2783 continue;
2784 if (n->simdclone->simdlen
2785 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2786 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2787 - exact_log2 (n->simdclone->simdlen)) * 1024;
2788 if (n->simdclone->inbranch)
2789 this_badness += 2048;
2790 int target_badness = targetm.simd_clone.usable (n);
2791 if (target_badness < 0)
2792 continue;
2793 this_badness += target_badness * 512;
2794 /* FORNOW: Have to add code to add the mask argument. */
2795 if (n->simdclone->inbranch)
2796 continue;
2797 for (i = 0; i < nargs; i++)
2799 switch (n->simdclone->args[i].arg_type)
2801 case SIMD_CLONE_ARG_TYPE_VECTOR:
2802 if (!useless_type_conversion_p
2803 (n->simdclone->args[i].orig_type,
2804 TREE_TYPE (gimple_call_arg (stmt, i))))
2805 i = -1;
2806 else if (arginfo[i].dt == vect_constant_def
2807 || arginfo[i].dt == vect_external_def
2808 || arginfo[i].linear_step)
2809 this_badness += 64;
2810 break;
2811 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2812 if (arginfo[i].dt != vect_constant_def
2813 && arginfo[i].dt != vect_external_def)
2814 i = -1;
2815 break;
2816 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2817 if (arginfo[i].dt == vect_constant_def
2818 || arginfo[i].dt == vect_external_def
2819 || (arginfo[i].linear_step
2820 != n->simdclone->args[i].linear_step))
2821 i = -1;
2822 break;
2823 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2824 /* FORNOW */
2825 i = -1;
2826 break;
2827 case SIMD_CLONE_ARG_TYPE_MASK:
2828 gcc_unreachable ();
2830 if (i == (size_t) -1)
2831 break;
2832 if (n->simdclone->args[i].alignment > arginfo[i].align)
2834 i = -1;
2835 break;
2837 if (arginfo[i].align)
2838 this_badness += (exact_log2 (arginfo[i].align)
2839 - exact_log2 (n->simdclone->args[i].alignment));
2841 if (i == (size_t) -1)
2842 continue;
2843 if (bestn == NULL || this_badness < badness)
2845 bestn = n;
2846 badness = this_badness;
2850 if (bestn == NULL)
2852 arginfo.release ();
2853 return false;
2856 for (i = 0; i < nargs; i++)
2857 if ((arginfo[i].dt == vect_constant_def
2858 || arginfo[i].dt == vect_external_def)
2859 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2861 arginfo[i].vectype
2862 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2863 i)));
2864 if (arginfo[i].vectype == NULL
2865 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2866 > bestn->simdclone->simdlen))
2868 arginfo.release ();
2869 return false;
2873 fndecl = bestn->decl;
2874 nunits = bestn->simdclone->simdlen;
2875 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2877 /* If the function isn't const, only allow it in simd loops where user
2878 has asserted that at least nunits consecutive iterations can be
2879 performed using SIMD instructions. */
2880 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2881 && gimple_vuse (stmt))
2883 arginfo.release ();
2884 return false;
2887 /* Sanity check: make sure that at least one copy of the vectorized stmt
2888 needs to be generated. */
2889 gcc_assert (ncopies >= 1);
2891 if (!vec_stmt) /* transformation not required. */
2893 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2894 for (i = 0; i < nargs; i++)
2895 if (bestn->simdclone->args[i].arg_type
2896 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2898 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2899 + 1);
2900 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2901 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2902 ? size_type_node : TREE_TYPE (arginfo[i].op);
2903 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2904 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2906 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2907 if (dump_enabled_p ())
2908 dump_printf_loc (MSG_NOTE, vect_location,
2909 "=== vectorizable_simd_clone_call ===\n");
2910 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2911 arginfo.release ();
2912 return true;
2915 /** Transform. **/
2917 if (dump_enabled_p ())
2918 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2920 /* Handle def. */
2921 scalar_dest = gimple_call_lhs (stmt);
2922 vec_dest = NULL_TREE;
2923 rtype = NULL_TREE;
2924 ratype = NULL_TREE;
2925 if (scalar_dest)
2927 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2928 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2929 if (TREE_CODE (rtype) == ARRAY_TYPE)
2931 ratype = rtype;
2932 rtype = TREE_TYPE (ratype);
2936 prev_stmt_info = NULL;
2937 for (j = 0; j < ncopies; ++j)
2939 /* Build argument list for the vectorized call. */
2940 if (j == 0)
2941 vargs.create (nargs);
2942 else
2943 vargs.truncate (0);
2945 for (i = 0; i < nargs; i++)
2947 unsigned int k, l, m, o;
2948 tree atype;
2949 op = gimple_call_arg (stmt, i);
2950 switch (bestn->simdclone->args[i].arg_type)
2952 case SIMD_CLONE_ARG_TYPE_VECTOR:
2953 atype = bestn->simdclone->args[i].vector_type;
2954 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2955 for (m = j * o; m < (j + 1) * o; m++)
2957 if (TYPE_VECTOR_SUBPARTS (atype)
2958 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2960 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2961 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2962 / TYPE_VECTOR_SUBPARTS (atype));
2963 gcc_assert ((k & (k - 1)) == 0);
2964 if (m == 0)
2965 vec_oprnd0
2966 = vect_get_vec_def_for_operand (op, stmt, NULL);
2967 else
2969 vec_oprnd0 = arginfo[i].op;
2970 if ((m & (k - 1)) == 0)
2971 vec_oprnd0
2972 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2973 vec_oprnd0);
2975 arginfo[i].op = vec_oprnd0;
2976 vec_oprnd0
2977 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2978 size_int (prec),
2979 bitsize_int ((m & (k - 1)) * prec));
2980 new_stmt
2981 = gimple_build_assign (make_ssa_name (atype),
2982 vec_oprnd0);
2983 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2984 vargs.safe_push (gimple_assign_lhs (new_stmt));
2986 else
2988 k = (TYPE_VECTOR_SUBPARTS (atype)
2989 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2990 gcc_assert ((k & (k - 1)) == 0);
2991 vec<constructor_elt, va_gc> *ctor_elts;
2992 if (k != 1)
2993 vec_alloc (ctor_elts, k);
2994 else
2995 ctor_elts = NULL;
2996 for (l = 0; l < k; l++)
2998 if (m == 0 && l == 0)
2999 vec_oprnd0
3000 = vect_get_vec_def_for_operand (op, stmt, NULL);
3001 else
3002 vec_oprnd0
3003 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3004 arginfo[i].op);
3005 arginfo[i].op = vec_oprnd0;
3006 if (k == 1)
3007 break;
3008 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3009 vec_oprnd0);
3011 if (k == 1)
3012 vargs.safe_push (vec_oprnd0);
3013 else
3015 vec_oprnd0 = build_constructor (atype, ctor_elts);
3016 new_stmt
3017 = gimple_build_assign (make_ssa_name (atype),
3018 vec_oprnd0);
3019 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3020 vargs.safe_push (gimple_assign_lhs (new_stmt));
3024 break;
3025 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3026 vargs.safe_push (op);
3027 break;
3028 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3029 if (j == 0)
3031 gimple_seq stmts;
3032 arginfo[i].op
3033 = force_gimple_operand (arginfo[i].op, &stmts, true,
3034 NULL_TREE);
3035 if (stmts != NULL)
3037 basic_block new_bb;
3038 edge pe = loop_preheader_edge (loop);
3039 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3040 gcc_assert (!new_bb);
3042 tree phi_res = copy_ssa_name (op);
3043 gphi *new_phi = create_phi_node (phi_res, loop->header);
3044 set_vinfo_for_stmt (new_phi,
3045 new_stmt_vec_info (new_phi, loop_vinfo,
3046 NULL));
3047 add_phi_arg (new_phi, arginfo[i].op,
3048 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3049 enum tree_code code
3050 = POINTER_TYPE_P (TREE_TYPE (op))
3051 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3052 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3053 ? sizetype : TREE_TYPE (op);
3054 widest_int cst
3055 = wi::mul (bestn->simdclone->args[i].linear_step,
3056 ncopies * nunits);
3057 tree tcst = wide_int_to_tree (type, cst);
3058 tree phi_arg = copy_ssa_name (op);
3059 new_stmt
3060 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3061 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3062 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3063 set_vinfo_for_stmt (new_stmt,
3064 new_stmt_vec_info (new_stmt, loop_vinfo,
3065 NULL));
3066 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3067 UNKNOWN_LOCATION);
3068 arginfo[i].op = phi_res;
3069 vargs.safe_push (phi_res);
3071 else
3073 enum tree_code code
3074 = POINTER_TYPE_P (TREE_TYPE (op))
3075 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3076 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3077 ? sizetype : TREE_TYPE (op);
3078 widest_int cst
3079 = wi::mul (bestn->simdclone->args[i].linear_step,
3080 j * nunits);
3081 tree tcst = wide_int_to_tree (type, cst);
3082 new_temp = make_ssa_name (TREE_TYPE (op));
3083 new_stmt = gimple_build_assign (new_temp, code,
3084 arginfo[i].op, tcst);
3085 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3086 vargs.safe_push (new_temp);
3088 break;
3089 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3090 default:
3091 gcc_unreachable ();
3095 new_stmt = gimple_build_call_vec (fndecl, vargs);
3096 if (vec_dest)
3098 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3099 if (ratype)
3100 new_temp = create_tmp_var (ratype);
3101 else if (TYPE_VECTOR_SUBPARTS (vectype)
3102 == TYPE_VECTOR_SUBPARTS (rtype))
3103 new_temp = make_ssa_name (vec_dest, new_stmt);
3104 else
3105 new_temp = make_ssa_name (rtype, new_stmt);
3106 gimple_call_set_lhs (new_stmt, new_temp);
3108 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3110 if (vec_dest)
3112 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3114 unsigned int k, l;
3115 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3116 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3117 gcc_assert ((k & (k - 1)) == 0);
3118 for (l = 0; l < k; l++)
3120 tree t;
3121 if (ratype)
3123 t = build_fold_addr_expr (new_temp);
3124 t = build2 (MEM_REF, vectype, t,
3125 build_int_cst (TREE_TYPE (t),
3126 l * prec / BITS_PER_UNIT));
3128 else
3129 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3130 size_int (prec), bitsize_int (l * prec));
3131 new_stmt
3132 = gimple_build_assign (make_ssa_name (vectype), t);
3133 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3134 if (j == 0 && l == 0)
3135 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3136 else
3137 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3139 prev_stmt_info = vinfo_for_stmt (new_stmt);
3142 if (ratype)
3144 tree clobber = build_constructor (ratype, NULL);
3145 TREE_THIS_VOLATILE (clobber) = 1;
3146 new_stmt = gimple_build_assign (new_temp, clobber);
3147 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3149 continue;
3151 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3153 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3154 / TYPE_VECTOR_SUBPARTS (rtype));
3155 gcc_assert ((k & (k - 1)) == 0);
3156 if ((j & (k - 1)) == 0)
3157 vec_alloc (ret_ctor_elts, k);
3158 if (ratype)
3160 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3161 for (m = 0; m < o; m++)
3163 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3164 size_int (m), NULL_TREE, NULL_TREE);
3165 new_stmt
3166 = gimple_build_assign (make_ssa_name (rtype), tem);
3167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3168 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3169 gimple_assign_lhs (new_stmt));
3171 tree clobber = build_constructor (ratype, NULL);
3172 TREE_THIS_VOLATILE (clobber) = 1;
3173 new_stmt = gimple_build_assign (new_temp, clobber);
3174 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3176 else
3177 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3178 if ((j & (k - 1)) != k - 1)
3179 continue;
3180 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3181 new_stmt
3182 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3183 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3185 if ((unsigned) j == k - 1)
3186 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3187 else
3188 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3190 prev_stmt_info = vinfo_for_stmt (new_stmt);
3191 continue;
3193 else if (ratype)
3195 tree t = build_fold_addr_expr (new_temp);
3196 t = build2 (MEM_REF, vectype, t,
3197 build_int_cst (TREE_TYPE (t), 0));
3198 new_stmt
3199 = gimple_build_assign (make_ssa_name (vec_dest), t);
3200 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3201 tree clobber = build_constructor (ratype, NULL);
3202 TREE_THIS_VOLATILE (clobber) = 1;
3203 vect_finish_stmt_generation (stmt,
3204 gimple_build_assign (new_temp,
3205 clobber), gsi);
3209 if (j == 0)
3210 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3211 else
3212 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3214 prev_stmt_info = vinfo_for_stmt (new_stmt);
3217 vargs.release ();
3219 /* The call in STMT might prevent it from being removed in dce.
3220 We however cannot remove it here, due to the way the ssa name
3221 it defines is mapped to the new definition. So just replace
3222 rhs of the statement with something harmless. */
3224 if (slp_node)
3225 return true;
3227 if (scalar_dest)
3229 type = TREE_TYPE (scalar_dest);
3230 if (is_pattern_stmt_p (stmt_info))
3231 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3232 else
3233 lhs = gimple_call_lhs (stmt);
3234 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3236 else
3237 new_stmt = gimple_build_nop ();
3238 set_vinfo_for_stmt (new_stmt, stmt_info);
3239 set_vinfo_for_stmt (stmt, NULL);
3240 STMT_VINFO_STMT (stmt_info) = new_stmt;
3241 gsi_replace (gsi, new_stmt, true);
3242 unlink_stmt_vdef (stmt);
3244 return true;
3248 /* Function vect_gen_widened_results_half
3250 Create a vector stmt whose code, type, number of arguments, and result
3251 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3252 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3253 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3254 needs to be created (DECL is a function-decl of a target-builtin).
3255 STMT is the original scalar stmt that we are vectorizing. */
3257 static gimple
3258 vect_gen_widened_results_half (enum tree_code code,
3259 tree decl,
3260 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3261 tree vec_dest, gimple_stmt_iterator *gsi,
3262 gimple stmt)
3264 gimple new_stmt;
3265 tree new_temp;
3267 /* Generate half of the widened result: */
3268 if (code == CALL_EXPR)
3270 /* Target specific support */
3271 if (op_type == binary_op)
3272 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3273 else
3274 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3275 new_temp = make_ssa_name (vec_dest, new_stmt);
3276 gimple_call_set_lhs (new_stmt, new_temp);
3278 else
3280 /* Generic support */
3281 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3282 if (op_type != binary_op)
3283 vec_oprnd1 = NULL;
3284 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3285 new_temp = make_ssa_name (vec_dest, new_stmt);
3286 gimple_assign_set_lhs (new_stmt, new_temp);
3288 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3290 return new_stmt;
3294 /* Get vectorized definitions for loop-based vectorization. For the first
3295 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3296 scalar operand), and for the rest we get a copy with
3297 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3298 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3299 The vectors are collected into VEC_OPRNDS. */
3301 static void
3302 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3303 vec<tree> *vec_oprnds, int multi_step_cvt)
3305 tree vec_oprnd;
3307 /* Get first vector operand. */
3308 /* All the vector operands except the very first one (that is scalar oprnd)
3309 are stmt copies. */
3310 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3311 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3312 else
3313 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3315 vec_oprnds->quick_push (vec_oprnd);
3317 /* Get second vector operand. */
3318 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3319 vec_oprnds->quick_push (vec_oprnd);
3321 *oprnd = vec_oprnd;
3323 /* For conversion in multiple steps, continue to get operands
3324 recursively. */
3325 if (multi_step_cvt)
3326 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3330 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3331 For multi-step conversions store the resulting vectors and call the function
3332 recursively. */
3334 static void
3335 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3336 int multi_step_cvt, gimple stmt,
3337 vec<tree> vec_dsts,
3338 gimple_stmt_iterator *gsi,
3339 slp_tree slp_node, enum tree_code code,
3340 stmt_vec_info *prev_stmt_info)
3342 unsigned int i;
3343 tree vop0, vop1, new_tmp, vec_dest;
3344 gimple new_stmt;
3345 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3347 vec_dest = vec_dsts.pop ();
3349 for (i = 0; i < vec_oprnds->length (); i += 2)
3351 /* Create demotion operation. */
3352 vop0 = (*vec_oprnds)[i];
3353 vop1 = (*vec_oprnds)[i + 1];
3354 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3355 new_tmp = make_ssa_name (vec_dest, new_stmt);
3356 gimple_assign_set_lhs (new_stmt, new_tmp);
3357 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3359 if (multi_step_cvt)
3360 /* Store the resulting vector for next recursive call. */
3361 (*vec_oprnds)[i/2] = new_tmp;
3362 else
3364 /* This is the last step of the conversion sequence. Store the
3365 vectors in SLP_NODE or in vector info of the scalar statement
3366 (or in STMT_VINFO_RELATED_STMT chain). */
3367 if (slp_node)
3368 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3369 else
3371 if (!*prev_stmt_info)
3372 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3373 else
3374 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3376 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3381 /* For multi-step demotion operations we first generate demotion operations
3382 from the source type to the intermediate types, and then combine the
3383 results (stored in VEC_OPRNDS) in demotion operation to the destination
3384 type. */
3385 if (multi_step_cvt)
3387 /* At each level of recursion we have half of the operands we had at the
3388 previous level. */
3389 vec_oprnds->truncate ((i+1)/2);
3390 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3391 stmt, vec_dsts, gsi, slp_node,
3392 VEC_PACK_TRUNC_EXPR,
3393 prev_stmt_info);
3396 vec_dsts.quick_push (vec_dest);
3400 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3401 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3402 the resulting vectors and call the function recursively. */
3404 static void
3405 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3406 vec<tree> *vec_oprnds1,
3407 gimple stmt, tree vec_dest,
3408 gimple_stmt_iterator *gsi,
3409 enum tree_code code1,
3410 enum tree_code code2, tree decl1,
3411 tree decl2, int op_type)
3413 int i;
3414 tree vop0, vop1, new_tmp1, new_tmp2;
3415 gimple new_stmt1, new_stmt2;
3416 vec<tree> vec_tmp = vNULL;
3418 vec_tmp.create (vec_oprnds0->length () * 2);
3419 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3421 if (op_type == binary_op)
3422 vop1 = (*vec_oprnds1)[i];
3423 else
3424 vop1 = NULL_TREE;
3426 /* Generate the two halves of promotion operation. */
3427 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3428 op_type, vec_dest, gsi, stmt);
3429 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3430 op_type, vec_dest, gsi, stmt);
3431 if (is_gimple_call (new_stmt1))
3433 new_tmp1 = gimple_call_lhs (new_stmt1);
3434 new_tmp2 = gimple_call_lhs (new_stmt2);
3436 else
3438 new_tmp1 = gimple_assign_lhs (new_stmt1);
3439 new_tmp2 = gimple_assign_lhs (new_stmt2);
3442 /* Store the results for the next step. */
3443 vec_tmp.quick_push (new_tmp1);
3444 vec_tmp.quick_push (new_tmp2);
3447 vec_oprnds0->release ();
3448 *vec_oprnds0 = vec_tmp;
3452 /* Check if STMT performs a conversion operation, that can be vectorized.
3453 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3454 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3455 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3457 static bool
3458 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3459 gimple *vec_stmt, slp_tree slp_node)
3461 tree vec_dest;
3462 tree scalar_dest;
3463 tree op0, op1 = NULL_TREE;
3464 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3465 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3467 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3468 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3469 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3470 tree new_temp;
3471 tree def;
3472 gimple def_stmt;
3473 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3474 gimple new_stmt = NULL;
3475 stmt_vec_info prev_stmt_info;
3476 int nunits_in;
3477 int nunits_out;
3478 tree vectype_out, vectype_in;
3479 int ncopies, i, j;
3480 tree lhs_type, rhs_type;
3481 enum { NARROW, NONE, WIDEN } modifier;
3482 vec<tree> vec_oprnds0 = vNULL;
3483 vec<tree> vec_oprnds1 = vNULL;
3484 tree vop0;
3485 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3486 int multi_step_cvt = 0;
3487 vec<tree> vec_dsts = vNULL;
3488 vec<tree> interm_types = vNULL;
3489 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3490 int op_type;
3491 machine_mode rhs_mode;
3492 unsigned short fltsz;
3494 /* Is STMT a vectorizable conversion? */
3496 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3497 return false;
3499 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3500 return false;
3502 if (!is_gimple_assign (stmt))
3503 return false;
3505 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3506 return false;
3508 code = gimple_assign_rhs_code (stmt);
3509 if (!CONVERT_EXPR_CODE_P (code)
3510 && code != FIX_TRUNC_EXPR
3511 && code != FLOAT_EXPR
3512 && code != WIDEN_MULT_EXPR
3513 && code != WIDEN_LSHIFT_EXPR)
3514 return false;
3516 op_type = TREE_CODE_LENGTH (code);
3518 /* Check types of lhs and rhs. */
3519 scalar_dest = gimple_assign_lhs (stmt);
3520 lhs_type = TREE_TYPE (scalar_dest);
3521 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3523 op0 = gimple_assign_rhs1 (stmt);
3524 rhs_type = TREE_TYPE (op0);
3526 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3527 && !((INTEGRAL_TYPE_P (lhs_type)
3528 && INTEGRAL_TYPE_P (rhs_type))
3529 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3530 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3531 return false;
3533 if ((INTEGRAL_TYPE_P (lhs_type)
3534 && (TYPE_PRECISION (lhs_type)
3535 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3536 || (INTEGRAL_TYPE_P (rhs_type)
3537 && (TYPE_PRECISION (rhs_type)
3538 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3540 if (dump_enabled_p ())
3541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3542 "type conversion to/from bit-precision unsupported."
3543 "\n");
3544 return false;
3547 /* Check the operands of the operation. */
3548 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3549 &def_stmt, &def, &dt[0], &vectype_in))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3553 "use not simple.\n");
3554 return false;
3556 if (op_type == binary_op)
3558 bool ok;
3560 op1 = gimple_assign_rhs2 (stmt);
3561 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3562 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3563 OP1. */
3564 if (CONSTANT_CLASS_P (op0))
3565 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3566 &def_stmt, &def, &dt[1], &vectype_in);
3567 else
3568 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3569 &def, &dt[1]);
3571 if (!ok)
3573 if (dump_enabled_p ())
3574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3575 "use not simple.\n");
3576 return false;
3580 /* If op0 is an external or constant defs use a vector type of
3581 the same size as the output vector type. */
3582 if (!vectype_in)
3583 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3584 if (vec_stmt)
3585 gcc_assert (vectype_in);
3586 if (!vectype_in)
3588 if (dump_enabled_p ())
3590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3591 "no vectype for scalar type ");
3592 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3593 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3596 return false;
3599 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3600 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3601 if (nunits_in < nunits_out)
3602 modifier = NARROW;
3603 else if (nunits_out == nunits_in)
3604 modifier = NONE;
3605 else
3606 modifier = WIDEN;
3608 /* Multiple types in SLP are handled by creating the appropriate number of
3609 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3610 case of SLP. */
3611 if (slp_node || PURE_SLP_STMT (stmt_info))
3612 ncopies = 1;
3613 else if (modifier == NARROW)
3614 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3615 else
3616 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3618 /* Sanity check: make sure that at least one copy of the vectorized stmt
3619 needs to be generated. */
3620 gcc_assert (ncopies >= 1);
3622 /* Supportable by target? */
3623 switch (modifier)
3625 case NONE:
3626 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3627 return false;
3628 if (supportable_convert_operation (code, vectype_out, vectype_in,
3629 &decl1, &code1))
3630 break;
3631 /* FALLTHRU */
3632 unsupported:
3633 if (dump_enabled_p ())
3634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3635 "conversion not supported by target.\n");
3636 return false;
3638 case WIDEN:
3639 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3640 &code1, &code2, &multi_step_cvt,
3641 &interm_types))
3643 /* Binary widening operation can only be supported directly by the
3644 architecture. */
3645 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3646 break;
3649 if (code != FLOAT_EXPR
3650 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3651 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3652 goto unsupported;
3654 rhs_mode = TYPE_MODE (rhs_type);
3655 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3656 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3657 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3658 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3660 cvt_type
3661 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3662 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3663 if (cvt_type == NULL_TREE)
3664 goto unsupported;
3666 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3668 if (!supportable_convert_operation (code, vectype_out,
3669 cvt_type, &decl1, &codecvt1))
3670 goto unsupported;
3672 else if (!supportable_widening_operation (code, stmt, vectype_out,
3673 cvt_type, &codecvt1,
3674 &codecvt2, &multi_step_cvt,
3675 &interm_types))
3676 continue;
3677 else
3678 gcc_assert (multi_step_cvt == 0);
3680 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3681 vectype_in, &code1, &code2,
3682 &multi_step_cvt, &interm_types))
3683 break;
3686 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3687 goto unsupported;
3689 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3690 codecvt2 = ERROR_MARK;
3691 else
3693 multi_step_cvt++;
3694 interm_types.safe_push (cvt_type);
3695 cvt_type = NULL_TREE;
3697 break;
3699 case NARROW:
3700 gcc_assert (op_type == unary_op);
3701 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3702 &code1, &multi_step_cvt,
3703 &interm_types))
3704 break;
3706 if (code != FIX_TRUNC_EXPR
3707 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3708 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3709 goto unsupported;
3711 rhs_mode = TYPE_MODE (rhs_type);
3712 cvt_type
3713 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3714 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3715 if (cvt_type == NULL_TREE)
3716 goto unsupported;
3717 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3718 &decl1, &codecvt1))
3719 goto unsupported;
3720 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3721 &code1, &multi_step_cvt,
3722 &interm_types))
3723 break;
3724 goto unsupported;
3726 default:
3727 gcc_unreachable ();
3730 if (!vec_stmt) /* transformation not required. */
3732 if (dump_enabled_p ())
3733 dump_printf_loc (MSG_NOTE, vect_location,
3734 "=== vectorizable_conversion ===\n");
3735 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3737 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3738 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3740 else if (modifier == NARROW)
3742 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3743 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3745 else
3747 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3748 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3750 interm_types.release ();
3751 return true;
3754 /** Transform. **/
3755 if (dump_enabled_p ())
3756 dump_printf_loc (MSG_NOTE, vect_location,
3757 "transform conversion. ncopies = %d.\n", ncopies);
3759 if (op_type == binary_op)
3761 if (CONSTANT_CLASS_P (op0))
3762 op0 = fold_convert (TREE_TYPE (op1), op0);
3763 else if (CONSTANT_CLASS_P (op1))
3764 op1 = fold_convert (TREE_TYPE (op0), op1);
3767 /* In case of multi-step conversion, we first generate conversion operations
3768 to the intermediate types, and then from that types to the final one.
3769 We create vector destinations for the intermediate type (TYPES) received
3770 from supportable_*_operation, and store them in the correct order
3771 for future use in vect_create_vectorized_*_stmts (). */
3772 vec_dsts.create (multi_step_cvt + 1);
3773 vec_dest = vect_create_destination_var (scalar_dest,
3774 (cvt_type && modifier == WIDEN)
3775 ? cvt_type : vectype_out);
3776 vec_dsts.quick_push (vec_dest);
3778 if (multi_step_cvt)
3780 for (i = interm_types.length () - 1;
3781 interm_types.iterate (i, &intermediate_type); i--)
3783 vec_dest = vect_create_destination_var (scalar_dest,
3784 intermediate_type);
3785 vec_dsts.quick_push (vec_dest);
3789 if (cvt_type)
3790 vec_dest = vect_create_destination_var (scalar_dest,
3791 modifier == WIDEN
3792 ? vectype_out : cvt_type);
3794 if (!slp_node)
3796 if (modifier == WIDEN)
3798 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3799 if (op_type == binary_op)
3800 vec_oprnds1.create (1);
3802 else if (modifier == NARROW)
3803 vec_oprnds0.create (
3804 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3806 else if (code == WIDEN_LSHIFT_EXPR)
3807 vec_oprnds1.create (slp_node->vec_stmts_size);
3809 last_oprnd = op0;
3810 prev_stmt_info = NULL;
3811 switch (modifier)
3813 case NONE:
3814 for (j = 0; j < ncopies; j++)
3816 if (j == 0)
3817 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3818 -1);
3819 else
3820 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3822 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3824 /* Arguments are ready, create the new vector stmt. */
3825 if (code1 == CALL_EXPR)
3827 new_stmt = gimple_build_call (decl1, 1, vop0);
3828 new_temp = make_ssa_name (vec_dest, new_stmt);
3829 gimple_call_set_lhs (new_stmt, new_temp);
3831 else
3833 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3834 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3835 new_temp = make_ssa_name (vec_dest, new_stmt);
3836 gimple_assign_set_lhs (new_stmt, new_temp);
3839 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3840 if (slp_node)
3841 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3842 else
3844 if (!prev_stmt_info)
3845 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3846 else
3847 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3848 prev_stmt_info = vinfo_for_stmt (new_stmt);
3852 break;
3854 case WIDEN:
3855 /* In case the vectorization factor (VF) is bigger than the number
3856 of elements that we can fit in a vectype (nunits), we have to
3857 generate more than one vector stmt - i.e - we need to "unroll"
3858 the vector stmt by a factor VF/nunits. */
3859 for (j = 0; j < ncopies; j++)
3861 /* Handle uses. */
3862 if (j == 0)
3864 if (slp_node)
3866 if (code == WIDEN_LSHIFT_EXPR)
3868 unsigned int k;
3870 vec_oprnd1 = op1;
3871 /* Store vec_oprnd1 for every vector stmt to be created
3872 for SLP_NODE. We check during the analysis that all
3873 the shift arguments are the same. */
3874 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3875 vec_oprnds1.quick_push (vec_oprnd1);
3877 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3878 slp_node, -1);
3880 else
3881 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3882 &vec_oprnds1, slp_node, -1);
3884 else
3886 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3887 vec_oprnds0.quick_push (vec_oprnd0);
3888 if (op_type == binary_op)
3890 if (code == WIDEN_LSHIFT_EXPR)
3891 vec_oprnd1 = op1;
3892 else
3893 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3894 NULL);
3895 vec_oprnds1.quick_push (vec_oprnd1);
3899 else
3901 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3902 vec_oprnds0.truncate (0);
3903 vec_oprnds0.quick_push (vec_oprnd0);
3904 if (op_type == binary_op)
3906 if (code == WIDEN_LSHIFT_EXPR)
3907 vec_oprnd1 = op1;
3908 else
3909 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3910 vec_oprnd1);
3911 vec_oprnds1.truncate (0);
3912 vec_oprnds1.quick_push (vec_oprnd1);
3916 /* Arguments are ready. Create the new vector stmts. */
3917 for (i = multi_step_cvt; i >= 0; i--)
3919 tree this_dest = vec_dsts[i];
3920 enum tree_code c1 = code1, c2 = code2;
3921 if (i == 0 && codecvt2 != ERROR_MARK)
3923 c1 = codecvt1;
3924 c2 = codecvt2;
3926 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3927 &vec_oprnds1,
3928 stmt, this_dest, gsi,
3929 c1, c2, decl1, decl2,
3930 op_type);
3933 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3935 if (cvt_type)
3937 if (codecvt1 == CALL_EXPR)
3939 new_stmt = gimple_build_call (decl1, 1, vop0);
3940 new_temp = make_ssa_name (vec_dest, new_stmt);
3941 gimple_call_set_lhs (new_stmt, new_temp);
3943 else
3945 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3946 new_temp = make_ssa_name (vec_dest);
3947 new_stmt = gimple_build_assign (new_temp, codecvt1,
3948 vop0);
3951 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3953 else
3954 new_stmt = SSA_NAME_DEF_STMT (vop0);
3956 if (slp_node)
3957 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3958 else
3960 if (!prev_stmt_info)
3961 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3962 else
3963 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3964 prev_stmt_info = vinfo_for_stmt (new_stmt);
3969 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3970 break;
3972 case NARROW:
3973 /* In case the vectorization factor (VF) is bigger than the number
3974 of elements that we can fit in a vectype (nunits), we have to
3975 generate more than one vector stmt - i.e - we need to "unroll"
3976 the vector stmt by a factor VF/nunits. */
3977 for (j = 0; j < ncopies; j++)
3979 /* Handle uses. */
3980 if (slp_node)
3981 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3982 slp_node, -1);
3983 else
3985 vec_oprnds0.truncate (0);
3986 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3987 vect_pow2 (multi_step_cvt) - 1);
3990 /* Arguments are ready. Create the new vector stmts. */
3991 if (cvt_type)
3992 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3994 if (codecvt1 == CALL_EXPR)
3996 new_stmt = gimple_build_call (decl1, 1, vop0);
3997 new_temp = make_ssa_name (vec_dest, new_stmt);
3998 gimple_call_set_lhs (new_stmt, new_temp);
4000 else
4002 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4003 new_temp = make_ssa_name (vec_dest);
4004 new_stmt = gimple_build_assign (new_temp, codecvt1,
4005 vop0);
4008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4009 vec_oprnds0[i] = new_temp;
4012 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4013 stmt, vec_dsts, gsi,
4014 slp_node, code1,
4015 &prev_stmt_info);
4018 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4019 break;
4022 vec_oprnds0.release ();
4023 vec_oprnds1.release ();
4024 vec_dsts.release ();
4025 interm_types.release ();
4027 return true;
4031 /* Function vectorizable_assignment.
4033 Check if STMT performs an assignment (copy) that can be vectorized.
4034 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4035 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4036 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4038 static bool
4039 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4040 gimple *vec_stmt, slp_tree slp_node)
4042 tree vec_dest;
4043 tree scalar_dest;
4044 tree op;
4045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4046 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4047 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4048 tree new_temp;
4049 tree def;
4050 gimple def_stmt;
4051 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4052 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4053 int ncopies;
4054 int i, j;
4055 vec<tree> vec_oprnds = vNULL;
4056 tree vop;
4057 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4058 gimple new_stmt = NULL;
4059 stmt_vec_info prev_stmt_info = NULL;
4060 enum tree_code code;
4061 tree vectype_in;
4063 /* Multiple types in SLP are handled by creating the appropriate number of
4064 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4065 case of SLP. */
4066 if (slp_node || PURE_SLP_STMT (stmt_info))
4067 ncopies = 1;
4068 else
4069 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4071 gcc_assert (ncopies >= 1);
4073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4074 return false;
4076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4077 return false;
4079 /* Is vectorizable assignment? */
4080 if (!is_gimple_assign (stmt))
4081 return false;
4083 scalar_dest = gimple_assign_lhs (stmt);
4084 if (TREE_CODE (scalar_dest) != SSA_NAME)
4085 return false;
4087 code = gimple_assign_rhs_code (stmt);
4088 if (gimple_assign_single_p (stmt)
4089 || code == PAREN_EXPR
4090 || CONVERT_EXPR_CODE_P (code))
4091 op = gimple_assign_rhs1 (stmt);
4092 else
4093 return false;
4095 if (code == VIEW_CONVERT_EXPR)
4096 op = TREE_OPERAND (op, 0);
4098 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4099 &def_stmt, &def, &dt[0], &vectype_in))
4101 if (dump_enabled_p ())
4102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4103 "use not simple.\n");
4104 return false;
4107 /* We can handle NOP_EXPR conversions that do not change the number
4108 of elements or the vector size. */
4109 if ((CONVERT_EXPR_CODE_P (code)
4110 || code == VIEW_CONVERT_EXPR)
4111 && (!vectype_in
4112 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4113 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4114 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4115 return false;
4117 /* We do not handle bit-precision changes. */
4118 if ((CONVERT_EXPR_CODE_P (code)
4119 || code == VIEW_CONVERT_EXPR)
4120 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4121 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4122 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4123 || ((TYPE_PRECISION (TREE_TYPE (op))
4124 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4125 /* But a conversion that does not change the bit-pattern is ok. */
4126 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4127 > TYPE_PRECISION (TREE_TYPE (op)))
4128 && TYPE_UNSIGNED (TREE_TYPE (op))))
4130 if (dump_enabled_p ())
4131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4132 "type conversion to/from bit-precision "
4133 "unsupported.\n");
4134 return false;
4137 if (!vec_stmt) /* transformation not required. */
4139 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4140 if (dump_enabled_p ())
4141 dump_printf_loc (MSG_NOTE, vect_location,
4142 "=== vectorizable_assignment ===\n");
4143 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4144 return true;
4147 /** Transform. **/
4148 if (dump_enabled_p ())
4149 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4151 /* Handle def. */
4152 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4154 /* Handle use. */
4155 for (j = 0; j < ncopies; j++)
4157 /* Handle uses. */
4158 if (j == 0)
4159 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4160 else
4161 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4163 /* Arguments are ready. create the new vector stmt. */
4164 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4166 if (CONVERT_EXPR_CODE_P (code)
4167 || code == VIEW_CONVERT_EXPR)
4168 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4169 new_stmt = gimple_build_assign (vec_dest, vop);
4170 new_temp = make_ssa_name (vec_dest, new_stmt);
4171 gimple_assign_set_lhs (new_stmt, new_temp);
4172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4173 if (slp_node)
4174 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4177 if (slp_node)
4178 continue;
4180 if (j == 0)
4181 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4182 else
4183 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4185 prev_stmt_info = vinfo_for_stmt (new_stmt);
4188 vec_oprnds.release ();
4189 return true;
4193 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4194 either as shift by a scalar or by a vector. */
4196 bool
4197 vect_supportable_shift (enum tree_code code, tree scalar_type)
4200 machine_mode vec_mode;
4201 optab optab;
4202 int icode;
4203 tree vectype;
4205 vectype = get_vectype_for_scalar_type (scalar_type);
4206 if (!vectype)
4207 return false;
4209 optab = optab_for_tree_code (code, vectype, optab_scalar);
4210 if (!optab
4211 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4213 optab = optab_for_tree_code (code, vectype, optab_vector);
4214 if (!optab
4215 || (optab_handler (optab, TYPE_MODE (vectype))
4216 == CODE_FOR_nothing))
4217 return false;
4220 vec_mode = TYPE_MODE (vectype);
4221 icode = (int) optab_handler (optab, vec_mode);
4222 if (icode == CODE_FOR_nothing)
4223 return false;
4225 return true;
4229 /* Function vectorizable_shift.
4231 Check if STMT performs a shift operation that can be vectorized.
4232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4233 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4234 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4236 static bool
4237 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4238 gimple *vec_stmt, slp_tree slp_node)
4240 tree vec_dest;
4241 tree scalar_dest;
4242 tree op0, op1 = NULL;
4243 tree vec_oprnd1 = NULL_TREE;
4244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4245 tree vectype;
4246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4247 enum tree_code code;
4248 machine_mode vec_mode;
4249 tree new_temp;
4250 optab optab;
4251 int icode;
4252 machine_mode optab_op2_mode;
4253 tree def;
4254 gimple def_stmt;
4255 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4256 gimple new_stmt = NULL;
4257 stmt_vec_info prev_stmt_info;
4258 int nunits_in;
4259 int nunits_out;
4260 tree vectype_out;
4261 tree op1_vectype;
4262 int ncopies;
4263 int j, i;
4264 vec<tree> vec_oprnds0 = vNULL;
4265 vec<tree> vec_oprnds1 = vNULL;
4266 tree vop0, vop1;
4267 unsigned int k;
4268 bool scalar_shift_arg = true;
4269 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4270 int vf;
4272 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4273 return false;
4275 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4276 return false;
4278 /* Is STMT a vectorizable binary/unary operation? */
4279 if (!is_gimple_assign (stmt))
4280 return false;
4282 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4283 return false;
4285 code = gimple_assign_rhs_code (stmt);
4287 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4288 || code == RROTATE_EXPR))
4289 return false;
4291 scalar_dest = gimple_assign_lhs (stmt);
4292 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4293 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4294 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4296 if (dump_enabled_p ())
4297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4298 "bit-precision shifts not supported.\n");
4299 return false;
4302 op0 = gimple_assign_rhs1 (stmt);
4303 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4304 &def_stmt, &def, &dt[0], &vectype))
4306 if (dump_enabled_p ())
4307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4308 "use not simple.\n");
4309 return false;
4311 /* If op0 is an external or constant def use a vector type with
4312 the same size as the output vector type. */
4313 if (!vectype)
4314 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4315 if (vec_stmt)
4316 gcc_assert (vectype);
4317 if (!vectype)
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4321 "no vectype for scalar type\n");
4322 return false;
4325 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4327 if (nunits_out != nunits_in)
4328 return false;
4330 op1 = gimple_assign_rhs2 (stmt);
4331 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4332 &def, &dt[1], &op1_vectype))
4334 if (dump_enabled_p ())
4335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4336 "use not simple.\n");
4337 return false;
4340 if (loop_vinfo)
4341 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4342 else
4343 vf = 1;
4345 /* Multiple types in SLP are handled by creating the appropriate number of
4346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4347 case of SLP. */
4348 if (slp_node || PURE_SLP_STMT (stmt_info))
4349 ncopies = 1;
4350 else
4351 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4353 gcc_assert (ncopies >= 1);
4355 /* Determine whether the shift amount is a vector, or scalar. If the
4356 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4358 if (dt[1] == vect_internal_def && !slp_node)
4359 scalar_shift_arg = false;
4360 else if (dt[1] == vect_constant_def
4361 || dt[1] == vect_external_def
4362 || dt[1] == vect_internal_def)
4364 /* In SLP, need to check whether the shift count is the same,
4365 in loops if it is a constant or invariant, it is always
4366 a scalar shift. */
4367 if (slp_node)
4369 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4370 gimple slpstmt;
4372 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4373 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4374 scalar_shift_arg = false;
4377 else
4379 if (dump_enabled_p ())
4380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4381 "operand mode requires invariant argument.\n");
4382 return false;
4385 /* Vector shifted by vector. */
4386 if (!scalar_shift_arg)
4388 optab = optab_for_tree_code (code, vectype, optab_vector);
4389 if (dump_enabled_p ())
4390 dump_printf_loc (MSG_NOTE, vect_location,
4391 "vector/vector shift/rotate found.\n");
4393 if (!op1_vectype)
4394 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4395 if (op1_vectype == NULL_TREE
4396 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4398 if (dump_enabled_p ())
4399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4400 "unusable type for last operand in"
4401 " vector/vector shift/rotate.\n");
4402 return false;
4405 /* See if the machine has a vector shifted by scalar insn and if not
4406 then see if it has a vector shifted by vector insn. */
4407 else
4409 optab = optab_for_tree_code (code, vectype, optab_scalar);
4410 if (optab
4411 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4413 if (dump_enabled_p ())
4414 dump_printf_loc (MSG_NOTE, vect_location,
4415 "vector/scalar shift/rotate found.\n");
4417 else
4419 optab = optab_for_tree_code (code, vectype, optab_vector);
4420 if (optab
4421 && (optab_handler (optab, TYPE_MODE (vectype))
4422 != CODE_FOR_nothing))
4424 scalar_shift_arg = false;
4426 if (dump_enabled_p ())
4427 dump_printf_loc (MSG_NOTE, vect_location,
4428 "vector/vector shift/rotate found.\n");
4430 /* Unlike the other binary operators, shifts/rotates have
4431 the rhs being int, instead of the same type as the lhs,
4432 so make sure the scalar is the right type if we are
4433 dealing with vectors of long long/long/short/char. */
4434 if (dt[1] == vect_constant_def)
4435 op1 = fold_convert (TREE_TYPE (vectype), op1);
4436 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4437 TREE_TYPE (op1)))
4439 if (slp_node
4440 && TYPE_MODE (TREE_TYPE (vectype))
4441 != TYPE_MODE (TREE_TYPE (op1)))
4443 if (dump_enabled_p ())
4444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4445 "unusable type for last operand in"
4446 " vector/vector shift/rotate.\n");
4447 return false;
4449 if (vec_stmt && !slp_node)
4451 op1 = fold_convert (TREE_TYPE (vectype), op1);
4452 op1 = vect_init_vector (stmt, op1,
4453 TREE_TYPE (vectype), NULL);
4460 /* Supportable by target? */
4461 if (!optab)
4463 if (dump_enabled_p ())
4464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4465 "no optab.\n");
4466 return false;
4468 vec_mode = TYPE_MODE (vectype);
4469 icode = (int) optab_handler (optab, vec_mode);
4470 if (icode == CODE_FOR_nothing)
4472 if (dump_enabled_p ())
4473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4474 "op not supported by target.\n");
4475 /* Check only during analysis. */
4476 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4477 || (vf < vect_min_worthwhile_factor (code)
4478 && !vec_stmt))
4479 return false;
4480 if (dump_enabled_p ())
4481 dump_printf_loc (MSG_NOTE, vect_location,
4482 "proceeding using word mode.\n");
4485 /* Worthwhile without SIMD support? Check only during analysis. */
4486 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4487 && vf < vect_min_worthwhile_factor (code)
4488 && !vec_stmt)
4490 if (dump_enabled_p ())
4491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4492 "not worthwhile without SIMD support.\n");
4493 return false;
4496 if (!vec_stmt) /* transformation not required. */
4498 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4499 if (dump_enabled_p ())
4500 dump_printf_loc (MSG_NOTE, vect_location,
4501 "=== vectorizable_shift ===\n");
4502 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4503 return true;
4506 /** Transform. **/
4508 if (dump_enabled_p ())
4509 dump_printf_loc (MSG_NOTE, vect_location,
4510 "transform binary/unary operation.\n");
4512 /* Handle def. */
4513 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4515 prev_stmt_info = NULL;
4516 for (j = 0; j < ncopies; j++)
4518 /* Handle uses. */
4519 if (j == 0)
4521 if (scalar_shift_arg)
4523 /* Vector shl and shr insn patterns can be defined with scalar
4524 operand 2 (shift operand). In this case, use constant or loop
4525 invariant op1 directly, without extending it to vector mode
4526 first. */
4527 optab_op2_mode = insn_data[icode].operand[2].mode;
4528 if (!VECTOR_MODE_P (optab_op2_mode))
4530 if (dump_enabled_p ())
4531 dump_printf_loc (MSG_NOTE, vect_location,
4532 "operand 1 using scalar mode.\n");
4533 vec_oprnd1 = op1;
4534 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4535 vec_oprnds1.quick_push (vec_oprnd1);
4536 if (slp_node)
4538 /* Store vec_oprnd1 for every vector stmt to be created
4539 for SLP_NODE. We check during the analysis that all
4540 the shift arguments are the same.
4541 TODO: Allow different constants for different vector
4542 stmts generated for an SLP instance. */
4543 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4544 vec_oprnds1.quick_push (vec_oprnd1);
4549 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4550 (a special case for certain kind of vector shifts); otherwise,
4551 operand 1 should be of a vector type (the usual case). */
4552 if (vec_oprnd1)
4553 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4554 slp_node, -1);
4555 else
4556 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4557 slp_node, -1);
4559 else
4560 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4562 /* Arguments are ready. Create the new vector stmt. */
4563 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4565 vop1 = vec_oprnds1[i];
4566 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4567 new_temp = make_ssa_name (vec_dest, new_stmt);
4568 gimple_assign_set_lhs (new_stmt, new_temp);
4569 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4570 if (slp_node)
4571 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4574 if (slp_node)
4575 continue;
4577 if (j == 0)
4578 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4579 else
4580 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4581 prev_stmt_info = vinfo_for_stmt (new_stmt);
4584 vec_oprnds0.release ();
4585 vec_oprnds1.release ();
4587 return true;
4591 /* Function vectorizable_operation.
4593 Check if STMT performs a binary, unary or ternary operation that can
4594 be vectorized.
4595 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4596 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4597 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4599 static bool
4600 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4601 gimple *vec_stmt, slp_tree slp_node)
4603 tree vec_dest;
4604 tree scalar_dest;
4605 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4606 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4607 tree vectype;
4608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4609 enum tree_code code;
4610 machine_mode vec_mode;
4611 tree new_temp;
4612 int op_type;
4613 optab optab;
4614 int icode;
4615 tree def;
4616 gimple def_stmt;
4617 enum vect_def_type dt[3]
4618 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4619 gimple new_stmt = NULL;
4620 stmt_vec_info prev_stmt_info;
4621 int nunits_in;
4622 int nunits_out;
4623 tree vectype_out;
4624 int ncopies;
4625 int j, i;
4626 vec<tree> vec_oprnds0 = vNULL;
4627 vec<tree> vec_oprnds1 = vNULL;
4628 vec<tree> vec_oprnds2 = vNULL;
4629 tree vop0, vop1, vop2;
4630 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4631 int vf;
4633 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4634 return false;
4636 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4637 return false;
4639 /* Is STMT a vectorizable binary/unary operation? */
4640 if (!is_gimple_assign (stmt))
4641 return false;
4643 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4644 return false;
4646 code = gimple_assign_rhs_code (stmt);
4648 /* For pointer addition, we should use the normal plus for
4649 the vector addition. */
4650 if (code == POINTER_PLUS_EXPR)
4651 code = PLUS_EXPR;
4653 /* Support only unary or binary operations. */
4654 op_type = TREE_CODE_LENGTH (code);
4655 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4657 if (dump_enabled_p ())
4658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4659 "num. args = %d (not unary/binary/ternary op).\n",
4660 op_type);
4661 return false;
4664 scalar_dest = gimple_assign_lhs (stmt);
4665 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4667 /* Most operations cannot handle bit-precision types without extra
4668 truncations. */
4669 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4670 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4671 /* Exception are bitwise binary operations. */
4672 && code != BIT_IOR_EXPR
4673 && code != BIT_XOR_EXPR
4674 && code != BIT_AND_EXPR)
4676 if (dump_enabled_p ())
4677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4678 "bit-precision arithmetic not supported.\n");
4679 return false;
4682 op0 = gimple_assign_rhs1 (stmt);
4683 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4684 &def_stmt, &def, &dt[0], &vectype))
4686 if (dump_enabled_p ())
4687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4688 "use not simple.\n");
4689 return false;
4691 /* If op0 is an external or constant def use a vector type with
4692 the same size as the output vector type. */
4693 if (!vectype)
4694 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4695 if (vec_stmt)
4696 gcc_assert (vectype);
4697 if (!vectype)
4699 if (dump_enabled_p ())
4701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4702 "no vectype for scalar type ");
4703 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4704 TREE_TYPE (op0));
4705 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4708 return false;
4711 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4712 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4713 if (nunits_out != nunits_in)
4714 return false;
4716 if (op_type == binary_op || op_type == ternary_op)
4718 op1 = gimple_assign_rhs2 (stmt);
4719 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4720 &def, &dt[1]))
4722 if (dump_enabled_p ())
4723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4724 "use not simple.\n");
4725 return false;
4728 if (op_type == ternary_op)
4730 op2 = gimple_assign_rhs3 (stmt);
4731 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4732 &def, &dt[2]))
4734 if (dump_enabled_p ())
4735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4736 "use not simple.\n");
4737 return false;
4741 if (loop_vinfo)
4742 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4743 else
4744 vf = 1;
4746 /* Multiple types in SLP are handled by creating the appropriate number of
4747 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4748 case of SLP. */
4749 if (slp_node || PURE_SLP_STMT (stmt_info))
4750 ncopies = 1;
4751 else
4752 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4754 gcc_assert (ncopies >= 1);
4756 /* Shifts are handled in vectorizable_shift (). */
4757 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4758 || code == RROTATE_EXPR)
4759 return false;
4761 /* Supportable by target? */
4763 vec_mode = TYPE_MODE (vectype);
4764 if (code == MULT_HIGHPART_EXPR)
4766 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4767 icode = LAST_INSN_CODE;
4768 else
4769 icode = CODE_FOR_nothing;
4771 else
4773 optab = optab_for_tree_code (code, vectype, optab_default);
4774 if (!optab)
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4778 "no optab.\n");
4779 return false;
4781 icode = (int) optab_handler (optab, vec_mode);
4784 if (icode == CODE_FOR_nothing)
4786 if (dump_enabled_p ())
4787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4788 "op not supported by target.\n");
4789 /* Check only during analysis. */
4790 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4791 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4792 return false;
4793 if (dump_enabled_p ())
4794 dump_printf_loc (MSG_NOTE, vect_location,
4795 "proceeding using word mode.\n");
4798 /* Worthwhile without SIMD support? Check only during analysis. */
4799 if (!VECTOR_MODE_P (vec_mode)
4800 && !vec_stmt
4801 && vf < vect_min_worthwhile_factor (code))
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4805 "not worthwhile without SIMD support.\n");
4806 return false;
4809 if (!vec_stmt) /* transformation not required. */
4811 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4812 if (dump_enabled_p ())
4813 dump_printf_loc (MSG_NOTE, vect_location,
4814 "=== vectorizable_operation ===\n");
4815 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4816 return true;
4819 /** Transform. **/
4821 if (dump_enabled_p ())
4822 dump_printf_loc (MSG_NOTE, vect_location,
4823 "transform binary/unary operation.\n");
4825 /* Handle def. */
4826 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4828 /* In case the vectorization factor (VF) is bigger than the number
4829 of elements that we can fit in a vectype (nunits), we have to generate
4830 more than one vector stmt - i.e - we need to "unroll" the
4831 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4832 from one copy of the vector stmt to the next, in the field
4833 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4834 stages to find the correct vector defs to be used when vectorizing
4835 stmts that use the defs of the current stmt. The example below
4836 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4837 we need to create 4 vectorized stmts):
4839 before vectorization:
4840 RELATED_STMT VEC_STMT
4841 S1: x = memref - -
4842 S2: z = x + 1 - -
4844 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4845 there):
4846 RELATED_STMT VEC_STMT
4847 VS1_0: vx0 = memref0 VS1_1 -
4848 VS1_1: vx1 = memref1 VS1_2 -
4849 VS1_2: vx2 = memref2 VS1_3 -
4850 VS1_3: vx3 = memref3 - -
4851 S1: x = load - VS1_0
4852 S2: z = x + 1 - -
4854 step2: vectorize stmt S2 (done here):
4855 To vectorize stmt S2 we first need to find the relevant vector
4856 def for the first operand 'x'. This is, as usual, obtained from
4857 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4858 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4859 relevant vector def 'vx0'. Having found 'vx0' we can generate
4860 the vector stmt VS2_0, and as usual, record it in the
4861 STMT_VINFO_VEC_STMT of stmt S2.
4862 When creating the second copy (VS2_1), we obtain the relevant vector
4863 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4864 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4865 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4866 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4867 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4868 chain of stmts and pointers:
4869 RELATED_STMT VEC_STMT
4870 VS1_0: vx0 = memref0 VS1_1 -
4871 VS1_1: vx1 = memref1 VS1_2 -
4872 VS1_2: vx2 = memref2 VS1_3 -
4873 VS1_3: vx3 = memref3 - -
4874 S1: x = load - VS1_0
4875 VS2_0: vz0 = vx0 + v1 VS2_1 -
4876 VS2_1: vz1 = vx1 + v1 VS2_2 -
4877 VS2_2: vz2 = vx2 + v1 VS2_3 -
4878 VS2_3: vz3 = vx3 + v1 - -
4879 S2: z = x + 1 - VS2_0 */
4881 prev_stmt_info = NULL;
4882 for (j = 0; j < ncopies; j++)
4884 /* Handle uses. */
4885 if (j == 0)
4887 if (op_type == binary_op || op_type == ternary_op)
4888 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4889 slp_node, -1);
4890 else
4891 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4892 slp_node, -1);
4893 if (op_type == ternary_op)
4895 vec_oprnds2.create (1);
4896 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4897 stmt,
4898 NULL));
4901 else
4903 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4904 if (op_type == ternary_op)
4906 tree vec_oprnd = vec_oprnds2.pop ();
4907 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4908 vec_oprnd));
4912 /* Arguments are ready. Create the new vector stmt. */
4913 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4915 vop1 = ((op_type == binary_op || op_type == ternary_op)
4916 ? vec_oprnds1[i] : NULL_TREE);
4917 vop2 = ((op_type == ternary_op)
4918 ? vec_oprnds2[i] : NULL_TREE);
4919 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4920 new_temp = make_ssa_name (vec_dest, new_stmt);
4921 gimple_assign_set_lhs (new_stmt, new_temp);
4922 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4923 if (slp_node)
4924 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4927 if (slp_node)
4928 continue;
4930 if (j == 0)
4931 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4932 else
4933 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4934 prev_stmt_info = vinfo_for_stmt (new_stmt);
4937 vec_oprnds0.release ();
4938 vec_oprnds1.release ();
4939 vec_oprnds2.release ();
4941 return true;
4944 /* A helper function to ensure data reference DR's base alignment
4945 for STMT_INFO. */
4947 static void
4948 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4950 if (!dr->aux)
4951 return;
4953 if (((dataref_aux *)dr->aux)->base_misaligned)
4955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4956 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4958 if (decl_in_symtab_p (base_decl))
4959 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4960 else
4962 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4963 DECL_USER_ALIGN (base_decl) = 1;
4965 ((dataref_aux *)dr->aux)->base_misaligned = false;
4970 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4971 reversal of the vector elements. If that is impossible to do,
4972 returns NULL. */
4974 static tree
4975 perm_mask_for_reverse (tree vectype)
4977 int i, nunits;
4978 unsigned char *sel;
4980 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4981 sel = XALLOCAVEC (unsigned char, nunits);
4983 for (i = 0; i < nunits; ++i)
4984 sel[i] = nunits - 1 - i;
4986 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4987 return NULL_TREE;
4988 return vect_gen_perm_mask_checked (vectype, sel);
4991 /* Function vectorizable_store.
4993 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4994 can be vectorized.
4995 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4996 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4997 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4999 static bool
5000 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5001 slp_tree slp_node)
5003 tree scalar_dest;
5004 tree data_ref;
5005 tree op;
5006 tree vec_oprnd = NULL_TREE;
5007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5008 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5009 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5010 tree elem_type;
5011 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5012 struct loop *loop = NULL;
5013 machine_mode vec_mode;
5014 tree dummy;
5015 enum dr_alignment_support alignment_support_scheme;
5016 tree def;
5017 gimple def_stmt;
5018 enum vect_def_type dt;
5019 stmt_vec_info prev_stmt_info = NULL;
5020 tree dataref_ptr = NULL_TREE;
5021 tree dataref_offset = NULL_TREE;
5022 gimple ptr_incr = NULL;
5023 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5024 int ncopies;
5025 int j;
5026 gimple next_stmt, first_stmt = NULL;
5027 bool grouped_store = false;
5028 bool store_lanes_p = false;
5029 unsigned int group_size, i;
5030 vec<tree> dr_chain = vNULL;
5031 vec<tree> oprnds = vNULL;
5032 vec<tree> result_chain = vNULL;
5033 bool inv_p;
5034 bool negative = false;
5035 tree offset = NULL_TREE;
5036 vec<tree> vec_oprnds = vNULL;
5037 bool slp = (slp_node != NULL);
5038 unsigned int vec_num;
5039 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5040 tree aggr_type;
5042 if (loop_vinfo)
5043 loop = LOOP_VINFO_LOOP (loop_vinfo);
5045 /* Multiple types in SLP are handled by creating the appropriate number of
5046 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5047 case of SLP. */
5048 if (slp || PURE_SLP_STMT (stmt_info))
5049 ncopies = 1;
5050 else
5051 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5053 gcc_assert (ncopies >= 1);
5055 /* FORNOW. This restriction should be relaxed. */
5056 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5058 if (dump_enabled_p ())
5059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5060 "multiple types in nested loop.\n");
5061 return false;
5064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5065 return false;
5067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5068 return false;
5070 /* Is vectorizable store? */
5072 if (!is_gimple_assign (stmt))
5073 return false;
5075 scalar_dest = gimple_assign_lhs (stmt);
5076 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5077 && is_pattern_stmt_p (stmt_info))
5078 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5079 if (TREE_CODE (scalar_dest) != ARRAY_REF
5080 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5081 && TREE_CODE (scalar_dest) != INDIRECT_REF
5082 && TREE_CODE (scalar_dest) != COMPONENT_REF
5083 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5084 && TREE_CODE (scalar_dest) != REALPART_EXPR
5085 && TREE_CODE (scalar_dest) != MEM_REF)
5086 return false;
5088 gcc_assert (gimple_assign_single_p (stmt));
5089 op = gimple_assign_rhs1 (stmt);
5090 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5091 &def, &dt))
5093 if (dump_enabled_p ())
5094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5095 "use not simple.\n");
5096 return false;
5099 elem_type = TREE_TYPE (vectype);
5100 vec_mode = TYPE_MODE (vectype);
5102 /* FORNOW. In some cases can vectorize even if data-type not supported
5103 (e.g. - array initialization with 0). */
5104 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5105 return false;
5107 if (!STMT_VINFO_DATA_REF (stmt_info))
5108 return false;
5110 if (!STMT_VINFO_STRIDED_P (stmt_info))
5112 negative =
5113 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5114 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5115 size_zero_node) < 0;
5116 if (negative && ncopies > 1)
5118 if (dump_enabled_p ())
5119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5120 "multiple types with negative step.\n");
5121 return false;
5123 if (negative)
5125 gcc_assert (!grouped_store);
5126 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5127 if (alignment_support_scheme != dr_aligned
5128 && alignment_support_scheme != dr_unaligned_supported)
5130 if (dump_enabled_p ())
5131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5132 "negative step but alignment required.\n");
5133 return false;
5135 if (dt != vect_constant_def
5136 && dt != vect_external_def
5137 && !perm_mask_for_reverse (vectype))
5139 if (dump_enabled_p ())
5140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141 "negative step and reversing not supported.\n");
5142 return false;
5147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5149 grouped_store = true;
5150 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5151 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5152 if (!slp
5153 && !PURE_SLP_STMT (stmt_info)
5154 && !STMT_VINFO_STRIDED_P (stmt_info))
5156 if (vect_store_lanes_supported (vectype, group_size))
5157 store_lanes_p = true;
5158 else if (!vect_grouped_store_supported (vectype, group_size))
5159 return false;
5162 if (STMT_VINFO_STRIDED_P (stmt_info)
5163 && (slp || PURE_SLP_STMT (stmt_info))
5164 && (group_size > nunits
5165 || nunits % group_size != 0))
5167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5168 "unhandled strided group store\n");
5169 return false;
5172 if (first_stmt == stmt)
5174 /* STMT is the leader of the group. Check the operands of all the
5175 stmts of the group. */
5176 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5177 while (next_stmt)
5179 gcc_assert (gimple_assign_single_p (next_stmt));
5180 op = gimple_assign_rhs1 (next_stmt);
5181 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5182 &def_stmt, &def, &dt))
5184 if (dump_enabled_p ())
5185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5186 "use not simple.\n");
5187 return false;
5189 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5194 if (!vec_stmt) /* transformation not required. */
5196 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5197 /* The SLP costs are calculated during SLP analysis. */
5198 if (!PURE_SLP_STMT (stmt_info))
5199 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5200 NULL, NULL, NULL);
5201 return true;
5204 /** Transform. **/
5206 ensure_base_align (stmt_info, dr);
5208 if (grouped_store)
5210 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5211 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5213 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5215 /* FORNOW */
5216 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5218 /* We vectorize all the stmts of the interleaving group when we
5219 reach the last stmt in the group. */
5220 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5221 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5222 && !slp)
5224 *vec_stmt = NULL;
5225 return true;
5228 if (slp)
5230 grouped_store = false;
5231 /* VEC_NUM is the number of vect stmts to be created for this
5232 group. */
5233 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5234 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5235 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5236 op = gimple_assign_rhs1 (first_stmt);
5238 else
5239 /* VEC_NUM is the number of vect stmts to be created for this
5240 group. */
5241 vec_num = group_size;
5243 else
5245 first_stmt = stmt;
5246 first_dr = dr;
5247 group_size = vec_num = 1;
5250 if (dump_enabled_p ())
5251 dump_printf_loc (MSG_NOTE, vect_location,
5252 "transform store. ncopies = %d\n", ncopies);
5254 if (STMT_VINFO_STRIDED_P (stmt_info))
5256 gimple_stmt_iterator incr_gsi;
5257 bool insert_after;
5258 gimple incr;
5259 tree offvar;
5260 tree ivstep;
5261 tree running_off;
5262 gimple_seq stmts = NULL;
5263 tree stride_base, stride_step, alias_off;
5264 tree vec_oprnd;
5265 unsigned int g;
5267 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5269 stride_base
5270 = fold_build_pointer_plus
5271 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5272 size_binop (PLUS_EXPR,
5273 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5274 convert_to_ptrofftype (DR_INIT(first_dr))));
5275 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5277 /* For a store with loop-invariant (but other than power-of-2)
5278 stride (i.e. not a grouped access) like so:
5280 for (i = 0; i < n; i += stride)
5281 array[i] = ...;
5283 we generate a new induction variable and new stores from
5284 the components of the (vectorized) rhs:
5286 for (j = 0; ; j += VF*stride)
5287 vectemp = ...;
5288 tmp1 = vectemp[0];
5289 array[j] = tmp1;
5290 tmp2 = vectemp[1];
5291 array[j + stride] = tmp2;
5295 unsigned nstores = nunits;
5296 tree ltype = elem_type;
5297 if (slp)
5299 nstores = nunits / group_size;
5300 if (group_size < nunits)
5301 ltype = build_vector_type (elem_type, group_size);
5302 else
5303 ltype = vectype;
5304 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5305 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5306 group_size = 1;
5309 ivstep = stride_step;
5310 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5311 build_int_cst (TREE_TYPE (ivstep),
5312 ncopies * nstores));
5314 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5316 create_iv (stride_base, ivstep, NULL,
5317 loop, &incr_gsi, insert_after,
5318 &offvar, NULL);
5319 incr = gsi_stmt (incr_gsi);
5320 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5322 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5323 if (stmts)
5324 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5326 prev_stmt_info = NULL;
5327 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5328 next_stmt = first_stmt;
5329 for (g = 0; g < group_size; g++)
5331 running_off = offvar;
5332 if (g)
5334 tree size = TYPE_SIZE_UNIT (ltype);
5335 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5336 size);
5337 tree newoff = copy_ssa_name (running_off, NULL);
5338 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5339 running_off, pos);
5340 vect_finish_stmt_generation (stmt, incr, gsi);
5341 running_off = newoff;
5343 for (j = 0; j < ncopies; j++)
5345 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5346 and first_stmt == stmt. */
5347 if (j == 0)
5349 if (slp)
5351 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5352 slp_node, -1);
5353 vec_oprnd = vec_oprnds[0];
5355 else
5357 gcc_assert (gimple_assign_single_p (next_stmt));
5358 op = gimple_assign_rhs1 (next_stmt);
5359 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5360 NULL);
5363 else
5365 if (slp)
5366 vec_oprnd = vec_oprnds[j];
5367 else
5369 vect_is_simple_use (vec_oprnd, NULL, loop_vinfo,
5370 bb_vinfo, &def_stmt, &def, &dt);
5371 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5375 for (i = 0; i < nstores; i++)
5377 tree newref, newoff;
5378 gimple incr, assign;
5379 tree size = TYPE_SIZE (ltype);
5380 /* Extract the i'th component. */
5381 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5382 bitsize_int (i), size);
5383 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5384 size, pos);
5386 elem = force_gimple_operand_gsi (gsi, elem, true,
5387 NULL_TREE, true,
5388 GSI_SAME_STMT);
5390 newref = build2 (MEM_REF, ltype,
5391 running_off, alias_off);
5393 /* And store it to *running_off. */
5394 assign = gimple_build_assign (newref, elem);
5395 vect_finish_stmt_generation (stmt, assign, gsi);
5397 newoff = copy_ssa_name (running_off, NULL);
5398 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5399 running_off, stride_step);
5400 vect_finish_stmt_generation (stmt, incr, gsi);
5402 running_off = newoff;
5403 if (g == group_size - 1
5404 && !slp)
5406 if (j == 0 && i == 0)
5407 STMT_VINFO_VEC_STMT (stmt_info)
5408 = *vec_stmt = assign;
5409 else
5410 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5411 prev_stmt_info = vinfo_for_stmt (assign);
5415 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5417 return true;
5420 dr_chain.create (group_size);
5421 oprnds.create (group_size);
5423 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5424 gcc_assert (alignment_support_scheme);
5425 /* Targets with store-lane instructions must not require explicit
5426 realignment. */
5427 gcc_assert (!store_lanes_p
5428 || alignment_support_scheme == dr_aligned
5429 || alignment_support_scheme == dr_unaligned_supported);
5431 if (negative)
5432 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5434 if (store_lanes_p)
5435 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5436 else
5437 aggr_type = vectype;
5439 /* In case the vectorization factor (VF) is bigger than the number
5440 of elements that we can fit in a vectype (nunits), we have to generate
5441 more than one vector stmt - i.e - we need to "unroll" the
5442 vector stmt by a factor VF/nunits. For more details see documentation in
5443 vect_get_vec_def_for_copy_stmt. */
5445 /* In case of interleaving (non-unit grouped access):
5447 S1: &base + 2 = x2
5448 S2: &base = x0
5449 S3: &base + 1 = x1
5450 S4: &base + 3 = x3
5452 We create vectorized stores starting from base address (the access of the
5453 first stmt in the chain (S2 in the above example), when the last store stmt
5454 of the chain (S4) is reached:
5456 VS1: &base = vx2
5457 VS2: &base + vec_size*1 = vx0
5458 VS3: &base + vec_size*2 = vx1
5459 VS4: &base + vec_size*3 = vx3
5461 Then permutation statements are generated:
5463 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5464 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5467 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5468 (the order of the data-refs in the output of vect_permute_store_chain
5469 corresponds to the order of scalar stmts in the interleaving chain - see
5470 the documentation of vect_permute_store_chain()).
5472 In case of both multiple types and interleaving, above vector stores and
5473 permutation stmts are created for every copy. The result vector stmts are
5474 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5475 STMT_VINFO_RELATED_STMT for the next copies.
5478 prev_stmt_info = NULL;
5479 for (j = 0; j < ncopies; j++)
5481 gimple new_stmt;
5483 if (j == 0)
5485 if (slp)
5487 /* Get vectorized arguments for SLP_NODE. */
5488 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5489 NULL, slp_node, -1);
5491 vec_oprnd = vec_oprnds[0];
5493 else
5495 /* For interleaved stores we collect vectorized defs for all the
5496 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5497 used as an input to vect_permute_store_chain(), and OPRNDS as
5498 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5500 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5501 OPRNDS are of size 1. */
5502 next_stmt = first_stmt;
5503 for (i = 0; i < group_size; i++)
5505 /* Since gaps are not supported for interleaved stores,
5506 GROUP_SIZE is the exact number of stmts in the chain.
5507 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5508 there is no interleaving, GROUP_SIZE is 1, and only one
5509 iteration of the loop will be executed. */
5510 gcc_assert (next_stmt
5511 && gimple_assign_single_p (next_stmt));
5512 op = gimple_assign_rhs1 (next_stmt);
5514 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5515 NULL);
5516 dr_chain.quick_push (vec_oprnd);
5517 oprnds.quick_push (vec_oprnd);
5518 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5522 /* We should have catched mismatched types earlier. */
5523 gcc_assert (useless_type_conversion_p (vectype,
5524 TREE_TYPE (vec_oprnd)));
5525 bool simd_lane_access_p
5526 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5527 if (simd_lane_access_p
5528 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5529 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5530 && integer_zerop (DR_OFFSET (first_dr))
5531 && integer_zerop (DR_INIT (first_dr))
5532 && alias_sets_conflict_p (get_alias_set (aggr_type),
5533 get_alias_set (DR_REF (first_dr))))
5535 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5536 dataref_offset = build_int_cst (reference_alias_ptr_type
5537 (DR_REF (first_dr)), 0);
5538 inv_p = false;
5540 else
5541 dataref_ptr
5542 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5543 simd_lane_access_p ? loop : NULL,
5544 offset, &dummy, gsi, &ptr_incr,
5545 simd_lane_access_p, &inv_p);
5546 gcc_assert (bb_vinfo || !inv_p);
5548 else
5550 /* For interleaved stores we created vectorized defs for all the
5551 defs stored in OPRNDS in the previous iteration (previous copy).
5552 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5553 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5554 next copy.
5555 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5556 OPRNDS are of size 1. */
5557 for (i = 0; i < group_size; i++)
5559 op = oprnds[i];
5560 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5561 &def, &dt);
5562 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5563 dr_chain[i] = vec_oprnd;
5564 oprnds[i] = vec_oprnd;
5566 if (dataref_offset)
5567 dataref_offset
5568 = int_const_binop (PLUS_EXPR, dataref_offset,
5569 TYPE_SIZE_UNIT (aggr_type));
5570 else
5571 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5572 TYPE_SIZE_UNIT (aggr_type));
5575 if (store_lanes_p)
5577 tree vec_array;
5579 /* Combine all the vectors into an array. */
5580 vec_array = create_vector_array (vectype, vec_num);
5581 for (i = 0; i < vec_num; i++)
5583 vec_oprnd = dr_chain[i];
5584 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5587 /* Emit:
5588 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5589 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5590 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5591 gimple_call_set_lhs (new_stmt, data_ref);
5592 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5594 else
5596 new_stmt = NULL;
5597 if (grouped_store)
5599 if (j == 0)
5600 result_chain.create (group_size);
5601 /* Permute. */
5602 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5603 &result_chain);
5606 next_stmt = first_stmt;
5607 for (i = 0; i < vec_num; i++)
5609 unsigned align, misalign;
5611 if (i > 0)
5612 /* Bump the vector pointer. */
5613 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5614 stmt, NULL_TREE);
5616 if (slp)
5617 vec_oprnd = vec_oprnds[i];
5618 else if (grouped_store)
5619 /* For grouped stores vectorized defs are interleaved in
5620 vect_permute_store_chain(). */
5621 vec_oprnd = result_chain[i];
5623 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5624 dataref_ptr,
5625 dataref_offset
5626 ? dataref_offset
5627 : build_int_cst (reference_alias_ptr_type
5628 (DR_REF (first_dr)), 0));
5629 align = TYPE_ALIGN_UNIT (vectype);
5630 if (aligned_access_p (first_dr))
5631 misalign = 0;
5632 else if (DR_MISALIGNMENT (first_dr) == -1)
5634 TREE_TYPE (data_ref)
5635 = build_aligned_type (TREE_TYPE (data_ref),
5636 TYPE_ALIGN (elem_type));
5637 align = TYPE_ALIGN_UNIT (elem_type);
5638 misalign = 0;
5640 else
5642 TREE_TYPE (data_ref)
5643 = build_aligned_type (TREE_TYPE (data_ref),
5644 TYPE_ALIGN (elem_type));
5645 misalign = DR_MISALIGNMENT (first_dr);
5647 if (dataref_offset == NULL_TREE
5648 && TREE_CODE (dataref_ptr) == SSA_NAME)
5649 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5650 misalign);
5652 if (negative
5653 && dt != vect_constant_def
5654 && dt != vect_external_def)
5656 tree perm_mask = perm_mask_for_reverse (vectype);
5657 tree perm_dest
5658 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5659 vectype);
5660 tree new_temp = make_ssa_name (perm_dest);
5662 /* Generate the permute statement. */
5663 gimple perm_stmt
5664 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5665 vec_oprnd, perm_mask);
5666 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5668 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5669 vec_oprnd = new_temp;
5672 /* Arguments are ready. Create the new vector stmt. */
5673 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5674 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5676 if (slp)
5677 continue;
5679 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5680 if (!next_stmt)
5681 break;
5684 if (!slp)
5686 if (j == 0)
5687 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5688 else
5689 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5690 prev_stmt_info = vinfo_for_stmt (new_stmt);
5694 dr_chain.release ();
5695 oprnds.release ();
5696 result_chain.release ();
5697 vec_oprnds.release ();
5699 return true;
5702 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5703 VECTOR_CST mask. No checks are made that the target platform supports the
5704 mask, so callers may wish to test can_vec_perm_p separately, or use
5705 vect_gen_perm_mask_checked. */
5707 tree
5708 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5710 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5711 int i, nunits;
5713 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5715 mask_elt_type = lang_hooks.types.type_for_mode
5716 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5717 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5719 mask_elts = XALLOCAVEC (tree, nunits);
5720 for (i = nunits - 1; i >= 0; i--)
5721 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5722 mask_vec = build_vector (mask_type, mask_elts);
5724 return mask_vec;
5727 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5728 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5730 tree
5731 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5733 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5734 return vect_gen_perm_mask_any (vectype, sel);
5737 /* Given a vector variable X and Y, that was generated for the scalar
5738 STMT, generate instructions to permute the vector elements of X and Y
5739 using permutation mask MASK_VEC, insert them at *GSI and return the
5740 permuted vector variable. */
5742 static tree
5743 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5744 gimple_stmt_iterator *gsi)
5746 tree vectype = TREE_TYPE (x);
5747 tree perm_dest, data_ref;
5748 gimple perm_stmt;
5750 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5751 data_ref = make_ssa_name (perm_dest);
5753 /* Generate the permute statement. */
5754 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5755 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5757 return data_ref;
5760 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5761 inserting them on the loops preheader edge. Returns true if we
5762 were successful in doing so (and thus STMT can be moved then),
5763 otherwise returns false. */
5765 static bool
5766 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5768 ssa_op_iter i;
5769 tree op;
5770 bool any = false;
5772 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5774 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5775 if (!gimple_nop_p (def_stmt)
5776 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5778 /* Make sure we don't need to recurse. While we could do
5779 so in simple cases when there are more complex use webs
5780 we don't have an easy way to preserve stmt order to fulfil
5781 dependencies within them. */
5782 tree op2;
5783 ssa_op_iter i2;
5784 if (gimple_code (def_stmt) == GIMPLE_PHI)
5785 return false;
5786 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5788 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5789 if (!gimple_nop_p (def_stmt2)
5790 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5791 return false;
5793 any = true;
5797 if (!any)
5798 return true;
5800 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5802 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5803 if (!gimple_nop_p (def_stmt)
5804 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5806 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5807 gsi_remove (&gsi, false);
5808 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5812 return true;
5815 /* vectorizable_load.
5817 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5818 can be vectorized.
5819 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5820 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5821 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5823 static bool
5824 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5825 slp_tree slp_node, slp_instance slp_node_instance)
5827 tree scalar_dest;
5828 tree vec_dest = NULL;
5829 tree data_ref = NULL;
5830 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5831 stmt_vec_info prev_stmt_info;
5832 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5833 struct loop *loop = NULL;
5834 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5835 bool nested_in_vect_loop = false;
5836 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5837 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5838 tree elem_type;
5839 tree new_temp;
5840 machine_mode mode;
5841 gimple new_stmt = NULL;
5842 tree dummy;
5843 enum dr_alignment_support alignment_support_scheme;
5844 tree dataref_ptr = NULL_TREE;
5845 tree dataref_offset = NULL_TREE;
5846 gimple ptr_incr = NULL;
5847 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5848 int ncopies;
5849 int i, j, group_size = -1, group_gap_adj;
5850 tree msq = NULL_TREE, lsq;
5851 tree offset = NULL_TREE;
5852 tree byte_offset = NULL_TREE;
5853 tree realignment_token = NULL_TREE;
5854 gphi *phi = NULL;
5855 vec<tree> dr_chain = vNULL;
5856 bool grouped_load = false;
5857 bool load_lanes_p = false;
5858 gimple first_stmt;
5859 bool inv_p;
5860 bool negative = false;
5861 bool compute_in_loop = false;
5862 struct loop *at_loop;
5863 int vec_num;
5864 bool slp = (slp_node != NULL);
5865 bool slp_perm = false;
5866 enum tree_code code;
5867 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5868 int vf;
5869 tree aggr_type;
5870 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5871 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5872 int gather_scale = 1;
5873 enum vect_def_type gather_dt = vect_unknown_def_type;
5875 if (loop_vinfo)
5877 loop = LOOP_VINFO_LOOP (loop_vinfo);
5878 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5879 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5881 else
5882 vf = 1;
5884 /* Multiple types in SLP are handled by creating the appropriate number of
5885 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5886 case of SLP. */
5887 if (slp || PURE_SLP_STMT (stmt_info))
5888 ncopies = 1;
5889 else
5890 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5892 gcc_assert (ncopies >= 1);
5894 /* FORNOW. This restriction should be relaxed. */
5895 if (nested_in_vect_loop && ncopies > 1)
5897 if (dump_enabled_p ())
5898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5899 "multiple types in nested loop.\n");
5900 return false;
5903 /* Invalidate assumptions made by dependence analysis when vectorization
5904 on the unrolled body effectively re-orders stmts. */
5905 if (ncopies > 1
5906 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5907 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5908 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5912 "cannot perform implicit CSE when unrolling "
5913 "with negative dependence distance\n");
5914 return false;
5917 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5918 return false;
5920 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5921 return false;
5923 /* Is vectorizable load? */
5924 if (!is_gimple_assign (stmt))
5925 return false;
5927 scalar_dest = gimple_assign_lhs (stmt);
5928 if (TREE_CODE (scalar_dest) != SSA_NAME)
5929 return false;
5931 code = gimple_assign_rhs_code (stmt);
5932 if (code != ARRAY_REF
5933 && code != BIT_FIELD_REF
5934 && code != INDIRECT_REF
5935 && code != COMPONENT_REF
5936 && code != IMAGPART_EXPR
5937 && code != REALPART_EXPR
5938 && code != MEM_REF
5939 && TREE_CODE_CLASS (code) != tcc_declaration)
5940 return false;
5942 if (!STMT_VINFO_DATA_REF (stmt_info))
5943 return false;
5945 elem_type = TREE_TYPE (vectype);
5946 mode = TYPE_MODE (vectype);
5948 /* FORNOW. In some cases can vectorize even if data-type not supported
5949 (e.g. - data copies). */
5950 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5952 if (dump_enabled_p ())
5953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5954 "Aligned load, but unsupported type.\n");
5955 return false;
5958 /* Check if the load is a part of an interleaving chain. */
5959 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5961 grouped_load = true;
5962 /* FORNOW */
5963 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5965 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5967 /* If this is single-element interleaving with an element distance
5968 that leaves unused vector loads around punt - we at least create
5969 very sub-optimal code in that case (and blow up memory,
5970 see PR65518). */
5971 if (first_stmt == stmt
5972 && !GROUP_NEXT_ELEMENT (stmt_info)
5973 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5975 if (dump_enabled_p ())
5976 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5977 "single-element interleaving not supported "
5978 "for not adjacent vector loads\n");
5979 return false;
5982 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
5983 slp_perm = true;
5985 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5986 if (!slp
5987 && !PURE_SLP_STMT (stmt_info)
5988 && !STMT_VINFO_STRIDED_P (stmt_info))
5990 if (vect_load_lanes_supported (vectype, group_size))
5991 load_lanes_p = true;
5992 else if (!vect_grouped_load_supported (vectype, group_size))
5993 return false;
5996 /* Invalidate assumptions made by dependence analysis when vectorization
5997 on the unrolled body effectively re-orders stmts. */
5998 if (!PURE_SLP_STMT (stmt_info)
5999 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6000 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6001 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6003 if (dump_enabled_p ())
6004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6005 "cannot perform implicit CSE when performing "
6006 "group loads with negative dependence distance\n");
6007 return false;
6010 /* Similarly when the stmt is a load that is both part of a SLP
6011 instance and a loop vectorized stmt via the same-dr mechanism
6012 we have to give up. */
6013 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6014 && (STMT_SLP_TYPE (stmt_info)
6015 != STMT_SLP_TYPE (vinfo_for_stmt
6016 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6018 if (dump_enabled_p ())
6019 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6020 "conflicting SLP types for CSEd load\n");
6021 return false;
6026 if (STMT_VINFO_GATHER_P (stmt_info))
6028 gimple def_stmt;
6029 tree def;
6030 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
6031 &gather_off, &gather_scale);
6032 gcc_assert (gather_decl);
6033 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
6034 &def_stmt, &def, &gather_dt,
6035 &gather_off_vectype))
6037 if (dump_enabled_p ())
6038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6039 "gather index use not simple.\n");
6040 return false;
6043 else if (STMT_VINFO_STRIDED_P (stmt_info))
6045 if ((grouped_load
6046 && (slp || PURE_SLP_STMT (stmt_info)))
6047 && (group_size > nunits
6048 || nunits % group_size != 0))
6050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6051 "unhandled strided group load\n");
6052 return false;
6055 else
6057 negative = tree_int_cst_compare (nested_in_vect_loop
6058 ? STMT_VINFO_DR_STEP (stmt_info)
6059 : DR_STEP (dr),
6060 size_zero_node) < 0;
6061 if (negative && ncopies > 1)
6063 if (dump_enabled_p ())
6064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6065 "multiple types with negative step.\n");
6066 return false;
6069 if (negative)
6071 if (grouped_load)
6073 if (dump_enabled_p ())
6074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6075 "negative step for group load not supported"
6076 "\n");
6077 return false;
6079 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6080 if (alignment_support_scheme != dr_aligned
6081 && alignment_support_scheme != dr_unaligned_supported)
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6085 "negative step but alignment required.\n");
6086 return false;
6088 if (!perm_mask_for_reverse (vectype))
6090 if (dump_enabled_p ())
6091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6092 "negative step and reversing not supported."
6093 "\n");
6094 return false;
6099 if (!vec_stmt) /* transformation not required. */
6101 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6102 /* The SLP costs are calculated during SLP analysis. */
6103 if (!PURE_SLP_STMT (stmt_info))
6104 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6105 NULL, NULL, NULL);
6106 return true;
6109 if (dump_enabled_p ())
6110 dump_printf_loc (MSG_NOTE, vect_location,
6111 "transform load. ncopies = %d\n", ncopies);
6113 /** Transform. **/
6115 ensure_base_align (stmt_info, dr);
6117 if (STMT_VINFO_GATHER_P (stmt_info))
6119 tree vec_oprnd0 = NULL_TREE, op;
6120 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6121 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6122 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6123 edge pe = loop_preheader_edge (loop);
6124 gimple_seq seq;
6125 basic_block new_bb;
6126 enum { NARROW, NONE, WIDEN } modifier;
6127 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6129 if (nunits == gather_off_nunits)
6130 modifier = NONE;
6131 else if (nunits == gather_off_nunits / 2)
6133 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6134 modifier = WIDEN;
6136 for (i = 0; i < gather_off_nunits; ++i)
6137 sel[i] = i | nunits;
6139 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6141 else if (nunits == gather_off_nunits * 2)
6143 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6144 modifier = NARROW;
6146 for (i = 0; i < nunits; ++i)
6147 sel[i] = i < gather_off_nunits
6148 ? i : i + nunits - gather_off_nunits;
6150 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6151 ncopies *= 2;
6153 else
6154 gcc_unreachable ();
6156 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6157 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6158 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6159 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6160 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6161 scaletype = TREE_VALUE (arglist);
6162 gcc_checking_assert (types_compatible_p (srctype, rettype));
6164 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6166 ptr = fold_convert (ptrtype, gather_base);
6167 if (!is_gimple_min_invariant (ptr))
6169 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6170 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6171 gcc_assert (!new_bb);
6174 /* Currently we support only unconditional gather loads,
6175 so mask should be all ones. */
6176 if (TREE_CODE (masktype) == INTEGER_TYPE)
6177 mask = build_int_cst (masktype, -1);
6178 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6180 mask = build_int_cst (TREE_TYPE (masktype), -1);
6181 mask = build_vector_from_val (masktype, mask);
6182 mask = vect_init_vector (stmt, mask, masktype, NULL);
6184 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6186 REAL_VALUE_TYPE r;
6187 long tmp[6];
6188 for (j = 0; j < 6; ++j)
6189 tmp[j] = -1;
6190 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6191 mask = build_real (TREE_TYPE (masktype), r);
6192 mask = build_vector_from_val (masktype, mask);
6193 mask = vect_init_vector (stmt, mask, masktype, NULL);
6195 else
6196 gcc_unreachable ();
6198 scale = build_int_cst (scaletype, gather_scale);
6200 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6201 merge = build_int_cst (TREE_TYPE (rettype), 0);
6202 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6204 REAL_VALUE_TYPE r;
6205 long tmp[6];
6206 for (j = 0; j < 6; ++j)
6207 tmp[j] = 0;
6208 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6209 merge = build_real (TREE_TYPE (rettype), r);
6211 else
6212 gcc_unreachable ();
6213 merge = build_vector_from_val (rettype, merge);
6214 merge = vect_init_vector (stmt, merge, rettype, NULL);
6216 prev_stmt_info = NULL;
6217 for (j = 0; j < ncopies; ++j)
6219 if (modifier == WIDEN && (j & 1))
6220 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6221 perm_mask, stmt, gsi);
6222 else if (j == 0)
6223 op = vec_oprnd0
6224 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6225 else
6226 op = vec_oprnd0
6227 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6229 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6231 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6232 == TYPE_VECTOR_SUBPARTS (idxtype));
6233 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6234 var = make_ssa_name (var);
6235 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6236 new_stmt
6237 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6239 op = var;
6242 new_stmt
6243 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6245 if (!useless_type_conversion_p (vectype, rettype))
6247 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6248 == TYPE_VECTOR_SUBPARTS (rettype));
6249 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6250 op = make_ssa_name (var, new_stmt);
6251 gimple_call_set_lhs (new_stmt, op);
6252 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6253 var = make_ssa_name (vec_dest);
6254 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6255 new_stmt
6256 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6258 else
6260 var = make_ssa_name (vec_dest, new_stmt);
6261 gimple_call_set_lhs (new_stmt, var);
6264 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6266 if (modifier == NARROW)
6268 if ((j & 1) == 0)
6270 prev_res = var;
6271 continue;
6273 var = permute_vec_elements (prev_res, var,
6274 perm_mask, stmt, gsi);
6275 new_stmt = SSA_NAME_DEF_STMT (var);
6278 if (prev_stmt_info == NULL)
6279 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6280 else
6281 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6282 prev_stmt_info = vinfo_for_stmt (new_stmt);
6284 return true;
6286 else if (STMT_VINFO_STRIDED_P (stmt_info))
6288 gimple_stmt_iterator incr_gsi;
6289 bool insert_after;
6290 gimple incr;
6291 tree offvar;
6292 tree ivstep;
6293 tree running_off;
6294 vec<constructor_elt, va_gc> *v = NULL;
6295 gimple_seq stmts = NULL;
6296 tree stride_base, stride_step, alias_off;
6298 gcc_assert (!nested_in_vect_loop);
6300 if (slp && grouped_load)
6301 first_dr = STMT_VINFO_DATA_REF
6302 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6303 else
6304 first_dr = dr;
6306 stride_base
6307 = fold_build_pointer_plus
6308 (DR_BASE_ADDRESS (first_dr),
6309 size_binop (PLUS_EXPR,
6310 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6311 convert_to_ptrofftype (DR_INIT (first_dr))));
6312 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6314 /* For a load with loop-invariant (but other than power-of-2)
6315 stride (i.e. not a grouped access) like so:
6317 for (i = 0; i < n; i += stride)
6318 ... = array[i];
6320 we generate a new induction variable and new accesses to
6321 form a new vector (or vectors, depending on ncopies):
6323 for (j = 0; ; j += VF*stride)
6324 tmp1 = array[j];
6325 tmp2 = array[j + stride];
6327 vectemp = {tmp1, tmp2, ...}
6330 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6331 build_int_cst (TREE_TYPE (stride_step), vf));
6333 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6335 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6336 loop, &incr_gsi, insert_after,
6337 &offvar, NULL);
6338 incr = gsi_stmt (incr_gsi);
6339 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6341 stride_step = force_gimple_operand (unshare_expr (stride_step),
6342 &stmts, true, NULL_TREE);
6343 if (stmts)
6344 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6346 prev_stmt_info = NULL;
6347 running_off = offvar;
6348 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6349 int nloads = nunits;
6350 tree ltype = TREE_TYPE (vectype);
6351 auto_vec<tree> dr_chain;
6352 if (slp)
6354 nloads = nunits / group_size;
6355 if (group_size < nunits)
6356 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6357 else
6358 ltype = vectype;
6359 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6360 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6361 if (slp_perm)
6362 dr_chain.create (ncopies);
6364 for (j = 0; j < ncopies; j++)
6366 tree vec_inv;
6368 if (nloads > 1)
6370 vec_alloc (v, nloads);
6371 for (i = 0; i < nloads; i++)
6373 tree newref, newoff;
6374 gimple incr;
6375 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6377 newref = force_gimple_operand_gsi (gsi, newref, true,
6378 NULL_TREE, true,
6379 GSI_SAME_STMT);
6380 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6381 newoff = copy_ssa_name (running_off);
6382 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6383 running_off, stride_step);
6384 vect_finish_stmt_generation (stmt, incr, gsi);
6386 running_off = newoff;
6389 vec_inv = build_constructor (vectype, v);
6390 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6391 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6393 else
6395 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6396 build2 (MEM_REF, ltype,
6397 running_off, alias_off));
6398 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6400 tree newoff = copy_ssa_name (running_off);
6401 gimple incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6402 running_off, stride_step);
6403 vect_finish_stmt_generation (stmt, incr, gsi);
6405 running_off = newoff;
6408 if (slp)
6410 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6411 if (slp_perm)
6412 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6414 else
6416 if (j == 0)
6417 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6418 else
6419 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6420 prev_stmt_info = vinfo_for_stmt (new_stmt);
6423 if (slp_perm)
6424 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6425 slp_node_instance, false);
6426 return true;
6429 if (grouped_load)
6431 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6432 if (slp
6433 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6434 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6435 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6437 /* Check if the chain of loads is already vectorized. */
6438 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6439 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6440 ??? But we can only do so if there is exactly one
6441 as we have no way to get at the rest. Leave the CSE
6442 opportunity alone.
6443 ??? With the group load eventually participating
6444 in multiple different permutations (having multiple
6445 slp nodes which refer to the same group) the CSE
6446 is even wrong code. See PR56270. */
6447 && !slp)
6449 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6450 return true;
6452 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6453 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6454 group_gap_adj = 0;
6456 /* VEC_NUM is the number of vect stmts to be created for this group. */
6457 if (slp)
6459 grouped_load = false;
6460 /* For SLP permutation support we need to load the whole group,
6461 not only the number of vector stmts the permutation result
6462 fits in. */
6463 if (slp_perm)
6464 vec_num = (group_size * vf + nunits - 1) / nunits;
6465 else
6466 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6467 group_gap_adj = vf * group_size - nunits * vec_num;
6469 else
6470 vec_num = group_size;
6472 else
6474 first_stmt = stmt;
6475 first_dr = dr;
6476 group_size = vec_num = 1;
6477 group_gap_adj = 0;
6480 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6481 gcc_assert (alignment_support_scheme);
6482 /* Targets with load-lane instructions must not require explicit
6483 realignment. */
6484 gcc_assert (!load_lanes_p
6485 || alignment_support_scheme == dr_aligned
6486 || alignment_support_scheme == dr_unaligned_supported);
6488 /* In case the vectorization factor (VF) is bigger than the number
6489 of elements that we can fit in a vectype (nunits), we have to generate
6490 more than one vector stmt - i.e - we need to "unroll" the
6491 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6492 from one copy of the vector stmt to the next, in the field
6493 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6494 stages to find the correct vector defs to be used when vectorizing
6495 stmts that use the defs of the current stmt. The example below
6496 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6497 need to create 4 vectorized stmts):
6499 before vectorization:
6500 RELATED_STMT VEC_STMT
6501 S1: x = memref - -
6502 S2: z = x + 1 - -
6504 step 1: vectorize stmt S1:
6505 We first create the vector stmt VS1_0, and, as usual, record a
6506 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6507 Next, we create the vector stmt VS1_1, and record a pointer to
6508 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6509 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6510 stmts and pointers:
6511 RELATED_STMT VEC_STMT
6512 VS1_0: vx0 = memref0 VS1_1 -
6513 VS1_1: vx1 = memref1 VS1_2 -
6514 VS1_2: vx2 = memref2 VS1_3 -
6515 VS1_3: vx3 = memref3 - -
6516 S1: x = load - VS1_0
6517 S2: z = x + 1 - -
6519 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6520 information we recorded in RELATED_STMT field is used to vectorize
6521 stmt S2. */
6523 /* In case of interleaving (non-unit grouped access):
6525 S1: x2 = &base + 2
6526 S2: x0 = &base
6527 S3: x1 = &base + 1
6528 S4: x3 = &base + 3
6530 Vectorized loads are created in the order of memory accesses
6531 starting from the access of the first stmt of the chain:
6533 VS1: vx0 = &base
6534 VS2: vx1 = &base + vec_size*1
6535 VS3: vx3 = &base + vec_size*2
6536 VS4: vx4 = &base + vec_size*3
6538 Then permutation statements are generated:
6540 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6541 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6544 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6545 (the order of the data-refs in the output of vect_permute_load_chain
6546 corresponds to the order of scalar stmts in the interleaving chain - see
6547 the documentation of vect_permute_load_chain()).
6548 The generation of permutation stmts and recording them in
6549 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6551 In case of both multiple types and interleaving, the vector loads and
6552 permutation stmts above are created for every copy. The result vector
6553 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6554 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6556 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6557 on a target that supports unaligned accesses (dr_unaligned_supported)
6558 we generate the following code:
6559 p = initial_addr;
6560 indx = 0;
6561 loop {
6562 p = p + indx * vectype_size;
6563 vec_dest = *(p);
6564 indx = indx + 1;
6567 Otherwise, the data reference is potentially unaligned on a target that
6568 does not support unaligned accesses (dr_explicit_realign_optimized) -
6569 then generate the following code, in which the data in each iteration is
6570 obtained by two vector loads, one from the previous iteration, and one
6571 from the current iteration:
6572 p1 = initial_addr;
6573 msq_init = *(floor(p1))
6574 p2 = initial_addr + VS - 1;
6575 realignment_token = call target_builtin;
6576 indx = 0;
6577 loop {
6578 p2 = p2 + indx * vectype_size
6579 lsq = *(floor(p2))
6580 vec_dest = realign_load (msq, lsq, realignment_token)
6581 indx = indx + 1;
6582 msq = lsq;
6583 } */
6585 /* If the misalignment remains the same throughout the execution of the
6586 loop, we can create the init_addr and permutation mask at the loop
6587 preheader. Otherwise, it needs to be created inside the loop.
6588 This can only occur when vectorizing memory accesses in the inner-loop
6589 nested within an outer-loop that is being vectorized. */
6591 if (nested_in_vect_loop
6592 && (TREE_INT_CST_LOW (DR_STEP (dr))
6593 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6595 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6596 compute_in_loop = true;
6599 if ((alignment_support_scheme == dr_explicit_realign_optimized
6600 || alignment_support_scheme == dr_explicit_realign)
6601 && !compute_in_loop)
6603 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6604 alignment_support_scheme, NULL_TREE,
6605 &at_loop);
6606 if (alignment_support_scheme == dr_explicit_realign_optimized)
6608 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6609 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6610 size_one_node);
6613 else
6614 at_loop = loop;
6616 if (negative)
6617 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6619 if (load_lanes_p)
6620 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6621 else
6622 aggr_type = vectype;
6624 prev_stmt_info = NULL;
6625 for (j = 0; j < ncopies; j++)
6627 /* 1. Create the vector or array pointer update chain. */
6628 if (j == 0)
6630 bool simd_lane_access_p
6631 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6632 if (simd_lane_access_p
6633 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6634 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6635 && integer_zerop (DR_OFFSET (first_dr))
6636 && integer_zerop (DR_INIT (first_dr))
6637 && alias_sets_conflict_p (get_alias_set (aggr_type),
6638 get_alias_set (DR_REF (first_dr)))
6639 && (alignment_support_scheme == dr_aligned
6640 || alignment_support_scheme == dr_unaligned_supported))
6642 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6643 dataref_offset = build_int_cst (reference_alias_ptr_type
6644 (DR_REF (first_dr)), 0);
6645 inv_p = false;
6647 else
6648 dataref_ptr
6649 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6650 offset, &dummy, gsi, &ptr_incr,
6651 simd_lane_access_p, &inv_p,
6652 byte_offset);
6654 else if (dataref_offset)
6655 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6656 TYPE_SIZE_UNIT (aggr_type));
6657 else
6658 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6659 TYPE_SIZE_UNIT (aggr_type));
6661 if (grouped_load || slp_perm)
6662 dr_chain.create (vec_num);
6664 if (load_lanes_p)
6666 tree vec_array;
6668 vec_array = create_vector_array (vectype, vec_num);
6670 /* Emit:
6671 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6672 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6673 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6674 gimple_call_set_lhs (new_stmt, vec_array);
6675 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6677 /* Extract each vector into an SSA_NAME. */
6678 for (i = 0; i < vec_num; i++)
6680 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6681 vec_array, i);
6682 dr_chain.quick_push (new_temp);
6685 /* Record the mapping between SSA_NAMEs and statements. */
6686 vect_record_grouped_load_vectors (stmt, dr_chain);
6688 else
6690 for (i = 0; i < vec_num; i++)
6692 if (i > 0)
6693 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6694 stmt, NULL_TREE);
6696 /* 2. Create the vector-load in the loop. */
6697 switch (alignment_support_scheme)
6699 case dr_aligned:
6700 case dr_unaligned_supported:
6702 unsigned int align, misalign;
6704 data_ref
6705 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6706 dataref_offset
6707 ? dataref_offset
6708 : build_int_cst (reference_alias_ptr_type
6709 (DR_REF (first_dr)), 0));
6710 align = TYPE_ALIGN_UNIT (vectype);
6711 if (alignment_support_scheme == dr_aligned)
6713 gcc_assert (aligned_access_p (first_dr));
6714 misalign = 0;
6716 else if (DR_MISALIGNMENT (first_dr) == -1)
6718 TREE_TYPE (data_ref)
6719 = build_aligned_type (TREE_TYPE (data_ref),
6720 TYPE_ALIGN (elem_type));
6721 align = TYPE_ALIGN_UNIT (elem_type);
6722 misalign = 0;
6724 else
6726 TREE_TYPE (data_ref)
6727 = build_aligned_type (TREE_TYPE (data_ref),
6728 TYPE_ALIGN (elem_type));
6729 misalign = DR_MISALIGNMENT (first_dr);
6731 if (dataref_offset == NULL_TREE
6732 && TREE_CODE (dataref_ptr) == SSA_NAME)
6733 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6734 align, misalign);
6735 break;
6737 case dr_explicit_realign:
6739 tree ptr, bump;
6741 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6743 if (compute_in_loop)
6744 msq = vect_setup_realignment (first_stmt, gsi,
6745 &realignment_token,
6746 dr_explicit_realign,
6747 dataref_ptr, NULL);
6749 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6750 ptr = copy_ssa_name (dataref_ptr);
6751 else
6752 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
6753 new_stmt = gimple_build_assign
6754 (ptr, BIT_AND_EXPR, dataref_ptr,
6755 build_int_cst
6756 (TREE_TYPE (dataref_ptr),
6757 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6758 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6759 data_ref
6760 = build2 (MEM_REF, vectype, ptr,
6761 build_int_cst (reference_alias_ptr_type
6762 (DR_REF (first_dr)), 0));
6763 vec_dest = vect_create_destination_var (scalar_dest,
6764 vectype);
6765 new_stmt = gimple_build_assign (vec_dest, data_ref);
6766 new_temp = make_ssa_name (vec_dest, new_stmt);
6767 gimple_assign_set_lhs (new_stmt, new_temp);
6768 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6769 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6770 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6771 msq = new_temp;
6773 bump = size_binop (MULT_EXPR, vs,
6774 TYPE_SIZE_UNIT (elem_type));
6775 bump = size_binop (MINUS_EXPR, bump, size_one_node);
6776 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6777 new_stmt = gimple_build_assign
6778 (NULL_TREE, BIT_AND_EXPR, ptr,
6779 build_int_cst
6780 (TREE_TYPE (ptr),
6781 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6782 ptr = copy_ssa_name (ptr, new_stmt);
6783 gimple_assign_set_lhs (new_stmt, ptr);
6784 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6785 data_ref
6786 = build2 (MEM_REF, vectype, ptr,
6787 build_int_cst (reference_alias_ptr_type
6788 (DR_REF (first_dr)), 0));
6789 break;
6791 case dr_explicit_realign_optimized:
6792 if (TREE_CODE (dataref_ptr) == SSA_NAME)
6793 new_temp = copy_ssa_name (dataref_ptr);
6794 else
6795 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
6796 new_stmt = gimple_build_assign
6797 (new_temp, BIT_AND_EXPR, dataref_ptr,
6798 build_int_cst
6799 (TREE_TYPE (dataref_ptr),
6800 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6801 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6802 data_ref
6803 = build2 (MEM_REF, vectype, new_temp,
6804 build_int_cst (reference_alias_ptr_type
6805 (DR_REF (first_dr)), 0));
6806 break;
6807 default:
6808 gcc_unreachable ();
6810 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6811 new_stmt = gimple_build_assign (vec_dest, data_ref);
6812 new_temp = make_ssa_name (vec_dest, new_stmt);
6813 gimple_assign_set_lhs (new_stmt, new_temp);
6814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6816 /* 3. Handle explicit realignment if necessary/supported.
6817 Create in loop:
6818 vec_dest = realign_load (msq, lsq, realignment_token) */
6819 if (alignment_support_scheme == dr_explicit_realign_optimized
6820 || alignment_support_scheme == dr_explicit_realign)
6822 lsq = gimple_assign_lhs (new_stmt);
6823 if (!realignment_token)
6824 realignment_token = dataref_ptr;
6825 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6826 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6827 msq, lsq, realignment_token);
6828 new_temp = make_ssa_name (vec_dest, new_stmt);
6829 gimple_assign_set_lhs (new_stmt, new_temp);
6830 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6832 if (alignment_support_scheme == dr_explicit_realign_optimized)
6834 gcc_assert (phi);
6835 if (i == vec_num - 1 && j == ncopies - 1)
6836 add_phi_arg (phi, lsq,
6837 loop_latch_edge (containing_loop),
6838 UNKNOWN_LOCATION);
6839 msq = lsq;
6843 /* 4. Handle invariant-load. */
6844 if (inv_p && !bb_vinfo)
6846 gcc_assert (!grouped_load);
6847 /* If we have versioned for aliasing or the loop doesn't
6848 have any data dependencies that would preclude this,
6849 then we are sure this is a loop invariant load and
6850 thus we can insert it on the preheader edge. */
6851 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6852 && !nested_in_vect_loop
6853 && hoist_defs_of_uses (stmt, loop))
6855 if (dump_enabled_p ())
6857 dump_printf_loc (MSG_NOTE, vect_location,
6858 "hoisting out of the vectorized "
6859 "loop: ");
6860 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6862 tree tem = copy_ssa_name (scalar_dest);
6863 gsi_insert_on_edge_immediate
6864 (loop_preheader_edge (loop),
6865 gimple_build_assign (tem,
6866 unshare_expr
6867 (gimple_assign_rhs1 (stmt))));
6868 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6870 else
6872 gimple_stmt_iterator gsi2 = *gsi;
6873 gsi_next (&gsi2);
6874 new_temp = vect_init_vector (stmt, scalar_dest,
6875 vectype, &gsi2);
6877 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6878 set_vinfo_for_stmt (new_stmt,
6879 new_stmt_vec_info (new_stmt, loop_vinfo,
6880 bb_vinfo));
6883 if (negative)
6885 tree perm_mask = perm_mask_for_reverse (vectype);
6886 new_temp = permute_vec_elements (new_temp, new_temp,
6887 perm_mask, stmt, gsi);
6888 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6891 /* Collect vector loads and later create their permutation in
6892 vect_transform_grouped_load (). */
6893 if (grouped_load || slp_perm)
6894 dr_chain.quick_push (new_temp);
6896 /* Store vector loads in the corresponding SLP_NODE. */
6897 if (slp && !slp_perm)
6898 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6900 /* Bump the vector pointer to account for a gap or for excess
6901 elements loaded for a permuted SLP load. */
6902 if (group_gap_adj != 0)
6904 bool ovf;
6905 tree bump
6906 = wide_int_to_tree (sizetype,
6907 wi::smul (TYPE_SIZE_UNIT (elem_type),
6908 group_gap_adj, &ovf));
6909 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6910 stmt, bump);
6914 if (slp && !slp_perm)
6915 continue;
6917 if (slp_perm)
6919 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6920 slp_node_instance, false))
6922 dr_chain.release ();
6923 return false;
6926 else
6928 if (grouped_load)
6930 if (!load_lanes_p)
6931 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6932 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6934 else
6936 if (j == 0)
6937 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6938 else
6939 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6940 prev_stmt_info = vinfo_for_stmt (new_stmt);
6943 dr_chain.release ();
6946 return true;
6949 /* Function vect_is_simple_cond.
6951 Input:
6952 LOOP - the loop that is being vectorized.
6953 COND - Condition that is checked for simple use.
6955 Output:
6956 *COMP_VECTYPE - the vector type for the comparison.
6958 Returns whether a COND can be vectorized. Checks whether
6959 condition operands are supportable using vec_is_simple_use. */
6961 static bool
6962 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6963 bb_vec_info bb_vinfo, tree *comp_vectype)
6965 tree lhs, rhs;
6966 tree def;
6967 enum vect_def_type dt;
6968 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6970 if (!COMPARISON_CLASS_P (cond))
6971 return false;
6973 lhs = TREE_OPERAND (cond, 0);
6974 rhs = TREE_OPERAND (cond, 1);
6976 if (TREE_CODE (lhs) == SSA_NAME)
6978 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6979 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6980 &lhs_def_stmt, &def, &dt, &vectype1))
6981 return false;
6983 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6984 && TREE_CODE (lhs) != FIXED_CST)
6985 return false;
6987 if (TREE_CODE (rhs) == SSA_NAME)
6989 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6990 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6991 &rhs_def_stmt, &def, &dt, &vectype2))
6992 return false;
6994 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6995 && TREE_CODE (rhs) != FIXED_CST)
6996 return false;
6998 *comp_vectype = vectype1 ? vectype1 : vectype2;
6999 return true;
7002 /* vectorizable_condition.
7004 Check if STMT is conditional modify expression that can be vectorized.
7005 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7006 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7007 at GSI.
7009 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7010 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7011 else caluse if it is 2).
7013 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7015 bool
7016 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
7017 gimple *vec_stmt, tree reduc_def, int reduc_index,
7018 slp_tree slp_node)
7020 tree scalar_dest = NULL_TREE;
7021 tree vec_dest = NULL_TREE;
7022 tree cond_expr, then_clause, else_clause;
7023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7024 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7025 tree comp_vectype = NULL_TREE;
7026 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7027 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7028 tree vec_compare, vec_cond_expr;
7029 tree new_temp;
7030 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7031 tree def;
7032 enum vect_def_type dt, dts[4];
7033 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7034 int ncopies;
7035 enum tree_code code;
7036 stmt_vec_info prev_stmt_info = NULL;
7037 int i, j;
7038 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7039 vec<tree> vec_oprnds0 = vNULL;
7040 vec<tree> vec_oprnds1 = vNULL;
7041 vec<tree> vec_oprnds2 = vNULL;
7042 vec<tree> vec_oprnds3 = vNULL;
7043 tree vec_cmp_type;
7045 if (slp_node || PURE_SLP_STMT (stmt_info))
7046 ncopies = 1;
7047 else
7048 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7050 gcc_assert (ncopies >= 1);
7051 if (reduc_index && ncopies > 1)
7052 return false; /* FORNOW */
7054 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7055 return false;
7057 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7058 return false;
7060 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7061 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7062 && reduc_def))
7063 return false;
7065 /* FORNOW: not yet supported. */
7066 if (STMT_VINFO_LIVE_P (stmt_info))
7068 if (dump_enabled_p ())
7069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7070 "value used after loop.\n");
7071 return false;
7074 /* Is vectorizable conditional operation? */
7075 if (!is_gimple_assign (stmt))
7076 return false;
7078 code = gimple_assign_rhs_code (stmt);
7080 if (code != COND_EXPR)
7081 return false;
7083 cond_expr = gimple_assign_rhs1 (stmt);
7084 then_clause = gimple_assign_rhs2 (stmt);
7085 else_clause = gimple_assign_rhs3 (stmt);
7087 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7088 &comp_vectype)
7089 || !comp_vectype)
7090 return false;
7092 if (TREE_CODE (then_clause) == SSA_NAME)
7094 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7095 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7096 &then_def_stmt, &def, &dt))
7097 return false;
7099 else if (TREE_CODE (then_clause) != INTEGER_CST
7100 && TREE_CODE (then_clause) != REAL_CST
7101 && TREE_CODE (then_clause) != FIXED_CST)
7102 return false;
7104 if (TREE_CODE (else_clause) == SSA_NAME)
7106 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7107 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7108 &else_def_stmt, &def, &dt))
7109 return false;
7111 else if (TREE_CODE (else_clause) != INTEGER_CST
7112 && TREE_CODE (else_clause) != REAL_CST
7113 && TREE_CODE (else_clause) != FIXED_CST)
7114 return false;
7116 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7117 /* The result of a vector comparison should be signed type. */
7118 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7119 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7120 if (vec_cmp_type == NULL_TREE)
7121 return false;
7123 if (!vec_stmt)
7125 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7126 return expand_vec_cond_expr_p (vectype, comp_vectype);
7129 /* Transform. */
7131 if (!slp_node)
7133 vec_oprnds0.create (1);
7134 vec_oprnds1.create (1);
7135 vec_oprnds2.create (1);
7136 vec_oprnds3.create (1);
7139 /* Handle def. */
7140 scalar_dest = gimple_assign_lhs (stmt);
7141 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7143 /* Handle cond expr. */
7144 for (j = 0; j < ncopies; j++)
7146 gassign *new_stmt = NULL;
7147 if (j == 0)
7149 if (slp_node)
7151 auto_vec<tree, 4> ops;
7152 auto_vec<vec<tree>, 4> vec_defs;
7154 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7155 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7156 ops.safe_push (then_clause);
7157 ops.safe_push (else_clause);
7158 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7159 vec_oprnds3 = vec_defs.pop ();
7160 vec_oprnds2 = vec_defs.pop ();
7161 vec_oprnds1 = vec_defs.pop ();
7162 vec_oprnds0 = vec_defs.pop ();
7164 ops.release ();
7165 vec_defs.release ();
7167 else
7169 gimple gtemp;
7170 vec_cond_lhs =
7171 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7172 stmt, NULL);
7173 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7174 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7176 vec_cond_rhs =
7177 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7178 stmt, NULL);
7179 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7180 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7181 if (reduc_index == 1)
7182 vec_then_clause = reduc_def;
7183 else
7185 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7186 stmt, NULL);
7187 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7188 NULL, &gtemp, &def, &dts[2]);
7190 if (reduc_index == 2)
7191 vec_else_clause = reduc_def;
7192 else
7194 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7195 stmt, NULL);
7196 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7197 NULL, &gtemp, &def, &dts[3]);
7201 else
7203 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7204 vec_oprnds0.pop ());
7205 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7206 vec_oprnds1.pop ());
7207 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7208 vec_oprnds2.pop ());
7209 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7210 vec_oprnds3.pop ());
7213 if (!slp_node)
7215 vec_oprnds0.quick_push (vec_cond_lhs);
7216 vec_oprnds1.quick_push (vec_cond_rhs);
7217 vec_oprnds2.quick_push (vec_then_clause);
7218 vec_oprnds3.quick_push (vec_else_clause);
7221 /* Arguments are ready. Create the new vector stmt. */
7222 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7224 vec_cond_rhs = vec_oprnds1[i];
7225 vec_then_clause = vec_oprnds2[i];
7226 vec_else_clause = vec_oprnds3[i];
7228 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7229 vec_cond_lhs, vec_cond_rhs);
7230 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7231 vec_compare, vec_then_clause, vec_else_clause);
7233 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7234 new_temp = make_ssa_name (vec_dest, new_stmt);
7235 gimple_assign_set_lhs (new_stmt, new_temp);
7236 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7237 if (slp_node)
7238 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7241 if (slp_node)
7242 continue;
7244 if (j == 0)
7245 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7246 else
7247 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7249 prev_stmt_info = vinfo_for_stmt (new_stmt);
7252 vec_oprnds0.release ();
7253 vec_oprnds1.release ();
7254 vec_oprnds2.release ();
7255 vec_oprnds3.release ();
7257 return true;
7261 /* Make sure the statement is vectorizable. */
7263 bool
7264 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7267 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7268 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7269 bool ok;
7270 tree scalar_type, vectype;
7271 gimple pattern_stmt;
7272 gimple_seq pattern_def_seq;
7274 if (dump_enabled_p ())
7276 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7277 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7280 if (gimple_has_volatile_ops (stmt))
7282 if (dump_enabled_p ())
7283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7284 "not vectorized: stmt has volatile operands\n");
7286 return false;
7289 /* Skip stmts that do not need to be vectorized. In loops this is expected
7290 to include:
7291 - the COND_EXPR which is the loop exit condition
7292 - any LABEL_EXPRs in the loop
7293 - computations that are used only for array indexing or loop control.
7294 In basic blocks we only analyze statements that are a part of some SLP
7295 instance, therefore, all the statements are relevant.
7297 Pattern statement needs to be analyzed instead of the original statement
7298 if the original statement is not relevant. Otherwise, we analyze both
7299 statements. In basic blocks we are called from some SLP instance
7300 traversal, don't analyze pattern stmts instead, the pattern stmts
7301 already will be part of SLP instance. */
7303 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7304 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7305 && !STMT_VINFO_LIVE_P (stmt_info))
7307 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7308 && pattern_stmt
7309 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7310 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7312 /* Analyze PATTERN_STMT instead of the original stmt. */
7313 stmt = pattern_stmt;
7314 stmt_info = vinfo_for_stmt (pattern_stmt);
7315 if (dump_enabled_p ())
7317 dump_printf_loc (MSG_NOTE, vect_location,
7318 "==> examining pattern statement: ");
7319 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7322 else
7324 if (dump_enabled_p ())
7325 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7327 return true;
7330 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7331 && node == NULL
7332 && pattern_stmt
7333 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7334 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7336 /* Analyze PATTERN_STMT too. */
7337 if (dump_enabled_p ())
7339 dump_printf_loc (MSG_NOTE, vect_location,
7340 "==> examining pattern statement: ");
7341 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7344 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7345 return false;
7348 if (is_pattern_stmt_p (stmt_info)
7349 && node == NULL
7350 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7352 gimple_stmt_iterator si;
7354 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7356 gimple pattern_def_stmt = gsi_stmt (si);
7357 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7358 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7360 /* Analyze def stmt of STMT if it's a pattern stmt. */
7361 if (dump_enabled_p ())
7363 dump_printf_loc (MSG_NOTE, vect_location,
7364 "==> examining pattern def statement: ");
7365 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7368 if (!vect_analyze_stmt (pattern_def_stmt,
7369 need_to_vectorize, node))
7370 return false;
7375 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7377 case vect_internal_def:
7378 break;
7380 case vect_reduction_def:
7381 case vect_nested_cycle:
7382 gcc_assert (!bb_vinfo
7383 && (relevance == vect_used_in_outer
7384 || relevance == vect_used_in_outer_by_reduction
7385 || relevance == vect_used_by_reduction
7386 || relevance == vect_unused_in_scope));
7387 break;
7389 case vect_induction_def:
7390 case vect_constant_def:
7391 case vect_external_def:
7392 case vect_unknown_def_type:
7393 default:
7394 gcc_unreachable ();
7397 if (bb_vinfo)
7399 gcc_assert (PURE_SLP_STMT (stmt_info));
7401 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7402 if (dump_enabled_p ())
7404 dump_printf_loc (MSG_NOTE, vect_location,
7405 "get vectype for scalar type: ");
7406 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7407 dump_printf (MSG_NOTE, "\n");
7410 vectype = get_vectype_for_scalar_type (scalar_type);
7411 if (!vectype)
7413 if (dump_enabled_p ())
7415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7416 "not SLPed: unsupported data-type ");
7417 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7418 scalar_type);
7419 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7421 return false;
7424 if (dump_enabled_p ())
7426 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7427 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7428 dump_printf (MSG_NOTE, "\n");
7431 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7434 if (STMT_VINFO_RELEVANT_P (stmt_info))
7436 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7437 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7438 || (is_gimple_call (stmt)
7439 && gimple_call_lhs (stmt) == NULL_TREE));
7440 *need_to_vectorize = true;
7443 if (PURE_SLP_STMT (stmt_info) && !node)
7445 dump_printf_loc (MSG_NOTE, vect_location,
7446 "handled only by SLP analysis\n");
7447 return true;
7450 ok = true;
7451 if (!bb_vinfo
7452 && (STMT_VINFO_RELEVANT_P (stmt_info)
7453 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7454 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7455 || vectorizable_conversion (stmt, NULL, NULL, node)
7456 || vectorizable_shift (stmt, NULL, NULL, node)
7457 || vectorizable_operation (stmt, NULL, NULL, node)
7458 || vectorizable_assignment (stmt, NULL, NULL, node)
7459 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7460 || vectorizable_call (stmt, NULL, NULL, node)
7461 || vectorizable_store (stmt, NULL, NULL, node)
7462 || vectorizable_reduction (stmt, NULL, NULL, node)
7463 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7464 else
7466 if (bb_vinfo)
7467 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7468 || vectorizable_conversion (stmt, NULL, NULL, node)
7469 || vectorizable_shift (stmt, NULL, NULL, node)
7470 || vectorizable_operation (stmt, NULL, NULL, node)
7471 || vectorizable_assignment (stmt, NULL, NULL, node)
7472 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7473 || vectorizable_call (stmt, NULL, NULL, node)
7474 || vectorizable_store (stmt, NULL, NULL, node)
7475 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7478 if (!ok)
7480 if (dump_enabled_p ())
7482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7483 "not vectorized: relevant stmt not ");
7484 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7485 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7488 return false;
7491 if (bb_vinfo)
7492 return true;
7494 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7495 need extra handling, except for vectorizable reductions. */
7496 if (STMT_VINFO_LIVE_P (stmt_info)
7497 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7498 ok = vectorizable_live_operation (stmt, NULL, NULL);
7500 if (!ok)
7502 if (dump_enabled_p ())
7504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7505 "not vectorized: live stmt not ");
7506 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7507 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7510 return false;
7513 return true;
7517 /* Function vect_transform_stmt.
7519 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7521 bool
7522 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7523 bool *grouped_store, slp_tree slp_node,
7524 slp_instance slp_node_instance)
7526 bool is_store = false;
7527 gimple vec_stmt = NULL;
7528 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7529 bool done;
7531 gimple old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7533 switch (STMT_VINFO_TYPE (stmt_info))
7535 case type_demotion_vec_info_type:
7536 case type_promotion_vec_info_type:
7537 case type_conversion_vec_info_type:
7538 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7539 gcc_assert (done);
7540 break;
7542 case induc_vec_info_type:
7543 gcc_assert (!slp_node);
7544 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7545 gcc_assert (done);
7546 break;
7548 case shift_vec_info_type:
7549 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7550 gcc_assert (done);
7551 break;
7553 case op_vec_info_type:
7554 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7555 gcc_assert (done);
7556 break;
7558 case assignment_vec_info_type:
7559 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7560 gcc_assert (done);
7561 break;
7563 case load_vec_info_type:
7564 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7565 slp_node_instance);
7566 gcc_assert (done);
7567 break;
7569 case store_vec_info_type:
7570 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7571 gcc_assert (done);
7572 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7574 /* In case of interleaving, the whole chain is vectorized when the
7575 last store in the chain is reached. Store stmts before the last
7576 one are skipped, and there vec_stmt_info shouldn't be freed
7577 meanwhile. */
7578 *grouped_store = true;
7579 if (STMT_VINFO_VEC_STMT (stmt_info))
7580 is_store = true;
7582 else
7583 is_store = true;
7584 break;
7586 case condition_vec_info_type:
7587 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7588 gcc_assert (done);
7589 break;
7591 case call_vec_info_type:
7592 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7593 stmt = gsi_stmt (*gsi);
7594 if (is_gimple_call (stmt)
7595 && gimple_call_internal_p (stmt)
7596 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7597 is_store = true;
7598 break;
7600 case call_simd_clone_vec_info_type:
7601 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7602 stmt = gsi_stmt (*gsi);
7603 break;
7605 case reduc_vec_info_type:
7606 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7607 gcc_assert (done);
7608 break;
7610 default:
7611 if (!STMT_VINFO_LIVE_P (stmt_info))
7613 if (dump_enabled_p ())
7614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7615 "stmt not supported.\n");
7616 gcc_unreachable ();
7620 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7621 This would break hybrid SLP vectorization. */
7622 if (slp_node)
7623 gcc_assert (!vec_stmt
7624 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
7626 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7627 is being vectorized, but outside the immediately enclosing loop. */
7628 if (vec_stmt
7629 && STMT_VINFO_LOOP_VINFO (stmt_info)
7630 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7631 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7632 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7633 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7634 || STMT_VINFO_RELEVANT (stmt_info) ==
7635 vect_used_in_outer_by_reduction))
7637 struct loop *innerloop = LOOP_VINFO_LOOP (
7638 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7639 imm_use_iterator imm_iter;
7640 use_operand_p use_p;
7641 tree scalar_dest;
7642 gimple exit_phi;
7644 if (dump_enabled_p ())
7645 dump_printf_loc (MSG_NOTE, vect_location,
7646 "Record the vdef for outer-loop vectorization.\n");
7648 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7649 (to be used when vectorizing outer-loop stmts that use the DEF of
7650 STMT). */
7651 if (gimple_code (stmt) == GIMPLE_PHI)
7652 scalar_dest = PHI_RESULT (stmt);
7653 else
7654 scalar_dest = gimple_assign_lhs (stmt);
7656 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7658 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7660 exit_phi = USE_STMT (use_p);
7661 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7666 /* Handle stmts whose DEF is used outside the loop-nest that is
7667 being vectorized. */
7668 if (STMT_VINFO_LIVE_P (stmt_info)
7669 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7671 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7672 gcc_assert (done);
7675 if (vec_stmt)
7676 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7678 return is_store;
7682 /* Remove a group of stores (for SLP or interleaving), free their
7683 stmt_vec_info. */
7685 void
7686 vect_remove_stores (gimple first_stmt)
7688 gimple next = first_stmt;
7689 gimple tmp;
7690 gimple_stmt_iterator next_si;
7692 while (next)
7694 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7696 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7697 if (is_pattern_stmt_p (stmt_info))
7698 next = STMT_VINFO_RELATED_STMT (stmt_info);
7699 /* Free the attached stmt_vec_info and remove the stmt. */
7700 next_si = gsi_for_stmt (next);
7701 unlink_stmt_vdef (next);
7702 gsi_remove (&next_si, true);
7703 release_defs (next);
7704 free_stmt_vec_info (next);
7705 next = tmp;
7710 /* Function new_stmt_vec_info.
7712 Create and initialize a new stmt_vec_info struct for STMT. */
7714 stmt_vec_info
7715 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7716 bb_vec_info bb_vinfo)
7718 stmt_vec_info res;
7719 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7721 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7722 STMT_VINFO_STMT (res) = stmt;
7723 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7724 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7725 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7726 STMT_VINFO_LIVE_P (res) = false;
7727 STMT_VINFO_VECTYPE (res) = NULL;
7728 STMT_VINFO_VEC_STMT (res) = NULL;
7729 STMT_VINFO_VECTORIZABLE (res) = true;
7730 STMT_VINFO_IN_PATTERN_P (res) = false;
7731 STMT_VINFO_RELATED_STMT (res) = NULL;
7732 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7733 STMT_VINFO_DATA_REF (res) = NULL;
7735 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7736 STMT_VINFO_DR_OFFSET (res) = NULL;
7737 STMT_VINFO_DR_INIT (res) = NULL;
7738 STMT_VINFO_DR_STEP (res) = NULL;
7739 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7741 if (gimple_code (stmt) == GIMPLE_PHI
7742 && is_loop_header_bb_p (gimple_bb (stmt)))
7743 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7744 else
7745 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7747 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7748 STMT_SLP_TYPE (res) = loop_vect;
7749 GROUP_FIRST_ELEMENT (res) = NULL;
7750 GROUP_NEXT_ELEMENT (res) = NULL;
7751 GROUP_SIZE (res) = 0;
7752 GROUP_STORE_COUNT (res) = 0;
7753 GROUP_GAP (res) = 0;
7754 GROUP_SAME_DR_STMT (res) = NULL;
7756 return res;
7760 /* Create a hash table for stmt_vec_info. */
7762 void
7763 init_stmt_vec_info_vec (void)
7765 gcc_assert (!stmt_vec_info_vec.exists ());
7766 stmt_vec_info_vec.create (50);
7770 /* Free hash table for stmt_vec_info. */
7772 void
7773 free_stmt_vec_info_vec (void)
7775 unsigned int i;
7776 vec_void_p info;
7777 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7778 if (info != NULL)
7779 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7780 gcc_assert (stmt_vec_info_vec.exists ());
7781 stmt_vec_info_vec.release ();
7785 /* Free stmt vectorization related info. */
7787 void
7788 free_stmt_vec_info (gimple stmt)
7790 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7792 if (!stmt_info)
7793 return;
7795 /* Check if this statement has a related "pattern stmt"
7796 (introduced by the vectorizer during the pattern recognition
7797 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7798 too. */
7799 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7801 stmt_vec_info patt_info
7802 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7803 if (patt_info)
7805 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7806 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7807 gimple_set_bb (patt_stmt, NULL);
7808 tree lhs = gimple_get_lhs (patt_stmt);
7809 if (TREE_CODE (lhs) == SSA_NAME)
7810 release_ssa_name (lhs);
7811 if (seq)
7813 gimple_stmt_iterator si;
7814 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7816 gimple seq_stmt = gsi_stmt (si);
7817 gimple_set_bb (seq_stmt, NULL);
7818 lhs = gimple_get_lhs (patt_stmt);
7819 if (TREE_CODE (lhs) == SSA_NAME)
7820 release_ssa_name (lhs);
7821 free_stmt_vec_info (seq_stmt);
7824 free_stmt_vec_info (patt_stmt);
7828 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7829 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7830 set_vinfo_for_stmt (stmt, NULL);
7831 free (stmt_info);
7835 /* Function get_vectype_for_scalar_type_and_size.
7837 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7838 by the target. */
7840 static tree
7841 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7843 machine_mode inner_mode = TYPE_MODE (scalar_type);
7844 machine_mode simd_mode;
7845 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7846 int nunits;
7847 tree vectype;
7849 if (nbytes == 0)
7850 return NULL_TREE;
7852 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7853 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7854 return NULL_TREE;
7856 /* For vector types of elements whose mode precision doesn't
7857 match their types precision we use a element type of mode
7858 precision. The vectorization routines will have to make sure
7859 they support the proper result truncation/extension.
7860 We also make sure to build vector types with INTEGER_TYPE
7861 component type only. */
7862 if (INTEGRAL_TYPE_P (scalar_type)
7863 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7864 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7865 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7866 TYPE_UNSIGNED (scalar_type));
7868 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7869 When the component mode passes the above test simply use a type
7870 corresponding to that mode. The theory is that any use that
7871 would cause problems with this will disable vectorization anyway. */
7872 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7873 && !INTEGRAL_TYPE_P (scalar_type))
7874 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7876 /* We can't build a vector type of elements with alignment bigger than
7877 their size. */
7878 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7879 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7880 TYPE_UNSIGNED (scalar_type));
7882 /* If we felt back to using the mode fail if there was
7883 no scalar type for it. */
7884 if (scalar_type == NULL_TREE)
7885 return NULL_TREE;
7887 /* If no size was supplied use the mode the target prefers. Otherwise
7888 lookup a vector mode of the specified size. */
7889 if (size == 0)
7890 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7891 else
7892 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7893 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7894 if (nunits <= 1)
7895 return NULL_TREE;
7897 vectype = build_vector_type (scalar_type, nunits);
7899 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7900 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7901 return NULL_TREE;
7903 return vectype;
7906 unsigned int current_vector_size;
7908 /* Function get_vectype_for_scalar_type.
7910 Returns the vector type corresponding to SCALAR_TYPE as supported
7911 by the target. */
7913 tree
7914 get_vectype_for_scalar_type (tree scalar_type)
7916 tree vectype;
7917 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7918 current_vector_size);
7919 if (vectype
7920 && current_vector_size == 0)
7921 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7922 return vectype;
7925 /* Function get_same_sized_vectype
7927 Returns a vector type corresponding to SCALAR_TYPE of size
7928 VECTOR_TYPE if supported by the target. */
7930 tree
7931 get_same_sized_vectype (tree scalar_type, tree vector_type)
7933 return get_vectype_for_scalar_type_and_size
7934 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7937 /* Function vect_is_simple_use.
7939 Input:
7940 LOOP_VINFO - the vect info of the loop that is being vectorized.
7941 BB_VINFO - the vect info of the basic block that is being vectorized.
7942 OPERAND - operand of STMT in the loop or bb.
7943 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7945 Returns whether a stmt with OPERAND can be vectorized.
7946 For loops, supportable operands are constants, loop invariants, and operands
7947 that are defined by the current iteration of the loop. Unsupportable
7948 operands are those that are defined by a previous iteration of the loop (as
7949 is the case in reduction/induction computations).
7950 For basic blocks, supportable operands are constants and bb invariants.
7951 For now, operands defined outside the basic block are not supported. */
7953 bool
7954 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7955 bb_vec_info bb_vinfo, gimple *def_stmt,
7956 tree *def, enum vect_def_type *dt)
7958 *def_stmt = NULL;
7959 *def = NULL_TREE;
7960 *dt = vect_unknown_def_type;
7962 if (dump_enabled_p ())
7964 dump_printf_loc (MSG_NOTE, vect_location,
7965 "vect_is_simple_use: operand ");
7966 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7967 dump_printf (MSG_NOTE, "\n");
7970 if (CONSTANT_CLASS_P (operand))
7972 *dt = vect_constant_def;
7973 return true;
7976 if (is_gimple_min_invariant (operand))
7978 *def = operand;
7979 *dt = vect_external_def;
7980 return true;
7983 if (TREE_CODE (operand) != SSA_NAME)
7985 if (dump_enabled_p ())
7986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7987 "not ssa-name.\n");
7988 return false;
7991 if (SSA_NAME_IS_DEFAULT_DEF (operand))
7993 *def = operand;
7994 *dt = vect_external_def;
7995 return true;
7998 *def_stmt = SSA_NAME_DEF_STMT (operand);
7999 if (dump_enabled_p ())
8001 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8002 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8005 basic_block bb = gimple_bb (*def_stmt);
8006 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
8007 || (bb_vinfo
8008 && (bb != BB_VINFO_BB (bb_vinfo)
8009 || gimple_code (*def_stmt) == GIMPLE_PHI)))
8010 *dt = vect_external_def;
8011 else
8013 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8014 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
8015 *dt = vect_external_def;
8016 else
8017 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8020 if (dump_enabled_p ())
8022 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8023 switch (*dt)
8025 case vect_uninitialized_def:
8026 dump_printf (MSG_NOTE, "uninitialized\n");
8027 break;
8028 case vect_constant_def:
8029 dump_printf (MSG_NOTE, "constant\n");
8030 break;
8031 case vect_external_def:
8032 dump_printf (MSG_NOTE, "external\n");
8033 break;
8034 case vect_internal_def:
8035 dump_printf (MSG_NOTE, "internal\n");
8036 break;
8037 case vect_induction_def:
8038 dump_printf (MSG_NOTE, "induction\n");
8039 break;
8040 case vect_reduction_def:
8041 dump_printf (MSG_NOTE, "reduction\n");
8042 break;
8043 case vect_double_reduction_def:
8044 dump_printf (MSG_NOTE, "double reduction\n");
8045 break;
8046 case vect_nested_cycle:
8047 dump_printf (MSG_NOTE, "nested cycle\n");
8048 break;
8049 case vect_unknown_def_type:
8050 dump_printf (MSG_NOTE, "unknown\n");
8051 break;
8055 if (*dt == vect_unknown_def_type
8056 || (stmt
8057 && *dt == vect_double_reduction_def
8058 && gimple_code (stmt) != GIMPLE_PHI))
8060 if (dump_enabled_p ())
8061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8062 "Unsupported pattern.\n");
8063 return false;
8066 switch (gimple_code (*def_stmt))
8068 case GIMPLE_PHI:
8069 *def = gimple_phi_result (*def_stmt);
8070 break;
8072 case GIMPLE_ASSIGN:
8073 *def = gimple_assign_lhs (*def_stmt);
8074 break;
8076 case GIMPLE_CALL:
8077 *def = gimple_call_lhs (*def_stmt);
8078 if (*def != NULL)
8079 break;
8080 /* FALLTHRU */
8081 default:
8082 if (dump_enabled_p ())
8083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8084 "unsupported defining stmt:\n");
8085 return false;
8088 return true;
8091 /* Function vect_is_simple_use_1.
8093 Same as vect_is_simple_use_1 but also determines the vector operand
8094 type of OPERAND and stores it to *VECTYPE. If the definition of
8095 OPERAND is vect_uninitialized_def, vect_constant_def or
8096 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8097 is responsible to compute the best suited vector type for the
8098 scalar operand. */
8100 bool
8101 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
8102 bb_vec_info bb_vinfo, gimple *def_stmt,
8103 tree *def, enum vect_def_type *dt, tree *vectype)
8105 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8106 def, dt))
8107 return false;
8109 /* Now get a vector type if the def is internal, otherwise supply
8110 NULL_TREE and leave it up to the caller to figure out a proper
8111 type for the use stmt. */
8112 if (*dt == vect_internal_def
8113 || *dt == vect_induction_def
8114 || *dt == vect_reduction_def
8115 || *dt == vect_double_reduction_def
8116 || *dt == vect_nested_cycle)
8118 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8120 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8121 && !STMT_VINFO_RELEVANT (stmt_info)
8122 && !STMT_VINFO_LIVE_P (stmt_info))
8123 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8125 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8126 gcc_assert (*vectype != NULL_TREE);
8128 else if (*dt == vect_uninitialized_def
8129 || *dt == vect_constant_def
8130 || *dt == vect_external_def)
8131 *vectype = NULL_TREE;
8132 else
8133 gcc_unreachable ();
8135 return true;
8139 /* Function supportable_widening_operation
8141 Check whether an operation represented by the code CODE is a
8142 widening operation that is supported by the target platform in
8143 vector form (i.e., when operating on arguments of type VECTYPE_IN
8144 producing a result of type VECTYPE_OUT).
8146 Widening operations we currently support are NOP (CONVERT), FLOAT
8147 and WIDEN_MULT. This function checks if these operations are supported
8148 by the target platform either directly (via vector tree-codes), or via
8149 target builtins.
8151 Output:
8152 - CODE1 and CODE2 are codes of vector operations to be used when
8153 vectorizing the operation, if available.
8154 - MULTI_STEP_CVT determines the number of required intermediate steps in
8155 case of multi-step conversion (like char->short->int - in that case
8156 MULTI_STEP_CVT will be 1).
8157 - INTERM_TYPES contains the intermediate type required to perform the
8158 widening operation (short in the above example). */
8160 bool
8161 supportable_widening_operation (enum tree_code code, gimple stmt,
8162 tree vectype_out, tree vectype_in,
8163 enum tree_code *code1, enum tree_code *code2,
8164 int *multi_step_cvt,
8165 vec<tree> *interm_types)
8167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8168 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8169 struct loop *vect_loop = NULL;
8170 machine_mode vec_mode;
8171 enum insn_code icode1, icode2;
8172 optab optab1, optab2;
8173 tree vectype = vectype_in;
8174 tree wide_vectype = vectype_out;
8175 enum tree_code c1, c2;
8176 int i;
8177 tree prev_type, intermediate_type;
8178 machine_mode intermediate_mode, prev_mode;
8179 optab optab3, optab4;
8181 *multi_step_cvt = 0;
8182 if (loop_info)
8183 vect_loop = LOOP_VINFO_LOOP (loop_info);
8185 switch (code)
8187 case WIDEN_MULT_EXPR:
8188 /* The result of a vectorized widening operation usually requires
8189 two vectors (because the widened results do not fit into one vector).
8190 The generated vector results would normally be expected to be
8191 generated in the same order as in the original scalar computation,
8192 i.e. if 8 results are generated in each vector iteration, they are
8193 to be organized as follows:
8194 vect1: [res1,res2,res3,res4],
8195 vect2: [res5,res6,res7,res8].
8197 However, in the special case that the result of the widening
8198 operation is used in a reduction computation only, the order doesn't
8199 matter (because when vectorizing a reduction we change the order of
8200 the computation). Some targets can take advantage of this and
8201 generate more efficient code. For example, targets like Altivec,
8202 that support widen_mult using a sequence of {mult_even,mult_odd}
8203 generate the following vectors:
8204 vect1: [res1,res3,res5,res7],
8205 vect2: [res2,res4,res6,res8].
8207 When vectorizing outer-loops, we execute the inner-loop sequentially
8208 (each vectorized inner-loop iteration contributes to VF outer-loop
8209 iterations in parallel). We therefore don't allow to change the
8210 order of the computation in the inner-loop during outer-loop
8211 vectorization. */
8212 /* TODO: Another case in which order doesn't *really* matter is when we
8213 widen and then contract again, e.g. (short)((int)x * y >> 8).
8214 Normally, pack_trunc performs an even/odd permute, whereas the
8215 repack from an even/odd expansion would be an interleave, which
8216 would be significantly simpler for e.g. AVX2. */
8217 /* In any case, in order to avoid duplicating the code below, recurse
8218 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8219 are properly set up for the caller. If we fail, we'll continue with
8220 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8221 if (vect_loop
8222 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8223 && !nested_in_vect_loop_p (vect_loop, stmt)
8224 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8225 stmt, vectype_out, vectype_in,
8226 code1, code2, multi_step_cvt,
8227 interm_types))
8229 /* Elements in a vector with vect_used_by_reduction property cannot
8230 be reordered if the use chain with this property does not have the
8231 same operation. One such an example is s += a * b, where elements
8232 in a and b cannot be reordered. Here we check if the vector defined
8233 by STMT is only directly used in the reduction statement. */
8234 tree lhs = gimple_assign_lhs (stmt);
8235 use_operand_p dummy;
8236 gimple use_stmt;
8237 stmt_vec_info use_stmt_info = NULL;
8238 if (single_imm_use (lhs, &dummy, &use_stmt)
8239 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8240 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8241 return true;
8243 c1 = VEC_WIDEN_MULT_LO_EXPR;
8244 c2 = VEC_WIDEN_MULT_HI_EXPR;
8245 break;
8247 case VEC_WIDEN_MULT_EVEN_EXPR:
8248 /* Support the recursion induced just above. */
8249 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8250 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8251 break;
8253 case WIDEN_LSHIFT_EXPR:
8254 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8255 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8256 break;
8258 CASE_CONVERT:
8259 c1 = VEC_UNPACK_LO_EXPR;
8260 c2 = VEC_UNPACK_HI_EXPR;
8261 break;
8263 case FLOAT_EXPR:
8264 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8265 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8266 break;
8268 case FIX_TRUNC_EXPR:
8269 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8270 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8271 computing the operation. */
8272 return false;
8274 default:
8275 gcc_unreachable ();
8278 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8279 std::swap (c1, c2);
8281 if (code == FIX_TRUNC_EXPR)
8283 /* The signedness is determined from output operand. */
8284 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8285 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8287 else
8289 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8290 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8293 if (!optab1 || !optab2)
8294 return false;
8296 vec_mode = TYPE_MODE (vectype);
8297 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8298 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8299 return false;
8301 *code1 = c1;
8302 *code2 = c2;
8304 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8305 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8306 return true;
8308 /* Check if it's a multi-step conversion that can be done using intermediate
8309 types. */
8311 prev_type = vectype;
8312 prev_mode = vec_mode;
8314 if (!CONVERT_EXPR_CODE_P (code))
8315 return false;
8317 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8318 intermediate steps in promotion sequence. We try
8319 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8320 not. */
8321 interm_types->create (MAX_INTERM_CVT_STEPS);
8322 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8324 intermediate_mode = insn_data[icode1].operand[0].mode;
8325 intermediate_type
8326 = lang_hooks.types.type_for_mode (intermediate_mode,
8327 TYPE_UNSIGNED (prev_type));
8328 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8329 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8331 if (!optab3 || !optab4
8332 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8333 || insn_data[icode1].operand[0].mode != intermediate_mode
8334 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8335 || insn_data[icode2].operand[0].mode != intermediate_mode
8336 || ((icode1 = optab_handler (optab3, intermediate_mode))
8337 == CODE_FOR_nothing)
8338 || ((icode2 = optab_handler (optab4, intermediate_mode))
8339 == CODE_FOR_nothing))
8340 break;
8342 interm_types->quick_push (intermediate_type);
8343 (*multi_step_cvt)++;
8345 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8346 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8347 return true;
8349 prev_type = intermediate_type;
8350 prev_mode = intermediate_mode;
8353 interm_types->release ();
8354 return false;
8358 /* Function supportable_narrowing_operation
8360 Check whether an operation represented by the code CODE is a
8361 narrowing operation that is supported by the target platform in
8362 vector form (i.e., when operating on arguments of type VECTYPE_IN
8363 and producing a result of type VECTYPE_OUT).
8365 Narrowing operations we currently support are NOP (CONVERT) and
8366 FIX_TRUNC. This function checks if these operations are supported by
8367 the target platform directly via vector tree-codes.
8369 Output:
8370 - CODE1 is the code of a vector operation to be used when
8371 vectorizing the operation, if available.
8372 - MULTI_STEP_CVT determines the number of required intermediate steps in
8373 case of multi-step conversion (like int->short->char - in that case
8374 MULTI_STEP_CVT will be 1).
8375 - INTERM_TYPES contains the intermediate type required to perform the
8376 narrowing operation (short in the above example). */
8378 bool
8379 supportable_narrowing_operation (enum tree_code code,
8380 tree vectype_out, tree vectype_in,
8381 enum tree_code *code1, int *multi_step_cvt,
8382 vec<tree> *interm_types)
8384 machine_mode vec_mode;
8385 enum insn_code icode1;
8386 optab optab1, interm_optab;
8387 tree vectype = vectype_in;
8388 tree narrow_vectype = vectype_out;
8389 enum tree_code c1;
8390 tree intermediate_type;
8391 machine_mode intermediate_mode, prev_mode;
8392 int i;
8393 bool uns;
8395 *multi_step_cvt = 0;
8396 switch (code)
8398 CASE_CONVERT:
8399 c1 = VEC_PACK_TRUNC_EXPR;
8400 break;
8402 case FIX_TRUNC_EXPR:
8403 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8404 break;
8406 case FLOAT_EXPR:
8407 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8408 tree code and optabs used for computing the operation. */
8409 return false;
8411 default:
8412 gcc_unreachable ();
8415 if (code == FIX_TRUNC_EXPR)
8416 /* The signedness is determined from output operand. */
8417 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8418 else
8419 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8421 if (!optab1)
8422 return false;
8424 vec_mode = TYPE_MODE (vectype);
8425 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8426 return false;
8428 *code1 = c1;
8430 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8431 return true;
8433 /* Check if it's a multi-step conversion that can be done using intermediate
8434 types. */
8435 prev_mode = vec_mode;
8436 if (code == FIX_TRUNC_EXPR)
8437 uns = TYPE_UNSIGNED (vectype_out);
8438 else
8439 uns = TYPE_UNSIGNED (vectype);
8441 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8442 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8443 costly than signed. */
8444 if (code == FIX_TRUNC_EXPR && uns)
8446 enum insn_code icode2;
8448 intermediate_type
8449 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8450 interm_optab
8451 = optab_for_tree_code (c1, intermediate_type, optab_default);
8452 if (interm_optab != unknown_optab
8453 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8454 && insn_data[icode1].operand[0].mode
8455 == insn_data[icode2].operand[0].mode)
8457 uns = false;
8458 optab1 = interm_optab;
8459 icode1 = icode2;
8463 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8464 intermediate steps in promotion sequence. We try
8465 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8466 interm_types->create (MAX_INTERM_CVT_STEPS);
8467 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8469 intermediate_mode = insn_data[icode1].operand[0].mode;
8470 intermediate_type
8471 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8472 interm_optab
8473 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8474 optab_default);
8475 if (!interm_optab
8476 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8477 || insn_data[icode1].operand[0].mode != intermediate_mode
8478 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8479 == CODE_FOR_nothing))
8480 break;
8482 interm_types->quick_push (intermediate_type);
8483 (*multi_step_cvt)++;
8485 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8486 return true;
8488 prev_mode = intermediate_mode;
8489 optab1 = interm_optab;
8492 interm_types->release ();
8493 return false;