PR middle-end/66867
[official-gcc.git] / gcc / tree-vect-stmts.c
blob5c655029dd59a582a205157b315cc5eccf8beca1
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
57 tree
58 stmt_vectype (struct _stmt_vec_info *stmt_info)
60 return STMT_VINFO_VECTYPE (stmt_info);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
65 bool
66 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
68 gimple *stmt = STMT_VINFO_STMT (stmt_info);
69 basic_block bb = gimple_bb (stmt);
70 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
71 struct loop* loop;
73 if (!loop_vinfo)
74 return false;
76 loop = LOOP_VINFO_LOOP (loop_vinfo);
78 return (bb->loop_father == loop->inner);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
85 unsigned
86 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
87 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
88 int misalign, enum vect_cost_model_location where)
90 if (body_cost_vec)
92 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
93 stmt_info_for_cost si = { count, kind,
94 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
95 misalign };
96 body_cost_vec->safe_push (si);
97 return (unsigned)
98 (builtin_vectorization_cost (kind, vectype, misalign) * count);
100 else
101 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
102 count, kind, stmt_info, misalign, where);
105 /* Return a variable of type ELEM_TYPE[NELEMS]. */
107 static tree
108 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
110 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111 "vect_array");
114 /* ARRAY is an array of vectors created by create_vector_array.
115 Return an SSA_NAME for the vector in index N. The reference
116 is part of the vectorization of STMT and the vector is associated
117 with scalar destination SCALAR_DEST. */
119 static tree
120 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121 tree array, unsigned HOST_WIDE_INT n)
123 tree vect_type, vect, vect_name, array_ref;
124 gimple *new_stmt;
126 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127 vect_type = TREE_TYPE (TREE_TYPE (array));
128 vect = vect_create_destination_var (scalar_dest, vect_type);
129 array_ref = build4 (ARRAY_REF, vect_type, array,
130 build_int_cst (size_type_node, n),
131 NULL_TREE, NULL_TREE);
133 new_stmt = gimple_build_assign (vect, array_ref);
134 vect_name = make_ssa_name (vect, new_stmt);
135 gimple_assign_set_lhs (new_stmt, vect_name);
136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
138 return vect_name;
141 /* ARRAY is an array of vectors created by create_vector_array.
142 Emit code to store SSA_NAME VECT in index N of the array.
143 The store is part of the vectorization of STMT. */
145 static void
146 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
147 tree array, unsigned HOST_WIDE_INT n)
149 tree array_ref;
150 gimple *new_stmt;
152 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153 build_int_cst (size_type_node, n),
154 NULL_TREE, NULL_TREE);
156 new_stmt = gimple_build_assign (array_ref, vect);
157 vect_finish_stmt_generation (stmt, new_stmt, gsi);
160 /* PTR is a pointer to an array of type TYPE. Return a representation
161 of *PTR. The memory reference replaces those in FIRST_DR
162 (and its group). */
164 static tree
165 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
167 tree mem_ref, alias_ptr_type;
169 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171 /* Arrays have the same alignment as their type. */
172 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173 return mem_ref;
176 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
178 /* Function vect_mark_relevant.
180 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
182 static void
183 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
184 enum vect_relevant relevant, bool live_p)
186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
187 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
188 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
189 gimple *pattern_stmt;
191 if (dump_enabled_p ())
193 dump_printf_loc (MSG_NOTE, vect_location,
194 "mark relevant %d, live %d: ", relevant, live_p);
195 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
198 /* If this stmt is an original stmt in a pattern, we might need to mark its
199 related pattern stmt instead of the original stmt. However, such stmts
200 may have their own uses that are not in any pattern, in such cases the
201 stmt itself should be marked. */
202 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
204 /* This is the last stmt in a sequence that was detected as a
205 pattern that can potentially be vectorized. Don't mark the stmt
206 as relevant/live because it's not going to be vectorized.
207 Instead mark the pattern-stmt that replaces it. */
209 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
211 if (dump_enabled_p ())
212 dump_printf_loc (MSG_NOTE, vect_location,
213 "last stmt in pattern. don't mark"
214 " relevant/live.\n");
215 stmt_info = vinfo_for_stmt (pattern_stmt);
216 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
217 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
218 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
219 stmt = pattern_stmt;
222 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
223 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
224 STMT_VINFO_RELEVANT (stmt_info) = relevant;
226 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
227 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
229 if (dump_enabled_p ())
230 dump_printf_loc (MSG_NOTE, vect_location,
231 "already marked relevant/live.\n");
232 return;
235 worklist->safe_push (stmt);
239 /* Function is_simple_and_all_uses_invariant
241 Return true if STMT is simple and all uses of it are invariant. */
243 bool
244 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
246 tree op;
247 gimple *def_stmt;
248 ssa_op_iter iter;
250 if (!is_gimple_assign (stmt))
251 return false;
253 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
255 enum vect_def_type dt = vect_uninitialized_def;
257 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
261 "use not simple.\n");
262 return false;
265 if (dt != vect_external_def && dt != vect_constant_def)
266 return false;
268 return true;
271 /* Function vect_stmt_relevant_p.
273 Return true if STMT in loop that is represented by LOOP_VINFO is
274 "relevant for vectorization".
276 A stmt is considered "relevant for vectorization" if:
277 - it has uses outside the loop.
278 - it has vdefs (it alters memory).
279 - control stmts in the loop (except for the exit condition).
281 CHECKME: what other side effects would the vectorizer allow? */
283 static bool
284 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
285 enum vect_relevant *relevant, bool *live_p)
287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
288 ssa_op_iter op_iter;
289 imm_use_iterator imm_iter;
290 use_operand_p use_p;
291 def_operand_p def_p;
293 *relevant = vect_unused_in_scope;
294 *live_p = false;
296 /* cond stmt other than loop exit cond. */
297 if (is_ctrl_stmt (stmt)
298 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
299 != loop_exit_ctrl_vec_info_type)
300 *relevant = vect_used_in_scope;
302 /* changing memory. */
303 if (gimple_code (stmt) != GIMPLE_PHI)
304 if (gimple_vdef (stmt)
305 && !gimple_clobber_p (stmt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_NOTE, vect_location,
309 "vec_stmt_relevant_p: stmt has vdefs.\n");
310 *relevant = vect_used_in_scope;
313 /* uses outside the loop. */
314 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
316 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
318 basic_block bb = gimple_bb (USE_STMT (use_p));
319 if (!flow_bb_inside_loop_p (loop, bb))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: used out of loop.\n");
325 if (is_gimple_debug (USE_STMT (use_p)))
326 continue;
328 /* We expect all such uses to be in the loop exit phis
329 (because of loop closed form) */
330 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
331 gcc_assert (bb == single_exit (loop)->dest);
333 *live_p = true;
338 if (*live_p && *relevant == vect_unused_in_scope
339 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
341 if (dump_enabled_p ())
342 dump_printf_loc (MSG_NOTE, vect_location,
343 "vec_stmt_relevant_p: stmt live but not relevant.\n");
344 *relevant = vect_used_only_live;
347 return (*live_p || *relevant);
351 /* Function exist_non_indexing_operands_for_use_p
353 USE is one of the uses attached to STMT. Check if USE is
354 used in STMT for anything other than indexing an array. */
356 static bool
357 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
359 tree operand;
360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
362 /* USE corresponds to some operand in STMT. If there is no data
363 reference in STMT, then any operand that corresponds to USE
364 is not indexing an array. */
365 if (!STMT_VINFO_DATA_REF (stmt_info))
366 return true;
368 /* STMT has a data_ref. FORNOW this means that its of one of
369 the following forms:
370 -1- ARRAY_REF = var
371 -2- var = ARRAY_REF
372 (This should have been verified in analyze_data_refs).
374 'var' in the second case corresponds to a def, not a use,
375 so USE cannot correspond to any operands that are not used
376 for array indexing.
378 Therefore, all we need to check is if STMT falls into the
379 first case, and whether var corresponds to USE. */
381 if (!gimple_assign_copy_p (stmt))
383 if (is_gimple_call (stmt)
384 && gimple_call_internal_p (stmt))
385 switch (gimple_call_internal_fn (stmt))
387 case IFN_MASK_STORE:
388 operand = gimple_call_arg (stmt, 3);
389 if (operand == use)
390 return true;
391 /* FALLTHRU */
392 case IFN_MASK_LOAD:
393 operand = gimple_call_arg (stmt, 2);
394 if (operand == use)
395 return true;
396 break;
397 default:
398 break;
400 return false;
403 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
404 return false;
405 operand = gimple_assign_rhs1 (stmt);
406 if (TREE_CODE (operand) != SSA_NAME)
407 return false;
409 if (operand == use)
410 return true;
412 return false;
417 Function process_use.
419 Inputs:
420 - a USE in STMT in a loop represented by LOOP_VINFO
421 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
422 that defined USE. This is done by calling mark_relevant and passing it
423 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
424 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
425 be performed.
427 Outputs:
428 Generally, LIVE_P and RELEVANT are used to define the liveness and
429 relevance info of the DEF_STMT of this USE:
430 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
431 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
432 Exceptions:
433 - case 1: If USE is used only for address computations (e.g. array indexing),
434 which does not need to be directly vectorized, then the liveness/relevance
435 of the respective DEF_STMT is left unchanged.
436 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
437 skip DEF_STMT cause it had already been processed.
438 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
439 be modified accordingly.
441 Return true if everything is as expected. Return false otherwise. */
443 static bool
444 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
445 enum vect_relevant relevant, vec<gimple *> *worklist,
446 bool force)
448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
449 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
450 stmt_vec_info dstmt_vinfo;
451 basic_block bb, def_bb;
452 gimple *def_stmt;
453 enum vect_def_type dt;
455 /* case 1: we are only interested in uses that need to be vectorized. Uses
456 that are used for address computation are not considered relevant. */
457 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
458 return true;
460 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
464 "not vectorized: unsupported use in stmt.\n");
465 return false;
468 if (!def_stmt || gimple_nop_p (def_stmt))
469 return true;
471 def_bb = gimple_bb (def_stmt);
472 if (!flow_bb_inside_loop_p (loop, def_bb))
474 if (dump_enabled_p ())
475 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
476 return true;
479 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
480 DEF_STMT must have already been processed, because this should be the
481 only way that STMT, which is a reduction-phi, was put in the worklist,
482 as there should be no other uses for DEF_STMT in the loop. So we just
483 check that everything is as expected, and we are done. */
484 dstmt_vinfo = vinfo_for_stmt (def_stmt);
485 bb = gimple_bb (stmt);
486 if (gimple_code (stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (def_stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE, vect_location,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
496 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
500 return true;
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
505 d = def_stmt
506 inner-loop:
507 stmt # use (d)
508 outer-loop-tail-bb:
509 ... */
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
516 switch (relevant)
518 case vect_unused_in_scope:
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
521 break;
523 case vect_used_in_outer_by_reduction:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_by_reduction;
526 break;
528 case vect_used_in_outer:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
530 relevant = vect_used_in_scope;
531 break;
533 case vect_used_in_scope:
534 break;
536 default:
537 gcc_unreachable ();
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
544 inner-loop:
545 d = def_stmt
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
547 stmt # use (d) */
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
554 switch (relevant)
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
560 break;
562 case vect_used_by_reduction:
563 case vect_used_only_live:
564 relevant = vect_used_in_outer_by_reduction;
565 break;
567 case vect_used_in_scope:
568 relevant = vect_used_in_outer;
569 break;
571 default:
572 gcc_unreachable ();
576 vect_mark_relevant (worklist, def_stmt, relevant, false);
577 return true;
581 /* Function vect_mark_stmts_to_be_vectorized.
583 Not all stmts in the loop need to be vectorized. For example:
585 for i...
586 for j...
587 1. T0 = i + j
588 2. T1 = a[T0]
590 3. j = j + 1
592 Stmt 1 and 3 do not need to be vectorized, because loop control and
593 addressing of vectorized data-refs are handled differently.
595 This pass detects such stmts. */
597 bool
598 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
601 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
602 unsigned int nbbs = loop->num_nodes;
603 gimple_stmt_iterator si;
604 gimple *stmt;
605 unsigned int i;
606 stmt_vec_info stmt_vinfo;
607 basic_block bb;
608 gimple *phi;
609 bool live_p;
610 enum vect_relevant relevant;
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "=== vect_mark_stmts_to_be_vectorized ===\n");
616 auto_vec<gimple *, 64> worklist;
618 /* 1. Init worklist. */
619 for (i = 0; i < nbbs; i++)
621 bb = bbs[i];
622 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
624 phi = gsi_stmt (si);
625 if (dump_enabled_p ())
627 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
631 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
632 vect_mark_relevant (&worklist, phi, relevant, live_p);
634 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
636 stmt = gsi_stmt (si);
637 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
640 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
643 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
644 vect_mark_relevant (&worklist, stmt, relevant, live_p);
648 /* 2. Process_worklist */
649 while (worklist.length () > 0)
651 use_operand_p use_p;
652 ssa_op_iter iter;
654 stmt = worklist.pop ();
655 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
661 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
662 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 of STMT. */
664 stmt_vinfo = vinfo_for_stmt (stmt);
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
687 if (dump_enabled_p ())
688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
689 "unsupported use of reduction.\n");
690 return false;
692 break;
694 case vect_nested_cycle:
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_in_outer_by_reduction
697 && relevant != vect_used_in_outer)
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
701 "unsupported use of nested cycle.\n");
703 return false;
705 break;
707 case vect_double_reduction_def:
708 if (relevant != vect_unused_in_scope
709 && relevant != vect_used_by_reduction
710 && relevant != vect_used_only_live)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714 "unsupported use of double reduction.\n");
716 return false;
718 break;
720 default:
721 break;
724 if (is_pattern_stmt_p (stmt_vinfo))
726 /* Pattern statements are not inserted into the code, so
727 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728 have to scan the RHS or function arguments instead. */
729 if (is_gimple_assign (stmt))
731 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732 tree op = gimple_assign_rhs1 (stmt);
734 i = 1;
735 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
737 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
738 relevant, &worklist, false)
739 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
740 relevant, &worklist, false))
741 return false;
742 i = 2;
744 for (; i < gimple_num_ops (stmt); i++)
746 op = gimple_op (stmt, i);
747 if (TREE_CODE (op) == SSA_NAME
748 && !process_use (stmt, op, loop_vinfo, relevant,
749 &worklist, false))
750 return false;
753 else if (is_gimple_call (stmt))
755 for (i = 0; i < gimple_call_num_args (stmt); i++)
757 tree arg = gimple_call_arg (stmt, i);
758 if (!process_use (stmt, arg, loop_vinfo, relevant,
759 &worklist, false))
760 return false;
764 else
765 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
767 tree op = USE_FROM_PTR (use_p);
768 if (!process_use (stmt, op, loop_vinfo, relevant,
769 &worklist, false))
770 return false;
773 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
775 tree off;
776 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
777 gcc_assert (decl);
778 if (!process_use (stmt, off, loop_vinfo, relevant, &worklist, true))
779 return false;
781 } /* while worklist */
783 return true;
787 /* Function vect_model_simple_cost.
789 Models cost for simple operations, i.e. those that only emit ncopies of a
790 single op. Right now, this does not account for multiple insns that could
791 be generated for the single vector op. We will handle that shortly. */
793 void
794 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
795 enum vect_def_type *dt,
796 stmt_vector_for_cost *prologue_cost_vec,
797 stmt_vector_for_cost *body_cost_vec)
799 int i;
800 int inside_cost = 0, prologue_cost = 0;
802 /* The SLP costs were already calculated during SLP tree build. */
803 if (PURE_SLP_STMT (stmt_info))
804 return;
806 /* FORNOW: Assuming maximum 2 args per stmts. */
807 for (i = 0; i < 2; i++)
808 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
809 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
810 stmt_info, 0, vect_prologue);
812 /* Pass the inside-of-loop statements to the target-specific cost model. */
813 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
814 stmt_info, 0, vect_body);
816 if (dump_enabled_p ())
817 dump_printf_loc (MSG_NOTE, vect_location,
818 "vect_model_simple_cost: inside_cost = %d, "
819 "prologue_cost = %d .\n", inside_cost, prologue_cost);
823 /* Model cost for type demotion and promotion operations. PWR is normally
824 zero for single-step promotions and demotions. It will be one if
825 two-step promotion/demotion is required, and so on. Each additional
826 step doubles the number of instructions required. */
828 static void
829 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
830 enum vect_def_type *dt, int pwr)
832 int i, tmp;
833 int inside_cost = 0, prologue_cost = 0;
834 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
835 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
836 void *target_cost_data;
838 /* The SLP costs were already calculated during SLP tree build. */
839 if (PURE_SLP_STMT (stmt_info))
840 return;
842 if (loop_vinfo)
843 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
844 else
845 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
847 for (i = 0; i < pwr + 1; i++)
849 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
850 (i + 1) : i;
851 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
852 vec_promote_demote, stmt_info, 0,
853 vect_body);
856 /* FORNOW: Assuming maximum 2 args per stmts. */
857 for (i = 0; i < 2; i++)
858 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
859 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
860 stmt_info, 0, vect_prologue);
862 if (dump_enabled_p ())
863 dump_printf_loc (MSG_NOTE, vect_location,
864 "vect_model_promotion_demotion_cost: inside_cost = %d, "
865 "prologue_cost = %d .\n", inside_cost, prologue_cost);
868 /* Function vect_cost_group_size
870 For grouped load or store, return the group_size only if it is the first
871 load or store of a group, else return 1. This ensures that group size is
872 only returned once per group. */
874 static int
875 vect_cost_group_size (stmt_vec_info stmt_info)
877 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
879 if (first_stmt == STMT_VINFO_STMT (stmt_info))
880 return GROUP_SIZE (stmt_info);
882 return 1;
886 /* Function vect_model_store_cost
888 Models cost for stores. In the case of grouped accesses, one access
889 has the overhead of the grouped access attributed to it. */
891 void
892 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
893 bool store_lanes_p, enum vect_def_type dt,
894 slp_tree slp_node,
895 stmt_vector_for_cost *prologue_cost_vec,
896 stmt_vector_for_cost *body_cost_vec)
898 int group_size;
899 unsigned int inside_cost = 0, prologue_cost = 0;
900 struct data_reference *first_dr;
901 gimple *first_stmt;
903 if (dt == vect_constant_def || dt == vect_external_def)
904 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
905 stmt_info, 0, vect_prologue);
907 /* Grouped access? */
908 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
910 if (slp_node)
912 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
913 group_size = 1;
915 else
917 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
918 group_size = vect_cost_group_size (stmt_info);
921 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
923 /* Not a grouped access. */
924 else
926 group_size = 1;
927 first_dr = STMT_VINFO_DATA_REF (stmt_info);
930 /* We assume that the cost of a single store-lanes instruction is
931 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
932 access is instead being provided by a permute-and-store operation,
933 include the cost of the permutes. */
934 if (!store_lanes_p && group_size > 1
935 && !STMT_VINFO_STRIDED_P (stmt_info))
937 /* Uses a high and low interleave or shuffle operations for each
938 needed permute. */
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE, vect_location,
945 "vect_model_store_cost: strided group_size = %d .\n",
946 group_size);
949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
950 /* Costs of the stores. */
951 if (STMT_VINFO_STRIDED_P (stmt_info)
952 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
954 /* N scalar stores plus extracting the elements. */
955 inside_cost += record_stmt_cost (body_cost_vec,
956 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
957 scalar_store, stmt_info, 0, vect_body);
959 else
960 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
962 if (STMT_VINFO_STRIDED_P (stmt_info))
963 inside_cost += record_stmt_cost (body_cost_vec,
964 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
965 vec_to_scalar, stmt_info, 0, vect_body);
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_NOTE, vect_location,
969 "vect_model_store_cost: inside_cost = %d, "
970 "prologue_cost = %d .\n", inside_cost, prologue_cost);
974 /* Calculate cost of DR's memory access. */
975 void
976 vect_get_store_cost (struct data_reference *dr, int ncopies,
977 unsigned int *inside_cost,
978 stmt_vector_for_cost *body_cost_vec)
980 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
981 gimple *stmt = DR_STMT (dr);
982 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
984 switch (alignment_support_scheme)
986 case dr_aligned:
988 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
989 vector_store, stmt_info, 0,
990 vect_body);
992 if (dump_enabled_p ())
993 dump_printf_loc (MSG_NOTE, vect_location,
994 "vect_model_store_cost: aligned.\n");
995 break;
998 case dr_unaligned_supported:
1000 /* Here, we assign an additional cost for the unaligned store. */
1001 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1002 unaligned_store, stmt_info,
1003 DR_MISALIGNMENT (dr), vect_body);
1004 if (dump_enabled_p ())
1005 dump_printf_loc (MSG_NOTE, vect_location,
1006 "vect_model_store_cost: unaligned supported by "
1007 "hardware.\n");
1008 break;
1011 case dr_unaligned_unsupported:
1013 *inside_cost = VECT_MAX_COST;
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1017 "vect_model_store_cost: unsupported access.\n");
1018 break;
1021 default:
1022 gcc_unreachable ();
1027 /* Function vect_model_load_cost
1029 Models cost for loads. In the case of grouped accesses, the last access
1030 has the overhead of the grouped access attributed to it. Since unaligned
1031 accesses are supported for loads, we also account for the costs of the
1032 access scheme chosen. */
1034 void
1035 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1036 bool load_lanes_p, slp_tree slp_node,
1037 stmt_vector_for_cost *prologue_cost_vec,
1038 stmt_vector_for_cost *body_cost_vec)
1040 int group_size;
1041 gimple *first_stmt;
1042 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1043 unsigned int inside_cost = 0, prologue_cost = 0;
1045 /* Grouped accesses? */
1046 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1047 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1049 group_size = vect_cost_group_size (stmt_info);
1050 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1052 /* Not a grouped access. */
1053 else
1055 group_size = 1;
1056 first_dr = dr;
1059 /* We assume that the cost of a single load-lanes instruction is
1060 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1061 access is instead being provided by a load-and-permute operation,
1062 include the cost of the permutes. */
1063 if (!load_lanes_p && group_size > 1
1064 && !STMT_VINFO_STRIDED_P (stmt_info))
1066 /* Uses an even and odd extract operations or shuffle operations
1067 for each needed permute. */
1068 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1069 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1070 stmt_info, 0, vect_body);
1072 if (dump_enabled_p ())
1073 dump_printf_loc (MSG_NOTE, vect_location,
1074 "vect_model_load_cost: strided group_size = %d .\n",
1075 group_size);
1078 /* The loads themselves. */
1079 if (STMT_VINFO_STRIDED_P (stmt_info)
1080 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1082 /* N scalar loads plus gathering them into a vector. */
1083 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1084 inside_cost += record_stmt_cost (body_cost_vec,
1085 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1086 scalar_load, stmt_info, 0, vect_body);
1088 else
1089 vect_get_load_cost (first_dr, ncopies,
1090 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1091 || group_size > 1 || slp_node),
1092 &inside_cost, &prologue_cost,
1093 prologue_cost_vec, body_cost_vec, true);
1094 if (STMT_VINFO_STRIDED_P (stmt_info))
1095 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1096 stmt_info, 0, vect_body);
1098 if (dump_enabled_p ())
1099 dump_printf_loc (MSG_NOTE, vect_location,
1100 "vect_model_load_cost: inside_cost = %d, "
1101 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1105 /* Calculate cost of DR's memory access. */
1106 void
1107 vect_get_load_cost (struct data_reference *dr, int ncopies,
1108 bool add_realign_cost, unsigned int *inside_cost,
1109 unsigned int *prologue_cost,
1110 stmt_vector_for_cost *prologue_cost_vec,
1111 stmt_vector_for_cost *body_cost_vec,
1112 bool record_prologue_costs)
1114 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1115 gimple *stmt = DR_STMT (dr);
1116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1118 switch (alignment_support_scheme)
1120 case dr_aligned:
1122 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1123 stmt_info, 0, vect_body);
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_NOTE, vect_location,
1127 "vect_model_load_cost: aligned.\n");
1129 break;
1131 case dr_unaligned_supported:
1133 /* Here, we assign an additional cost for the unaligned load. */
1134 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1135 unaligned_load, stmt_info,
1136 DR_MISALIGNMENT (dr), vect_body);
1138 if (dump_enabled_p ())
1139 dump_printf_loc (MSG_NOTE, vect_location,
1140 "vect_model_load_cost: unaligned supported by "
1141 "hardware.\n");
1143 break;
1145 case dr_explicit_realign:
1147 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1148 vector_load, stmt_info, 0, vect_body);
1149 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1150 vec_perm, stmt_info, 0, vect_body);
1152 /* FIXME: If the misalignment remains fixed across the iterations of
1153 the containing loop, the following cost should be added to the
1154 prologue costs. */
1155 if (targetm.vectorize.builtin_mask_for_load)
1156 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1157 stmt_info, 0, vect_body);
1159 if (dump_enabled_p ())
1160 dump_printf_loc (MSG_NOTE, vect_location,
1161 "vect_model_load_cost: explicit realign\n");
1163 break;
1165 case dr_explicit_realign_optimized:
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: unaligned software "
1170 "pipelined.\n");
1172 /* Unaligned software pipeline has a load of an address, an initial
1173 load, and possibly a mask operation to "prime" the loop. However,
1174 if this is an access in a group of loads, which provide grouped
1175 access, then the above cost should only be considered for one
1176 access in the group. Inside the loop, there is a load op
1177 and a realignment op. */
1179 if (add_realign_cost && record_prologue_costs)
1181 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1182 vector_stmt, stmt_info,
1183 0, vect_prologue);
1184 if (targetm.vectorize.builtin_mask_for_load)
1185 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1186 vector_stmt, stmt_info,
1187 0, vect_prologue);
1190 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1191 stmt_info, 0, vect_body);
1192 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1193 stmt_info, 0, vect_body);
1195 if (dump_enabled_p ())
1196 dump_printf_loc (MSG_NOTE, vect_location,
1197 "vect_model_load_cost: explicit realign optimized"
1198 "\n");
1200 break;
1203 case dr_unaligned_unsupported:
1205 *inside_cost = VECT_MAX_COST;
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1209 "vect_model_load_cost: unsupported access.\n");
1210 break;
1213 default:
1214 gcc_unreachable ();
1218 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1219 the loop preheader for the vectorized stmt STMT. */
1221 static void
1222 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1224 if (gsi)
1225 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1226 else
1228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1229 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1231 if (loop_vinfo)
1233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1234 basic_block new_bb;
1235 edge pe;
1237 if (nested_in_vect_loop_p (loop, stmt))
1238 loop = loop->inner;
1240 pe = loop_preheader_edge (loop);
1241 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1242 gcc_assert (!new_bb);
1244 else
1246 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1247 basic_block bb;
1248 gimple_stmt_iterator gsi_bb_start;
1250 gcc_assert (bb_vinfo);
1251 bb = BB_VINFO_BB (bb_vinfo);
1252 gsi_bb_start = gsi_after_labels (bb);
1253 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1257 if (dump_enabled_p ())
1259 dump_printf_loc (MSG_NOTE, vect_location,
1260 "created new init_stmt: ");
1261 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1265 /* Function vect_init_vector.
1267 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1268 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1269 vector type a vector with all elements equal to VAL is created first.
1270 Place the initialization at BSI if it is not NULL. Otherwise, place the
1271 initialization at the loop preheader.
1272 Return the DEF of INIT_STMT.
1273 It will be used in the vectorization of STMT. */
1275 tree
1276 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1278 gimple *init_stmt;
1279 tree new_temp;
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1284 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1296 else
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (stmt, init_stmt, gsi);
1302 val = new_temp;
1305 else if (CONSTANT_CLASS_P (val))
1306 val = fold_convert (TREE_TYPE (type), val);
1307 else
1309 new_temp = make_ssa_name (TREE_TYPE (type));
1310 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1311 init_stmt = gimple_build_assign (new_temp,
1312 fold_build1 (VIEW_CONVERT_EXPR,
1313 TREE_TYPE (type),
1314 val));
1315 else
1316 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1317 vect_init_vector_1 (stmt, init_stmt, gsi);
1318 val = new_temp;
1321 val = build_vector_from_val (type, val);
1324 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1325 init_stmt = gimple_build_assign (new_temp, val);
1326 vect_init_vector_1 (stmt, init_stmt, gsi);
1327 return new_temp;
1330 /* Function vect_get_vec_def_for_operand_1.
1332 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1333 DT that will be used in the vectorized stmt. */
1335 tree
1336 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1338 tree vec_oprnd;
1339 gimple *vec_stmt;
1340 stmt_vec_info def_stmt_info = NULL;
1342 switch (dt)
1344 /* operand is a constant or a loop invariant. */
1345 case vect_constant_def:
1346 case vect_external_def:
1347 /* Code should use vect_get_vec_def_for_operand. */
1348 gcc_unreachable ();
1350 /* operand is defined inside the loop. */
1351 case vect_internal_def:
1353 /* Get the def from the vectorized stmt. */
1354 def_stmt_info = vinfo_for_stmt (def_stmt);
1356 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1357 /* Get vectorized pattern statement. */
1358 if (!vec_stmt
1359 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1360 && !STMT_VINFO_RELEVANT (def_stmt_info))
1361 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1362 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1363 gcc_assert (vec_stmt);
1364 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1365 vec_oprnd = PHI_RESULT (vec_stmt);
1366 else if (is_gimple_call (vec_stmt))
1367 vec_oprnd = gimple_call_lhs (vec_stmt);
1368 else
1369 vec_oprnd = gimple_assign_lhs (vec_stmt);
1370 return vec_oprnd;
1373 /* operand is defined by a loop header phi - reduction */
1374 case vect_reduction_def:
1375 case vect_double_reduction_def:
1376 case vect_nested_cycle:
1377 /* Code should use get_initial_def_for_reduction. */
1378 gcc_unreachable ();
1380 /* operand is defined by loop-header phi - induction. */
1381 case vect_induction_def:
1383 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1385 /* Get the def from the vectorized stmt. */
1386 def_stmt_info = vinfo_for_stmt (def_stmt);
1387 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1388 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1389 vec_oprnd = PHI_RESULT (vec_stmt);
1390 else
1391 vec_oprnd = gimple_get_lhs (vec_stmt);
1392 return vec_oprnd;
1395 default:
1396 gcc_unreachable ();
1401 /* Function vect_get_vec_def_for_operand.
1403 OP is an operand in STMT. This function returns a (vector) def that will be
1404 used in the vectorized stmt for STMT.
1406 In the case that OP is an SSA_NAME which is defined in the loop, then
1407 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1409 In case OP is an invariant or constant, a new stmt that creates a vector def
1410 needs to be introduced. VECTYPE may be used to specify a required type for
1411 vector invariant. */
1413 tree
1414 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1416 gimple *def_stmt;
1417 enum vect_def_type dt;
1418 bool is_simple_use;
1419 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1420 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1422 if (dump_enabled_p ())
1424 dump_printf_loc (MSG_NOTE, vect_location,
1425 "vect_get_vec_def_for_operand: ");
1426 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1427 dump_printf (MSG_NOTE, "\n");
1430 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1431 gcc_assert (is_simple_use);
1432 if (def_stmt && dump_enabled_p ())
1434 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1435 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438 if (dt == vect_constant_def || dt == vect_external_def)
1440 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1441 tree vector_type;
1443 if (vectype)
1444 vector_type = vectype;
1445 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1446 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1447 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1448 else
1449 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1451 gcc_assert (vector_type);
1452 return vect_init_vector (stmt, op, vector_type, NULL);
1454 else
1455 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1459 /* Function vect_get_vec_def_for_stmt_copy
1461 Return a vector-def for an operand. This function is used when the
1462 vectorized stmt to be created (by the caller to this function) is a "copy"
1463 created in case the vectorized result cannot fit in one vector, and several
1464 copies of the vector-stmt are required. In this case the vector-def is
1465 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1466 of the stmt that defines VEC_OPRND.
1467 DT is the type of the vector def VEC_OPRND.
1469 Context:
1470 In case the vectorization factor (VF) is bigger than the number
1471 of elements that can fit in a vectype (nunits), we have to generate
1472 more than one vector stmt to vectorize the scalar stmt. This situation
1473 arises when there are multiple data-types operated upon in the loop; the
1474 smallest data-type determines the VF, and as a result, when vectorizing
1475 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1476 vector stmt (each computing a vector of 'nunits' results, and together
1477 computing 'VF' results in each iteration). This function is called when
1478 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1479 which VF=16 and nunits=4, so the number of copies required is 4):
1481 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1483 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1484 VS1.1: vx.1 = memref1 VS1.2
1485 VS1.2: vx.2 = memref2 VS1.3
1486 VS1.3: vx.3 = memref3
1488 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1489 VSnew.1: vz1 = vx.1 + ... VSnew.2
1490 VSnew.2: vz2 = vx.2 + ... VSnew.3
1491 VSnew.3: vz3 = vx.3 + ...
1493 The vectorization of S1 is explained in vectorizable_load.
1494 The vectorization of S2:
1495 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1496 the function 'vect_get_vec_def_for_operand' is called to
1497 get the relevant vector-def for each operand of S2. For operand x it
1498 returns the vector-def 'vx.0'.
1500 To create the remaining copies of the vector-stmt (VSnew.j), this
1501 function is called to get the relevant vector-def for each operand. It is
1502 obtained from the respective VS1.j stmt, which is recorded in the
1503 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1505 For example, to obtain the vector-def 'vx.1' in order to create the
1506 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1507 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1508 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1509 and return its def ('vx.1').
1510 Overall, to create the above sequence this function will be called 3 times:
1511 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1512 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1513 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1515 tree
1516 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1518 gimple *vec_stmt_for_operand;
1519 stmt_vec_info def_stmt_info;
1521 /* Do nothing; can reuse same def. */
1522 if (dt == vect_external_def || dt == vect_constant_def )
1523 return vec_oprnd;
1525 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1526 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1527 gcc_assert (def_stmt_info);
1528 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1529 gcc_assert (vec_stmt_for_operand);
1530 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1531 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1532 else
1533 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1534 return vec_oprnd;
1538 /* Get vectorized definitions for the operands to create a copy of an original
1539 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1541 static void
1542 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1543 vec<tree> *vec_oprnds0,
1544 vec<tree> *vec_oprnds1)
1546 tree vec_oprnd = vec_oprnds0->pop ();
1548 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1549 vec_oprnds0->quick_push (vec_oprnd);
1551 if (vec_oprnds1 && vec_oprnds1->length ())
1553 vec_oprnd = vec_oprnds1->pop ();
1554 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1555 vec_oprnds1->quick_push (vec_oprnd);
1560 /* Get vectorized definitions for OP0 and OP1.
1561 REDUC_INDEX is the index of reduction operand in case of reduction,
1562 and -1 otherwise. */
1564 void
1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566 vec<tree> *vec_oprnds0,
1567 vec<tree> *vec_oprnds1,
1568 slp_tree slp_node, int reduc_index)
1570 if (slp_node)
1572 int nops = (op1 == NULL_TREE) ? 1 : 2;
1573 auto_vec<tree> ops (nops);
1574 auto_vec<vec<tree> > vec_defs (nops);
1576 ops.quick_push (op0);
1577 if (op1)
1578 ops.quick_push (op1);
1580 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1582 *vec_oprnds0 = vec_defs[0];
1583 if (op1)
1584 *vec_oprnds1 = vec_defs[1];
1586 else
1588 tree vec_oprnd;
1590 vec_oprnds0->create (1);
1591 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592 vec_oprnds0->quick_push (vec_oprnd);
1594 if (op1)
1596 vec_oprnds1->create (1);
1597 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598 vec_oprnds1->quick_push (vec_oprnd);
1604 /* Function vect_finish_stmt_generation.
1606 Insert a new stmt. */
1608 void
1609 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1610 gimple_stmt_iterator *gsi)
1612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1613 vec_info *vinfo = stmt_info->vinfo;
1615 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1617 if (!gsi_end_p (*gsi)
1618 && gimple_has_mem_ops (vec_stmt))
1620 gimple *at_stmt = gsi_stmt (*gsi);
1621 tree vuse = gimple_vuse (at_stmt);
1622 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1624 tree vdef = gimple_vdef (at_stmt);
1625 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1626 /* If we have an SSA vuse and insert a store, update virtual
1627 SSA form to avoid triggering the renamer. Do so only
1628 if we can easily see all uses - which is what almost always
1629 happens with the way vectorized stmts are inserted. */
1630 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1631 && ((is_gimple_assign (vec_stmt)
1632 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1633 || (is_gimple_call (vec_stmt)
1634 && !(gimple_call_flags (vec_stmt)
1635 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1637 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1638 gimple_set_vdef (vec_stmt, new_vdef);
1639 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1643 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1645 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1647 if (dump_enabled_p ())
1649 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1653 gimple_set_location (vec_stmt, gimple_location (stmt));
1655 /* While EH edges will generally prevent vectorization, stmt might
1656 e.g. be in a must-not-throw region. Ensure newly created stmts
1657 that could throw are part of the same region. */
1658 int lp_nr = lookup_stmt_eh_lp (stmt);
1659 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1660 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1663 /* We want to vectorize a call to combined function CFN with function
1664 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1665 as the types of all inputs. Check whether this is possible using
1666 an internal function, returning its code if so or IFN_LAST if not. */
1668 static internal_fn
1669 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1670 tree vectype_out, tree vectype_in)
1672 internal_fn ifn;
1673 if (internal_fn_p (cfn))
1674 ifn = as_internal_fn (cfn);
1675 else
1676 ifn = associated_internal_fn (fndecl);
1677 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1679 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1680 if (info.vectorizable)
1682 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1683 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1684 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1685 OPTIMIZE_FOR_SPEED))
1686 return ifn;
1689 return IFN_LAST;
1693 static tree permute_vec_elements (tree, tree, tree, gimple *,
1694 gimple_stmt_iterator *);
1697 /* Function vectorizable_mask_load_store.
1699 Check if STMT performs a conditional load or store that can be vectorized.
1700 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1702 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1704 static bool
1705 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1706 gimple **vec_stmt, slp_tree slp_node)
1708 tree vec_dest = NULL;
1709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1710 stmt_vec_info prev_stmt_info;
1711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1712 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1713 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1714 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1716 tree rhs_vectype = NULL_TREE;
1717 tree mask_vectype;
1718 tree elem_type;
1719 gimple *new_stmt;
1720 tree dummy;
1721 tree dataref_ptr = NULL_TREE;
1722 gimple *ptr_incr;
1723 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1724 int ncopies;
1725 int i, j;
1726 bool inv_p;
1727 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1728 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1729 int gather_scale = 1;
1730 enum vect_def_type gather_dt = vect_unknown_def_type;
1731 bool is_store;
1732 tree mask;
1733 gimple *def_stmt;
1734 enum vect_def_type dt;
1736 if (slp_node != NULL)
1737 return false;
1739 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1740 gcc_assert (ncopies >= 1);
1742 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1743 mask = gimple_call_arg (stmt, 2);
1745 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
1746 return false;
1748 /* FORNOW. This restriction should be relaxed. */
1749 if (nested_in_vect_loop && ncopies > 1)
1751 if (dump_enabled_p ())
1752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1753 "multiple types in nested loop.");
1754 return false;
1757 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1758 return false;
1760 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
1761 && ! vec_stmt)
1762 return false;
1764 if (!STMT_VINFO_DATA_REF (stmt_info))
1765 return false;
1767 elem_type = TREE_TYPE (vectype);
1769 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1770 return false;
1772 if (STMT_VINFO_STRIDED_P (stmt_info))
1773 return false;
1775 if (TREE_CODE (mask) != SSA_NAME)
1776 return false;
1778 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
1779 return false;
1781 if (!mask_vectype)
1782 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
1784 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
1785 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
1786 return false;
1788 if (is_store)
1790 tree rhs = gimple_call_arg (stmt, 3);
1791 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
1792 return false;
1795 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1797 gimple *def_stmt;
1798 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1799 &gather_off, &gather_scale);
1800 gcc_assert (gather_decl);
1801 if (!vect_is_simple_use (gather_off, loop_vinfo, &def_stmt, &gather_dt,
1802 &gather_off_vectype))
1804 if (dump_enabled_p ())
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1806 "gather index use not simple.");
1807 return false;
1810 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1811 tree masktype
1812 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1813 if (TREE_CODE (masktype) == INTEGER_TYPE)
1815 if (dump_enabled_p ())
1816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1817 "masked gather with integer mask not supported.");
1818 return false;
1821 else if (tree_int_cst_compare (nested_in_vect_loop
1822 ? STMT_VINFO_DR_STEP (stmt_info)
1823 : DR_STEP (dr), size_zero_node) <= 0)
1824 return false;
1825 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1826 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
1827 TYPE_MODE (mask_vectype),
1828 !is_store)
1829 || (rhs_vectype
1830 && !useless_type_conversion_p (vectype, rhs_vectype)))
1831 return false;
1833 if (!vec_stmt) /* transformation not required. */
1835 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1836 if (is_store)
1837 vect_model_store_cost (stmt_info, ncopies, false, dt,
1838 NULL, NULL, NULL);
1839 else
1840 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1841 return true;
1844 /** Transform. **/
1846 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1848 tree vec_oprnd0 = NULL_TREE, op;
1849 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1850 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1851 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1852 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1853 tree mask_perm_mask = NULL_TREE;
1854 edge pe = loop_preheader_edge (loop);
1855 gimple_seq seq;
1856 basic_block new_bb;
1857 enum { NARROW, NONE, WIDEN } modifier;
1858 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1860 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1861 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1862 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1863 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1864 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1865 scaletype = TREE_VALUE (arglist);
1866 gcc_checking_assert (types_compatible_p (srctype, rettype)
1867 && types_compatible_p (srctype, masktype));
1869 if (nunits == gather_off_nunits)
1870 modifier = NONE;
1871 else if (nunits == gather_off_nunits / 2)
1873 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1874 modifier = WIDEN;
1876 for (i = 0; i < gather_off_nunits; ++i)
1877 sel[i] = i | nunits;
1879 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1881 else if (nunits == gather_off_nunits * 2)
1883 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1884 modifier = NARROW;
1886 for (i = 0; i < nunits; ++i)
1887 sel[i] = i < gather_off_nunits
1888 ? i : i + nunits - gather_off_nunits;
1890 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1891 ncopies *= 2;
1892 for (i = 0; i < nunits; ++i)
1893 sel[i] = i | gather_off_nunits;
1894 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1896 else
1897 gcc_unreachable ();
1899 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1901 ptr = fold_convert (ptrtype, gather_base);
1902 if (!is_gimple_min_invariant (ptr))
1904 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1905 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1906 gcc_assert (!new_bb);
1909 scale = build_int_cst (scaletype, gather_scale);
1911 prev_stmt_info = NULL;
1912 for (j = 0; j < ncopies; ++j)
1914 if (modifier == WIDEN && (j & 1))
1915 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1916 perm_mask, stmt, gsi);
1917 else if (j == 0)
1918 op = vec_oprnd0
1919 = vect_get_vec_def_for_operand (gather_off, stmt);
1920 else
1921 op = vec_oprnd0
1922 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1924 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1926 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1927 == TYPE_VECTOR_SUBPARTS (idxtype));
1928 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
1929 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1930 new_stmt
1931 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1932 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1933 op = var;
1936 if (mask_perm_mask && (j & 1))
1937 mask_op = permute_vec_elements (mask_op, mask_op,
1938 mask_perm_mask, stmt, gsi);
1939 else
1941 if (j == 0)
1942 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
1943 else
1945 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
1946 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1949 mask_op = vec_mask;
1950 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1952 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1953 == TYPE_VECTOR_SUBPARTS (masktype));
1954 var = vect_get_new_ssa_name (masktype, vect_simple_var);
1955 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1956 new_stmt
1957 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1958 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1959 mask_op = var;
1963 new_stmt
1964 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1965 scale);
1967 if (!useless_type_conversion_p (vectype, rettype))
1969 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
1970 == TYPE_VECTOR_SUBPARTS (rettype));
1971 op = vect_get_new_ssa_name (rettype, vect_simple_var);
1972 gimple_call_set_lhs (new_stmt, op);
1973 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1974 var = make_ssa_name (vec_dest);
1975 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
1976 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1978 else
1980 var = make_ssa_name (vec_dest, new_stmt);
1981 gimple_call_set_lhs (new_stmt, var);
1984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986 if (modifier == NARROW)
1988 if ((j & 1) == 0)
1990 prev_res = var;
1991 continue;
1993 var = permute_vec_elements (prev_res, var,
1994 perm_mask, stmt, gsi);
1995 new_stmt = SSA_NAME_DEF_STMT (var);
1998 if (prev_stmt_info == NULL)
1999 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2000 else
2001 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2002 prev_stmt_info = vinfo_for_stmt (new_stmt);
2005 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2006 from the IL. */
2007 if (STMT_VINFO_RELATED_STMT (stmt_info))
2009 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2010 stmt_info = vinfo_for_stmt (stmt);
2012 tree lhs = gimple_call_lhs (stmt);
2013 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2014 set_vinfo_for_stmt (new_stmt, stmt_info);
2015 set_vinfo_for_stmt (stmt, NULL);
2016 STMT_VINFO_STMT (stmt_info) = new_stmt;
2017 gsi_replace (gsi, new_stmt, true);
2018 return true;
2020 else if (is_store)
2022 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2023 prev_stmt_info = NULL;
2024 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2025 for (i = 0; i < ncopies; i++)
2027 unsigned align, misalign;
2029 if (i == 0)
2031 tree rhs = gimple_call_arg (stmt, 3);
2032 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2033 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2034 /* We should have catched mismatched types earlier. */
2035 gcc_assert (useless_type_conversion_p (vectype,
2036 TREE_TYPE (vec_rhs)));
2037 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2038 NULL_TREE, &dummy, gsi,
2039 &ptr_incr, false, &inv_p);
2040 gcc_assert (!inv_p);
2042 else
2044 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2045 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2046 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2047 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2048 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2049 TYPE_SIZE_UNIT (vectype));
2052 align = TYPE_ALIGN_UNIT (vectype);
2053 if (aligned_access_p (dr))
2054 misalign = 0;
2055 else if (DR_MISALIGNMENT (dr) == -1)
2057 align = TYPE_ALIGN_UNIT (elem_type);
2058 misalign = 0;
2060 else
2061 misalign = DR_MISALIGNMENT (dr);
2062 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2063 misalign);
2064 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2065 misalign ? misalign & -misalign : align);
2066 new_stmt
2067 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2068 ptr, vec_mask, vec_rhs);
2069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2070 if (i == 0)
2071 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2072 else
2073 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2074 prev_stmt_info = vinfo_for_stmt (new_stmt);
2077 else
2079 tree vec_mask = NULL_TREE;
2080 prev_stmt_info = NULL;
2081 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2082 for (i = 0; i < ncopies; i++)
2084 unsigned align, misalign;
2086 if (i == 0)
2088 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2089 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2090 NULL_TREE, &dummy, gsi,
2091 &ptr_incr, false, &inv_p);
2092 gcc_assert (!inv_p);
2094 else
2096 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2097 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2098 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2099 TYPE_SIZE_UNIT (vectype));
2102 align = TYPE_ALIGN_UNIT (vectype);
2103 if (aligned_access_p (dr))
2104 misalign = 0;
2105 else if (DR_MISALIGNMENT (dr) == -1)
2107 align = TYPE_ALIGN_UNIT (elem_type);
2108 misalign = 0;
2110 else
2111 misalign = DR_MISALIGNMENT (dr);
2112 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2113 misalign);
2114 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2115 misalign ? misalign & -misalign : align);
2116 new_stmt
2117 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2118 ptr, vec_mask);
2119 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2120 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2121 if (i == 0)
2122 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2123 else
2124 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2125 prev_stmt_info = vinfo_for_stmt (new_stmt);
2129 if (!is_store)
2131 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2132 from the IL. */
2133 if (STMT_VINFO_RELATED_STMT (stmt_info))
2135 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2136 stmt_info = vinfo_for_stmt (stmt);
2138 tree lhs = gimple_call_lhs (stmt);
2139 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2140 set_vinfo_for_stmt (new_stmt, stmt_info);
2141 set_vinfo_for_stmt (stmt, NULL);
2142 STMT_VINFO_STMT (stmt_info) = new_stmt;
2143 gsi_replace (gsi, new_stmt, true);
2146 return true;
2149 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2150 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2151 in a single step. On success, store the binary pack code in
2152 *CONVERT_CODE. */
2154 static bool
2155 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2156 tree_code *convert_code)
2158 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2159 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2160 return false;
2162 tree_code code;
2163 int multi_step_cvt = 0;
2164 auto_vec <tree, 8> interm_types;
2165 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2166 &code, &multi_step_cvt,
2167 &interm_types)
2168 || multi_step_cvt)
2169 return false;
2171 *convert_code = code;
2172 return true;
2175 /* Function vectorizable_call.
2177 Check if GS performs a function call that can be vectorized.
2178 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2179 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2180 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2182 static bool
2183 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2184 slp_tree slp_node)
2186 gcall *stmt;
2187 tree vec_dest;
2188 tree scalar_dest;
2189 tree op, type;
2190 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2191 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2192 tree vectype_out, vectype_in;
2193 int nunits_in;
2194 int nunits_out;
2195 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2196 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2197 vec_info *vinfo = stmt_info->vinfo;
2198 tree fndecl, new_temp, rhs_type;
2199 gimple *def_stmt;
2200 enum vect_def_type dt[3]
2201 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2202 gimple *new_stmt = NULL;
2203 int ncopies, j;
2204 vec<tree> vargs = vNULL;
2205 enum { NARROW, NONE, WIDEN } modifier;
2206 size_t i, nargs;
2207 tree lhs;
2209 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2210 return false;
2212 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2213 && ! vec_stmt)
2214 return false;
2216 /* Is GS a vectorizable call? */
2217 stmt = dyn_cast <gcall *> (gs);
2218 if (!stmt)
2219 return false;
2221 if (gimple_call_internal_p (stmt)
2222 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2223 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2224 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2225 slp_node);
2227 if (gimple_call_lhs (stmt) == NULL_TREE
2228 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2229 return false;
2231 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2233 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2235 /* Process function arguments. */
2236 rhs_type = NULL_TREE;
2237 vectype_in = NULL_TREE;
2238 nargs = gimple_call_num_args (stmt);
2240 /* Bail out if the function has more than three arguments, we do not have
2241 interesting builtin functions to vectorize with more than two arguments
2242 except for fma. No arguments is also not good. */
2243 if (nargs == 0 || nargs > 3)
2244 return false;
2246 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2247 if (gimple_call_internal_p (stmt)
2248 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2250 nargs = 0;
2251 rhs_type = unsigned_type_node;
2254 for (i = 0; i < nargs; i++)
2256 tree opvectype;
2258 op = gimple_call_arg (stmt, i);
2260 /* We can only handle calls with arguments of the same type. */
2261 if (rhs_type
2262 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2264 if (dump_enabled_p ())
2265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2266 "argument types differ.\n");
2267 return false;
2269 if (!rhs_type)
2270 rhs_type = TREE_TYPE (op);
2272 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "use not simple.\n");
2277 return false;
2280 if (!vectype_in)
2281 vectype_in = opvectype;
2282 else if (opvectype
2283 && opvectype != vectype_in)
2285 if (dump_enabled_p ())
2286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2287 "argument vector types differ.\n");
2288 return false;
2291 /* If all arguments are external or constant defs use a vector type with
2292 the same size as the output vector type. */
2293 if (!vectype_in)
2294 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2295 if (vec_stmt)
2296 gcc_assert (vectype_in);
2297 if (!vectype_in)
2299 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "no vectype for scalar type ");
2303 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2304 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2307 return false;
2310 /* FORNOW */
2311 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2312 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2313 if (nunits_in == nunits_out / 2)
2314 modifier = NARROW;
2315 else if (nunits_out == nunits_in)
2316 modifier = NONE;
2317 else if (nunits_out == nunits_in / 2)
2318 modifier = WIDEN;
2319 else
2320 return false;
2322 /* We only handle functions that do not read or clobber memory. */
2323 if (gimple_vuse (stmt))
2325 if (dump_enabled_p ())
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "function reads from or writes to memory.\n");
2328 return false;
2331 /* For now, we only vectorize functions if a target specific builtin
2332 is available. TODO -- in some cases, it might be profitable to
2333 insert the calls for pieces of the vector, in order to be able
2334 to vectorize other operations in the loop. */
2335 fndecl = NULL_TREE;
2336 internal_fn ifn = IFN_LAST;
2337 combined_fn cfn = gimple_call_combined_fn (stmt);
2338 tree callee = gimple_call_fndecl (stmt);
2340 /* First try using an internal function. */
2341 tree_code convert_code = ERROR_MARK;
2342 if (cfn != CFN_LAST
2343 && (modifier == NONE
2344 || (modifier == NARROW
2345 && simple_integer_narrowing (vectype_out, vectype_in,
2346 &convert_code))))
2347 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2348 vectype_in);
2350 /* If that fails, try asking for a target-specific built-in function. */
2351 if (ifn == IFN_LAST)
2353 if (cfn != CFN_LAST)
2354 fndecl = targetm.vectorize.builtin_vectorized_function
2355 (cfn, vectype_out, vectype_in);
2356 else
2357 fndecl = targetm.vectorize.builtin_md_vectorized_function
2358 (callee, vectype_out, vectype_in);
2361 if (ifn == IFN_LAST && !fndecl)
2363 if (cfn == CFN_GOMP_SIMD_LANE
2364 && !slp_node
2365 && loop_vinfo
2366 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2367 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2368 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2369 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2371 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2372 { 0, 1, 2, ... vf - 1 } vector. */
2373 gcc_assert (nargs == 0);
2375 else
2377 if (dump_enabled_p ())
2378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2379 "function is not vectorizable.\n");
2380 return false;
2384 if (slp_node)
2385 ncopies = 1;
2386 else if (modifier == NARROW && ifn == IFN_LAST)
2387 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2388 else
2389 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2391 /* Sanity check: make sure that at least one copy of the vectorized stmt
2392 needs to be generated. */
2393 gcc_assert (ncopies >= 1);
2395 if (!vec_stmt) /* transformation not required. */
2397 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2400 "\n");
2401 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2402 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2403 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2404 vec_promote_demote, stmt_info, 0, vect_body);
2406 return true;
2409 /** Transform. **/
2411 if (dump_enabled_p ())
2412 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2414 /* Handle def. */
2415 scalar_dest = gimple_call_lhs (stmt);
2416 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2418 prev_stmt_info = NULL;
2419 if (modifier == NONE || ifn != IFN_LAST)
2421 tree prev_res = NULL_TREE;
2422 for (j = 0; j < ncopies; ++j)
2424 /* Build argument list for the vectorized call. */
2425 if (j == 0)
2426 vargs.create (nargs);
2427 else
2428 vargs.truncate (0);
2430 if (slp_node)
2432 auto_vec<vec<tree> > vec_defs (nargs);
2433 vec<tree> vec_oprnds0;
2435 for (i = 0; i < nargs; i++)
2436 vargs.quick_push (gimple_call_arg (stmt, i));
2437 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2438 vec_oprnds0 = vec_defs[0];
2440 /* Arguments are ready. Create the new vector stmt. */
2441 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2443 size_t k;
2444 for (k = 0; k < nargs; k++)
2446 vec<tree> vec_oprndsk = vec_defs[k];
2447 vargs[k] = vec_oprndsk[i];
2449 if (modifier == NARROW)
2451 tree half_res = make_ssa_name (vectype_in);
2452 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2453 gimple_call_set_lhs (new_stmt, half_res);
2454 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2455 if ((i & 1) == 0)
2457 prev_res = half_res;
2458 continue;
2460 new_temp = make_ssa_name (vec_dest);
2461 new_stmt = gimple_build_assign (new_temp, convert_code,
2462 prev_res, half_res);
2464 else
2466 if (ifn != IFN_LAST)
2467 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2468 else
2469 new_stmt = gimple_build_call_vec (fndecl, vargs);
2470 new_temp = make_ssa_name (vec_dest, new_stmt);
2471 gimple_call_set_lhs (new_stmt, new_temp);
2473 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2474 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2477 for (i = 0; i < nargs; i++)
2479 vec<tree> vec_oprndsi = vec_defs[i];
2480 vec_oprndsi.release ();
2482 continue;
2485 for (i = 0; i < nargs; i++)
2487 op = gimple_call_arg (stmt, i);
2488 if (j == 0)
2489 vec_oprnd0
2490 = vect_get_vec_def_for_operand (op, stmt);
2491 else
2493 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2494 vec_oprnd0
2495 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2498 vargs.quick_push (vec_oprnd0);
2501 if (gimple_call_internal_p (stmt)
2502 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2504 tree *v = XALLOCAVEC (tree, nunits_out);
2505 int k;
2506 for (k = 0; k < nunits_out; ++k)
2507 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2508 tree cst = build_vector (vectype_out, v);
2509 tree new_var
2510 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2511 gimple *init_stmt = gimple_build_assign (new_var, cst);
2512 vect_init_vector_1 (stmt, init_stmt, NULL);
2513 new_temp = make_ssa_name (vec_dest);
2514 new_stmt = gimple_build_assign (new_temp, new_var);
2516 else if (modifier == NARROW)
2518 tree half_res = make_ssa_name (vectype_in);
2519 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2520 gimple_call_set_lhs (new_stmt, half_res);
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2522 if ((j & 1) == 0)
2524 prev_res = half_res;
2525 continue;
2527 new_temp = make_ssa_name (vec_dest);
2528 new_stmt = gimple_build_assign (new_temp, convert_code,
2529 prev_res, half_res);
2531 else
2533 if (ifn != IFN_LAST)
2534 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2535 else
2536 new_stmt = gimple_build_call_vec (fndecl, vargs);
2537 new_temp = make_ssa_name (vec_dest, new_stmt);
2538 gimple_call_set_lhs (new_stmt, new_temp);
2540 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2542 if (j == (modifier == NARROW ? 1 : 0))
2543 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2544 else
2545 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2547 prev_stmt_info = vinfo_for_stmt (new_stmt);
2550 else if (modifier == NARROW)
2552 for (j = 0; j < ncopies; ++j)
2554 /* Build argument list for the vectorized call. */
2555 if (j == 0)
2556 vargs.create (nargs * 2);
2557 else
2558 vargs.truncate (0);
2560 if (slp_node)
2562 auto_vec<vec<tree> > vec_defs (nargs);
2563 vec<tree> vec_oprnds0;
2565 for (i = 0; i < nargs; i++)
2566 vargs.quick_push (gimple_call_arg (stmt, i));
2567 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2568 vec_oprnds0 = vec_defs[0];
2570 /* Arguments are ready. Create the new vector stmt. */
2571 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2573 size_t k;
2574 vargs.truncate (0);
2575 for (k = 0; k < nargs; k++)
2577 vec<tree> vec_oprndsk = vec_defs[k];
2578 vargs.quick_push (vec_oprndsk[i]);
2579 vargs.quick_push (vec_oprndsk[i + 1]);
2581 if (ifn != IFN_LAST)
2582 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2583 else
2584 new_stmt = gimple_build_call_vec (fndecl, vargs);
2585 new_temp = make_ssa_name (vec_dest, new_stmt);
2586 gimple_call_set_lhs (new_stmt, new_temp);
2587 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2588 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2591 for (i = 0; i < nargs; i++)
2593 vec<tree> vec_oprndsi = vec_defs[i];
2594 vec_oprndsi.release ();
2596 continue;
2599 for (i = 0; i < nargs; i++)
2601 op = gimple_call_arg (stmt, i);
2602 if (j == 0)
2604 vec_oprnd0
2605 = vect_get_vec_def_for_operand (op, stmt);
2606 vec_oprnd1
2607 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2609 else
2611 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2612 vec_oprnd0
2613 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2614 vec_oprnd1
2615 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2618 vargs.quick_push (vec_oprnd0);
2619 vargs.quick_push (vec_oprnd1);
2622 new_stmt = gimple_build_call_vec (fndecl, vargs);
2623 new_temp = make_ssa_name (vec_dest, new_stmt);
2624 gimple_call_set_lhs (new_stmt, new_temp);
2625 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2627 if (j == 0)
2628 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2629 else
2630 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2632 prev_stmt_info = vinfo_for_stmt (new_stmt);
2635 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2637 else
2638 /* No current target implements this case. */
2639 return false;
2641 vargs.release ();
2643 /* The call in STMT might prevent it from being removed in dce.
2644 We however cannot remove it here, due to the way the ssa name
2645 it defines is mapped to the new definition. So just replace
2646 rhs of the statement with something harmless. */
2648 if (slp_node)
2649 return true;
2651 type = TREE_TYPE (scalar_dest);
2652 if (is_pattern_stmt_p (stmt_info))
2653 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2654 else
2655 lhs = gimple_call_lhs (stmt);
2657 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2658 set_vinfo_for_stmt (new_stmt, stmt_info);
2659 set_vinfo_for_stmt (stmt, NULL);
2660 STMT_VINFO_STMT (stmt_info) = new_stmt;
2661 gsi_replace (gsi, new_stmt, false);
2663 return true;
2667 struct simd_call_arg_info
2669 tree vectype;
2670 tree op;
2671 enum vect_def_type dt;
2672 HOST_WIDE_INT linear_step;
2673 unsigned int align;
2674 bool simd_lane_linear;
2677 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2678 is linear within simd lane (but not within whole loop), note it in
2679 *ARGINFO. */
2681 static void
2682 vect_simd_lane_linear (tree op, struct loop *loop,
2683 struct simd_call_arg_info *arginfo)
2685 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2687 if (!is_gimple_assign (def_stmt)
2688 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2689 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2690 return;
2692 tree base = gimple_assign_rhs1 (def_stmt);
2693 HOST_WIDE_INT linear_step = 0;
2694 tree v = gimple_assign_rhs2 (def_stmt);
2695 while (TREE_CODE (v) == SSA_NAME)
2697 tree t;
2698 def_stmt = SSA_NAME_DEF_STMT (v);
2699 if (is_gimple_assign (def_stmt))
2700 switch (gimple_assign_rhs_code (def_stmt))
2702 case PLUS_EXPR:
2703 t = gimple_assign_rhs2 (def_stmt);
2704 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2705 return;
2706 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2707 v = gimple_assign_rhs1 (def_stmt);
2708 continue;
2709 case MULT_EXPR:
2710 t = gimple_assign_rhs2 (def_stmt);
2711 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2712 return;
2713 linear_step = tree_to_shwi (t);
2714 v = gimple_assign_rhs1 (def_stmt);
2715 continue;
2716 CASE_CONVERT:
2717 t = gimple_assign_rhs1 (def_stmt);
2718 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2719 || (TYPE_PRECISION (TREE_TYPE (v))
2720 < TYPE_PRECISION (TREE_TYPE (t))))
2721 return;
2722 if (!linear_step)
2723 linear_step = 1;
2724 v = t;
2725 continue;
2726 default:
2727 return;
2729 else if (is_gimple_call (def_stmt)
2730 && gimple_call_internal_p (def_stmt)
2731 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2732 && loop->simduid
2733 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2734 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2735 == loop->simduid))
2737 if (!linear_step)
2738 linear_step = 1;
2739 arginfo->linear_step = linear_step;
2740 arginfo->op = base;
2741 arginfo->simd_lane_linear = true;
2742 return;
2747 /* Function vectorizable_simd_clone_call.
2749 Check if STMT performs a function call that can be vectorized
2750 by calling a simd clone of the function.
2751 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2752 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2753 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2755 static bool
2756 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2757 gimple **vec_stmt, slp_tree slp_node)
2759 tree vec_dest;
2760 tree scalar_dest;
2761 tree op, type;
2762 tree vec_oprnd0 = NULL_TREE;
2763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2764 tree vectype;
2765 unsigned int nunits;
2766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2767 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2768 vec_info *vinfo = stmt_info->vinfo;
2769 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2770 tree fndecl, new_temp;
2771 gimple *def_stmt;
2772 gimple *new_stmt = NULL;
2773 int ncopies, j;
2774 auto_vec<simd_call_arg_info> arginfo;
2775 vec<tree> vargs = vNULL;
2776 size_t i, nargs;
2777 tree lhs, rtype, ratype;
2778 vec<constructor_elt, va_gc> *ret_ctor_elts;
2780 /* Is STMT a vectorizable call? */
2781 if (!is_gimple_call (stmt))
2782 return false;
2784 fndecl = gimple_call_fndecl (stmt);
2785 if (fndecl == NULL_TREE)
2786 return false;
2788 struct cgraph_node *node = cgraph_node::get (fndecl);
2789 if (node == NULL || node->simd_clones == NULL)
2790 return false;
2792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2793 return false;
2795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2796 && ! vec_stmt)
2797 return false;
2799 if (gimple_call_lhs (stmt)
2800 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2801 return false;
2803 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2805 vectype = STMT_VINFO_VECTYPE (stmt_info);
2807 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2808 return false;
2810 /* FORNOW */
2811 if (slp_node)
2812 return false;
2814 /* Process function arguments. */
2815 nargs = gimple_call_num_args (stmt);
2817 /* Bail out if the function has zero arguments. */
2818 if (nargs == 0)
2819 return false;
2821 arginfo.reserve (nargs, true);
2823 for (i = 0; i < nargs; i++)
2825 simd_call_arg_info thisarginfo;
2826 affine_iv iv;
2828 thisarginfo.linear_step = 0;
2829 thisarginfo.align = 0;
2830 thisarginfo.op = NULL_TREE;
2831 thisarginfo.simd_lane_linear = false;
2833 op = gimple_call_arg (stmt, i);
2834 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
2835 &thisarginfo.vectype)
2836 || thisarginfo.dt == vect_uninitialized_def)
2838 if (dump_enabled_p ())
2839 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2840 "use not simple.\n");
2841 return false;
2844 if (thisarginfo.dt == vect_constant_def
2845 || thisarginfo.dt == vect_external_def)
2846 gcc_assert (thisarginfo.vectype == NULL_TREE);
2847 else
2848 gcc_assert (thisarginfo.vectype != NULL_TREE);
2850 /* For linear arguments, the analyze phase should have saved
2851 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2852 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2853 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2855 gcc_assert (vec_stmt);
2856 thisarginfo.linear_step
2857 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2858 thisarginfo.op
2859 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2860 thisarginfo.simd_lane_linear
2861 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2862 == boolean_true_node);
2863 /* If loop has been peeled for alignment, we need to adjust it. */
2864 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2865 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2866 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2868 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2869 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2870 tree opt = TREE_TYPE (thisarginfo.op);
2871 bias = fold_convert (TREE_TYPE (step), bias);
2872 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2873 thisarginfo.op
2874 = fold_build2 (POINTER_TYPE_P (opt)
2875 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2876 thisarginfo.op, bias);
2879 else if (!vec_stmt
2880 && thisarginfo.dt != vect_constant_def
2881 && thisarginfo.dt != vect_external_def
2882 && loop_vinfo
2883 && TREE_CODE (op) == SSA_NAME
2884 && simple_iv (loop, loop_containing_stmt (stmt), op,
2885 &iv, false)
2886 && tree_fits_shwi_p (iv.step))
2888 thisarginfo.linear_step = tree_to_shwi (iv.step);
2889 thisarginfo.op = iv.base;
2891 else if ((thisarginfo.dt == vect_constant_def
2892 || thisarginfo.dt == vect_external_def)
2893 && POINTER_TYPE_P (TREE_TYPE (op)))
2894 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2895 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2896 linear too. */
2897 if (POINTER_TYPE_P (TREE_TYPE (op))
2898 && !thisarginfo.linear_step
2899 && !vec_stmt
2900 && thisarginfo.dt != vect_constant_def
2901 && thisarginfo.dt != vect_external_def
2902 && loop_vinfo
2903 && !slp_node
2904 && TREE_CODE (op) == SSA_NAME)
2905 vect_simd_lane_linear (op, loop, &thisarginfo);
2907 arginfo.quick_push (thisarginfo);
2910 unsigned int badness = 0;
2911 struct cgraph_node *bestn = NULL;
2912 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2913 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2914 else
2915 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2916 n = n->simdclone->next_clone)
2918 unsigned int this_badness = 0;
2919 if (n->simdclone->simdlen
2920 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2921 || n->simdclone->nargs != nargs)
2922 continue;
2923 if (n->simdclone->simdlen
2924 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2925 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2926 - exact_log2 (n->simdclone->simdlen)) * 1024;
2927 if (n->simdclone->inbranch)
2928 this_badness += 2048;
2929 int target_badness = targetm.simd_clone.usable (n);
2930 if (target_badness < 0)
2931 continue;
2932 this_badness += target_badness * 512;
2933 /* FORNOW: Have to add code to add the mask argument. */
2934 if (n->simdclone->inbranch)
2935 continue;
2936 for (i = 0; i < nargs; i++)
2938 switch (n->simdclone->args[i].arg_type)
2940 case SIMD_CLONE_ARG_TYPE_VECTOR:
2941 if (!useless_type_conversion_p
2942 (n->simdclone->args[i].orig_type,
2943 TREE_TYPE (gimple_call_arg (stmt, i))))
2944 i = -1;
2945 else if (arginfo[i].dt == vect_constant_def
2946 || arginfo[i].dt == vect_external_def
2947 || arginfo[i].linear_step)
2948 this_badness += 64;
2949 break;
2950 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2951 if (arginfo[i].dt != vect_constant_def
2952 && arginfo[i].dt != vect_external_def)
2953 i = -1;
2954 break;
2955 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2956 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
2957 if (arginfo[i].dt == vect_constant_def
2958 || arginfo[i].dt == vect_external_def
2959 || (arginfo[i].linear_step
2960 != n->simdclone->args[i].linear_step))
2961 i = -1;
2962 break;
2963 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2964 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
2965 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
2966 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
2967 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
2968 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
2969 /* FORNOW */
2970 i = -1;
2971 break;
2972 case SIMD_CLONE_ARG_TYPE_MASK:
2973 gcc_unreachable ();
2975 if (i == (size_t) -1)
2976 break;
2977 if (n->simdclone->args[i].alignment > arginfo[i].align)
2979 i = -1;
2980 break;
2982 if (arginfo[i].align)
2983 this_badness += (exact_log2 (arginfo[i].align)
2984 - exact_log2 (n->simdclone->args[i].alignment));
2986 if (i == (size_t) -1)
2987 continue;
2988 if (bestn == NULL || this_badness < badness)
2990 bestn = n;
2991 badness = this_badness;
2995 if (bestn == NULL)
2996 return false;
2998 for (i = 0; i < nargs; i++)
2999 if ((arginfo[i].dt == vect_constant_def
3000 || arginfo[i].dt == vect_external_def)
3001 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3003 arginfo[i].vectype
3004 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3005 i)));
3006 if (arginfo[i].vectype == NULL
3007 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3008 > bestn->simdclone->simdlen))
3009 return false;
3012 fndecl = bestn->decl;
3013 nunits = bestn->simdclone->simdlen;
3014 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3016 /* If the function isn't const, only allow it in simd loops where user
3017 has asserted that at least nunits consecutive iterations can be
3018 performed using SIMD instructions. */
3019 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3020 && gimple_vuse (stmt))
3021 return false;
3023 /* Sanity check: make sure that at least one copy of the vectorized stmt
3024 needs to be generated. */
3025 gcc_assert (ncopies >= 1);
3027 if (!vec_stmt) /* transformation not required. */
3029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3030 for (i = 0; i < nargs; i++)
3031 if ((bestn->simdclone->args[i].arg_type
3032 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3033 || (bestn->simdclone->args[i].arg_type
3034 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3037 + 1);
3038 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3039 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3040 ? size_type_node : TREE_TYPE (arginfo[i].op);
3041 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3043 tree sll = arginfo[i].simd_lane_linear
3044 ? boolean_true_node : boolean_false_node;
3045 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3047 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE, vect_location,
3050 "=== vectorizable_simd_clone_call ===\n");
3051 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3052 return true;
3055 /** Transform. **/
3057 if (dump_enabled_p ())
3058 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3060 /* Handle def. */
3061 scalar_dest = gimple_call_lhs (stmt);
3062 vec_dest = NULL_TREE;
3063 rtype = NULL_TREE;
3064 ratype = NULL_TREE;
3065 if (scalar_dest)
3067 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3068 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3069 if (TREE_CODE (rtype) == ARRAY_TYPE)
3071 ratype = rtype;
3072 rtype = TREE_TYPE (ratype);
3076 prev_stmt_info = NULL;
3077 for (j = 0; j < ncopies; ++j)
3079 /* Build argument list for the vectorized call. */
3080 if (j == 0)
3081 vargs.create (nargs);
3082 else
3083 vargs.truncate (0);
3085 for (i = 0; i < nargs; i++)
3087 unsigned int k, l, m, o;
3088 tree atype;
3089 op = gimple_call_arg (stmt, i);
3090 switch (bestn->simdclone->args[i].arg_type)
3092 case SIMD_CLONE_ARG_TYPE_VECTOR:
3093 atype = bestn->simdclone->args[i].vector_type;
3094 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3095 for (m = j * o; m < (j + 1) * o; m++)
3097 if (TYPE_VECTOR_SUBPARTS (atype)
3098 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3100 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3101 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3102 / TYPE_VECTOR_SUBPARTS (atype));
3103 gcc_assert ((k & (k - 1)) == 0);
3104 if (m == 0)
3105 vec_oprnd0
3106 = vect_get_vec_def_for_operand (op, stmt);
3107 else
3109 vec_oprnd0 = arginfo[i].op;
3110 if ((m & (k - 1)) == 0)
3111 vec_oprnd0
3112 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3113 vec_oprnd0);
3115 arginfo[i].op = vec_oprnd0;
3116 vec_oprnd0
3117 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3118 size_int (prec),
3119 bitsize_int ((m & (k - 1)) * prec));
3120 new_stmt
3121 = gimple_build_assign (make_ssa_name (atype),
3122 vec_oprnd0);
3123 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3124 vargs.safe_push (gimple_assign_lhs (new_stmt));
3126 else
3128 k = (TYPE_VECTOR_SUBPARTS (atype)
3129 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3130 gcc_assert ((k & (k - 1)) == 0);
3131 vec<constructor_elt, va_gc> *ctor_elts;
3132 if (k != 1)
3133 vec_alloc (ctor_elts, k);
3134 else
3135 ctor_elts = NULL;
3136 for (l = 0; l < k; l++)
3138 if (m == 0 && l == 0)
3139 vec_oprnd0
3140 = vect_get_vec_def_for_operand (op, stmt);
3141 else
3142 vec_oprnd0
3143 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3144 arginfo[i].op);
3145 arginfo[i].op = vec_oprnd0;
3146 if (k == 1)
3147 break;
3148 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3149 vec_oprnd0);
3151 if (k == 1)
3152 vargs.safe_push (vec_oprnd0);
3153 else
3155 vec_oprnd0 = build_constructor (atype, ctor_elts);
3156 new_stmt
3157 = gimple_build_assign (make_ssa_name (atype),
3158 vec_oprnd0);
3159 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3160 vargs.safe_push (gimple_assign_lhs (new_stmt));
3164 break;
3165 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3166 vargs.safe_push (op);
3167 break;
3168 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3169 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3170 if (j == 0)
3172 gimple_seq stmts;
3173 arginfo[i].op
3174 = force_gimple_operand (arginfo[i].op, &stmts, true,
3175 NULL_TREE);
3176 if (stmts != NULL)
3178 basic_block new_bb;
3179 edge pe = loop_preheader_edge (loop);
3180 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3181 gcc_assert (!new_bb);
3183 if (arginfo[i].simd_lane_linear)
3185 vargs.safe_push (arginfo[i].op);
3186 break;
3188 tree phi_res = copy_ssa_name (op);
3189 gphi *new_phi = create_phi_node (phi_res, loop->header);
3190 set_vinfo_for_stmt (new_phi,
3191 new_stmt_vec_info (new_phi, loop_vinfo));
3192 add_phi_arg (new_phi, arginfo[i].op,
3193 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3194 enum tree_code code
3195 = POINTER_TYPE_P (TREE_TYPE (op))
3196 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3197 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3198 ? sizetype : TREE_TYPE (op);
3199 widest_int cst
3200 = wi::mul (bestn->simdclone->args[i].linear_step,
3201 ncopies * nunits);
3202 tree tcst = wide_int_to_tree (type, cst);
3203 tree phi_arg = copy_ssa_name (op);
3204 new_stmt
3205 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3206 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3207 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3208 set_vinfo_for_stmt (new_stmt,
3209 new_stmt_vec_info (new_stmt, loop_vinfo));
3210 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3211 UNKNOWN_LOCATION);
3212 arginfo[i].op = phi_res;
3213 vargs.safe_push (phi_res);
3215 else
3217 enum tree_code code
3218 = POINTER_TYPE_P (TREE_TYPE (op))
3219 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3220 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3221 ? sizetype : TREE_TYPE (op);
3222 widest_int cst
3223 = wi::mul (bestn->simdclone->args[i].linear_step,
3224 j * nunits);
3225 tree tcst = wide_int_to_tree (type, cst);
3226 new_temp = make_ssa_name (TREE_TYPE (op));
3227 new_stmt = gimple_build_assign (new_temp, code,
3228 arginfo[i].op, tcst);
3229 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3230 vargs.safe_push (new_temp);
3232 break;
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3234 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3235 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3236 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3237 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3238 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3239 default:
3240 gcc_unreachable ();
3244 new_stmt = gimple_build_call_vec (fndecl, vargs);
3245 if (vec_dest)
3247 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3248 if (ratype)
3249 new_temp = create_tmp_var (ratype);
3250 else if (TYPE_VECTOR_SUBPARTS (vectype)
3251 == TYPE_VECTOR_SUBPARTS (rtype))
3252 new_temp = make_ssa_name (vec_dest, new_stmt);
3253 else
3254 new_temp = make_ssa_name (rtype, new_stmt);
3255 gimple_call_set_lhs (new_stmt, new_temp);
3257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3259 if (vec_dest)
3261 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3263 unsigned int k, l;
3264 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3265 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3266 gcc_assert ((k & (k - 1)) == 0);
3267 for (l = 0; l < k; l++)
3269 tree t;
3270 if (ratype)
3272 t = build_fold_addr_expr (new_temp);
3273 t = build2 (MEM_REF, vectype, t,
3274 build_int_cst (TREE_TYPE (t),
3275 l * prec / BITS_PER_UNIT));
3277 else
3278 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3279 size_int (prec), bitsize_int (l * prec));
3280 new_stmt
3281 = gimple_build_assign (make_ssa_name (vectype), t);
3282 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3283 if (j == 0 && l == 0)
3284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3285 else
3286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3288 prev_stmt_info = vinfo_for_stmt (new_stmt);
3291 if (ratype)
3293 tree clobber = build_constructor (ratype, NULL);
3294 TREE_THIS_VOLATILE (clobber) = 1;
3295 new_stmt = gimple_build_assign (new_temp, clobber);
3296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3298 continue;
3300 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3302 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3303 / TYPE_VECTOR_SUBPARTS (rtype));
3304 gcc_assert ((k & (k - 1)) == 0);
3305 if ((j & (k - 1)) == 0)
3306 vec_alloc (ret_ctor_elts, k);
3307 if (ratype)
3309 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3310 for (m = 0; m < o; m++)
3312 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3313 size_int (m), NULL_TREE, NULL_TREE);
3314 new_stmt
3315 = gimple_build_assign (make_ssa_name (rtype), tem);
3316 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3318 gimple_assign_lhs (new_stmt));
3320 tree clobber = build_constructor (ratype, NULL);
3321 TREE_THIS_VOLATILE (clobber) = 1;
3322 new_stmt = gimple_build_assign (new_temp, clobber);
3323 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3325 else
3326 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3327 if ((j & (k - 1)) != k - 1)
3328 continue;
3329 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3330 new_stmt
3331 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3332 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3334 if ((unsigned) j == k - 1)
3335 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3336 else
3337 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3339 prev_stmt_info = vinfo_for_stmt (new_stmt);
3340 continue;
3342 else if (ratype)
3344 tree t = build_fold_addr_expr (new_temp);
3345 t = build2 (MEM_REF, vectype, t,
3346 build_int_cst (TREE_TYPE (t), 0));
3347 new_stmt
3348 = gimple_build_assign (make_ssa_name (vec_dest), t);
3349 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3350 tree clobber = build_constructor (ratype, NULL);
3351 TREE_THIS_VOLATILE (clobber) = 1;
3352 vect_finish_stmt_generation (stmt,
3353 gimple_build_assign (new_temp,
3354 clobber), gsi);
3358 if (j == 0)
3359 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3360 else
3361 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3363 prev_stmt_info = vinfo_for_stmt (new_stmt);
3366 vargs.release ();
3368 /* The call in STMT might prevent it from being removed in dce.
3369 We however cannot remove it here, due to the way the ssa name
3370 it defines is mapped to the new definition. So just replace
3371 rhs of the statement with something harmless. */
3373 if (slp_node)
3374 return true;
3376 if (scalar_dest)
3378 type = TREE_TYPE (scalar_dest);
3379 if (is_pattern_stmt_p (stmt_info))
3380 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3381 else
3382 lhs = gimple_call_lhs (stmt);
3383 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3385 else
3386 new_stmt = gimple_build_nop ();
3387 set_vinfo_for_stmt (new_stmt, stmt_info);
3388 set_vinfo_for_stmt (stmt, NULL);
3389 STMT_VINFO_STMT (stmt_info) = new_stmt;
3390 gsi_replace (gsi, new_stmt, true);
3391 unlink_stmt_vdef (stmt);
3393 return true;
3397 /* Function vect_gen_widened_results_half
3399 Create a vector stmt whose code, type, number of arguments, and result
3400 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3401 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3402 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3403 needs to be created (DECL is a function-decl of a target-builtin).
3404 STMT is the original scalar stmt that we are vectorizing. */
3406 static gimple *
3407 vect_gen_widened_results_half (enum tree_code code,
3408 tree decl,
3409 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3410 tree vec_dest, gimple_stmt_iterator *gsi,
3411 gimple *stmt)
3413 gimple *new_stmt;
3414 tree new_temp;
3416 /* Generate half of the widened result: */
3417 if (code == CALL_EXPR)
3419 /* Target specific support */
3420 if (op_type == binary_op)
3421 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3422 else
3423 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3424 new_temp = make_ssa_name (vec_dest, new_stmt);
3425 gimple_call_set_lhs (new_stmt, new_temp);
3427 else
3429 /* Generic support */
3430 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3431 if (op_type != binary_op)
3432 vec_oprnd1 = NULL;
3433 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3434 new_temp = make_ssa_name (vec_dest, new_stmt);
3435 gimple_assign_set_lhs (new_stmt, new_temp);
3437 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3439 return new_stmt;
3443 /* Get vectorized definitions for loop-based vectorization. For the first
3444 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3445 scalar operand), and for the rest we get a copy with
3446 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3447 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3448 The vectors are collected into VEC_OPRNDS. */
3450 static void
3451 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3452 vec<tree> *vec_oprnds, int multi_step_cvt)
3454 tree vec_oprnd;
3456 /* Get first vector operand. */
3457 /* All the vector operands except the very first one (that is scalar oprnd)
3458 are stmt copies. */
3459 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3460 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3461 else
3462 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3464 vec_oprnds->quick_push (vec_oprnd);
3466 /* Get second vector operand. */
3467 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3468 vec_oprnds->quick_push (vec_oprnd);
3470 *oprnd = vec_oprnd;
3472 /* For conversion in multiple steps, continue to get operands
3473 recursively. */
3474 if (multi_step_cvt)
3475 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3479 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3480 For multi-step conversions store the resulting vectors and call the function
3481 recursively. */
3483 static void
3484 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3485 int multi_step_cvt, gimple *stmt,
3486 vec<tree> vec_dsts,
3487 gimple_stmt_iterator *gsi,
3488 slp_tree slp_node, enum tree_code code,
3489 stmt_vec_info *prev_stmt_info)
3491 unsigned int i;
3492 tree vop0, vop1, new_tmp, vec_dest;
3493 gimple *new_stmt;
3494 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3496 vec_dest = vec_dsts.pop ();
3498 for (i = 0; i < vec_oprnds->length (); i += 2)
3500 /* Create demotion operation. */
3501 vop0 = (*vec_oprnds)[i];
3502 vop1 = (*vec_oprnds)[i + 1];
3503 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3504 new_tmp = make_ssa_name (vec_dest, new_stmt);
3505 gimple_assign_set_lhs (new_stmt, new_tmp);
3506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3508 if (multi_step_cvt)
3509 /* Store the resulting vector for next recursive call. */
3510 (*vec_oprnds)[i/2] = new_tmp;
3511 else
3513 /* This is the last step of the conversion sequence. Store the
3514 vectors in SLP_NODE or in vector info of the scalar statement
3515 (or in STMT_VINFO_RELATED_STMT chain). */
3516 if (slp_node)
3517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3518 else
3520 if (!*prev_stmt_info)
3521 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3522 else
3523 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3525 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3530 /* For multi-step demotion operations we first generate demotion operations
3531 from the source type to the intermediate types, and then combine the
3532 results (stored in VEC_OPRNDS) in demotion operation to the destination
3533 type. */
3534 if (multi_step_cvt)
3536 /* At each level of recursion we have half of the operands we had at the
3537 previous level. */
3538 vec_oprnds->truncate ((i+1)/2);
3539 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3540 stmt, vec_dsts, gsi, slp_node,
3541 VEC_PACK_TRUNC_EXPR,
3542 prev_stmt_info);
3545 vec_dsts.quick_push (vec_dest);
3549 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3550 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3551 the resulting vectors and call the function recursively. */
3553 static void
3554 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3555 vec<tree> *vec_oprnds1,
3556 gimple *stmt, tree vec_dest,
3557 gimple_stmt_iterator *gsi,
3558 enum tree_code code1,
3559 enum tree_code code2, tree decl1,
3560 tree decl2, int op_type)
3562 int i;
3563 tree vop0, vop1, new_tmp1, new_tmp2;
3564 gimple *new_stmt1, *new_stmt2;
3565 vec<tree> vec_tmp = vNULL;
3567 vec_tmp.create (vec_oprnds0->length () * 2);
3568 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3570 if (op_type == binary_op)
3571 vop1 = (*vec_oprnds1)[i];
3572 else
3573 vop1 = NULL_TREE;
3575 /* Generate the two halves of promotion operation. */
3576 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3577 op_type, vec_dest, gsi, stmt);
3578 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3579 op_type, vec_dest, gsi, stmt);
3580 if (is_gimple_call (new_stmt1))
3582 new_tmp1 = gimple_call_lhs (new_stmt1);
3583 new_tmp2 = gimple_call_lhs (new_stmt2);
3585 else
3587 new_tmp1 = gimple_assign_lhs (new_stmt1);
3588 new_tmp2 = gimple_assign_lhs (new_stmt2);
3591 /* Store the results for the next step. */
3592 vec_tmp.quick_push (new_tmp1);
3593 vec_tmp.quick_push (new_tmp2);
3596 vec_oprnds0->release ();
3597 *vec_oprnds0 = vec_tmp;
3601 /* Check if STMT performs a conversion operation, that can be vectorized.
3602 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3604 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3606 static bool
3607 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3608 gimple **vec_stmt, slp_tree slp_node)
3610 tree vec_dest;
3611 tree scalar_dest;
3612 tree op0, op1 = NULL_TREE;
3613 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3615 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3616 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3617 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3618 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3619 tree new_temp;
3620 gimple *def_stmt;
3621 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3622 gimple *new_stmt = NULL;
3623 stmt_vec_info prev_stmt_info;
3624 int nunits_in;
3625 int nunits_out;
3626 tree vectype_out, vectype_in;
3627 int ncopies, i, j;
3628 tree lhs_type, rhs_type;
3629 enum { NARROW, NONE, WIDEN } modifier;
3630 vec<tree> vec_oprnds0 = vNULL;
3631 vec<tree> vec_oprnds1 = vNULL;
3632 tree vop0;
3633 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3634 vec_info *vinfo = stmt_info->vinfo;
3635 int multi_step_cvt = 0;
3636 vec<tree> vec_dsts = vNULL;
3637 vec<tree> interm_types = vNULL;
3638 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3639 int op_type;
3640 machine_mode rhs_mode;
3641 unsigned short fltsz;
3643 /* Is STMT a vectorizable conversion? */
3645 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3646 return false;
3648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3649 && ! vec_stmt)
3650 return false;
3652 if (!is_gimple_assign (stmt))
3653 return false;
3655 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3656 return false;
3658 code = gimple_assign_rhs_code (stmt);
3659 if (!CONVERT_EXPR_CODE_P (code)
3660 && code != FIX_TRUNC_EXPR
3661 && code != FLOAT_EXPR
3662 && code != WIDEN_MULT_EXPR
3663 && code != WIDEN_LSHIFT_EXPR)
3664 return false;
3666 op_type = TREE_CODE_LENGTH (code);
3668 /* Check types of lhs and rhs. */
3669 scalar_dest = gimple_assign_lhs (stmt);
3670 lhs_type = TREE_TYPE (scalar_dest);
3671 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3673 op0 = gimple_assign_rhs1 (stmt);
3674 rhs_type = TREE_TYPE (op0);
3676 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3677 && !((INTEGRAL_TYPE_P (lhs_type)
3678 && INTEGRAL_TYPE_P (rhs_type))
3679 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3680 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3681 return false;
3683 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3684 && ((INTEGRAL_TYPE_P (lhs_type)
3685 && (TYPE_PRECISION (lhs_type)
3686 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3687 || (INTEGRAL_TYPE_P (rhs_type)
3688 && (TYPE_PRECISION (rhs_type)
3689 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3691 if (dump_enabled_p ())
3692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3693 "type conversion to/from bit-precision unsupported."
3694 "\n");
3695 return false;
3698 /* Check the operands of the operation. */
3699 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3701 if (dump_enabled_p ())
3702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3703 "use not simple.\n");
3704 return false;
3706 if (op_type == binary_op)
3708 bool ok;
3710 op1 = gimple_assign_rhs2 (stmt);
3711 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3712 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3713 OP1. */
3714 if (CONSTANT_CLASS_P (op0))
3715 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3716 else
3717 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3719 if (!ok)
3721 if (dump_enabled_p ())
3722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3723 "use not simple.\n");
3724 return false;
3728 /* If op0 is an external or constant defs use a vector type of
3729 the same size as the output vector type. */
3730 if (!vectype_in)
3731 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3732 if (vec_stmt)
3733 gcc_assert (vectype_in);
3734 if (!vectype_in)
3736 if (dump_enabled_p ())
3738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3739 "no vectype for scalar type ");
3740 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3741 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3744 return false;
3747 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3748 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
3750 if (dump_enabled_p ())
3752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3753 "can't convert between boolean and non "
3754 "boolean vectors");
3755 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3756 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3759 return false;
3762 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3763 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3764 if (nunits_in < nunits_out)
3765 modifier = NARROW;
3766 else if (nunits_out == nunits_in)
3767 modifier = NONE;
3768 else
3769 modifier = WIDEN;
3771 /* Multiple types in SLP are handled by creating the appropriate number of
3772 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3773 case of SLP. */
3774 if (slp_node)
3775 ncopies = 1;
3776 else if (modifier == NARROW)
3777 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3778 else
3779 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3781 /* Sanity check: make sure that at least one copy of the vectorized stmt
3782 needs to be generated. */
3783 gcc_assert (ncopies >= 1);
3785 /* Supportable by target? */
3786 switch (modifier)
3788 case NONE:
3789 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3790 return false;
3791 if (supportable_convert_operation (code, vectype_out, vectype_in,
3792 &decl1, &code1))
3793 break;
3794 /* FALLTHRU */
3795 unsupported:
3796 if (dump_enabled_p ())
3797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3798 "conversion not supported by target.\n");
3799 return false;
3801 case WIDEN:
3802 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3803 &code1, &code2, &multi_step_cvt,
3804 &interm_types))
3806 /* Binary widening operation can only be supported directly by the
3807 architecture. */
3808 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3809 break;
3812 if (code != FLOAT_EXPR
3813 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3814 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3815 goto unsupported;
3817 rhs_mode = TYPE_MODE (rhs_type);
3818 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3819 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3820 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3821 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3823 cvt_type
3824 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3825 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3826 if (cvt_type == NULL_TREE)
3827 goto unsupported;
3829 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3831 if (!supportable_convert_operation (code, vectype_out,
3832 cvt_type, &decl1, &codecvt1))
3833 goto unsupported;
3835 else if (!supportable_widening_operation (code, stmt, vectype_out,
3836 cvt_type, &codecvt1,
3837 &codecvt2, &multi_step_cvt,
3838 &interm_types))
3839 continue;
3840 else
3841 gcc_assert (multi_step_cvt == 0);
3843 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3844 vectype_in, &code1, &code2,
3845 &multi_step_cvt, &interm_types))
3846 break;
3849 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3850 goto unsupported;
3852 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3853 codecvt2 = ERROR_MARK;
3854 else
3856 multi_step_cvt++;
3857 interm_types.safe_push (cvt_type);
3858 cvt_type = NULL_TREE;
3860 break;
3862 case NARROW:
3863 gcc_assert (op_type == unary_op);
3864 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3865 &code1, &multi_step_cvt,
3866 &interm_types))
3867 break;
3869 if (code != FIX_TRUNC_EXPR
3870 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3871 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3872 goto unsupported;
3874 rhs_mode = TYPE_MODE (rhs_type);
3875 cvt_type
3876 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3877 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3878 if (cvt_type == NULL_TREE)
3879 goto unsupported;
3880 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3881 &decl1, &codecvt1))
3882 goto unsupported;
3883 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3884 &code1, &multi_step_cvt,
3885 &interm_types))
3886 break;
3887 goto unsupported;
3889 default:
3890 gcc_unreachable ();
3893 if (!vec_stmt) /* transformation not required. */
3895 if (dump_enabled_p ())
3896 dump_printf_loc (MSG_NOTE, vect_location,
3897 "=== vectorizable_conversion ===\n");
3898 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3900 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3901 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3903 else if (modifier == NARROW)
3905 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3906 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3908 else
3910 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3911 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3913 interm_types.release ();
3914 return true;
3917 /** Transform. **/
3918 if (dump_enabled_p ())
3919 dump_printf_loc (MSG_NOTE, vect_location,
3920 "transform conversion. ncopies = %d.\n", ncopies);
3922 if (op_type == binary_op)
3924 if (CONSTANT_CLASS_P (op0))
3925 op0 = fold_convert (TREE_TYPE (op1), op0);
3926 else if (CONSTANT_CLASS_P (op1))
3927 op1 = fold_convert (TREE_TYPE (op0), op1);
3930 /* In case of multi-step conversion, we first generate conversion operations
3931 to the intermediate types, and then from that types to the final one.
3932 We create vector destinations for the intermediate type (TYPES) received
3933 from supportable_*_operation, and store them in the correct order
3934 for future use in vect_create_vectorized_*_stmts (). */
3935 vec_dsts.create (multi_step_cvt + 1);
3936 vec_dest = vect_create_destination_var (scalar_dest,
3937 (cvt_type && modifier == WIDEN)
3938 ? cvt_type : vectype_out);
3939 vec_dsts.quick_push (vec_dest);
3941 if (multi_step_cvt)
3943 for (i = interm_types.length () - 1;
3944 interm_types.iterate (i, &intermediate_type); i--)
3946 vec_dest = vect_create_destination_var (scalar_dest,
3947 intermediate_type);
3948 vec_dsts.quick_push (vec_dest);
3952 if (cvt_type)
3953 vec_dest = vect_create_destination_var (scalar_dest,
3954 modifier == WIDEN
3955 ? vectype_out : cvt_type);
3957 if (!slp_node)
3959 if (modifier == WIDEN)
3961 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3962 if (op_type == binary_op)
3963 vec_oprnds1.create (1);
3965 else if (modifier == NARROW)
3966 vec_oprnds0.create (
3967 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3969 else if (code == WIDEN_LSHIFT_EXPR)
3970 vec_oprnds1.create (slp_node->vec_stmts_size);
3972 last_oprnd = op0;
3973 prev_stmt_info = NULL;
3974 switch (modifier)
3976 case NONE:
3977 for (j = 0; j < ncopies; j++)
3979 if (j == 0)
3980 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3981 -1);
3982 else
3983 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3985 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3987 /* Arguments are ready, create the new vector stmt. */
3988 if (code1 == CALL_EXPR)
3990 new_stmt = gimple_build_call (decl1, 1, vop0);
3991 new_temp = make_ssa_name (vec_dest, new_stmt);
3992 gimple_call_set_lhs (new_stmt, new_temp);
3994 else
3996 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3997 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3998 new_temp = make_ssa_name (vec_dest, new_stmt);
3999 gimple_assign_set_lhs (new_stmt, new_temp);
4002 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4003 if (slp_node)
4004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4005 else
4007 if (!prev_stmt_info)
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4009 else
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4015 break;
4017 case WIDEN:
4018 /* In case the vectorization factor (VF) is bigger than the number
4019 of elements that we can fit in a vectype (nunits), we have to
4020 generate more than one vector stmt - i.e - we need to "unroll"
4021 the vector stmt by a factor VF/nunits. */
4022 for (j = 0; j < ncopies; j++)
4024 /* Handle uses. */
4025 if (j == 0)
4027 if (slp_node)
4029 if (code == WIDEN_LSHIFT_EXPR)
4031 unsigned int k;
4033 vec_oprnd1 = op1;
4034 /* Store vec_oprnd1 for every vector stmt to be created
4035 for SLP_NODE. We check during the analysis that all
4036 the shift arguments are the same. */
4037 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4038 vec_oprnds1.quick_push (vec_oprnd1);
4040 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4041 slp_node, -1);
4043 else
4044 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4045 &vec_oprnds1, slp_node, -1);
4047 else
4049 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4050 vec_oprnds0.quick_push (vec_oprnd0);
4051 if (op_type == binary_op)
4053 if (code == WIDEN_LSHIFT_EXPR)
4054 vec_oprnd1 = op1;
4055 else
4056 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4057 vec_oprnds1.quick_push (vec_oprnd1);
4061 else
4063 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4064 vec_oprnds0.truncate (0);
4065 vec_oprnds0.quick_push (vec_oprnd0);
4066 if (op_type == binary_op)
4068 if (code == WIDEN_LSHIFT_EXPR)
4069 vec_oprnd1 = op1;
4070 else
4071 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4072 vec_oprnd1);
4073 vec_oprnds1.truncate (0);
4074 vec_oprnds1.quick_push (vec_oprnd1);
4078 /* Arguments are ready. Create the new vector stmts. */
4079 for (i = multi_step_cvt; i >= 0; i--)
4081 tree this_dest = vec_dsts[i];
4082 enum tree_code c1 = code1, c2 = code2;
4083 if (i == 0 && codecvt2 != ERROR_MARK)
4085 c1 = codecvt1;
4086 c2 = codecvt2;
4088 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4089 &vec_oprnds1,
4090 stmt, this_dest, gsi,
4091 c1, c2, decl1, decl2,
4092 op_type);
4095 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4097 if (cvt_type)
4099 if (codecvt1 == CALL_EXPR)
4101 new_stmt = gimple_build_call (decl1, 1, vop0);
4102 new_temp = make_ssa_name (vec_dest, new_stmt);
4103 gimple_call_set_lhs (new_stmt, new_temp);
4105 else
4107 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4108 new_temp = make_ssa_name (vec_dest);
4109 new_stmt = gimple_build_assign (new_temp, codecvt1,
4110 vop0);
4113 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4115 else
4116 new_stmt = SSA_NAME_DEF_STMT (vop0);
4118 if (slp_node)
4119 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4120 else
4122 if (!prev_stmt_info)
4123 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4124 else
4125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126 prev_stmt_info = vinfo_for_stmt (new_stmt);
4131 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4132 break;
4134 case NARROW:
4135 /* In case the vectorization factor (VF) is bigger than the number
4136 of elements that we can fit in a vectype (nunits), we have to
4137 generate more than one vector stmt - i.e - we need to "unroll"
4138 the vector stmt by a factor VF/nunits. */
4139 for (j = 0; j < ncopies; j++)
4141 /* Handle uses. */
4142 if (slp_node)
4143 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4144 slp_node, -1);
4145 else
4147 vec_oprnds0.truncate (0);
4148 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4149 vect_pow2 (multi_step_cvt) - 1);
4152 /* Arguments are ready. Create the new vector stmts. */
4153 if (cvt_type)
4154 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4156 if (codecvt1 == CALL_EXPR)
4158 new_stmt = gimple_build_call (decl1, 1, vop0);
4159 new_temp = make_ssa_name (vec_dest, new_stmt);
4160 gimple_call_set_lhs (new_stmt, new_temp);
4162 else
4164 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4165 new_temp = make_ssa_name (vec_dest);
4166 new_stmt = gimple_build_assign (new_temp, codecvt1,
4167 vop0);
4170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4171 vec_oprnds0[i] = new_temp;
4174 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4175 stmt, vec_dsts, gsi,
4176 slp_node, code1,
4177 &prev_stmt_info);
4180 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4181 break;
4184 vec_oprnds0.release ();
4185 vec_oprnds1.release ();
4186 vec_dsts.release ();
4187 interm_types.release ();
4189 return true;
4193 /* Function vectorizable_assignment.
4195 Check if STMT performs an assignment (copy) that can be vectorized.
4196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4200 static bool
4201 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4202 gimple **vec_stmt, slp_tree slp_node)
4204 tree vec_dest;
4205 tree scalar_dest;
4206 tree op;
4207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4209 tree new_temp;
4210 gimple *def_stmt;
4211 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4212 int ncopies;
4213 int i, j;
4214 vec<tree> vec_oprnds = vNULL;
4215 tree vop;
4216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4217 vec_info *vinfo = stmt_info->vinfo;
4218 gimple *new_stmt = NULL;
4219 stmt_vec_info prev_stmt_info = NULL;
4220 enum tree_code code;
4221 tree vectype_in;
4223 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4224 return false;
4226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4227 && ! vec_stmt)
4228 return false;
4230 /* Is vectorizable assignment? */
4231 if (!is_gimple_assign (stmt))
4232 return false;
4234 scalar_dest = gimple_assign_lhs (stmt);
4235 if (TREE_CODE (scalar_dest) != SSA_NAME)
4236 return false;
4238 code = gimple_assign_rhs_code (stmt);
4239 if (gimple_assign_single_p (stmt)
4240 || code == PAREN_EXPR
4241 || CONVERT_EXPR_CODE_P (code))
4242 op = gimple_assign_rhs1 (stmt);
4243 else
4244 return false;
4246 if (code == VIEW_CONVERT_EXPR)
4247 op = TREE_OPERAND (op, 0);
4249 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4250 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4252 /* Multiple types in SLP are handled by creating the appropriate number of
4253 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4254 case of SLP. */
4255 if (slp_node)
4256 ncopies = 1;
4257 else
4258 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4260 gcc_assert (ncopies >= 1);
4262 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4264 if (dump_enabled_p ())
4265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4266 "use not simple.\n");
4267 return false;
4270 /* We can handle NOP_EXPR conversions that do not change the number
4271 of elements or the vector size. */
4272 if ((CONVERT_EXPR_CODE_P (code)
4273 || code == VIEW_CONVERT_EXPR)
4274 && (!vectype_in
4275 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4276 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4277 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4278 return false;
4280 /* We do not handle bit-precision changes. */
4281 if ((CONVERT_EXPR_CODE_P (code)
4282 || code == VIEW_CONVERT_EXPR)
4283 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4284 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4285 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4286 || ((TYPE_PRECISION (TREE_TYPE (op))
4287 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4288 /* But a conversion that does not change the bit-pattern is ok. */
4289 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4290 > TYPE_PRECISION (TREE_TYPE (op)))
4291 && TYPE_UNSIGNED (TREE_TYPE (op)))
4292 /* Conversion between boolean types of different sizes is
4293 a simple assignment in case their vectypes are same
4294 boolean vectors. */
4295 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4296 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4298 if (dump_enabled_p ())
4299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4300 "type conversion to/from bit-precision "
4301 "unsupported.\n");
4302 return false;
4305 if (!vec_stmt) /* transformation not required. */
4307 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4308 if (dump_enabled_p ())
4309 dump_printf_loc (MSG_NOTE, vect_location,
4310 "=== vectorizable_assignment ===\n");
4311 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4312 return true;
4315 /** Transform. **/
4316 if (dump_enabled_p ())
4317 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4319 /* Handle def. */
4320 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4322 /* Handle use. */
4323 for (j = 0; j < ncopies; j++)
4325 /* Handle uses. */
4326 if (j == 0)
4327 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4328 else
4329 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4331 /* Arguments are ready. create the new vector stmt. */
4332 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4334 if (CONVERT_EXPR_CODE_P (code)
4335 || code == VIEW_CONVERT_EXPR)
4336 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4337 new_stmt = gimple_build_assign (vec_dest, vop);
4338 new_temp = make_ssa_name (vec_dest, new_stmt);
4339 gimple_assign_set_lhs (new_stmt, new_temp);
4340 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4341 if (slp_node)
4342 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4345 if (slp_node)
4346 continue;
4348 if (j == 0)
4349 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4350 else
4351 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4353 prev_stmt_info = vinfo_for_stmt (new_stmt);
4356 vec_oprnds.release ();
4357 return true;
4361 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4362 either as shift by a scalar or by a vector. */
4364 bool
4365 vect_supportable_shift (enum tree_code code, tree scalar_type)
4368 machine_mode vec_mode;
4369 optab optab;
4370 int icode;
4371 tree vectype;
4373 vectype = get_vectype_for_scalar_type (scalar_type);
4374 if (!vectype)
4375 return false;
4377 optab = optab_for_tree_code (code, vectype, optab_scalar);
4378 if (!optab
4379 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4381 optab = optab_for_tree_code (code, vectype, optab_vector);
4382 if (!optab
4383 || (optab_handler (optab, TYPE_MODE (vectype))
4384 == CODE_FOR_nothing))
4385 return false;
4388 vec_mode = TYPE_MODE (vectype);
4389 icode = (int) optab_handler (optab, vec_mode);
4390 if (icode == CODE_FOR_nothing)
4391 return false;
4393 return true;
4397 /* Function vectorizable_shift.
4399 Check if STMT performs a shift operation that can be vectorized.
4400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4404 static bool
4405 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4406 gimple **vec_stmt, slp_tree slp_node)
4408 tree vec_dest;
4409 tree scalar_dest;
4410 tree op0, op1 = NULL;
4411 tree vec_oprnd1 = NULL_TREE;
4412 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4413 tree vectype;
4414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4415 enum tree_code code;
4416 machine_mode vec_mode;
4417 tree new_temp;
4418 optab optab;
4419 int icode;
4420 machine_mode optab_op2_mode;
4421 gimple *def_stmt;
4422 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4423 gimple *new_stmt = NULL;
4424 stmt_vec_info prev_stmt_info;
4425 int nunits_in;
4426 int nunits_out;
4427 tree vectype_out;
4428 tree op1_vectype;
4429 int ncopies;
4430 int j, i;
4431 vec<tree> vec_oprnds0 = vNULL;
4432 vec<tree> vec_oprnds1 = vNULL;
4433 tree vop0, vop1;
4434 unsigned int k;
4435 bool scalar_shift_arg = true;
4436 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4437 vec_info *vinfo = stmt_info->vinfo;
4438 int vf;
4440 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4441 return false;
4443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4444 && ! vec_stmt)
4445 return false;
4447 /* Is STMT a vectorizable binary/unary operation? */
4448 if (!is_gimple_assign (stmt))
4449 return false;
4451 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4452 return false;
4454 code = gimple_assign_rhs_code (stmt);
4456 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4457 || code == RROTATE_EXPR))
4458 return false;
4460 scalar_dest = gimple_assign_lhs (stmt);
4461 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4462 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4463 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4465 if (dump_enabled_p ())
4466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4467 "bit-precision shifts not supported.\n");
4468 return false;
4471 op0 = gimple_assign_rhs1 (stmt);
4472 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4474 if (dump_enabled_p ())
4475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4476 "use not simple.\n");
4477 return false;
4479 /* If op0 is an external or constant def use a vector type with
4480 the same size as the output vector type. */
4481 if (!vectype)
4482 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4483 if (vec_stmt)
4484 gcc_assert (vectype);
4485 if (!vectype)
4487 if (dump_enabled_p ())
4488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4489 "no vectype for scalar type\n");
4490 return false;
4493 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4494 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4495 if (nunits_out != nunits_in)
4496 return false;
4498 op1 = gimple_assign_rhs2 (stmt);
4499 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4501 if (dump_enabled_p ())
4502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4503 "use not simple.\n");
4504 return false;
4507 if (loop_vinfo)
4508 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4509 else
4510 vf = 1;
4512 /* Multiple types in SLP are handled by creating the appropriate number of
4513 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4514 case of SLP. */
4515 if (slp_node)
4516 ncopies = 1;
4517 else
4518 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4520 gcc_assert (ncopies >= 1);
4522 /* Determine whether the shift amount is a vector, or scalar. If the
4523 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4525 if ((dt[1] == vect_internal_def
4526 || dt[1] == vect_induction_def)
4527 && !slp_node)
4528 scalar_shift_arg = false;
4529 else if (dt[1] == vect_constant_def
4530 || dt[1] == vect_external_def
4531 || dt[1] == vect_internal_def)
4533 /* In SLP, need to check whether the shift count is the same,
4534 in loops if it is a constant or invariant, it is always
4535 a scalar shift. */
4536 if (slp_node)
4538 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4539 gimple *slpstmt;
4541 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4542 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4543 scalar_shift_arg = false;
4546 /* If the shift amount is computed by a pattern stmt we cannot
4547 use the scalar amount directly thus give up and use a vector
4548 shift. */
4549 if (dt[1] == vect_internal_def)
4551 gimple *def = SSA_NAME_DEF_STMT (op1);
4552 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4553 scalar_shift_arg = false;
4556 else
4558 if (dump_enabled_p ())
4559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4560 "operand mode requires invariant argument.\n");
4561 return false;
4564 /* Vector shifted by vector. */
4565 if (!scalar_shift_arg)
4567 optab = optab_for_tree_code (code, vectype, optab_vector);
4568 if (dump_enabled_p ())
4569 dump_printf_loc (MSG_NOTE, vect_location,
4570 "vector/vector shift/rotate found.\n");
4572 if (!op1_vectype)
4573 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4574 if (op1_vectype == NULL_TREE
4575 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4577 if (dump_enabled_p ())
4578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4579 "unusable type for last operand in"
4580 " vector/vector shift/rotate.\n");
4581 return false;
4584 /* See if the machine has a vector shifted by scalar insn and if not
4585 then see if it has a vector shifted by vector insn. */
4586 else
4588 optab = optab_for_tree_code (code, vectype, optab_scalar);
4589 if (optab
4590 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4592 if (dump_enabled_p ())
4593 dump_printf_loc (MSG_NOTE, vect_location,
4594 "vector/scalar shift/rotate found.\n");
4596 else
4598 optab = optab_for_tree_code (code, vectype, optab_vector);
4599 if (optab
4600 && (optab_handler (optab, TYPE_MODE (vectype))
4601 != CODE_FOR_nothing))
4603 scalar_shift_arg = false;
4605 if (dump_enabled_p ())
4606 dump_printf_loc (MSG_NOTE, vect_location,
4607 "vector/vector shift/rotate found.\n");
4609 /* Unlike the other binary operators, shifts/rotates have
4610 the rhs being int, instead of the same type as the lhs,
4611 so make sure the scalar is the right type if we are
4612 dealing with vectors of long long/long/short/char. */
4613 if (dt[1] == vect_constant_def)
4614 op1 = fold_convert (TREE_TYPE (vectype), op1);
4615 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4616 TREE_TYPE (op1)))
4618 if (slp_node
4619 && TYPE_MODE (TREE_TYPE (vectype))
4620 != TYPE_MODE (TREE_TYPE (op1)))
4622 if (dump_enabled_p ())
4623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4624 "unusable type for last operand in"
4625 " vector/vector shift/rotate.\n");
4626 return false;
4628 if (vec_stmt && !slp_node)
4630 op1 = fold_convert (TREE_TYPE (vectype), op1);
4631 op1 = vect_init_vector (stmt, op1,
4632 TREE_TYPE (vectype), NULL);
4639 /* Supportable by target? */
4640 if (!optab)
4642 if (dump_enabled_p ())
4643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4644 "no optab.\n");
4645 return false;
4647 vec_mode = TYPE_MODE (vectype);
4648 icode = (int) optab_handler (optab, vec_mode);
4649 if (icode == CODE_FOR_nothing)
4651 if (dump_enabled_p ())
4652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4653 "op not supported by target.\n");
4654 /* Check only during analysis. */
4655 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4656 || (vf < vect_min_worthwhile_factor (code)
4657 && !vec_stmt))
4658 return false;
4659 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_NOTE, vect_location,
4661 "proceeding using word mode.\n");
4664 /* Worthwhile without SIMD support? Check only during analysis. */
4665 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4666 && vf < vect_min_worthwhile_factor (code)
4667 && !vec_stmt)
4669 if (dump_enabled_p ())
4670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4671 "not worthwhile without SIMD support.\n");
4672 return false;
4675 if (!vec_stmt) /* transformation not required. */
4677 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_NOTE, vect_location,
4680 "=== vectorizable_shift ===\n");
4681 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4682 return true;
4685 /** Transform. **/
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_NOTE, vect_location,
4689 "transform binary/unary operation.\n");
4691 /* Handle def. */
4692 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4694 prev_stmt_info = NULL;
4695 for (j = 0; j < ncopies; j++)
4697 /* Handle uses. */
4698 if (j == 0)
4700 if (scalar_shift_arg)
4702 /* Vector shl and shr insn patterns can be defined with scalar
4703 operand 2 (shift operand). In this case, use constant or loop
4704 invariant op1 directly, without extending it to vector mode
4705 first. */
4706 optab_op2_mode = insn_data[icode].operand[2].mode;
4707 if (!VECTOR_MODE_P (optab_op2_mode))
4709 if (dump_enabled_p ())
4710 dump_printf_loc (MSG_NOTE, vect_location,
4711 "operand 1 using scalar mode.\n");
4712 vec_oprnd1 = op1;
4713 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4714 vec_oprnds1.quick_push (vec_oprnd1);
4715 if (slp_node)
4717 /* Store vec_oprnd1 for every vector stmt to be created
4718 for SLP_NODE. We check during the analysis that all
4719 the shift arguments are the same.
4720 TODO: Allow different constants for different vector
4721 stmts generated for an SLP instance. */
4722 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4723 vec_oprnds1.quick_push (vec_oprnd1);
4728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4729 (a special case for certain kind of vector shifts); otherwise,
4730 operand 1 should be of a vector type (the usual case). */
4731 if (vec_oprnd1)
4732 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4733 slp_node, -1);
4734 else
4735 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4736 slp_node, -1);
4738 else
4739 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4741 /* Arguments are ready. Create the new vector stmt. */
4742 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4744 vop1 = vec_oprnds1[i];
4745 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4746 new_temp = make_ssa_name (vec_dest, new_stmt);
4747 gimple_assign_set_lhs (new_stmt, new_temp);
4748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4749 if (slp_node)
4750 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4753 if (slp_node)
4754 continue;
4756 if (j == 0)
4757 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4758 else
4759 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4760 prev_stmt_info = vinfo_for_stmt (new_stmt);
4763 vec_oprnds0.release ();
4764 vec_oprnds1.release ();
4766 return true;
4770 /* Function vectorizable_operation.
4772 Check if STMT performs a binary, unary or ternary operation that can
4773 be vectorized.
4774 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4775 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4776 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4778 static bool
4779 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4780 gimple **vec_stmt, slp_tree slp_node)
4782 tree vec_dest;
4783 tree scalar_dest;
4784 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4785 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4786 tree vectype;
4787 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4788 enum tree_code code;
4789 machine_mode vec_mode;
4790 tree new_temp;
4791 int op_type;
4792 optab optab;
4793 bool target_support_p;
4794 gimple *def_stmt;
4795 enum vect_def_type dt[3]
4796 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4797 gimple *new_stmt = NULL;
4798 stmt_vec_info prev_stmt_info;
4799 int nunits_in;
4800 int nunits_out;
4801 tree vectype_out;
4802 int ncopies;
4803 int j, i;
4804 vec<tree> vec_oprnds0 = vNULL;
4805 vec<tree> vec_oprnds1 = vNULL;
4806 vec<tree> vec_oprnds2 = vNULL;
4807 tree vop0, vop1, vop2;
4808 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4809 vec_info *vinfo = stmt_info->vinfo;
4810 int vf;
4812 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4813 return false;
4815 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4816 && ! vec_stmt)
4817 return false;
4819 /* Is STMT a vectorizable binary/unary operation? */
4820 if (!is_gimple_assign (stmt))
4821 return false;
4823 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4824 return false;
4826 code = gimple_assign_rhs_code (stmt);
4828 /* For pointer addition, we should use the normal plus for
4829 the vector addition. */
4830 if (code == POINTER_PLUS_EXPR)
4831 code = PLUS_EXPR;
4833 /* Support only unary or binary operations. */
4834 op_type = TREE_CODE_LENGTH (code);
4835 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4837 if (dump_enabled_p ())
4838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4839 "num. args = %d (not unary/binary/ternary op).\n",
4840 op_type);
4841 return false;
4844 scalar_dest = gimple_assign_lhs (stmt);
4845 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4847 /* Most operations cannot handle bit-precision types without extra
4848 truncations. */
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4851 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4852 /* Exception are bitwise binary operations. */
4853 && code != BIT_IOR_EXPR
4854 && code != BIT_XOR_EXPR
4855 && code != BIT_AND_EXPR)
4857 if (dump_enabled_p ())
4858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4859 "bit-precision arithmetic not supported.\n");
4860 return false;
4863 op0 = gimple_assign_rhs1 (stmt);
4864 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4866 if (dump_enabled_p ())
4867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4868 "use not simple.\n");
4869 return false;
4871 /* If op0 is an external or constant def use a vector type with
4872 the same size as the output vector type. */
4873 if (!vectype)
4875 /* For boolean type we cannot determine vectype by
4876 invariant value (don't know whether it is a vector
4877 of booleans or vector of integers). We use output
4878 vectype because operations on boolean don't change
4879 type. */
4880 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
4882 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
4884 if (dump_enabled_p ())
4885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4886 "not supported operation on bool value.\n");
4887 return false;
4889 vectype = vectype_out;
4891 else
4892 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4894 if (vec_stmt)
4895 gcc_assert (vectype);
4896 if (!vectype)
4898 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901 "no vectype for scalar type ");
4902 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4903 TREE_TYPE (op0));
4904 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4907 return false;
4910 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4912 if (nunits_out != nunits_in)
4913 return false;
4915 if (op_type == binary_op || op_type == ternary_op)
4917 op1 = gimple_assign_rhs2 (stmt);
4918 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
4920 if (dump_enabled_p ())
4921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4922 "use not simple.\n");
4923 return false;
4926 if (op_type == ternary_op)
4928 op2 = gimple_assign_rhs3 (stmt);
4929 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
4931 if (dump_enabled_p ())
4932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4933 "use not simple.\n");
4934 return false;
4938 if (loop_vinfo)
4939 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4940 else
4941 vf = 1;
4943 /* Multiple types in SLP are handled by creating the appropriate number of
4944 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4945 case of SLP. */
4946 if (slp_node)
4947 ncopies = 1;
4948 else
4949 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4951 gcc_assert (ncopies >= 1);
4953 /* Shifts are handled in vectorizable_shift (). */
4954 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4955 || code == RROTATE_EXPR)
4956 return false;
4958 /* Supportable by target? */
4960 vec_mode = TYPE_MODE (vectype);
4961 if (code == MULT_HIGHPART_EXPR)
4962 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4963 else
4965 optab = optab_for_tree_code (code, vectype, optab_default);
4966 if (!optab)
4968 if (dump_enabled_p ())
4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4970 "no optab.\n");
4971 return false;
4973 target_support_p = (optab_handler (optab, vec_mode)
4974 != CODE_FOR_nothing);
4977 if (!target_support_p)
4979 if (dump_enabled_p ())
4980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4981 "op not supported by target.\n");
4982 /* Check only during analysis. */
4983 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4984 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4985 return false;
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_NOTE, vect_location,
4988 "proceeding using word mode.\n");
4991 /* Worthwhile without SIMD support? Check only during analysis. */
4992 if (!VECTOR_MODE_P (vec_mode)
4993 && !vec_stmt
4994 && vf < vect_min_worthwhile_factor (code))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4998 "not worthwhile without SIMD support.\n");
4999 return false;
5002 if (!vec_stmt) /* transformation not required. */
5004 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5005 if (dump_enabled_p ())
5006 dump_printf_loc (MSG_NOTE, vect_location,
5007 "=== vectorizable_operation ===\n");
5008 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5009 return true;
5012 /** Transform. **/
5014 if (dump_enabled_p ())
5015 dump_printf_loc (MSG_NOTE, vect_location,
5016 "transform binary/unary operation.\n");
5018 /* Handle def. */
5019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5021 /* In case the vectorization factor (VF) is bigger than the number
5022 of elements that we can fit in a vectype (nunits), we have to generate
5023 more than one vector stmt - i.e - we need to "unroll" the
5024 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5025 from one copy of the vector stmt to the next, in the field
5026 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5027 stages to find the correct vector defs to be used when vectorizing
5028 stmts that use the defs of the current stmt. The example below
5029 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5030 we need to create 4 vectorized stmts):
5032 before vectorization:
5033 RELATED_STMT VEC_STMT
5034 S1: x = memref - -
5035 S2: z = x + 1 - -
5037 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5038 there):
5039 RELATED_STMT VEC_STMT
5040 VS1_0: vx0 = memref0 VS1_1 -
5041 VS1_1: vx1 = memref1 VS1_2 -
5042 VS1_2: vx2 = memref2 VS1_3 -
5043 VS1_3: vx3 = memref3 - -
5044 S1: x = load - VS1_0
5045 S2: z = x + 1 - -
5047 step2: vectorize stmt S2 (done here):
5048 To vectorize stmt S2 we first need to find the relevant vector
5049 def for the first operand 'x'. This is, as usual, obtained from
5050 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5051 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5052 relevant vector def 'vx0'. Having found 'vx0' we can generate
5053 the vector stmt VS2_0, and as usual, record it in the
5054 STMT_VINFO_VEC_STMT of stmt S2.
5055 When creating the second copy (VS2_1), we obtain the relevant vector
5056 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5057 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5058 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5059 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5060 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5061 chain of stmts and pointers:
5062 RELATED_STMT VEC_STMT
5063 VS1_0: vx0 = memref0 VS1_1 -
5064 VS1_1: vx1 = memref1 VS1_2 -
5065 VS1_2: vx2 = memref2 VS1_3 -
5066 VS1_3: vx3 = memref3 - -
5067 S1: x = load - VS1_0
5068 VS2_0: vz0 = vx0 + v1 VS2_1 -
5069 VS2_1: vz1 = vx1 + v1 VS2_2 -
5070 VS2_2: vz2 = vx2 + v1 VS2_3 -
5071 VS2_3: vz3 = vx3 + v1 - -
5072 S2: z = x + 1 - VS2_0 */
5074 prev_stmt_info = NULL;
5075 for (j = 0; j < ncopies; j++)
5077 /* Handle uses. */
5078 if (j == 0)
5080 if (op_type == binary_op || op_type == ternary_op)
5081 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5082 slp_node, -1);
5083 else
5084 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5085 slp_node, -1);
5086 if (op_type == ternary_op)
5088 vec_oprnds2.create (1);
5089 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
5090 stmt));
5093 else
5095 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5096 if (op_type == ternary_op)
5098 tree vec_oprnd = vec_oprnds2.pop ();
5099 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5100 vec_oprnd));
5104 /* Arguments are ready. Create the new vector stmt. */
5105 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5107 vop1 = ((op_type == binary_op || op_type == ternary_op)
5108 ? vec_oprnds1[i] : NULL_TREE);
5109 vop2 = ((op_type == ternary_op)
5110 ? vec_oprnds2[i] : NULL_TREE);
5111 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5112 new_temp = make_ssa_name (vec_dest, new_stmt);
5113 gimple_assign_set_lhs (new_stmt, new_temp);
5114 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5115 if (slp_node)
5116 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5119 if (slp_node)
5120 continue;
5122 if (j == 0)
5123 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5124 else
5125 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5126 prev_stmt_info = vinfo_for_stmt (new_stmt);
5129 vec_oprnds0.release ();
5130 vec_oprnds1.release ();
5131 vec_oprnds2.release ();
5133 return true;
5136 /* A helper function to ensure data reference DR's base alignment
5137 for STMT_INFO. */
5139 static void
5140 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5142 if (!dr->aux)
5143 return;
5145 if (DR_VECT_AUX (dr)->base_misaligned)
5147 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5148 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5150 if (decl_in_symtab_p (base_decl))
5151 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5152 else
5154 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5155 DECL_USER_ALIGN (base_decl) = 1;
5157 DR_VECT_AUX (dr)->base_misaligned = false;
5162 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5163 reversal of the vector elements. If that is impossible to do,
5164 returns NULL. */
5166 static tree
5167 perm_mask_for_reverse (tree vectype)
5169 int i, nunits;
5170 unsigned char *sel;
5172 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5173 sel = XALLOCAVEC (unsigned char, nunits);
5175 for (i = 0; i < nunits; ++i)
5176 sel[i] = nunits - 1 - i;
5178 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5179 return NULL_TREE;
5180 return vect_gen_perm_mask_checked (vectype, sel);
5183 /* Function vectorizable_store.
5185 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5186 can be vectorized.
5187 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5188 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5189 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5191 static bool
5192 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5193 slp_tree slp_node)
5195 tree scalar_dest;
5196 tree data_ref;
5197 tree op;
5198 tree vec_oprnd = NULL_TREE;
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5201 tree elem_type;
5202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5203 struct loop *loop = NULL;
5204 machine_mode vec_mode;
5205 tree dummy;
5206 enum dr_alignment_support alignment_support_scheme;
5207 gimple *def_stmt;
5208 enum vect_def_type dt;
5209 stmt_vec_info prev_stmt_info = NULL;
5210 tree dataref_ptr = NULL_TREE;
5211 tree dataref_offset = NULL_TREE;
5212 gimple *ptr_incr = NULL;
5213 int ncopies;
5214 int j;
5215 gimple *next_stmt, *first_stmt = NULL;
5216 bool grouped_store = false;
5217 bool store_lanes_p = false;
5218 unsigned int group_size, i;
5219 vec<tree> dr_chain = vNULL;
5220 vec<tree> oprnds = vNULL;
5221 vec<tree> result_chain = vNULL;
5222 bool inv_p;
5223 bool negative = false;
5224 tree offset = NULL_TREE;
5225 vec<tree> vec_oprnds = vNULL;
5226 bool slp = (slp_node != NULL);
5227 unsigned int vec_num;
5228 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5229 vec_info *vinfo = stmt_info->vinfo;
5230 tree aggr_type;
5231 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5232 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5233 int scatter_scale = 1;
5234 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5235 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5236 gimple *new_stmt;
5237 int vf;
5239 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5240 return false;
5242 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5243 && ! vec_stmt)
5244 return false;
5246 /* Is vectorizable store? */
5248 if (!is_gimple_assign (stmt))
5249 return false;
5251 scalar_dest = gimple_assign_lhs (stmt);
5252 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5253 && is_pattern_stmt_p (stmt_info))
5254 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5255 if (TREE_CODE (scalar_dest) != ARRAY_REF
5256 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5257 && TREE_CODE (scalar_dest) != INDIRECT_REF
5258 && TREE_CODE (scalar_dest) != COMPONENT_REF
5259 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5260 && TREE_CODE (scalar_dest) != REALPART_EXPR
5261 && TREE_CODE (scalar_dest) != MEM_REF)
5262 return false;
5264 /* Cannot have hybrid store SLP -- that would mean storing to the
5265 same location twice. */
5266 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5268 gcc_assert (gimple_assign_single_p (stmt));
5270 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5271 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5273 if (loop_vinfo)
5275 loop = LOOP_VINFO_LOOP (loop_vinfo);
5276 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5278 else
5279 vf = 1;
5281 /* Multiple types in SLP are handled by creating the appropriate number of
5282 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5283 case of SLP. */
5284 if (slp)
5285 ncopies = 1;
5286 else
5287 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5289 gcc_assert (ncopies >= 1);
5291 /* FORNOW. This restriction should be relaxed. */
5292 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5294 if (dump_enabled_p ())
5295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5296 "multiple types in nested loop.\n");
5297 return false;
5300 op = gimple_assign_rhs1 (stmt);
5302 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5306 "use not simple.\n");
5307 return false;
5310 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5311 return false;
5313 elem_type = TREE_TYPE (vectype);
5314 vec_mode = TYPE_MODE (vectype);
5316 /* FORNOW. In some cases can vectorize even if data-type not supported
5317 (e.g. - array initialization with 0). */
5318 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5319 return false;
5321 if (!STMT_VINFO_DATA_REF (stmt_info))
5322 return false;
5324 if (!STMT_VINFO_STRIDED_P (stmt_info))
5326 negative =
5327 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5328 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5329 size_zero_node) < 0;
5330 if (negative && ncopies > 1)
5332 if (dump_enabled_p ())
5333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334 "multiple types with negative step.\n");
5335 return false;
5337 if (negative)
5339 gcc_assert (!grouped_store);
5340 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5341 if (alignment_support_scheme != dr_aligned
5342 && alignment_support_scheme != dr_unaligned_supported)
5344 if (dump_enabled_p ())
5345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5346 "negative step but alignment required.\n");
5347 return false;
5349 if (dt != vect_constant_def
5350 && dt != vect_external_def
5351 && !perm_mask_for_reverse (vectype))
5353 if (dump_enabled_p ())
5354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5355 "negative step and reversing not supported.\n");
5356 return false;
5361 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5363 grouped_store = true;
5364 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5365 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5366 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
5368 if (vect_store_lanes_supported (vectype, group_size))
5369 store_lanes_p = true;
5370 else if (!vect_grouped_store_supported (vectype, group_size))
5371 return false;
5374 if (first_stmt == stmt)
5376 /* STMT is the leader of the group. Check the operands of all the
5377 stmts of the group. */
5378 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5379 while (next_stmt)
5381 gcc_assert (gimple_assign_single_p (next_stmt));
5382 op = gimple_assign_rhs1 (next_stmt);
5383 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5387 "use not simple.\n");
5388 return false;
5390 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5395 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5397 gimple *def_stmt;
5398 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5399 &scatter_off, &scatter_scale);
5400 gcc_assert (scatter_decl);
5401 if (!vect_is_simple_use (scatter_off, vinfo, &def_stmt, &scatter_idx_dt,
5402 &scatter_off_vectype))
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "scatter index use not simple.");
5407 return false;
5411 if (!vec_stmt) /* transformation not required. */
5413 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5414 /* The SLP costs are calculated during SLP analysis. */
5415 if (!PURE_SLP_STMT (stmt_info))
5416 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5417 NULL, NULL, NULL);
5418 return true;
5421 /** Transform. **/
5423 ensure_base_align (stmt_info, dr);
5425 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5427 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5428 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5429 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5430 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5431 edge pe = loop_preheader_edge (loop);
5432 gimple_seq seq;
5433 basic_block new_bb;
5434 enum { NARROW, NONE, WIDEN } modifier;
5435 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5437 if (nunits == (unsigned int) scatter_off_nunits)
5438 modifier = NONE;
5439 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5441 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5442 modifier = WIDEN;
5444 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5445 sel[i] = i | nunits;
5447 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5448 gcc_assert (perm_mask != NULL_TREE);
5450 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5452 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5453 modifier = NARROW;
5455 for (i = 0; i < (unsigned int) nunits; ++i)
5456 sel[i] = i | scatter_off_nunits;
5458 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5459 gcc_assert (perm_mask != NULL_TREE);
5460 ncopies *= 2;
5462 else
5463 gcc_unreachable ();
5465 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5466 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5467 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5468 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5469 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5470 scaletype = TREE_VALUE (arglist);
5472 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5473 && TREE_CODE (rettype) == VOID_TYPE);
5475 ptr = fold_convert (ptrtype, scatter_base);
5476 if (!is_gimple_min_invariant (ptr))
5478 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5479 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5480 gcc_assert (!new_bb);
5483 /* Currently we support only unconditional scatter stores,
5484 so mask should be all ones. */
5485 mask = build_int_cst (masktype, -1);
5486 mask = vect_init_vector (stmt, mask, masktype, NULL);
5488 scale = build_int_cst (scaletype, scatter_scale);
5490 prev_stmt_info = NULL;
5491 for (j = 0; j < ncopies; ++j)
5493 if (j == 0)
5495 src = vec_oprnd1
5496 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5497 op = vec_oprnd0
5498 = vect_get_vec_def_for_operand (scatter_off, stmt);
5500 else if (modifier != NONE && (j & 1))
5502 if (modifier == WIDEN)
5504 src = vec_oprnd1
5505 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5506 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5507 stmt, gsi);
5509 else if (modifier == NARROW)
5511 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5512 stmt, gsi);
5513 op = vec_oprnd0
5514 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5516 else
5517 gcc_unreachable ();
5519 else
5521 src = vec_oprnd1
5522 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5523 op = vec_oprnd0
5524 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5527 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5529 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5530 == TYPE_VECTOR_SUBPARTS (srctype));
5531 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5532 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5533 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5535 src = var;
5538 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5540 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5541 == TYPE_VECTOR_SUBPARTS (idxtype));
5542 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5543 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5544 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5546 op = var;
5549 new_stmt
5550 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5552 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5554 if (prev_stmt_info == NULL)
5555 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5556 else
5557 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5558 prev_stmt_info = vinfo_for_stmt (new_stmt);
5560 return true;
5563 if (grouped_store)
5565 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5566 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5568 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5570 /* FORNOW */
5571 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5573 /* We vectorize all the stmts of the interleaving group when we
5574 reach the last stmt in the group. */
5575 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5576 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5577 && !slp)
5579 *vec_stmt = NULL;
5580 return true;
5583 if (slp)
5585 grouped_store = false;
5586 /* VEC_NUM is the number of vect stmts to be created for this
5587 group. */
5588 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5589 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5590 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5591 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5592 op = gimple_assign_rhs1 (first_stmt);
5594 else
5595 /* VEC_NUM is the number of vect stmts to be created for this
5596 group. */
5597 vec_num = group_size;
5599 else
5601 first_stmt = stmt;
5602 first_dr = dr;
5603 group_size = vec_num = 1;
5606 if (dump_enabled_p ())
5607 dump_printf_loc (MSG_NOTE, vect_location,
5608 "transform store. ncopies = %d\n", ncopies);
5610 if (STMT_VINFO_STRIDED_P (stmt_info))
5612 gimple_stmt_iterator incr_gsi;
5613 bool insert_after;
5614 gimple *incr;
5615 tree offvar;
5616 tree ivstep;
5617 tree running_off;
5618 gimple_seq stmts = NULL;
5619 tree stride_base, stride_step, alias_off;
5620 tree vec_oprnd;
5621 unsigned int g;
5623 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5625 stride_base
5626 = fold_build_pointer_plus
5627 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5628 size_binop (PLUS_EXPR,
5629 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5630 convert_to_ptrofftype (DR_INIT(first_dr))));
5631 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5633 /* For a store with loop-invariant (but other than power-of-2)
5634 stride (i.e. not a grouped access) like so:
5636 for (i = 0; i < n; i += stride)
5637 array[i] = ...;
5639 we generate a new induction variable and new stores from
5640 the components of the (vectorized) rhs:
5642 for (j = 0; ; j += VF*stride)
5643 vectemp = ...;
5644 tmp1 = vectemp[0];
5645 array[j] = tmp1;
5646 tmp2 = vectemp[1];
5647 array[j + stride] = tmp2;
5651 unsigned nstores = nunits;
5652 unsigned lnel = 1;
5653 tree ltype = elem_type;
5654 if (slp)
5656 if (group_size < nunits
5657 && nunits % group_size == 0)
5659 nstores = nunits / group_size;
5660 lnel = group_size;
5661 ltype = build_vector_type (elem_type, group_size);
5663 else if (group_size >= nunits
5664 && group_size % nunits == 0)
5666 nstores = 1;
5667 lnel = nunits;
5668 ltype = vectype;
5670 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5671 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5674 ivstep = stride_step;
5675 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5676 build_int_cst (TREE_TYPE (ivstep), vf));
5678 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5680 create_iv (stride_base, ivstep, NULL,
5681 loop, &incr_gsi, insert_after,
5682 &offvar, NULL);
5683 incr = gsi_stmt (incr_gsi);
5684 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5686 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5687 if (stmts)
5688 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5690 prev_stmt_info = NULL;
5691 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5692 next_stmt = first_stmt;
5693 for (g = 0; g < group_size; g++)
5695 running_off = offvar;
5696 if (g)
5698 tree size = TYPE_SIZE_UNIT (ltype);
5699 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5700 size);
5701 tree newoff = copy_ssa_name (running_off, NULL);
5702 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5703 running_off, pos);
5704 vect_finish_stmt_generation (stmt, incr, gsi);
5705 running_off = newoff;
5707 unsigned int group_el = 0;
5708 unsigned HOST_WIDE_INT
5709 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
5710 for (j = 0; j < ncopies; j++)
5712 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5713 and first_stmt == stmt. */
5714 if (j == 0)
5716 if (slp)
5718 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5719 slp_node, -1);
5720 vec_oprnd = vec_oprnds[0];
5722 else
5724 gcc_assert (gimple_assign_single_p (next_stmt));
5725 op = gimple_assign_rhs1 (next_stmt);
5726 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5729 else
5731 if (slp)
5732 vec_oprnd = vec_oprnds[j];
5733 else
5735 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5736 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5740 for (i = 0; i < nstores; i++)
5742 tree newref, newoff;
5743 gimple *incr, *assign;
5744 tree size = TYPE_SIZE (ltype);
5745 /* Extract the i'th component. */
5746 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5747 bitsize_int (i), size);
5748 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5749 size, pos);
5751 elem = force_gimple_operand_gsi (gsi, elem, true,
5752 NULL_TREE, true,
5753 GSI_SAME_STMT);
5755 tree this_off = build_int_cst (TREE_TYPE (alias_off),
5756 group_el * elsz);
5757 newref = build2 (MEM_REF, ltype,
5758 running_off, this_off);
5760 /* And store it to *running_off. */
5761 assign = gimple_build_assign (newref, elem);
5762 vect_finish_stmt_generation (stmt, assign, gsi);
5764 group_el += lnel;
5765 if (! slp
5766 || group_el == group_size)
5768 newoff = copy_ssa_name (running_off, NULL);
5769 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5770 running_off, stride_step);
5771 vect_finish_stmt_generation (stmt, incr, gsi);
5773 running_off = newoff;
5774 group_el = 0;
5776 if (g == group_size - 1
5777 && !slp)
5779 if (j == 0 && i == 0)
5780 STMT_VINFO_VEC_STMT (stmt_info)
5781 = *vec_stmt = assign;
5782 else
5783 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5784 prev_stmt_info = vinfo_for_stmt (assign);
5788 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5789 if (slp)
5790 break;
5792 return true;
5795 dr_chain.create (group_size);
5796 oprnds.create (group_size);
5798 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5799 gcc_assert (alignment_support_scheme);
5800 /* Targets with store-lane instructions must not require explicit
5801 realignment. */
5802 gcc_assert (!store_lanes_p
5803 || alignment_support_scheme == dr_aligned
5804 || alignment_support_scheme == dr_unaligned_supported);
5806 if (negative)
5807 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5809 if (store_lanes_p)
5810 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5811 else
5812 aggr_type = vectype;
5814 /* In case the vectorization factor (VF) is bigger than the number
5815 of elements that we can fit in a vectype (nunits), we have to generate
5816 more than one vector stmt - i.e - we need to "unroll" the
5817 vector stmt by a factor VF/nunits. For more details see documentation in
5818 vect_get_vec_def_for_copy_stmt. */
5820 /* In case of interleaving (non-unit grouped access):
5822 S1: &base + 2 = x2
5823 S2: &base = x0
5824 S3: &base + 1 = x1
5825 S4: &base + 3 = x3
5827 We create vectorized stores starting from base address (the access of the
5828 first stmt in the chain (S2 in the above example), when the last store stmt
5829 of the chain (S4) is reached:
5831 VS1: &base = vx2
5832 VS2: &base + vec_size*1 = vx0
5833 VS3: &base + vec_size*2 = vx1
5834 VS4: &base + vec_size*3 = vx3
5836 Then permutation statements are generated:
5838 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5839 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5842 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5843 (the order of the data-refs in the output of vect_permute_store_chain
5844 corresponds to the order of scalar stmts in the interleaving chain - see
5845 the documentation of vect_permute_store_chain()).
5847 In case of both multiple types and interleaving, above vector stores and
5848 permutation stmts are created for every copy. The result vector stmts are
5849 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5850 STMT_VINFO_RELATED_STMT for the next copies.
5853 prev_stmt_info = NULL;
5854 for (j = 0; j < ncopies; j++)
5857 if (j == 0)
5859 if (slp)
5861 /* Get vectorized arguments for SLP_NODE. */
5862 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5863 NULL, slp_node, -1);
5865 vec_oprnd = vec_oprnds[0];
5867 else
5869 /* For interleaved stores we collect vectorized defs for all the
5870 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5871 used as an input to vect_permute_store_chain(), and OPRNDS as
5872 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5874 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5875 OPRNDS are of size 1. */
5876 next_stmt = first_stmt;
5877 for (i = 0; i < group_size; i++)
5879 /* Since gaps are not supported for interleaved stores,
5880 GROUP_SIZE is the exact number of stmts in the chain.
5881 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5882 there is no interleaving, GROUP_SIZE is 1, and only one
5883 iteration of the loop will be executed. */
5884 gcc_assert (next_stmt
5885 && gimple_assign_single_p (next_stmt));
5886 op = gimple_assign_rhs1 (next_stmt);
5888 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5889 dr_chain.quick_push (vec_oprnd);
5890 oprnds.quick_push (vec_oprnd);
5891 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5895 /* We should have catched mismatched types earlier. */
5896 gcc_assert (useless_type_conversion_p (vectype,
5897 TREE_TYPE (vec_oprnd)));
5898 bool simd_lane_access_p
5899 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5900 if (simd_lane_access_p
5901 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5902 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5903 && integer_zerop (DR_OFFSET (first_dr))
5904 && integer_zerop (DR_INIT (first_dr))
5905 && alias_sets_conflict_p (get_alias_set (aggr_type),
5906 get_alias_set (DR_REF (first_dr))))
5908 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5909 dataref_offset = build_int_cst (reference_alias_ptr_type
5910 (DR_REF (first_dr)), 0);
5911 inv_p = false;
5913 else
5914 dataref_ptr
5915 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5916 simd_lane_access_p ? loop : NULL,
5917 offset, &dummy, gsi, &ptr_incr,
5918 simd_lane_access_p, &inv_p);
5919 gcc_assert (bb_vinfo || !inv_p);
5921 else
5923 /* For interleaved stores we created vectorized defs for all the
5924 defs stored in OPRNDS in the previous iteration (previous copy).
5925 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5926 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5927 next copy.
5928 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5929 OPRNDS are of size 1. */
5930 for (i = 0; i < group_size; i++)
5932 op = oprnds[i];
5933 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
5934 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5935 dr_chain[i] = vec_oprnd;
5936 oprnds[i] = vec_oprnd;
5938 if (dataref_offset)
5939 dataref_offset
5940 = int_const_binop (PLUS_EXPR, dataref_offset,
5941 TYPE_SIZE_UNIT (aggr_type));
5942 else
5943 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5944 TYPE_SIZE_UNIT (aggr_type));
5947 if (store_lanes_p)
5949 tree vec_array;
5951 /* Combine all the vectors into an array. */
5952 vec_array = create_vector_array (vectype, vec_num);
5953 for (i = 0; i < vec_num; i++)
5955 vec_oprnd = dr_chain[i];
5956 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5959 /* Emit:
5960 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5961 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5962 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5963 gimple_call_set_lhs (new_stmt, data_ref);
5964 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5966 else
5968 new_stmt = NULL;
5969 if (grouped_store)
5971 if (j == 0)
5972 result_chain.create (group_size);
5973 /* Permute. */
5974 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5975 &result_chain);
5978 next_stmt = first_stmt;
5979 for (i = 0; i < vec_num; i++)
5981 unsigned align, misalign;
5983 if (i > 0)
5984 /* Bump the vector pointer. */
5985 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5986 stmt, NULL_TREE);
5988 if (slp)
5989 vec_oprnd = vec_oprnds[i];
5990 else if (grouped_store)
5991 /* For grouped stores vectorized defs are interleaved in
5992 vect_permute_store_chain(). */
5993 vec_oprnd = result_chain[i];
5995 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5996 dataref_ptr,
5997 dataref_offset
5998 ? dataref_offset
5999 : build_int_cst (reference_alias_ptr_type
6000 (DR_REF (first_dr)), 0));
6001 align = TYPE_ALIGN_UNIT (vectype);
6002 if (aligned_access_p (first_dr))
6003 misalign = 0;
6004 else if (DR_MISALIGNMENT (first_dr) == -1)
6006 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6007 align = TYPE_ALIGN_UNIT (elem_type);
6008 else
6009 align = get_object_alignment (DR_REF (first_dr))
6010 / BITS_PER_UNIT;
6011 misalign = 0;
6012 TREE_TYPE (data_ref)
6013 = build_aligned_type (TREE_TYPE (data_ref),
6014 align * BITS_PER_UNIT);
6016 else
6018 TREE_TYPE (data_ref)
6019 = build_aligned_type (TREE_TYPE (data_ref),
6020 TYPE_ALIGN (elem_type));
6021 misalign = DR_MISALIGNMENT (first_dr);
6023 if (dataref_offset == NULL_TREE
6024 && TREE_CODE (dataref_ptr) == SSA_NAME)
6025 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6026 misalign);
6028 if (negative
6029 && dt != vect_constant_def
6030 && dt != vect_external_def)
6032 tree perm_mask = perm_mask_for_reverse (vectype);
6033 tree perm_dest
6034 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6035 vectype);
6036 tree new_temp = make_ssa_name (perm_dest);
6038 /* Generate the permute statement. */
6039 gimple *perm_stmt
6040 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6041 vec_oprnd, perm_mask);
6042 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6044 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6045 vec_oprnd = new_temp;
6048 /* Arguments are ready. Create the new vector stmt. */
6049 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6050 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6052 if (slp)
6053 continue;
6055 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6056 if (!next_stmt)
6057 break;
6060 if (!slp)
6062 if (j == 0)
6063 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6064 else
6065 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6066 prev_stmt_info = vinfo_for_stmt (new_stmt);
6070 dr_chain.release ();
6071 oprnds.release ();
6072 result_chain.release ();
6073 vec_oprnds.release ();
6075 return true;
6078 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6079 VECTOR_CST mask. No checks are made that the target platform supports the
6080 mask, so callers may wish to test can_vec_perm_p separately, or use
6081 vect_gen_perm_mask_checked. */
6083 tree
6084 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6086 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6087 int i, nunits;
6089 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6091 mask_elt_type = lang_hooks.types.type_for_mode
6092 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6093 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6095 mask_elts = XALLOCAVEC (tree, nunits);
6096 for (i = nunits - 1; i >= 0; i--)
6097 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6098 mask_vec = build_vector (mask_type, mask_elts);
6100 return mask_vec;
6103 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6104 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6106 tree
6107 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6109 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6110 return vect_gen_perm_mask_any (vectype, sel);
6113 /* Given a vector variable X and Y, that was generated for the scalar
6114 STMT, generate instructions to permute the vector elements of X and Y
6115 using permutation mask MASK_VEC, insert them at *GSI and return the
6116 permuted vector variable. */
6118 static tree
6119 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6120 gimple_stmt_iterator *gsi)
6122 tree vectype = TREE_TYPE (x);
6123 tree perm_dest, data_ref;
6124 gimple *perm_stmt;
6126 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6127 data_ref = make_ssa_name (perm_dest);
6129 /* Generate the permute statement. */
6130 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6131 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6133 return data_ref;
6136 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6137 inserting them on the loops preheader edge. Returns true if we
6138 were successful in doing so (and thus STMT can be moved then),
6139 otherwise returns false. */
6141 static bool
6142 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6144 ssa_op_iter i;
6145 tree op;
6146 bool any = false;
6148 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6150 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6151 if (!gimple_nop_p (def_stmt)
6152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6154 /* Make sure we don't need to recurse. While we could do
6155 so in simple cases when there are more complex use webs
6156 we don't have an easy way to preserve stmt order to fulfil
6157 dependencies within them. */
6158 tree op2;
6159 ssa_op_iter i2;
6160 if (gimple_code (def_stmt) == GIMPLE_PHI)
6161 return false;
6162 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6164 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6165 if (!gimple_nop_p (def_stmt2)
6166 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6167 return false;
6169 any = true;
6173 if (!any)
6174 return true;
6176 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6178 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6179 if (!gimple_nop_p (def_stmt)
6180 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6182 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6183 gsi_remove (&gsi, false);
6184 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6188 return true;
6191 /* vectorizable_load.
6193 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6194 can be vectorized.
6195 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6196 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6197 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6199 static bool
6200 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6201 slp_tree slp_node, slp_instance slp_node_instance)
6203 tree scalar_dest;
6204 tree vec_dest = NULL;
6205 tree data_ref = NULL;
6206 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6207 stmt_vec_info prev_stmt_info;
6208 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6209 struct loop *loop = NULL;
6210 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6211 bool nested_in_vect_loop = false;
6212 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6213 tree elem_type;
6214 tree new_temp;
6215 machine_mode mode;
6216 gimple *new_stmt = NULL;
6217 tree dummy;
6218 enum dr_alignment_support alignment_support_scheme;
6219 tree dataref_ptr = NULL_TREE;
6220 tree dataref_offset = NULL_TREE;
6221 gimple *ptr_incr = NULL;
6222 int ncopies;
6223 int i, j, group_size = -1, group_gap_adj;
6224 tree msq = NULL_TREE, lsq;
6225 tree offset = NULL_TREE;
6226 tree byte_offset = NULL_TREE;
6227 tree realignment_token = NULL_TREE;
6228 gphi *phi = NULL;
6229 vec<tree> dr_chain = vNULL;
6230 bool grouped_load = false;
6231 bool load_lanes_p = false;
6232 gimple *first_stmt;
6233 gimple *first_stmt_for_drptr = NULL;
6234 bool inv_p;
6235 bool negative = false;
6236 bool compute_in_loop = false;
6237 struct loop *at_loop;
6238 int vec_num;
6239 bool slp = (slp_node != NULL);
6240 bool slp_perm = false;
6241 enum tree_code code;
6242 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6243 int vf;
6244 tree aggr_type;
6245 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6246 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6247 int gather_scale = 1;
6248 enum vect_def_type gather_dt = vect_unknown_def_type;
6249 vec_info *vinfo = stmt_info->vinfo;
6251 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6252 return false;
6254 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6255 && ! vec_stmt)
6256 return false;
6258 /* Is vectorizable load? */
6259 if (!is_gimple_assign (stmt))
6260 return false;
6262 scalar_dest = gimple_assign_lhs (stmt);
6263 if (TREE_CODE (scalar_dest) != SSA_NAME)
6264 return false;
6266 code = gimple_assign_rhs_code (stmt);
6267 if (code != ARRAY_REF
6268 && code != BIT_FIELD_REF
6269 && code != INDIRECT_REF
6270 && code != COMPONENT_REF
6271 && code != IMAGPART_EXPR
6272 && code != REALPART_EXPR
6273 && code != MEM_REF
6274 && TREE_CODE_CLASS (code) != tcc_declaration)
6275 return false;
6277 if (!STMT_VINFO_DATA_REF (stmt_info))
6278 return false;
6280 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6281 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6283 if (loop_vinfo)
6285 loop = LOOP_VINFO_LOOP (loop_vinfo);
6286 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6287 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6289 else
6290 vf = 1;
6292 /* Multiple types in SLP are handled by creating the appropriate number of
6293 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6294 case of SLP. */
6295 if (slp)
6296 ncopies = 1;
6297 else
6298 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6300 gcc_assert (ncopies >= 1);
6302 /* FORNOW. This restriction should be relaxed. */
6303 if (nested_in_vect_loop && ncopies > 1)
6305 if (dump_enabled_p ())
6306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6307 "multiple types in nested loop.\n");
6308 return false;
6311 /* Invalidate assumptions made by dependence analysis when vectorization
6312 on the unrolled body effectively re-orders stmts. */
6313 if (ncopies > 1
6314 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6315 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6316 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6318 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6320 "cannot perform implicit CSE when unrolling "
6321 "with negative dependence distance\n");
6322 return false;
6325 elem_type = TREE_TYPE (vectype);
6326 mode = TYPE_MODE (vectype);
6328 /* FORNOW. In some cases can vectorize even if data-type not supported
6329 (e.g. - data copies). */
6330 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6334 "Aligned load, but unsupported type.\n");
6335 return false;
6338 /* Check if the load is a part of an interleaving chain. */
6339 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6341 grouped_load = true;
6342 /* FORNOW */
6343 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6345 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6346 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6348 if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
6350 if (vect_load_lanes_supported (vectype, group_size))
6351 load_lanes_p = true;
6352 else if (!vect_grouped_load_supported (vectype, group_size))
6353 return false;
6356 /* If this is single-element interleaving with an element distance
6357 that leaves unused vector loads around punt - we at least create
6358 very sub-optimal code in that case (and blow up memory,
6359 see PR65518). */
6360 if (first_stmt == stmt
6361 && !GROUP_NEXT_ELEMENT (stmt_info))
6363 if (GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6365 if (dump_enabled_p ())
6366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6367 "single-element interleaving not supported "
6368 "for not adjacent vector loads\n");
6369 return false;
6372 /* Single-element interleaving requires peeling for gaps. */
6373 gcc_assert (GROUP_GAP (stmt_info));
6376 /* If there is a gap in the end of the group or the group size cannot
6377 be made a multiple of the vector element count then we access excess
6378 elements in the last iteration and thus need to peel that off. */
6379 if (loop_vinfo
6380 && ! STMT_VINFO_STRIDED_P (stmt_info)
6381 && (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
6382 || (!slp && !load_lanes_p && vf % group_size != 0)))
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6386 "Data access with gaps requires scalar "
6387 "epilogue loop\n");
6388 if (loop->inner)
6390 if (dump_enabled_p ())
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6392 "Peeling for outer loop is not supported\n");
6393 return false;
6396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
6399 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6400 slp_perm = true;
6402 /* ??? The following is overly pessimistic (as well as the loop
6403 case above) in the case we can statically determine the excess
6404 elements loaded are within the bounds of a decl that is accessed.
6405 Likewise for BB vectorizations using masked loads is a possibility. */
6406 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6409 "BB vectorization with gaps at the end of a load "
6410 "is not supported\n");
6411 return false;
6414 /* Invalidate assumptions made by dependence analysis when vectorization
6415 on the unrolled body effectively re-orders stmts. */
6416 if (!PURE_SLP_STMT (stmt_info)
6417 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6418 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6419 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6421 if (dump_enabled_p ())
6422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6423 "cannot perform implicit CSE when performing "
6424 "group loads with negative dependence distance\n");
6425 return false;
6428 /* Similarly when the stmt is a load that is both part of a SLP
6429 instance and a loop vectorized stmt via the same-dr mechanism
6430 we have to give up. */
6431 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6432 && (STMT_SLP_TYPE (stmt_info)
6433 != STMT_SLP_TYPE (vinfo_for_stmt
6434 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6436 if (dump_enabled_p ())
6437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6438 "conflicting SLP types for CSEd load\n");
6439 return false;
6444 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6446 gimple *def_stmt;
6447 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6448 &gather_off, &gather_scale);
6449 gcc_assert (gather_decl);
6450 if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
6451 &gather_off_vectype))
6453 if (dump_enabled_p ())
6454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6455 "gather index use not simple.\n");
6456 return false;
6459 else if (STMT_VINFO_STRIDED_P (stmt_info))
6461 else
6463 negative = tree_int_cst_compare (nested_in_vect_loop
6464 ? STMT_VINFO_DR_STEP (stmt_info)
6465 : DR_STEP (dr),
6466 size_zero_node) < 0;
6467 if (negative && ncopies > 1)
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6471 "multiple types with negative step.\n");
6472 return false;
6475 if (negative)
6477 if (grouped_load)
6479 if (dump_enabled_p ())
6480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6481 "negative step for group load not supported"
6482 "\n");
6483 return false;
6485 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6486 if (alignment_support_scheme != dr_aligned
6487 && alignment_support_scheme != dr_unaligned_supported)
6489 if (dump_enabled_p ())
6490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6491 "negative step but alignment required.\n");
6492 return false;
6494 if (!perm_mask_for_reverse (vectype))
6496 if (dump_enabled_p ())
6497 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6498 "negative step and reversing not supported."
6499 "\n");
6500 return false;
6505 if (!vec_stmt) /* transformation not required. */
6507 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6508 /* The SLP costs are calculated during SLP analysis. */
6509 if (!PURE_SLP_STMT (stmt_info))
6510 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6511 NULL, NULL, NULL);
6512 return true;
6515 if (dump_enabled_p ())
6516 dump_printf_loc (MSG_NOTE, vect_location,
6517 "transform load. ncopies = %d\n", ncopies);
6519 /** Transform. **/
6521 ensure_base_align (stmt_info, dr);
6523 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6525 tree vec_oprnd0 = NULL_TREE, op;
6526 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6527 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6528 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6529 edge pe = loop_preheader_edge (loop);
6530 gimple_seq seq;
6531 basic_block new_bb;
6532 enum { NARROW, NONE, WIDEN } modifier;
6533 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6535 if (nunits == gather_off_nunits)
6536 modifier = NONE;
6537 else if (nunits == gather_off_nunits / 2)
6539 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6540 modifier = WIDEN;
6542 for (i = 0; i < gather_off_nunits; ++i)
6543 sel[i] = i | nunits;
6545 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6547 else if (nunits == gather_off_nunits * 2)
6549 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6550 modifier = NARROW;
6552 for (i = 0; i < nunits; ++i)
6553 sel[i] = i < gather_off_nunits
6554 ? i : i + nunits - gather_off_nunits;
6556 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6557 ncopies *= 2;
6559 else
6560 gcc_unreachable ();
6562 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6563 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6564 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6565 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6566 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6567 scaletype = TREE_VALUE (arglist);
6568 gcc_checking_assert (types_compatible_p (srctype, rettype));
6570 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6572 ptr = fold_convert (ptrtype, gather_base);
6573 if (!is_gimple_min_invariant (ptr))
6575 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6576 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6577 gcc_assert (!new_bb);
6580 /* Currently we support only unconditional gather loads,
6581 so mask should be all ones. */
6582 if (TREE_CODE (masktype) == INTEGER_TYPE)
6583 mask = build_int_cst (masktype, -1);
6584 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6586 mask = build_int_cst (TREE_TYPE (masktype), -1);
6587 mask = build_vector_from_val (masktype, mask);
6588 mask = vect_init_vector (stmt, mask, masktype, NULL);
6590 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6592 REAL_VALUE_TYPE r;
6593 long tmp[6];
6594 for (j = 0; j < 6; ++j)
6595 tmp[j] = -1;
6596 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6597 mask = build_real (TREE_TYPE (masktype), r);
6598 mask = build_vector_from_val (masktype, mask);
6599 mask = vect_init_vector (stmt, mask, masktype, NULL);
6601 else
6602 gcc_unreachable ();
6604 scale = build_int_cst (scaletype, gather_scale);
6606 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6607 merge = build_int_cst (TREE_TYPE (rettype), 0);
6608 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6610 REAL_VALUE_TYPE r;
6611 long tmp[6];
6612 for (j = 0; j < 6; ++j)
6613 tmp[j] = 0;
6614 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6615 merge = build_real (TREE_TYPE (rettype), r);
6617 else
6618 gcc_unreachable ();
6619 merge = build_vector_from_val (rettype, merge);
6620 merge = vect_init_vector (stmt, merge, rettype, NULL);
6622 prev_stmt_info = NULL;
6623 for (j = 0; j < ncopies; ++j)
6625 if (modifier == WIDEN && (j & 1))
6626 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6627 perm_mask, stmt, gsi);
6628 else if (j == 0)
6629 op = vec_oprnd0
6630 = vect_get_vec_def_for_operand (gather_off, stmt);
6631 else
6632 op = vec_oprnd0
6633 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6635 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6637 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6638 == TYPE_VECTOR_SUBPARTS (idxtype));
6639 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6640 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6641 new_stmt
6642 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6643 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6644 op = var;
6647 new_stmt
6648 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6650 if (!useless_type_conversion_p (vectype, rettype))
6652 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6653 == TYPE_VECTOR_SUBPARTS (rettype));
6654 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6655 gimple_call_set_lhs (new_stmt, op);
6656 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6657 var = make_ssa_name (vec_dest);
6658 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6659 new_stmt
6660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6662 else
6664 var = make_ssa_name (vec_dest, new_stmt);
6665 gimple_call_set_lhs (new_stmt, var);
6668 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6670 if (modifier == NARROW)
6672 if ((j & 1) == 0)
6674 prev_res = var;
6675 continue;
6677 var = permute_vec_elements (prev_res, var,
6678 perm_mask, stmt, gsi);
6679 new_stmt = SSA_NAME_DEF_STMT (var);
6682 if (prev_stmt_info == NULL)
6683 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6684 else
6685 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6686 prev_stmt_info = vinfo_for_stmt (new_stmt);
6688 return true;
6690 else if (STMT_VINFO_STRIDED_P (stmt_info))
6692 gimple_stmt_iterator incr_gsi;
6693 bool insert_after;
6694 gimple *incr;
6695 tree offvar;
6696 tree ivstep;
6697 tree running_off;
6698 vec<constructor_elt, va_gc> *v = NULL;
6699 gimple_seq stmts = NULL;
6700 tree stride_base, stride_step, alias_off;
6702 gcc_assert (!nested_in_vect_loop);
6704 if (slp && grouped_load)
6705 first_dr = STMT_VINFO_DATA_REF
6706 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6707 else
6708 first_dr = dr;
6710 stride_base
6711 = fold_build_pointer_plus
6712 (DR_BASE_ADDRESS (first_dr),
6713 size_binop (PLUS_EXPR,
6714 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6715 convert_to_ptrofftype (DR_INIT (first_dr))));
6716 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6718 /* For a load with loop-invariant (but other than power-of-2)
6719 stride (i.e. not a grouped access) like so:
6721 for (i = 0; i < n; i += stride)
6722 ... = array[i];
6724 we generate a new induction variable and new accesses to
6725 form a new vector (or vectors, depending on ncopies):
6727 for (j = 0; ; j += VF*stride)
6728 tmp1 = array[j];
6729 tmp2 = array[j + stride];
6731 vectemp = {tmp1, tmp2, ...}
6734 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6735 build_int_cst (TREE_TYPE (stride_step), vf));
6737 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6739 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6740 loop, &incr_gsi, insert_after,
6741 &offvar, NULL);
6742 incr = gsi_stmt (incr_gsi);
6743 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6745 stride_step = force_gimple_operand (unshare_expr (stride_step),
6746 &stmts, true, NULL_TREE);
6747 if (stmts)
6748 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6750 prev_stmt_info = NULL;
6751 running_off = offvar;
6752 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6753 int nloads = nunits;
6754 int lnel = 1;
6755 tree ltype = TREE_TYPE (vectype);
6756 auto_vec<tree> dr_chain;
6757 if (slp)
6759 if (group_size < nunits
6760 && nunits % group_size == 0)
6762 nloads = nunits / group_size;
6763 lnel = group_size;
6764 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6765 ltype = build_aligned_type (ltype,
6766 TYPE_ALIGN (TREE_TYPE (vectype)));
6768 else if (group_size >= nunits
6769 && group_size % nunits == 0)
6771 nloads = 1;
6772 lnel = nunits;
6773 ltype = vectype;
6774 ltype = build_aligned_type (ltype,
6775 TYPE_ALIGN (TREE_TYPE (vectype)));
6777 /* For SLP permutation support we need to load the whole group,
6778 not only the number of vector stmts the permutation result
6779 fits in. */
6780 if (slp_perm)
6782 ncopies = (group_size * vf + nunits - 1) / nunits;
6783 dr_chain.create (ncopies);
6785 else
6786 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6788 int group_el = 0;
6789 unsigned HOST_WIDE_INT
6790 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6791 for (j = 0; j < ncopies; j++)
6793 if (nloads > 1)
6794 vec_alloc (v, nloads);
6795 for (i = 0; i < nloads; i++)
6797 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6798 group_el * elsz);
6799 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6800 build2 (MEM_REF, ltype,
6801 running_off, this_off));
6802 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6803 if (nloads > 1)
6804 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
6805 gimple_assign_lhs (new_stmt));
6807 group_el += lnel;
6808 if (! slp
6809 || group_el == group_size)
6811 tree newoff = copy_ssa_name (running_off);
6812 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6813 running_off, stride_step);
6814 vect_finish_stmt_generation (stmt, incr, gsi);
6816 running_off = newoff;
6817 group_el = 0;
6820 if (nloads > 1)
6822 tree vec_inv = build_constructor (vectype, v);
6823 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6824 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6827 if (slp)
6829 if (slp_perm)
6830 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6831 else
6832 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6834 else
6836 if (j == 0)
6837 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6838 else
6839 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6840 prev_stmt_info = vinfo_for_stmt (new_stmt);
6843 if (slp_perm)
6844 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6845 slp_node_instance, false);
6846 return true;
6849 if (grouped_load)
6851 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6852 /* For SLP vectorization we directly vectorize a subchain
6853 without permutation. */
6854 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6855 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6856 /* For BB vectorization always use the first stmt to base
6857 the data ref pointer on. */
6858 if (bb_vinfo)
6859 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6861 /* Check if the chain of loads is already vectorized. */
6862 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6863 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6864 ??? But we can only do so if there is exactly one
6865 as we have no way to get at the rest. Leave the CSE
6866 opportunity alone.
6867 ??? With the group load eventually participating
6868 in multiple different permutations (having multiple
6869 slp nodes which refer to the same group) the CSE
6870 is even wrong code. See PR56270. */
6871 && !slp)
6873 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6874 return true;
6876 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6877 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6878 group_gap_adj = 0;
6880 /* VEC_NUM is the number of vect stmts to be created for this group. */
6881 if (slp)
6883 grouped_load = false;
6884 /* For SLP permutation support we need to load the whole group,
6885 not only the number of vector stmts the permutation result
6886 fits in. */
6887 if (slp_perm)
6888 vec_num = (group_size * vf + nunits - 1) / nunits;
6889 else
6890 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6891 group_gap_adj = vf * group_size - nunits * vec_num;
6893 else
6894 vec_num = group_size;
6896 else
6898 first_stmt = stmt;
6899 first_dr = dr;
6900 group_size = vec_num = 1;
6901 group_gap_adj = 0;
6904 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6905 gcc_assert (alignment_support_scheme);
6906 /* Targets with load-lane instructions must not require explicit
6907 realignment. */
6908 gcc_assert (!load_lanes_p
6909 || alignment_support_scheme == dr_aligned
6910 || alignment_support_scheme == dr_unaligned_supported);
6912 /* In case the vectorization factor (VF) is bigger than the number
6913 of elements that we can fit in a vectype (nunits), we have to generate
6914 more than one vector stmt - i.e - we need to "unroll" the
6915 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6916 from one copy of the vector stmt to the next, in the field
6917 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6918 stages to find the correct vector defs to be used when vectorizing
6919 stmts that use the defs of the current stmt. The example below
6920 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6921 need to create 4 vectorized stmts):
6923 before vectorization:
6924 RELATED_STMT VEC_STMT
6925 S1: x = memref - -
6926 S2: z = x + 1 - -
6928 step 1: vectorize stmt S1:
6929 We first create the vector stmt VS1_0, and, as usual, record a
6930 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6931 Next, we create the vector stmt VS1_1, and record a pointer to
6932 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6933 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6934 stmts and pointers:
6935 RELATED_STMT VEC_STMT
6936 VS1_0: vx0 = memref0 VS1_1 -
6937 VS1_1: vx1 = memref1 VS1_2 -
6938 VS1_2: vx2 = memref2 VS1_3 -
6939 VS1_3: vx3 = memref3 - -
6940 S1: x = load - VS1_0
6941 S2: z = x + 1 - -
6943 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6944 information we recorded in RELATED_STMT field is used to vectorize
6945 stmt S2. */
6947 /* In case of interleaving (non-unit grouped access):
6949 S1: x2 = &base + 2
6950 S2: x0 = &base
6951 S3: x1 = &base + 1
6952 S4: x3 = &base + 3
6954 Vectorized loads are created in the order of memory accesses
6955 starting from the access of the first stmt of the chain:
6957 VS1: vx0 = &base
6958 VS2: vx1 = &base + vec_size*1
6959 VS3: vx3 = &base + vec_size*2
6960 VS4: vx4 = &base + vec_size*3
6962 Then permutation statements are generated:
6964 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6965 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6968 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6969 (the order of the data-refs in the output of vect_permute_load_chain
6970 corresponds to the order of scalar stmts in the interleaving chain - see
6971 the documentation of vect_permute_load_chain()).
6972 The generation of permutation stmts and recording them in
6973 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6975 In case of both multiple types and interleaving, the vector loads and
6976 permutation stmts above are created for every copy. The result vector
6977 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6978 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6980 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6981 on a target that supports unaligned accesses (dr_unaligned_supported)
6982 we generate the following code:
6983 p = initial_addr;
6984 indx = 0;
6985 loop {
6986 p = p + indx * vectype_size;
6987 vec_dest = *(p);
6988 indx = indx + 1;
6991 Otherwise, the data reference is potentially unaligned on a target that
6992 does not support unaligned accesses (dr_explicit_realign_optimized) -
6993 then generate the following code, in which the data in each iteration is
6994 obtained by two vector loads, one from the previous iteration, and one
6995 from the current iteration:
6996 p1 = initial_addr;
6997 msq_init = *(floor(p1))
6998 p2 = initial_addr + VS - 1;
6999 realignment_token = call target_builtin;
7000 indx = 0;
7001 loop {
7002 p2 = p2 + indx * vectype_size
7003 lsq = *(floor(p2))
7004 vec_dest = realign_load (msq, lsq, realignment_token)
7005 indx = indx + 1;
7006 msq = lsq;
7007 } */
7009 /* If the misalignment remains the same throughout the execution of the
7010 loop, we can create the init_addr and permutation mask at the loop
7011 preheader. Otherwise, it needs to be created inside the loop.
7012 This can only occur when vectorizing memory accesses in the inner-loop
7013 nested within an outer-loop that is being vectorized. */
7015 if (nested_in_vect_loop
7016 && (TREE_INT_CST_LOW (DR_STEP (dr))
7017 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7019 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7020 compute_in_loop = true;
7023 if ((alignment_support_scheme == dr_explicit_realign_optimized
7024 || alignment_support_scheme == dr_explicit_realign)
7025 && !compute_in_loop)
7027 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7028 alignment_support_scheme, NULL_TREE,
7029 &at_loop);
7030 if (alignment_support_scheme == dr_explicit_realign_optimized)
7032 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7033 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7034 size_one_node);
7037 else
7038 at_loop = loop;
7040 if (negative)
7041 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7043 if (load_lanes_p)
7044 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7045 else
7046 aggr_type = vectype;
7048 prev_stmt_info = NULL;
7049 for (j = 0; j < ncopies; j++)
7051 /* 1. Create the vector or array pointer update chain. */
7052 if (j == 0)
7054 bool simd_lane_access_p
7055 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7056 if (simd_lane_access_p
7057 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7058 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7059 && integer_zerop (DR_OFFSET (first_dr))
7060 && integer_zerop (DR_INIT (first_dr))
7061 && alias_sets_conflict_p (get_alias_set (aggr_type),
7062 get_alias_set (DR_REF (first_dr)))
7063 && (alignment_support_scheme == dr_aligned
7064 || alignment_support_scheme == dr_unaligned_supported))
7066 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7067 dataref_offset = build_int_cst (reference_alias_ptr_type
7068 (DR_REF (first_dr)), 0);
7069 inv_p = false;
7071 else if (first_stmt_for_drptr
7072 && first_stmt != first_stmt_for_drptr)
7074 dataref_ptr
7075 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7076 at_loop, offset, &dummy, gsi,
7077 &ptr_incr, simd_lane_access_p,
7078 &inv_p, byte_offset);
7079 /* Adjust the pointer by the difference to first_stmt. */
7080 data_reference_p ptrdr
7081 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7082 tree diff = fold_convert (sizetype,
7083 size_binop (MINUS_EXPR,
7084 DR_INIT (first_dr),
7085 DR_INIT (ptrdr)));
7086 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7087 stmt, diff);
7089 else
7090 dataref_ptr
7091 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7092 offset, &dummy, gsi, &ptr_incr,
7093 simd_lane_access_p, &inv_p,
7094 byte_offset);
7096 else if (dataref_offset)
7097 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7098 TYPE_SIZE_UNIT (aggr_type));
7099 else
7100 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7101 TYPE_SIZE_UNIT (aggr_type));
7103 if (grouped_load || slp_perm)
7104 dr_chain.create (vec_num);
7106 if (load_lanes_p)
7108 tree vec_array;
7110 vec_array = create_vector_array (vectype, vec_num);
7112 /* Emit:
7113 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7114 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
7115 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7116 gimple_call_set_lhs (new_stmt, vec_array);
7117 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7119 /* Extract each vector into an SSA_NAME. */
7120 for (i = 0; i < vec_num; i++)
7122 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7123 vec_array, i);
7124 dr_chain.quick_push (new_temp);
7127 /* Record the mapping between SSA_NAMEs and statements. */
7128 vect_record_grouped_load_vectors (stmt, dr_chain);
7130 else
7132 for (i = 0; i < vec_num; i++)
7134 if (i > 0)
7135 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7136 stmt, NULL_TREE);
7138 /* 2. Create the vector-load in the loop. */
7139 switch (alignment_support_scheme)
7141 case dr_aligned:
7142 case dr_unaligned_supported:
7144 unsigned int align, misalign;
7146 data_ref
7147 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7148 dataref_offset
7149 ? dataref_offset
7150 : build_int_cst (reference_alias_ptr_type
7151 (DR_REF (first_dr)), 0));
7152 align = TYPE_ALIGN_UNIT (vectype);
7153 if (alignment_support_scheme == dr_aligned)
7155 gcc_assert (aligned_access_p (first_dr));
7156 misalign = 0;
7158 else if (DR_MISALIGNMENT (first_dr) == -1)
7160 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7161 align = TYPE_ALIGN_UNIT (elem_type);
7162 else
7163 align = (get_object_alignment (DR_REF (first_dr))
7164 / BITS_PER_UNIT);
7165 misalign = 0;
7166 TREE_TYPE (data_ref)
7167 = build_aligned_type (TREE_TYPE (data_ref),
7168 align * BITS_PER_UNIT);
7170 else
7172 TREE_TYPE (data_ref)
7173 = build_aligned_type (TREE_TYPE (data_ref),
7174 TYPE_ALIGN (elem_type));
7175 misalign = DR_MISALIGNMENT (first_dr);
7177 if (dataref_offset == NULL_TREE
7178 && TREE_CODE (dataref_ptr) == SSA_NAME)
7179 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7180 align, misalign);
7181 break;
7183 case dr_explicit_realign:
7185 tree ptr, bump;
7187 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7189 if (compute_in_loop)
7190 msq = vect_setup_realignment (first_stmt, gsi,
7191 &realignment_token,
7192 dr_explicit_realign,
7193 dataref_ptr, NULL);
7195 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7196 ptr = copy_ssa_name (dataref_ptr);
7197 else
7198 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7199 new_stmt = gimple_build_assign
7200 (ptr, BIT_AND_EXPR, dataref_ptr,
7201 build_int_cst
7202 (TREE_TYPE (dataref_ptr),
7203 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7204 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7205 data_ref
7206 = build2 (MEM_REF, vectype, ptr,
7207 build_int_cst (reference_alias_ptr_type
7208 (DR_REF (first_dr)), 0));
7209 vec_dest = vect_create_destination_var (scalar_dest,
7210 vectype);
7211 new_stmt = gimple_build_assign (vec_dest, data_ref);
7212 new_temp = make_ssa_name (vec_dest, new_stmt);
7213 gimple_assign_set_lhs (new_stmt, new_temp);
7214 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7215 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7216 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7217 msq = new_temp;
7219 bump = size_binop (MULT_EXPR, vs,
7220 TYPE_SIZE_UNIT (elem_type));
7221 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7222 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7223 new_stmt = gimple_build_assign
7224 (NULL_TREE, BIT_AND_EXPR, ptr,
7225 build_int_cst
7226 (TREE_TYPE (ptr),
7227 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7228 ptr = copy_ssa_name (ptr, new_stmt);
7229 gimple_assign_set_lhs (new_stmt, ptr);
7230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7231 data_ref
7232 = build2 (MEM_REF, vectype, ptr,
7233 build_int_cst (reference_alias_ptr_type
7234 (DR_REF (first_dr)), 0));
7235 break;
7237 case dr_explicit_realign_optimized:
7238 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7239 new_temp = copy_ssa_name (dataref_ptr);
7240 else
7241 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7242 new_stmt = gimple_build_assign
7243 (new_temp, BIT_AND_EXPR, dataref_ptr,
7244 build_int_cst
7245 (TREE_TYPE (dataref_ptr),
7246 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7247 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7248 data_ref
7249 = build2 (MEM_REF, vectype, new_temp,
7250 build_int_cst (reference_alias_ptr_type
7251 (DR_REF (first_dr)), 0));
7252 break;
7253 default:
7254 gcc_unreachable ();
7256 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7257 new_stmt = gimple_build_assign (vec_dest, data_ref);
7258 new_temp = make_ssa_name (vec_dest, new_stmt);
7259 gimple_assign_set_lhs (new_stmt, new_temp);
7260 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7262 /* 3. Handle explicit realignment if necessary/supported.
7263 Create in loop:
7264 vec_dest = realign_load (msq, lsq, realignment_token) */
7265 if (alignment_support_scheme == dr_explicit_realign_optimized
7266 || alignment_support_scheme == dr_explicit_realign)
7268 lsq = gimple_assign_lhs (new_stmt);
7269 if (!realignment_token)
7270 realignment_token = dataref_ptr;
7271 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7272 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7273 msq, lsq, realignment_token);
7274 new_temp = make_ssa_name (vec_dest, new_stmt);
7275 gimple_assign_set_lhs (new_stmt, new_temp);
7276 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7278 if (alignment_support_scheme == dr_explicit_realign_optimized)
7280 gcc_assert (phi);
7281 if (i == vec_num - 1 && j == ncopies - 1)
7282 add_phi_arg (phi, lsq,
7283 loop_latch_edge (containing_loop),
7284 UNKNOWN_LOCATION);
7285 msq = lsq;
7289 /* 4. Handle invariant-load. */
7290 if (inv_p && !bb_vinfo)
7292 gcc_assert (!grouped_load);
7293 /* If we have versioned for aliasing or the loop doesn't
7294 have any data dependencies that would preclude this,
7295 then we are sure this is a loop invariant load and
7296 thus we can insert it on the preheader edge. */
7297 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7298 && !nested_in_vect_loop
7299 && hoist_defs_of_uses (stmt, loop))
7301 if (dump_enabled_p ())
7303 dump_printf_loc (MSG_NOTE, vect_location,
7304 "hoisting out of the vectorized "
7305 "loop: ");
7306 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7308 tree tem = copy_ssa_name (scalar_dest);
7309 gsi_insert_on_edge_immediate
7310 (loop_preheader_edge (loop),
7311 gimple_build_assign (tem,
7312 unshare_expr
7313 (gimple_assign_rhs1 (stmt))));
7314 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7315 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7316 set_vinfo_for_stmt (new_stmt,
7317 new_stmt_vec_info (new_stmt, vinfo));
7319 else
7321 gimple_stmt_iterator gsi2 = *gsi;
7322 gsi_next (&gsi2);
7323 new_temp = vect_init_vector (stmt, scalar_dest,
7324 vectype, &gsi2);
7325 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7329 if (negative)
7331 tree perm_mask = perm_mask_for_reverse (vectype);
7332 new_temp = permute_vec_elements (new_temp, new_temp,
7333 perm_mask, stmt, gsi);
7334 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7337 /* Collect vector loads and later create their permutation in
7338 vect_transform_grouped_load (). */
7339 if (grouped_load || slp_perm)
7340 dr_chain.quick_push (new_temp);
7342 /* Store vector loads in the corresponding SLP_NODE. */
7343 if (slp && !slp_perm)
7344 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7346 /* Bump the vector pointer to account for a gap or for excess
7347 elements loaded for a permuted SLP load. */
7348 if (group_gap_adj != 0)
7350 bool ovf;
7351 tree bump
7352 = wide_int_to_tree (sizetype,
7353 wi::smul (TYPE_SIZE_UNIT (elem_type),
7354 group_gap_adj, &ovf));
7355 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7356 stmt, bump);
7360 if (slp && !slp_perm)
7361 continue;
7363 if (slp_perm)
7365 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7366 slp_node_instance, false))
7368 dr_chain.release ();
7369 return false;
7372 else
7374 if (grouped_load)
7376 if (!load_lanes_p)
7377 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7378 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7380 else
7382 if (j == 0)
7383 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7384 else
7385 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7386 prev_stmt_info = vinfo_for_stmt (new_stmt);
7389 dr_chain.release ();
7392 return true;
7395 /* Function vect_is_simple_cond.
7397 Input:
7398 LOOP - the loop that is being vectorized.
7399 COND - Condition that is checked for simple use.
7401 Output:
7402 *COMP_VECTYPE - the vector type for the comparison.
7404 Returns whether a COND can be vectorized. Checks whether
7405 condition operands are supportable using vec_is_simple_use. */
7407 static bool
7408 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7410 tree lhs, rhs;
7411 enum vect_def_type dt;
7412 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7414 /* Mask case. */
7415 if (TREE_CODE (cond) == SSA_NAME
7416 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7418 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7419 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7420 &dt, comp_vectype)
7421 || !*comp_vectype
7422 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7423 return false;
7424 return true;
7427 if (!COMPARISON_CLASS_P (cond))
7428 return false;
7430 lhs = TREE_OPERAND (cond, 0);
7431 rhs = TREE_OPERAND (cond, 1);
7433 if (TREE_CODE (lhs) == SSA_NAME)
7435 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7436 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7437 return false;
7439 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7440 && TREE_CODE (lhs) != FIXED_CST)
7441 return false;
7443 if (TREE_CODE (rhs) == SSA_NAME)
7445 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7446 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7447 return false;
7449 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7450 && TREE_CODE (rhs) != FIXED_CST)
7451 return false;
7453 if (vectype1 && vectype2
7454 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7455 return false;
7457 *comp_vectype = vectype1 ? vectype1 : vectype2;
7458 return true;
7461 /* vectorizable_condition.
7463 Check if STMT is conditional modify expression that can be vectorized.
7464 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7465 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7466 at GSI.
7468 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7469 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7470 else clause if it is 2).
7472 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7474 bool
7475 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7476 gimple **vec_stmt, tree reduc_def, int reduc_index,
7477 slp_tree slp_node)
7479 tree scalar_dest = NULL_TREE;
7480 tree vec_dest = NULL_TREE;
7481 tree cond_expr, then_clause, else_clause;
7482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7483 tree comp_vectype = NULL_TREE;
7484 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7485 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7486 tree vec_compare;
7487 tree new_temp;
7488 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7489 enum vect_def_type dt, dts[4];
7490 int ncopies;
7491 enum tree_code code;
7492 stmt_vec_info prev_stmt_info = NULL;
7493 int i, j;
7494 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7495 vec<tree> vec_oprnds0 = vNULL;
7496 vec<tree> vec_oprnds1 = vNULL;
7497 vec<tree> vec_oprnds2 = vNULL;
7498 vec<tree> vec_oprnds3 = vNULL;
7499 tree vec_cmp_type;
7500 bool masked = false;
7502 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7503 return false;
7505 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7507 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7508 return false;
7510 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7511 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7512 && reduc_def))
7513 return false;
7515 /* FORNOW: not yet supported. */
7516 if (STMT_VINFO_LIVE_P (stmt_info))
7518 if (dump_enabled_p ())
7519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7520 "value used after loop.\n");
7521 return false;
7525 /* Is vectorizable conditional operation? */
7526 if (!is_gimple_assign (stmt))
7527 return false;
7529 code = gimple_assign_rhs_code (stmt);
7531 if (code != COND_EXPR)
7532 return false;
7534 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7535 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7536 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7538 if (slp_node)
7539 ncopies = 1;
7540 else
7541 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7543 gcc_assert (ncopies >= 1);
7544 if (reduc_index && ncopies > 1)
7545 return false; /* FORNOW */
7547 cond_expr = gimple_assign_rhs1 (stmt);
7548 then_clause = gimple_assign_rhs2 (stmt);
7549 else_clause = gimple_assign_rhs3 (stmt);
7551 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7552 || !comp_vectype)
7553 return false;
7555 gimple *def_stmt;
7556 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7557 &vectype1))
7558 return false;
7559 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7560 &vectype2))
7561 return false;
7563 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7564 return false;
7566 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7567 return false;
7569 masked = !COMPARISON_CLASS_P (cond_expr);
7570 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7572 if (vec_cmp_type == NULL_TREE)
7573 return false;
7575 if (!vec_stmt)
7577 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7578 return expand_vec_cond_expr_p (vectype, comp_vectype);
7581 /* Transform. */
7583 if (!slp_node)
7585 vec_oprnds0.create (1);
7586 vec_oprnds1.create (1);
7587 vec_oprnds2.create (1);
7588 vec_oprnds3.create (1);
7591 /* Handle def. */
7592 scalar_dest = gimple_assign_lhs (stmt);
7593 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7595 /* Handle cond expr. */
7596 for (j = 0; j < ncopies; j++)
7598 gassign *new_stmt = NULL;
7599 if (j == 0)
7601 if (slp_node)
7603 auto_vec<tree, 4> ops;
7604 auto_vec<vec<tree>, 4> vec_defs;
7606 if (masked)
7607 ops.safe_push (cond_expr);
7608 else
7610 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7611 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7613 ops.safe_push (then_clause);
7614 ops.safe_push (else_clause);
7615 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7616 vec_oprnds3 = vec_defs.pop ();
7617 vec_oprnds2 = vec_defs.pop ();
7618 if (!masked)
7619 vec_oprnds1 = vec_defs.pop ();
7620 vec_oprnds0 = vec_defs.pop ();
7622 ops.release ();
7623 vec_defs.release ();
7625 else
7627 gimple *gtemp;
7628 if (masked)
7630 vec_cond_lhs
7631 = vect_get_vec_def_for_operand (cond_expr, stmt,
7632 comp_vectype);
7633 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7634 &gtemp, &dts[0]);
7636 else
7638 vec_cond_lhs =
7639 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7640 stmt, comp_vectype);
7641 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7642 loop_vinfo, &gtemp, &dts[0]);
7644 vec_cond_rhs =
7645 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7646 stmt, comp_vectype);
7647 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7648 loop_vinfo, &gtemp, &dts[1]);
7650 if (reduc_index == 1)
7651 vec_then_clause = reduc_def;
7652 else
7654 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7655 stmt);
7656 vect_is_simple_use (then_clause, loop_vinfo,
7657 &gtemp, &dts[2]);
7659 if (reduc_index == 2)
7660 vec_else_clause = reduc_def;
7661 else
7663 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7664 stmt);
7665 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7669 else
7671 vec_cond_lhs
7672 = vect_get_vec_def_for_stmt_copy (dts[0],
7673 vec_oprnds0.pop ());
7674 if (!masked)
7675 vec_cond_rhs
7676 = vect_get_vec_def_for_stmt_copy (dts[1],
7677 vec_oprnds1.pop ());
7679 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7680 vec_oprnds2.pop ());
7681 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7682 vec_oprnds3.pop ());
7685 if (!slp_node)
7687 vec_oprnds0.quick_push (vec_cond_lhs);
7688 if (!masked)
7689 vec_oprnds1.quick_push (vec_cond_rhs);
7690 vec_oprnds2.quick_push (vec_then_clause);
7691 vec_oprnds3.quick_push (vec_else_clause);
7694 /* Arguments are ready. Create the new vector stmt. */
7695 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7697 vec_then_clause = vec_oprnds2[i];
7698 vec_else_clause = vec_oprnds3[i];
7700 if (masked)
7701 vec_compare = vec_cond_lhs;
7702 else
7704 vec_cond_rhs = vec_oprnds1[i];
7705 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7706 vec_cond_lhs, vec_cond_rhs);
7708 new_temp = make_ssa_name (vec_dest);
7709 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7710 vec_compare, vec_then_clause,
7711 vec_else_clause);
7712 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7713 if (slp_node)
7714 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7717 if (slp_node)
7718 continue;
7720 if (j == 0)
7721 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7722 else
7723 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7725 prev_stmt_info = vinfo_for_stmt (new_stmt);
7728 vec_oprnds0.release ();
7729 vec_oprnds1.release ();
7730 vec_oprnds2.release ();
7731 vec_oprnds3.release ();
7733 return true;
7736 /* vectorizable_comparison.
7738 Check if STMT is comparison expression that can be vectorized.
7739 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7740 comparison, put it in VEC_STMT, and insert it at GSI.
7742 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7744 static bool
7745 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7746 gimple **vec_stmt, tree reduc_def,
7747 slp_tree slp_node)
7749 tree lhs, rhs1, rhs2;
7750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7751 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7752 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7753 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7754 tree new_temp;
7755 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7756 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7757 unsigned nunits;
7758 int ncopies;
7759 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7760 stmt_vec_info prev_stmt_info = NULL;
7761 int i, j;
7762 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7763 vec<tree> vec_oprnds0 = vNULL;
7764 vec<tree> vec_oprnds1 = vNULL;
7765 gimple *def_stmt;
7766 tree mask_type;
7767 tree mask;
7769 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7770 return false;
7772 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7773 return false;
7775 mask_type = vectype;
7776 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7778 if (slp_node)
7779 ncopies = 1;
7780 else
7781 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7783 gcc_assert (ncopies >= 1);
7784 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7785 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7786 && reduc_def))
7787 return false;
7789 if (STMT_VINFO_LIVE_P (stmt_info))
7791 if (dump_enabled_p ())
7792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7793 "value used after loop.\n");
7794 return false;
7797 if (!is_gimple_assign (stmt))
7798 return false;
7800 code = gimple_assign_rhs_code (stmt);
7802 if (TREE_CODE_CLASS (code) != tcc_comparison)
7803 return false;
7805 rhs1 = gimple_assign_rhs1 (stmt);
7806 rhs2 = gimple_assign_rhs2 (stmt);
7808 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7809 &dts[0], &vectype1))
7810 return false;
7812 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7813 &dts[1], &vectype2))
7814 return false;
7816 if (vectype1 && vectype2
7817 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7818 return false;
7820 vectype = vectype1 ? vectype1 : vectype2;
7822 /* Invariant comparison. */
7823 if (!vectype)
7825 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7826 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7827 return false;
7829 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7830 return false;
7832 /* Can't compare mask and non-mask types. */
7833 if (vectype1 && vectype2
7834 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
7835 return false;
7837 /* Boolean values may have another representation in vectors
7838 and therefore we prefer bit operations over comparison for
7839 them (which also works for scalar masks). We store opcodes
7840 to use in bitop1 and bitop2. Statement is vectorized as
7841 BITOP2 (rhs1 BITOP1 rhs2) or
7842 rhs1 BITOP2 (BITOP1 rhs2)
7843 depending on bitop1 and bitop2 arity. */
7844 if (VECTOR_BOOLEAN_TYPE_P (vectype))
7846 if (code == GT_EXPR)
7848 bitop1 = BIT_NOT_EXPR;
7849 bitop2 = BIT_AND_EXPR;
7851 else if (code == GE_EXPR)
7853 bitop1 = BIT_NOT_EXPR;
7854 bitop2 = BIT_IOR_EXPR;
7856 else if (code == LT_EXPR)
7858 bitop1 = BIT_NOT_EXPR;
7859 bitop2 = BIT_AND_EXPR;
7860 std::swap (rhs1, rhs2);
7862 else if (code == LE_EXPR)
7864 bitop1 = BIT_NOT_EXPR;
7865 bitop2 = BIT_IOR_EXPR;
7866 std::swap (rhs1, rhs2);
7868 else
7870 bitop1 = BIT_XOR_EXPR;
7871 if (code == EQ_EXPR)
7872 bitop2 = BIT_NOT_EXPR;
7876 if (!vec_stmt)
7878 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7879 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
7880 dts, NULL, NULL);
7881 if (bitop1 == NOP_EXPR)
7882 return expand_vec_cmp_expr_p (vectype, mask_type);
7883 else
7885 machine_mode mode = TYPE_MODE (vectype);
7886 optab optab;
7888 optab = optab_for_tree_code (bitop1, vectype, optab_default);
7889 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7890 return false;
7892 if (bitop2 != NOP_EXPR)
7894 optab = optab_for_tree_code (bitop2, vectype, optab_default);
7895 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7896 return false;
7898 return true;
7902 /* Transform. */
7903 if (!slp_node)
7905 vec_oprnds0.create (1);
7906 vec_oprnds1.create (1);
7909 /* Handle def. */
7910 lhs = gimple_assign_lhs (stmt);
7911 mask = vect_create_destination_var (lhs, mask_type);
7913 /* Handle cmp expr. */
7914 for (j = 0; j < ncopies; j++)
7916 gassign *new_stmt = NULL;
7917 if (j == 0)
7919 if (slp_node)
7921 auto_vec<tree, 2> ops;
7922 auto_vec<vec<tree>, 2> vec_defs;
7924 ops.safe_push (rhs1);
7925 ops.safe_push (rhs2);
7926 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7927 vec_oprnds1 = vec_defs.pop ();
7928 vec_oprnds0 = vec_defs.pop ();
7930 else
7932 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
7933 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
7936 else
7938 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
7939 vec_oprnds0.pop ());
7940 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
7941 vec_oprnds1.pop ());
7944 if (!slp_node)
7946 vec_oprnds0.quick_push (vec_rhs1);
7947 vec_oprnds1.quick_push (vec_rhs2);
7950 /* Arguments are ready. Create the new vector stmt. */
7951 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
7953 vec_rhs2 = vec_oprnds1[i];
7955 new_temp = make_ssa_name (mask);
7956 if (bitop1 == NOP_EXPR)
7958 new_stmt = gimple_build_assign (new_temp, code,
7959 vec_rhs1, vec_rhs2);
7960 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7962 else
7964 if (bitop1 == BIT_NOT_EXPR)
7965 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
7966 else
7967 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
7968 vec_rhs2);
7969 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7970 if (bitop2 != NOP_EXPR)
7972 tree res = make_ssa_name (mask);
7973 if (bitop2 == BIT_NOT_EXPR)
7974 new_stmt = gimple_build_assign (res, bitop2, new_temp);
7975 else
7976 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
7977 new_temp);
7978 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7981 if (slp_node)
7982 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7985 if (slp_node)
7986 continue;
7988 if (j == 0)
7989 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7990 else
7991 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7993 prev_stmt_info = vinfo_for_stmt (new_stmt);
7996 vec_oprnds0.release ();
7997 vec_oprnds1.release ();
7999 return true;
8002 /* Make sure the statement is vectorizable. */
8004 bool
8005 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8008 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8009 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8010 bool ok;
8011 tree scalar_type, vectype;
8012 gimple *pattern_stmt;
8013 gimple_seq pattern_def_seq;
8015 if (dump_enabled_p ())
8017 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8018 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8021 if (gimple_has_volatile_ops (stmt))
8023 if (dump_enabled_p ())
8024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8025 "not vectorized: stmt has volatile operands\n");
8027 return false;
8030 /* Skip stmts that do not need to be vectorized. In loops this is expected
8031 to include:
8032 - the COND_EXPR which is the loop exit condition
8033 - any LABEL_EXPRs in the loop
8034 - computations that are used only for array indexing or loop control.
8035 In basic blocks we only analyze statements that are a part of some SLP
8036 instance, therefore, all the statements are relevant.
8038 Pattern statement needs to be analyzed instead of the original statement
8039 if the original statement is not relevant. Otherwise, we analyze both
8040 statements. In basic blocks we are called from some SLP instance
8041 traversal, don't analyze pattern stmts instead, the pattern stmts
8042 already will be part of SLP instance. */
8044 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8045 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8046 && !STMT_VINFO_LIVE_P (stmt_info))
8048 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8049 && pattern_stmt
8050 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8051 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8053 /* Analyze PATTERN_STMT instead of the original stmt. */
8054 stmt = pattern_stmt;
8055 stmt_info = vinfo_for_stmt (pattern_stmt);
8056 if (dump_enabled_p ())
8058 dump_printf_loc (MSG_NOTE, vect_location,
8059 "==> examining pattern statement: ");
8060 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8063 else
8065 if (dump_enabled_p ())
8066 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8068 return true;
8071 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8072 && node == NULL
8073 && pattern_stmt
8074 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8075 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8077 /* Analyze PATTERN_STMT too. */
8078 if (dump_enabled_p ())
8080 dump_printf_loc (MSG_NOTE, vect_location,
8081 "==> examining pattern statement: ");
8082 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8085 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8086 return false;
8089 if (is_pattern_stmt_p (stmt_info)
8090 && node == NULL
8091 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8093 gimple_stmt_iterator si;
8095 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8097 gimple *pattern_def_stmt = gsi_stmt (si);
8098 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8099 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8101 /* Analyze def stmt of STMT if it's a pattern stmt. */
8102 if (dump_enabled_p ())
8104 dump_printf_loc (MSG_NOTE, vect_location,
8105 "==> examining pattern def statement: ");
8106 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8109 if (!vect_analyze_stmt (pattern_def_stmt,
8110 need_to_vectorize, node))
8111 return false;
8116 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8118 case vect_internal_def:
8119 break;
8121 case vect_reduction_def:
8122 case vect_nested_cycle:
8123 gcc_assert (!bb_vinfo
8124 && (relevance == vect_used_in_outer
8125 || relevance == vect_used_in_outer_by_reduction
8126 || relevance == vect_used_by_reduction
8127 || relevance == vect_unused_in_scope
8128 || relevance == vect_used_only_live));
8129 break;
8131 case vect_induction_def:
8132 case vect_constant_def:
8133 case vect_external_def:
8134 case vect_unknown_def_type:
8135 default:
8136 gcc_unreachable ();
8139 if (bb_vinfo)
8141 gcc_assert (PURE_SLP_STMT (stmt_info));
8143 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8144 if (dump_enabled_p ())
8146 dump_printf_loc (MSG_NOTE, vect_location,
8147 "get vectype for scalar type: ");
8148 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8149 dump_printf (MSG_NOTE, "\n");
8152 vectype = get_vectype_for_scalar_type (scalar_type);
8153 if (!vectype)
8155 if (dump_enabled_p ())
8157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8158 "not SLPed: unsupported data-type ");
8159 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8160 scalar_type);
8161 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8163 return false;
8166 if (dump_enabled_p ())
8168 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8169 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8170 dump_printf (MSG_NOTE, "\n");
8173 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8176 if (STMT_VINFO_RELEVANT_P (stmt_info))
8178 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8179 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8180 || (is_gimple_call (stmt)
8181 && gimple_call_lhs (stmt) == NULL_TREE));
8182 *need_to_vectorize = true;
8185 if (PURE_SLP_STMT (stmt_info) && !node)
8187 dump_printf_loc (MSG_NOTE, vect_location,
8188 "handled only by SLP analysis\n");
8189 return true;
8192 ok = true;
8193 if (!bb_vinfo
8194 && (STMT_VINFO_RELEVANT_P (stmt_info)
8195 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8196 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8197 || vectorizable_conversion (stmt, NULL, NULL, node)
8198 || vectorizable_shift (stmt, NULL, NULL, node)
8199 || vectorizable_operation (stmt, NULL, NULL, node)
8200 || vectorizable_assignment (stmt, NULL, NULL, node)
8201 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8202 || vectorizable_call (stmt, NULL, NULL, node)
8203 || vectorizable_store (stmt, NULL, NULL, node)
8204 || vectorizable_reduction (stmt, NULL, NULL, node)
8205 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8206 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8207 else
8209 if (bb_vinfo)
8210 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8211 || vectorizable_conversion (stmt, NULL, NULL, node)
8212 || vectorizable_shift (stmt, NULL, NULL, node)
8213 || vectorizable_operation (stmt, NULL, NULL, node)
8214 || vectorizable_assignment (stmt, NULL, NULL, node)
8215 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8216 || vectorizable_call (stmt, NULL, NULL, node)
8217 || vectorizable_store (stmt, NULL, NULL, node)
8218 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8219 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8222 if (!ok)
8224 if (dump_enabled_p ())
8226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8227 "not vectorized: relevant stmt not ");
8228 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8229 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8232 return false;
8235 if (bb_vinfo)
8236 return true;
8238 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8239 need extra handling, except for vectorizable reductions. */
8240 if (STMT_VINFO_LIVE_P (stmt_info)
8241 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8242 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8244 if (!ok)
8246 if (dump_enabled_p ())
8248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8249 "not vectorized: live stmt not ");
8250 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8251 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8254 return false;
8257 return true;
8261 /* Function vect_transform_stmt.
8263 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8265 bool
8266 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8267 bool *grouped_store, slp_tree slp_node,
8268 slp_instance slp_node_instance)
8270 bool is_store = false;
8271 gimple *vec_stmt = NULL;
8272 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8273 bool done;
8275 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8276 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8278 switch (STMT_VINFO_TYPE (stmt_info))
8280 case type_demotion_vec_info_type:
8281 case type_promotion_vec_info_type:
8282 case type_conversion_vec_info_type:
8283 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8284 gcc_assert (done);
8285 break;
8287 case induc_vec_info_type:
8288 gcc_assert (!slp_node);
8289 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8290 gcc_assert (done);
8291 break;
8293 case shift_vec_info_type:
8294 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8295 gcc_assert (done);
8296 break;
8298 case op_vec_info_type:
8299 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8300 gcc_assert (done);
8301 break;
8303 case assignment_vec_info_type:
8304 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8305 gcc_assert (done);
8306 break;
8308 case load_vec_info_type:
8309 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8310 slp_node_instance);
8311 gcc_assert (done);
8312 break;
8314 case store_vec_info_type:
8315 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8316 gcc_assert (done);
8317 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8319 /* In case of interleaving, the whole chain is vectorized when the
8320 last store in the chain is reached. Store stmts before the last
8321 one are skipped, and there vec_stmt_info shouldn't be freed
8322 meanwhile. */
8323 *grouped_store = true;
8324 if (STMT_VINFO_VEC_STMT (stmt_info))
8325 is_store = true;
8327 else
8328 is_store = true;
8329 break;
8331 case condition_vec_info_type:
8332 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8333 gcc_assert (done);
8334 break;
8336 case comparison_vec_info_type:
8337 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8338 gcc_assert (done);
8339 break;
8341 case call_vec_info_type:
8342 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8343 stmt = gsi_stmt (*gsi);
8344 if (is_gimple_call (stmt)
8345 && gimple_call_internal_p (stmt)
8346 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8347 is_store = true;
8348 break;
8350 case call_simd_clone_vec_info_type:
8351 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8352 stmt = gsi_stmt (*gsi);
8353 break;
8355 case reduc_vec_info_type:
8356 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8357 gcc_assert (done);
8358 break;
8360 default:
8361 if (!STMT_VINFO_LIVE_P (stmt_info))
8363 if (dump_enabled_p ())
8364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8365 "stmt not supported.\n");
8366 gcc_unreachable ();
8370 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8371 This would break hybrid SLP vectorization. */
8372 if (slp_node)
8373 gcc_assert (!vec_stmt
8374 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8376 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8377 is being vectorized, but outside the immediately enclosing loop. */
8378 if (vec_stmt
8379 && STMT_VINFO_LOOP_VINFO (stmt_info)
8380 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8381 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8382 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8383 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8384 || STMT_VINFO_RELEVANT (stmt_info) ==
8385 vect_used_in_outer_by_reduction))
8387 struct loop *innerloop = LOOP_VINFO_LOOP (
8388 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8389 imm_use_iterator imm_iter;
8390 use_operand_p use_p;
8391 tree scalar_dest;
8392 gimple *exit_phi;
8394 if (dump_enabled_p ())
8395 dump_printf_loc (MSG_NOTE, vect_location,
8396 "Record the vdef for outer-loop vectorization.\n");
8398 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8399 (to be used when vectorizing outer-loop stmts that use the DEF of
8400 STMT). */
8401 if (gimple_code (stmt) == GIMPLE_PHI)
8402 scalar_dest = PHI_RESULT (stmt);
8403 else
8404 scalar_dest = gimple_assign_lhs (stmt);
8406 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8408 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8410 exit_phi = USE_STMT (use_p);
8411 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8416 /* Handle stmts whose DEF is used outside the loop-nest that is
8417 being vectorized. */
8418 if (slp_node)
8420 gimple *slp_stmt;
8421 int i;
8422 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8424 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8425 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8426 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8428 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8429 &vec_stmt);
8430 gcc_assert (done);
8434 else if (STMT_VINFO_LIVE_P (stmt_info)
8435 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8437 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8438 gcc_assert (done);
8441 if (vec_stmt)
8442 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8444 return is_store;
8448 /* Remove a group of stores (for SLP or interleaving), free their
8449 stmt_vec_info. */
8451 void
8452 vect_remove_stores (gimple *first_stmt)
8454 gimple *next = first_stmt;
8455 gimple *tmp;
8456 gimple_stmt_iterator next_si;
8458 while (next)
8460 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8462 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8463 if (is_pattern_stmt_p (stmt_info))
8464 next = STMT_VINFO_RELATED_STMT (stmt_info);
8465 /* Free the attached stmt_vec_info and remove the stmt. */
8466 next_si = gsi_for_stmt (next);
8467 unlink_stmt_vdef (next);
8468 gsi_remove (&next_si, true);
8469 release_defs (next);
8470 free_stmt_vec_info (next);
8471 next = tmp;
8476 /* Function new_stmt_vec_info.
8478 Create and initialize a new stmt_vec_info struct for STMT. */
8480 stmt_vec_info
8481 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8483 stmt_vec_info res;
8484 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8486 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8487 STMT_VINFO_STMT (res) = stmt;
8488 res->vinfo = vinfo;
8489 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8490 STMT_VINFO_LIVE_P (res) = false;
8491 STMT_VINFO_VECTYPE (res) = NULL;
8492 STMT_VINFO_VEC_STMT (res) = NULL;
8493 STMT_VINFO_VECTORIZABLE (res) = true;
8494 STMT_VINFO_IN_PATTERN_P (res) = false;
8495 STMT_VINFO_RELATED_STMT (res) = NULL;
8496 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8497 STMT_VINFO_DATA_REF (res) = NULL;
8498 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8500 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8501 STMT_VINFO_DR_OFFSET (res) = NULL;
8502 STMT_VINFO_DR_INIT (res) = NULL;
8503 STMT_VINFO_DR_STEP (res) = NULL;
8504 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8506 if (gimple_code (stmt) == GIMPLE_PHI
8507 && is_loop_header_bb_p (gimple_bb (stmt)))
8508 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8509 else
8510 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8512 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8513 STMT_SLP_TYPE (res) = loop_vect;
8514 STMT_VINFO_NUM_SLP_USES (res) = 0;
8516 GROUP_FIRST_ELEMENT (res) = NULL;
8517 GROUP_NEXT_ELEMENT (res) = NULL;
8518 GROUP_SIZE (res) = 0;
8519 GROUP_STORE_COUNT (res) = 0;
8520 GROUP_GAP (res) = 0;
8521 GROUP_SAME_DR_STMT (res) = NULL;
8523 return res;
8527 /* Create a hash table for stmt_vec_info. */
8529 void
8530 init_stmt_vec_info_vec (void)
8532 gcc_assert (!stmt_vec_info_vec.exists ());
8533 stmt_vec_info_vec.create (50);
8537 /* Free hash table for stmt_vec_info. */
8539 void
8540 free_stmt_vec_info_vec (void)
8542 unsigned int i;
8543 stmt_vec_info info;
8544 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8545 if (info != NULL)
8546 free_stmt_vec_info (STMT_VINFO_STMT (info));
8547 gcc_assert (stmt_vec_info_vec.exists ());
8548 stmt_vec_info_vec.release ();
8552 /* Free stmt vectorization related info. */
8554 void
8555 free_stmt_vec_info (gimple *stmt)
8557 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8559 if (!stmt_info)
8560 return;
8562 /* Check if this statement has a related "pattern stmt"
8563 (introduced by the vectorizer during the pattern recognition
8564 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8565 too. */
8566 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8568 stmt_vec_info patt_info
8569 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8570 if (patt_info)
8572 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8573 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8574 gimple_set_bb (patt_stmt, NULL);
8575 tree lhs = gimple_get_lhs (patt_stmt);
8576 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8577 release_ssa_name (lhs);
8578 if (seq)
8580 gimple_stmt_iterator si;
8581 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8583 gimple *seq_stmt = gsi_stmt (si);
8584 gimple_set_bb (seq_stmt, NULL);
8585 lhs = gimple_get_lhs (seq_stmt);
8586 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8587 release_ssa_name (lhs);
8588 free_stmt_vec_info (seq_stmt);
8591 free_stmt_vec_info (patt_stmt);
8595 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8596 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8597 set_vinfo_for_stmt (stmt, NULL);
8598 free (stmt_info);
8602 /* Function get_vectype_for_scalar_type_and_size.
8604 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8605 by the target. */
8607 static tree
8608 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8610 machine_mode inner_mode = TYPE_MODE (scalar_type);
8611 machine_mode simd_mode;
8612 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8613 int nunits;
8614 tree vectype;
8616 if (nbytes == 0)
8617 return NULL_TREE;
8619 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8620 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8621 return NULL_TREE;
8623 /* For vector types of elements whose mode precision doesn't
8624 match their types precision we use a element type of mode
8625 precision. The vectorization routines will have to make sure
8626 they support the proper result truncation/extension.
8627 We also make sure to build vector types with INTEGER_TYPE
8628 component type only. */
8629 if (INTEGRAL_TYPE_P (scalar_type)
8630 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8631 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8632 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8633 TYPE_UNSIGNED (scalar_type));
8635 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8636 When the component mode passes the above test simply use a type
8637 corresponding to that mode. The theory is that any use that
8638 would cause problems with this will disable vectorization anyway. */
8639 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8640 && !INTEGRAL_TYPE_P (scalar_type))
8641 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8643 /* We can't build a vector type of elements with alignment bigger than
8644 their size. */
8645 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8646 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8647 TYPE_UNSIGNED (scalar_type));
8649 /* If we felt back to using the mode fail if there was
8650 no scalar type for it. */
8651 if (scalar_type == NULL_TREE)
8652 return NULL_TREE;
8654 /* If no size was supplied use the mode the target prefers. Otherwise
8655 lookup a vector mode of the specified size. */
8656 if (size == 0)
8657 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8658 else
8659 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8660 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8661 if (nunits <= 1)
8662 return NULL_TREE;
8664 vectype = build_vector_type (scalar_type, nunits);
8666 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8667 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8668 return NULL_TREE;
8670 return vectype;
8673 unsigned int current_vector_size;
8675 /* Function get_vectype_for_scalar_type.
8677 Returns the vector type corresponding to SCALAR_TYPE as supported
8678 by the target. */
8680 tree
8681 get_vectype_for_scalar_type (tree scalar_type)
8683 tree vectype;
8684 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8685 current_vector_size);
8686 if (vectype
8687 && current_vector_size == 0)
8688 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8689 return vectype;
8692 /* Function get_mask_type_for_scalar_type.
8694 Returns the mask type corresponding to a result of comparison
8695 of vectors of specified SCALAR_TYPE as supported by target. */
8697 tree
8698 get_mask_type_for_scalar_type (tree scalar_type)
8700 tree vectype = get_vectype_for_scalar_type (scalar_type);
8702 if (!vectype)
8703 return NULL;
8705 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8706 current_vector_size);
8709 /* Function get_same_sized_vectype
8711 Returns a vector type corresponding to SCALAR_TYPE of size
8712 VECTOR_TYPE if supported by the target. */
8714 tree
8715 get_same_sized_vectype (tree scalar_type, tree vector_type)
8717 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8718 return build_same_sized_truth_vector_type (vector_type);
8720 return get_vectype_for_scalar_type_and_size
8721 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8724 /* Function vect_is_simple_use.
8726 Input:
8727 VINFO - the vect info of the loop or basic block that is being vectorized.
8728 OPERAND - operand in the loop or bb.
8729 Output:
8730 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8731 DT - the type of definition
8733 Returns whether a stmt with OPERAND can be vectorized.
8734 For loops, supportable operands are constants, loop invariants, and operands
8735 that are defined by the current iteration of the loop. Unsupportable
8736 operands are those that are defined by a previous iteration of the loop (as
8737 is the case in reduction/induction computations).
8738 For basic blocks, supportable operands are constants and bb invariants.
8739 For now, operands defined outside the basic block are not supported. */
8741 bool
8742 vect_is_simple_use (tree operand, vec_info *vinfo,
8743 gimple **def_stmt, enum vect_def_type *dt)
8745 *def_stmt = NULL;
8746 *dt = vect_unknown_def_type;
8748 if (dump_enabled_p ())
8750 dump_printf_loc (MSG_NOTE, vect_location,
8751 "vect_is_simple_use: operand ");
8752 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8753 dump_printf (MSG_NOTE, "\n");
8756 if (CONSTANT_CLASS_P (operand))
8758 *dt = vect_constant_def;
8759 return true;
8762 if (is_gimple_min_invariant (operand))
8764 *dt = vect_external_def;
8765 return true;
8768 if (TREE_CODE (operand) != SSA_NAME)
8770 if (dump_enabled_p ())
8771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8772 "not ssa-name.\n");
8773 return false;
8776 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8778 *dt = vect_external_def;
8779 return true;
8782 *def_stmt = SSA_NAME_DEF_STMT (operand);
8783 if (dump_enabled_p ())
8785 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8786 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8789 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8790 *dt = vect_external_def;
8791 else
8793 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8794 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8797 if (dump_enabled_p ())
8799 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8800 switch (*dt)
8802 case vect_uninitialized_def:
8803 dump_printf (MSG_NOTE, "uninitialized\n");
8804 break;
8805 case vect_constant_def:
8806 dump_printf (MSG_NOTE, "constant\n");
8807 break;
8808 case vect_external_def:
8809 dump_printf (MSG_NOTE, "external\n");
8810 break;
8811 case vect_internal_def:
8812 dump_printf (MSG_NOTE, "internal\n");
8813 break;
8814 case vect_induction_def:
8815 dump_printf (MSG_NOTE, "induction\n");
8816 break;
8817 case vect_reduction_def:
8818 dump_printf (MSG_NOTE, "reduction\n");
8819 break;
8820 case vect_double_reduction_def:
8821 dump_printf (MSG_NOTE, "double reduction\n");
8822 break;
8823 case vect_nested_cycle:
8824 dump_printf (MSG_NOTE, "nested cycle\n");
8825 break;
8826 case vect_unknown_def_type:
8827 dump_printf (MSG_NOTE, "unknown\n");
8828 break;
8832 if (*dt == vect_unknown_def_type)
8834 if (dump_enabled_p ())
8835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8836 "Unsupported pattern.\n");
8837 return false;
8840 switch (gimple_code (*def_stmt))
8842 case GIMPLE_PHI:
8843 case GIMPLE_ASSIGN:
8844 case GIMPLE_CALL:
8845 break;
8846 default:
8847 if (dump_enabled_p ())
8848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8849 "unsupported defining stmt:\n");
8850 return false;
8853 return true;
8856 /* Function vect_is_simple_use.
8858 Same as vect_is_simple_use but also determines the vector operand
8859 type of OPERAND and stores it to *VECTYPE. If the definition of
8860 OPERAND is vect_uninitialized_def, vect_constant_def or
8861 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8862 is responsible to compute the best suited vector type for the
8863 scalar operand. */
8865 bool
8866 vect_is_simple_use (tree operand, vec_info *vinfo,
8867 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8869 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8870 return false;
8872 /* Now get a vector type if the def is internal, otherwise supply
8873 NULL_TREE and leave it up to the caller to figure out a proper
8874 type for the use stmt. */
8875 if (*dt == vect_internal_def
8876 || *dt == vect_induction_def
8877 || *dt == vect_reduction_def
8878 || *dt == vect_double_reduction_def
8879 || *dt == vect_nested_cycle)
8881 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8883 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8884 && !STMT_VINFO_RELEVANT (stmt_info)
8885 && !STMT_VINFO_LIVE_P (stmt_info))
8886 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8888 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8889 gcc_assert (*vectype != NULL_TREE);
8891 else if (*dt == vect_uninitialized_def
8892 || *dt == vect_constant_def
8893 || *dt == vect_external_def)
8894 *vectype = NULL_TREE;
8895 else
8896 gcc_unreachable ();
8898 return true;
8902 /* Function supportable_widening_operation
8904 Check whether an operation represented by the code CODE is a
8905 widening operation that is supported by the target platform in
8906 vector form (i.e., when operating on arguments of type VECTYPE_IN
8907 producing a result of type VECTYPE_OUT).
8909 Widening operations we currently support are NOP (CONVERT), FLOAT
8910 and WIDEN_MULT. This function checks if these operations are supported
8911 by the target platform either directly (via vector tree-codes), or via
8912 target builtins.
8914 Output:
8915 - CODE1 and CODE2 are codes of vector operations to be used when
8916 vectorizing the operation, if available.
8917 - MULTI_STEP_CVT determines the number of required intermediate steps in
8918 case of multi-step conversion (like char->short->int - in that case
8919 MULTI_STEP_CVT will be 1).
8920 - INTERM_TYPES contains the intermediate type required to perform the
8921 widening operation (short in the above example). */
8923 bool
8924 supportable_widening_operation (enum tree_code code, gimple *stmt,
8925 tree vectype_out, tree vectype_in,
8926 enum tree_code *code1, enum tree_code *code2,
8927 int *multi_step_cvt,
8928 vec<tree> *interm_types)
8930 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8931 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8932 struct loop *vect_loop = NULL;
8933 machine_mode vec_mode;
8934 enum insn_code icode1, icode2;
8935 optab optab1, optab2;
8936 tree vectype = vectype_in;
8937 tree wide_vectype = vectype_out;
8938 enum tree_code c1, c2;
8939 int i;
8940 tree prev_type, intermediate_type;
8941 machine_mode intermediate_mode, prev_mode;
8942 optab optab3, optab4;
8944 *multi_step_cvt = 0;
8945 if (loop_info)
8946 vect_loop = LOOP_VINFO_LOOP (loop_info);
8948 switch (code)
8950 case WIDEN_MULT_EXPR:
8951 /* The result of a vectorized widening operation usually requires
8952 two vectors (because the widened results do not fit into one vector).
8953 The generated vector results would normally be expected to be
8954 generated in the same order as in the original scalar computation,
8955 i.e. if 8 results are generated in each vector iteration, they are
8956 to be organized as follows:
8957 vect1: [res1,res2,res3,res4],
8958 vect2: [res5,res6,res7,res8].
8960 However, in the special case that the result of the widening
8961 operation is used in a reduction computation only, the order doesn't
8962 matter (because when vectorizing a reduction we change the order of
8963 the computation). Some targets can take advantage of this and
8964 generate more efficient code. For example, targets like Altivec,
8965 that support widen_mult using a sequence of {mult_even,mult_odd}
8966 generate the following vectors:
8967 vect1: [res1,res3,res5,res7],
8968 vect2: [res2,res4,res6,res8].
8970 When vectorizing outer-loops, we execute the inner-loop sequentially
8971 (each vectorized inner-loop iteration contributes to VF outer-loop
8972 iterations in parallel). We therefore don't allow to change the
8973 order of the computation in the inner-loop during outer-loop
8974 vectorization. */
8975 /* TODO: Another case in which order doesn't *really* matter is when we
8976 widen and then contract again, e.g. (short)((int)x * y >> 8).
8977 Normally, pack_trunc performs an even/odd permute, whereas the
8978 repack from an even/odd expansion would be an interleave, which
8979 would be significantly simpler for e.g. AVX2. */
8980 /* In any case, in order to avoid duplicating the code below, recurse
8981 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8982 are properly set up for the caller. If we fail, we'll continue with
8983 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8984 if (vect_loop
8985 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8986 && !nested_in_vect_loop_p (vect_loop, stmt)
8987 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8988 stmt, vectype_out, vectype_in,
8989 code1, code2, multi_step_cvt,
8990 interm_types))
8992 /* Elements in a vector with vect_used_by_reduction property cannot
8993 be reordered if the use chain with this property does not have the
8994 same operation. One such an example is s += a * b, where elements
8995 in a and b cannot be reordered. Here we check if the vector defined
8996 by STMT is only directly used in the reduction statement. */
8997 tree lhs = gimple_assign_lhs (stmt);
8998 use_operand_p dummy;
8999 gimple *use_stmt;
9000 stmt_vec_info use_stmt_info = NULL;
9001 if (single_imm_use (lhs, &dummy, &use_stmt)
9002 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9003 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9004 return true;
9006 c1 = VEC_WIDEN_MULT_LO_EXPR;
9007 c2 = VEC_WIDEN_MULT_HI_EXPR;
9008 break;
9010 case DOT_PROD_EXPR:
9011 c1 = DOT_PROD_EXPR;
9012 c2 = DOT_PROD_EXPR;
9013 break;
9015 case SAD_EXPR:
9016 c1 = SAD_EXPR;
9017 c2 = SAD_EXPR;
9018 break;
9020 case VEC_WIDEN_MULT_EVEN_EXPR:
9021 /* Support the recursion induced just above. */
9022 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9023 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9024 break;
9026 case WIDEN_LSHIFT_EXPR:
9027 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9028 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9029 break;
9031 CASE_CONVERT:
9032 c1 = VEC_UNPACK_LO_EXPR;
9033 c2 = VEC_UNPACK_HI_EXPR;
9034 break;
9036 case FLOAT_EXPR:
9037 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9038 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9039 break;
9041 case FIX_TRUNC_EXPR:
9042 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9043 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9044 computing the operation. */
9045 return false;
9047 default:
9048 gcc_unreachable ();
9051 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9052 std::swap (c1, c2);
9054 if (code == FIX_TRUNC_EXPR)
9056 /* The signedness is determined from output operand. */
9057 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9058 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9060 else
9062 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9063 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9066 if (!optab1 || !optab2)
9067 return false;
9069 vec_mode = TYPE_MODE (vectype);
9070 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9071 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9072 return false;
9074 *code1 = c1;
9075 *code2 = c2;
9077 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9078 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9079 /* For scalar masks we may have different boolean
9080 vector types having the same QImode. Thus we
9081 add additional check for elements number. */
9082 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9083 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9084 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9086 /* Check if it's a multi-step conversion that can be done using intermediate
9087 types. */
9089 prev_type = vectype;
9090 prev_mode = vec_mode;
9092 if (!CONVERT_EXPR_CODE_P (code))
9093 return false;
9095 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9096 intermediate steps in promotion sequence. We try
9097 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9098 not. */
9099 interm_types->create (MAX_INTERM_CVT_STEPS);
9100 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9102 intermediate_mode = insn_data[icode1].operand[0].mode;
9103 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9105 intermediate_type
9106 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9107 current_vector_size);
9108 if (intermediate_mode != TYPE_MODE (intermediate_type))
9109 return false;
9111 else
9112 intermediate_type
9113 = lang_hooks.types.type_for_mode (intermediate_mode,
9114 TYPE_UNSIGNED (prev_type));
9116 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9117 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9119 if (!optab3 || !optab4
9120 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9121 || insn_data[icode1].operand[0].mode != intermediate_mode
9122 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9123 || insn_data[icode2].operand[0].mode != intermediate_mode
9124 || ((icode1 = optab_handler (optab3, intermediate_mode))
9125 == CODE_FOR_nothing)
9126 || ((icode2 = optab_handler (optab4, intermediate_mode))
9127 == CODE_FOR_nothing))
9128 break;
9130 interm_types->quick_push (intermediate_type);
9131 (*multi_step_cvt)++;
9133 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9134 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9135 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9136 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9137 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9139 prev_type = intermediate_type;
9140 prev_mode = intermediate_mode;
9143 interm_types->release ();
9144 return false;
9148 /* Function supportable_narrowing_operation
9150 Check whether an operation represented by the code CODE is a
9151 narrowing operation that is supported by the target platform in
9152 vector form (i.e., when operating on arguments of type VECTYPE_IN
9153 and producing a result of type VECTYPE_OUT).
9155 Narrowing operations we currently support are NOP (CONVERT) and
9156 FIX_TRUNC. This function checks if these operations are supported by
9157 the target platform directly via vector tree-codes.
9159 Output:
9160 - CODE1 is the code of a vector operation to be used when
9161 vectorizing the operation, if available.
9162 - MULTI_STEP_CVT determines the number of required intermediate steps in
9163 case of multi-step conversion (like int->short->char - in that case
9164 MULTI_STEP_CVT will be 1).
9165 - INTERM_TYPES contains the intermediate type required to perform the
9166 narrowing operation (short in the above example). */
9168 bool
9169 supportable_narrowing_operation (enum tree_code code,
9170 tree vectype_out, tree vectype_in,
9171 enum tree_code *code1, int *multi_step_cvt,
9172 vec<tree> *interm_types)
9174 machine_mode vec_mode;
9175 enum insn_code icode1;
9176 optab optab1, interm_optab;
9177 tree vectype = vectype_in;
9178 tree narrow_vectype = vectype_out;
9179 enum tree_code c1;
9180 tree intermediate_type, prev_type;
9181 machine_mode intermediate_mode, prev_mode;
9182 int i;
9183 bool uns;
9185 *multi_step_cvt = 0;
9186 switch (code)
9188 CASE_CONVERT:
9189 c1 = VEC_PACK_TRUNC_EXPR;
9190 break;
9192 case FIX_TRUNC_EXPR:
9193 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9194 break;
9196 case FLOAT_EXPR:
9197 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9198 tree code and optabs used for computing the operation. */
9199 return false;
9201 default:
9202 gcc_unreachable ();
9205 if (code == FIX_TRUNC_EXPR)
9206 /* The signedness is determined from output operand. */
9207 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9208 else
9209 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9211 if (!optab1)
9212 return false;
9214 vec_mode = TYPE_MODE (vectype);
9215 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9216 return false;
9218 *code1 = c1;
9220 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9221 /* For scalar masks we may have different boolean
9222 vector types having the same QImode. Thus we
9223 add additional check for elements number. */
9224 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9225 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9226 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9228 /* Check if it's a multi-step conversion that can be done using intermediate
9229 types. */
9230 prev_mode = vec_mode;
9231 prev_type = vectype;
9232 if (code == FIX_TRUNC_EXPR)
9233 uns = TYPE_UNSIGNED (vectype_out);
9234 else
9235 uns = TYPE_UNSIGNED (vectype);
9237 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9238 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9239 costly than signed. */
9240 if (code == FIX_TRUNC_EXPR && uns)
9242 enum insn_code icode2;
9244 intermediate_type
9245 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9246 interm_optab
9247 = optab_for_tree_code (c1, intermediate_type, optab_default);
9248 if (interm_optab != unknown_optab
9249 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9250 && insn_data[icode1].operand[0].mode
9251 == insn_data[icode2].operand[0].mode)
9253 uns = false;
9254 optab1 = interm_optab;
9255 icode1 = icode2;
9259 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9260 intermediate steps in promotion sequence. We try
9261 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9262 interm_types->create (MAX_INTERM_CVT_STEPS);
9263 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9265 intermediate_mode = insn_data[icode1].operand[0].mode;
9266 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9268 intermediate_type
9269 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9270 current_vector_size);
9271 if (intermediate_mode != TYPE_MODE (intermediate_type))
9272 return false;
9274 else
9275 intermediate_type
9276 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9277 interm_optab
9278 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9279 optab_default);
9280 if (!interm_optab
9281 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9282 || insn_data[icode1].operand[0].mode != intermediate_mode
9283 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9284 == CODE_FOR_nothing))
9285 break;
9287 interm_types->quick_push (intermediate_type);
9288 (*multi_step_cvt)++;
9290 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9291 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9292 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9293 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9295 prev_mode = intermediate_mode;
9296 prev_type = intermediate_type;
9297 optab1 = interm_optab;
9300 interm_types->release ();
9301 return false;