compiler: improve escape analysis
[official-gcc.git] / gcc / tree-vect-stmts.c
blob07dc222b0dcedc2c5a33f072470a802d51139d0b
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
125 static tree
126 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
127 tree array, unsigned HOST_WIDE_INT n)
129 tree vect_type, vect, vect_name, array_ref;
130 gimple *new_stmt;
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
144 return vect_name;
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
151 static void
152 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
153 tree array, unsigned HOST_WIDE_INT n)
155 tree array_ref;
156 gimple *new_stmt;
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
170 static tree
171 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 tree mem_ref;
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
178 return mem_ref;
181 /* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
184 static void
185 vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
192 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
194 /* Function vect_mark_relevant.
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
198 static void
199 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
200 enum vect_relevant relevant, bool live_p)
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
205 gimple *pattern_stmt;
207 if (dump_enabled_p ())
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
245 if (dump_enabled_p ())
246 dump_printf_loc (MSG_NOTE, vect_location,
247 "already marked relevant/live.\n");
248 return;
251 worklist->safe_push (stmt);
255 /* Function is_simple_and_all_uses_invariant
257 Return true if STMT is simple and all uses of it are invariant. */
259 bool
260 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
262 tree op;
263 gimple *def_stmt;
264 ssa_op_iter iter;
266 if (!is_gimple_assign (stmt))
267 return false;
269 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
271 enum vect_def_type dt = vect_uninitialized_def;
273 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
275 if (dump_enabled_p ())
276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277 "use not simple.\n");
278 return false;
281 if (dt != vect_external_def && dt != vect_constant_def)
282 return false;
284 return true;
287 /* Function vect_stmt_relevant_p.
289 Return true if STMT in loop that is represented by LOOP_VINFO is
290 "relevant for vectorization".
292 A stmt is considered "relevant for vectorization" if:
293 - it has uses outside the loop.
294 - it has vdefs (it alters memory).
295 - control stmts in the loop (except for the exit condition).
297 CHECKME: what other side effects would the vectorizer allow? */
299 static bool
300 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
301 enum vect_relevant *relevant, bool *live_p)
303 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
304 ssa_op_iter op_iter;
305 imm_use_iterator imm_iter;
306 use_operand_p use_p;
307 def_operand_p def_p;
309 *relevant = vect_unused_in_scope;
310 *live_p = false;
312 /* cond stmt other than loop exit cond. */
313 if (is_ctrl_stmt (stmt)
314 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
315 != loop_exit_ctrl_vec_info_type)
316 *relevant = vect_used_in_scope;
318 /* changing memory. */
319 if (gimple_code (stmt) != GIMPLE_PHI)
320 if (gimple_vdef (stmt)
321 && !gimple_clobber_p (stmt))
323 if (dump_enabled_p ())
324 dump_printf_loc (MSG_NOTE, vect_location,
325 "vec_stmt_relevant_p: stmt has vdefs.\n");
326 *relevant = vect_used_in_scope;
329 /* uses outside the loop. */
330 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
334 basic_block bb = gimple_bb (USE_STMT (use_p));
335 if (!flow_bb_inside_loop_p (loop, bb))
337 if (dump_enabled_p ())
338 dump_printf_loc (MSG_NOTE, vect_location,
339 "vec_stmt_relevant_p: used out of loop.\n");
341 if (is_gimple_debug (USE_STMT (use_p)))
342 continue;
344 /* We expect all such uses to be in the loop exit phis
345 (because of loop closed form) */
346 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
347 gcc_assert (bb == single_exit (loop)->dest);
349 *live_p = true;
354 if (*live_p && *relevant == vect_unused_in_scope
355 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
357 if (dump_enabled_p ())
358 dump_printf_loc (MSG_NOTE, vect_location,
359 "vec_stmt_relevant_p: stmt live but not relevant.\n");
360 *relevant = vect_used_only_live;
363 return (*live_p || *relevant);
367 /* Function exist_non_indexing_operands_for_use_p
369 USE is one of the uses attached to STMT. Check if USE is
370 used in STMT for anything other than indexing an array. */
372 static bool
373 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
375 tree operand;
376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
378 /* USE corresponds to some operand in STMT. If there is no data
379 reference in STMT, then any operand that corresponds to USE
380 is not indexing an array. */
381 if (!STMT_VINFO_DATA_REF (stmt_info))
382 return true;
384 /* STMT has a data_ref. FORNOW this means that its of one of
385 the following forms:
386 -1- ARRAY_REF = var
387 -2- var = ARRAY_REF
388 (This should have been verified in analyze_data_refs).
390 'var' in the second case corresponds to a def, not a use,
391 so USE cannot correspond to any operands that are not used
392 for array indexing.
394 Therefore, all we need to check is if STMT falls into the
395 first case, and whether var corresponds to USE. */
397 if (!gimple_assign_copy_p (stmt))
399 if (is_gimple_call (stmt)
400 && gimple_call_internal_p (stmt))
402 internal_fn ifn = gimple_call_internal_fn (stmt);
403 int mask_index = internal_fn_mask_index (ifn);
404 if (mask_index >= 0
405 && use == gimple_call_arg (stmt, mask_index))
406 return true;
407 int stored_value_index = internal_fn_stored_value_index (ifn);
408 if (stored_value_index >= 0
409 && use == gimple_call_arg (stmt, stored_value_index))
410 return true;
411 if (internal_gather_scatter_fn_p (ifn)
412 && use == gimple_call_arg (stmt, 1))
413 return true;
415 return false;
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
420 operand = gimple_assign_rhs1 (stmt);
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
424 if (operand == use)
425 return true;
427 return false;
432 Function process_use.
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
458 static bool
459 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
460 enum vect_relevant relevant, vec<gimple *> *worklist,
461 bool force)
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
467 gimple *def_stmt;
468 enum vect_def_type dt;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
548 case vect_used_in_scope:
549 break;
551 default:
552 gcc_unreachable ();
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
569 switch (relevant)
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
577 case vect_used_by_reduction:
578 case vect_used_only_live:
579 relevant = vect_used_in_outer_by_reduction;
580 break;
582 case vect_used_in_scope:
583 relevant = vect_used_in_outer;
584 break;
586 default:
587 gcc_unreachable ();
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
607 vect_mark_relevant (worklist, def_stmt, relevant, false);
608 return true;
612 /* Function vect_mark_stmts_to_be_vectorized.
614 Not all stmts in the loop need to be vectorized. For example:
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
621 3. j = j + 1
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
626 This pass detects such stmts. */
628 bool
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
635 gimple *stmt;
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
639 gimple *phi;
640 bool live_p;
641 enum vect_relevant relevant;
643 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
645 auto_vec<gimple *, 64> worklist;
647 /* 1. Init worklist. */
648 for (i = 0; i < nbbs; i++)
650 bb = bbs[i];
651 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
653 phi = gsi_stmt (si);
654 if (dump_enabled_p ())
656 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
660 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
661 vect_mark_relevant (&worklist, phi, relevant, live_p);
663 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
665 stmt = gsi_stmt (si);
666 if (dump_enabled_p ())
668 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
672 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
673 vect_mark_relevant (&worklist, stmt, relevant, live_p);
677 /* 2. Process_worklist */
678 while (worklist.length () > 0)
680 use_operand_p use_p;
681 ssa_op_iter iter;
683 stmt = worklist.pop ();
684 if (dump_enabled_p ())
686 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
687 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
690 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
691 (DEF_STMT) as relevant/irrelevant according to the relevance property
692 of STMT. */
693 stmt_vinfo = vinfo_for_stmt (stmt);
694 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
696 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
697 propagated as is to the DEF_STMTs of its USEs.
699 One exception is when STMT has been identified as defining a reduction
700 variable; in this case we set the relevance to vect_used_by_reduction.
701 This is because we distinguish between two kinds of relevant stmts -
702 those that are used by a reduction computation, and those that are
703 (also) used by a regular computation. This allows us later on to
704 identify stmts that are used solely by a reduction, and therefore the
705 order of the results that they produce does not have to be kept. */
707 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
709 case vect_reduction_def:
710 gcc_assert (relevant != vect_unused_in_scope);
711 if (relevant != vect_unused_in_scope
712 && relevant != vect_used_in_scope
713 && relevant != vect_used_by_reduction
714 && relevant != vect_used_only_live)
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
718 "unsupported use of reduction.\n");
719 return false;
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
728 if (dump_enabled_p ())
729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
730 "unsupported use of nested cycle.\n");
732 return false;
734 break;
736 case vect_double_reduction_def:
737 if (relevant != vect_unused_in_scope
738 && relevant != vect_used_by_reduction
739 && relevant != vect_used_only_live)
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "unsupported use of double reduction.\n");
745 return false;
747 break;
749 default:
750 break;
753 if (is_pattern_stmt_p (stmt_vinfo))
755 /* Pattern statements are not inserted into the code, so
756 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
757 have to scan the RHS or function arguments instead. */
758 if (is_gimple_assign (stmt))
760 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
761 tree op = gimple_assign_rhs1 (stmt);
763 i = 1;
764 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
766 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
767 relevant, &worklist, false)
768 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
769 relevant, &worklist, false))
770 return false;
771 i = 2;
773 for (; i < gimple_num_ops (stmt); i++)
775 op = gimple_op (stmt, i);
776 if (TREE_CODE (op) == SSA_NAME
777 && !process_use (stmt, op, loop_vinfo, relevant,
778 &worklist, false))
779 return false;
782 else if (is_gimple_call (stmt))
784 for (i = 0; i < gimple_call_num_args (stmt); i++)
786 tree arg = gimple_call_arg (stmt, i);
787 if (!process_use (stmt, arg, loop_vinfo, relevant,
788 &worklist, false))
789 return false;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 if (!process_use (stmt, op, loop_vinfo, relevant,
798 &worklist, false))
799 return false;
802 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
804 gather_scatter_info gs_info;
805 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
806 gcc_unreachable ();
807 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
808 &worklist, true))
809 return false;
811 } /* while worklist */
813 return true;
816 /* Compute the prologue cost for invariant or constant operands. */
818 static unsigned
819 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
820 unsigned opno, enum vect_def_type dt,
821 stmt_vector_for_cost *cost_vec)
823 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
824 tree op = gimple_op (stmt, opno);
825 unsigned prologue_cost = 0;
827 /* Without looking at the actual initializer a vector of
828 constants can be implemented as load from the constant pool.
829 When all elements are the same we can use a splat. */
830 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
831 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
832 unsigned num_vects_to_check;
833 unsigned HOST_WIDE_INT const_nunits;
834 unsigned nelt_limit;
835 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
836 && ! multiple_p (const_nunits, group_size))
838 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
839 nelt_limit = const_nunits;
841 else
843 /* If either the vector has variable length or the vectors
844 are composed of repeated whole groups we only need to
845 cost construction once. All vectors will be the same. */
846 num_vects_to_check = 1;
847 nelt_limit = group_size;
849 tree elt = NULL_TREE;
850 unsigned nelt = 0;
851 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
853 unsigned si = j % group_size;
854 if (nelt == 0)
855 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
856 /* ??? We're just tracking whether all operands of a single
857 vector initializer are the same, ideally we'd check if
858 we emitted the same one already. */
859 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
860 opno))
861 elt = NULL_TREE;
862 nelt++;
863 if (nelt == nelt_limit)
865 /* ??? We need to pass down stmt_info for a vector type
866 even if it points to the wrong stmt. */
867 prologue_cost += record_stmt_cost
868 (cost_vec, 1,
869 dt == vect_external_def
870 ? (elt ? scalar_to_vec : vec_construct)
871 : vector_load,
872 stmt_info, 0, vect_prologue);
873 nelt = 0;
877 return prologue_cost;
880 /* Function vect_model_simple_cost.
882 Models cost for simple operations, i.e. those that only emit ncopies of a
883 single op. Right now, this does not account for multiple insns that could
884 be generated for the single vector op. We will handle that shortly. */
886 static void
887 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
888 enum vect_def_type *dt,
889 int ndts,
890 slp_tree node,
891 stmt_vector_for_cost *cost_vec)
893 int inside_cost = 0, prologue_cost = 0;
895 gcc_assert (cost_vec != NULL);
897 /* ??? Somehow we need to fix this at the callers. */
898 if (node)
899 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
901 if (node)
903 /* Scan operands and account for prologue cost of constants/externals.
904 ??? This over-estimates cost for multiple uses and should be
905 re-engineered. */
906 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
907 tree lhs = gimple_get_lhs (stmt);
908 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
910 tree op = gimple_op (stmt, i);
911 gimple *def_stmt;
912 enum vect_def_type dt;
913 if (!op || op == lhs)
914 continue;
915 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
916 && (dt == vect_constant_def || dt == vect_external_def))
917 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
918 i, dt, cost_vec);
921 else
922 /* Cost the "broadcast" of a scalar operand in to a vector operand.
923 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
924 cost model. */
925 for (int i = 0; i < ndts; i++)
926 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
927 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
928 stmt_info, 0, vect_prologue);
930 /* Adjust for two-operator SLP nodes. */
931 if (node && SLP_TREE_TWO_OPERATORS (node))
933 ncopies *= 2;
934 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
935 stmt_info, 0, vect_body);
938 /* Pass the inside-of-loop statements to the target-specific cost model. */
939 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
940 stmt_info, 0, vect_body);
942 if (dump_enabled_p ())
943 dump_printf_loc (MSG_NOTE, vect_location,
944 "vect_model_simple_cost: inside_cost = %d, "
945 "prologue_cost = %d .\n", inside_cost, prologue_cost);
949 /* Model cost for type demotion and promotion operations. PWR is normally
950 zero for single-step promotions and demotions. It will be one if
951 two-step promotion/demotion is required, and so on. Each additional
952 step doubles the number of instructions required. */
954 static void
955 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
956 enum vect_def_type *dt, int pwr,
957 stmt_vector_for_cost *cost_vec)
959 int i, tmp;
960 int inside_cost = 0, prologue_cost = 0;
962 for (i = 0; i < pwr + 1; i++)
964 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
965 (i + 1) : i;
966 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
967 vec_promote_demote, stmt_info, 0,
968 vect_body);
971 /* FORNOW: Assuming maximum 2 args per stmts. */
972 for (i = 0; i < 2; i++)
973 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
974 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
975 stmt_info, 0, vect_prologue);
977 if (dump_enabled_p ())
978 dump_printf_loc (MSG_NOTE, vect_location,
979 "vect_model_promotion_demotion_cost: inside_cost = %d, "
980 "prologue_cost = %d .\n", inside_cost, prologue_cost);
983 /* Function vect_model_store_cost
985 Models cost for stores. In the case of grouped accesses, one access
986 has the overhead of the grouped access attributed to it. */
988 static void
989 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
990 enum vect_def_type dt,
991 vect_memory_access_type memory_access_type,
992 vec_load_store_type vls_type, slp_tree slp_node,
993 stmt_vector_for_cost *cost_vec)
995 unsigned int inside_cost = 0, prologue_cost = 0;
996 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
997 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
999 /* ??? Somehow we need to fix this at the callers. */
1000 if (slp_node)
1001 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1003 if (vls_type == VLS_STORE_INVARIANT)
1005 if (slp_node)
1006 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1007 1, dt, cost_vec);
1008 else
1009 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1010 stmt_info, 0, vect_prologue);
1013 /* Grouped stores update all elements in the group at once,
1014 so we want the DR for the first statement. */
1015 if (!slp_node && grouped_access_p)
1016 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1018 /* True if we should include any once-per-group costs as well as
1019 the cost of the statement itself. For SLP we only get called
1020 once per group anyhow. */
1021 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1023 /* We assume that the cost of a single store-lanes instruction is
1024 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1025 access is instead being provided by a permute-and-store operation,
1026 include the cost of the permutes. */
1027 if (first_stmt_p
1028 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1030 /* Uses a high and low interleave or shuffle operations for each
1031 needed permute. */
1032 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
1033 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1034 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1035 stmt_info, 0, vect_body);
1037 if (dump_enabled_p ())
1038 dump_printf_loc (MSG_NOTE, vect_location,
1039 "vect_model_store_cost: strided group_size = %d .\n",
1040 group_size);
1043 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1044 /* Costs of the stores. */
1045 if (memory_access_type == VMAT_ELEMENTWISE
1046 || memory_access_type == VMAT_GATHER_SCATTER)
1048 /* N scalar stores plus extracting the elements. */
1049 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1050 inside_cost += record_stmt_cost (cost_vec,
1051 ncopies * assumed_nunits,
1052 scalar_store, stmt_info, 0, vect_body);
1054 else
1055 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1057 if (memory_access_type == VMAT_ELEMENTWISE
1058 || memory_access_type == VMAT_STRIDED_SLP)
1060 /* N scalar stores plus extracting the elements. */
1061 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1062 inside_cost += record_stmt_cost (cost_vec,
1063 ncopies * assumed_nunits,
1064 vec_to_scalar, stmt_info, 0, vect_body);
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_NOTE, vect_location,
1069 "vect_model_store_cost: inside_cost = %d, "
1070 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1074 /* Calculate cost of DR's memory access. */
1075 void
1076 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1077 unsigned int *inside_cost,
1078 stmt_vector_for_cost *body_cost_vec)
1080 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1081 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1083 switch (alignment_support_scheme)
1085 case dr_aligned:
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1088 vector_store, stmt_info, 0,
1089 vect_body);
1091 if (dump_enabled_p ())
1092 dump_printf_loc (MSG_NOTE, vect_location,
1093 "vect_model_store_cost: aligned.\n");
1094 break;
1097 case dr_unaligned_supported:
1099 /* Here, we assign an additional cost for the unaligned store. */
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1101 unaligned_store, stmt_info,
1102 DR_MISALIGNMENT (dr), vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_store_cost: unaligned supported by "
1106 "hardware.\n");
1107 break;
1110 case dr_unaligned_unsupported:
1112 *inside_cost = VECT_MAX_COST;
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1116 "vect_model_store_cost: unsupported access.\n");
1117 break;
1120 default:
1121 gcc_unreachable ();
1126 /* Function vect_model_load_cost
1128 Models cost for loads. In the case of grouped accesses, one access has
1129 the overhead of the grouped access attributed to it. Since unaligned
1130 accesses are supported for loads, we also account for the costs of the
1131 access scheme chosen. */
1133 static void
1134 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1135 vect_memory_access_type memory_access_type,
1136 slp_instance instance,
1137 slp_tree slp_node,
1138 stmt_vector_for_cost *cost_vec)
1140 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1141 unsigned int inside_cost = 0, prologue_cost = 0;
1142 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1144 gcc_assert (cost_vec);
1146 /* ??? Somehow we need to fix this at the callers. */
1147 if (slp_node)
1148 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1150 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1152 /* If the load is permuted then the alignment is determined by
1153 the first group element not by the first scalar stmt DR. */
1154 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1155 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1156 /* Record the cost for the permutation. */
1157 unsigned n_perms;
1158 unsigned assumed_nunits
1159 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1160 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1161 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1162 slp_vf, instance, true,
1163 &n_perms);
1164 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1165 stmt_info, 0, vect_body);
1166 /* And adjust the number of loads performed. This handles
1167 redundancies as well as loads that are later dead. */
1168 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
1169 bitmap_clear (perm);
1170 for (unsigned i = 0;
1171 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1172 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1173 ncopies = 0;
1174 bool load_seen = false;
1175 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
1177 if (i % assumed_nunits == 0)
1179 if (load_seen)
1180 ncopies++;
1181 load_seen = false;
1183 if (bitmap_bit_p (perm, i))
1184 load_seen = true;
1186 if (load_seen)
1187 ncopies++;
1188 gcc_assert (ncopies
1189 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
1190 + assumed_nunits - 1) / assumed_nunits);
1193 /* Grouped loads read all elements in the group at once,
1194 so we want the DR for the first statement. */
1195 if (!slp_node && grouped_access_p)
1196 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1198 /* True if we should include any once-per-group costs as well as
1199 the cost of the statement itself. For SLP we only get called
1200 once per group anyhow. */
1201 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1203 /* We assume that the cost of a single load-lanes instruction is
1204 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1205 access is instead being provided by a load-and-permute operation,
1206 include the cost of the permutes. */
1207 if (first_stmt_p
1208 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1210 /* Uses an even and odd extract operations or shuffle operations
1211 for each needed permute. */
1212 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
1213 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1214 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1215 stmt_info, 0, vect_body);
1217 if (dump_enabled_p ())
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "vect_model_load_cost: strided group_size = %d .\n",
1220 group_size);
1223 /* The loads themselves. */
1224 if (memory_access_type == VMAT_ELEMENTWISE
1225 || memory_access_type == VMAT_GATHER_SCATTER)
1227 /* N scalar loads plus gathering them into a vector. */
1228 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1229 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1230 inside_cost += record_stmt_cost (cost_vec,
1231 ncopies * assumed_nunits,
1232 scalar_load, stmt_info, 0, vect_body);
1234 else
1235 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1236 &inside_cost, &prologue_cost,
1237 cost_vec, cost_vec, true);
1238 if (memory_access_type == VMAT_ELEMENTWISE
1239 || memory_access_type == VMAT_STRIDED_SLP)
1240 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1241 stmt_info, 0, vect_body);
1243 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE, vect_location,
1245 "vect_model_load_cost: inside_cost = %d, "
1246 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1250 /* Calculate cost of DR's memory access. */
1251 void
1252 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1253 bool add_realign_cost, unsigned int *inside_cost,
1254 unsigned int *prologue_cost,
1255 stmt_vector_for_cost *prologue_cost_vec,
1256 stmt_vector_for_cost *body_cost_vec,
1257 bool record_prologue_costs)
1259 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1260 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1262 switch (alignment_support_scheme)
1264 case dr_aligned:
1266 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1267 stmt_info, 0, vect_body);
1269 if (dump_enabled_p ())
1270 dump_printf_loc (MSG_NOTE, vect_location,
1271 "vect_model_load_cost: aligned.\n");
1273 break;
1275 case dr_unaligned_supported:
1277 /* Here, we assign an additional cost for the unaligned load. */
1278 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1279 unaligned_load, stmt_info,
1280 DR_MISALIGNMENT (dr), vect_body);
1282 if (dump_enabled_p ())
1283 dump_printf_loc (MSG_NOTE, vect_location,
1284 "vect_model_load_cost: unaligned supported by "
1285 "hardware.\n");
1287 break;
1289 case dr_explicit_realign:
1291 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1292 vector_load, stmt_info, 0, vect_body);
1293 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1294 vec_perm, stmt_info, 0, vect_body);
1296 /* FIXME: If the misalignment remains fixed across the iterations of
1297 the containing loop, the following cost should be added to the
1298 prologue costs. */
1299 if (targetm.vectorize.builtin_mask_for_load)
1300 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1301 stmt_info, 0, vect_body);
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_NOTE, vect_location,
1305 "vect_model_load_cost: explicit realign\n");
1307 break;
1309 case dr_explicit_realign_optimized:
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: unaligned software "
1314 "pipelined.\n");
1316 /* Unaligned software pipeline has a load of an address, an initial
1317 load, and possibly a mask operation to "prime" the loop. However,
1318 if this is an access in a group of loads, which provide grouped
1319 access, then the above cost should only be considered for one
1320 access in the group. Inside the loop, there is a load op
1321 and a realignment op. */
1323 if (add_realign_cost && record_prologue_costs)
1325 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1326 vector_stmt, stmt_info,
1327 0, vect_prologue);
1328 if (targetm.vectorize.builtin_mask_for_load)
1329 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1330 vector_stmt, stmt_info,
1331 0, vect_prologue);
1334 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1335 stmt_info, 0, vect_body);
1336 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1337 stmt_info, 0, vect_body);
1339 if (dump_enabled_p ())
1340 dump_printf_loc (MSG_NOTE, vect_location,
1341 "vect_model_load_cost: explicit realign optimized"
1342 "\n");
1344 break;
1347 case dr_unaligned_unsupported:
1349 *inside_cost = VECT_MAX_COST;
1351 if (dump_enabled_p ())
1352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1353 "vect_model_load_cost: unsupported access.\n");
1354 break;
1357 default:
1358 gcc_unreachable ();
1362 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1363 the loop preheader for the vectorized stmt STMT. */
1365 static void
1366 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1368 if (gsi)
1369 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1370 else
1372 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1373 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1375 if (loop_vinfo)
1377 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1378 basic_block new_bb;
1379 edge pe;
1381 if (nested_in_vect_loop_p (loop, stmt))
1382 loop = loop->inner;
1384 pe = loop_preheader_edge (loop);
1385 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1386 gcc_assert (!new_bb);
1388 else
1390 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1391 basic_block bb;
1392 gimple_stmt_iterator gsi_bb_start;
1394 gcc_assert (bb_vinfo);
1395 bb = BB_VINFO_BB (bb_vinfo);
1396 gsi_bb_start = gsi_after_labels (bb);
1397 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1401 if (dump_enabled_p ())
1403 dump_printf_loc (MSG_NOTE, vect_location,
1404 "created new init_stmt: ");
1405 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1409 /* Function vect_init_vector.
1411 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1412 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1413 vector type a vector with all elements equal to VAL is created first.
1414 Place the initialization at BSI if it is not NULL. Otherwise, place the
1415 initialization at the loop preheader.
1416 Return the DEF of INIT_STMT.
1417 It will be used in the vectorization of STMT. */
1419 tree
1420 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1422 gimple *init_stmt;
1423 tree new_temp;
1425 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1426 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1428 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1429 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1431 /* Scalar boolean value should be transformed into
1432 all zeros or all ones value before building a vector. */
1433 if (VECTOR_BOOLEAN_TYPE_P (type))
1435 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1436 tree false_val = build_zero_cst (TREE_TYPE (type));
1438 if (CONSTANT_CLASS_P (val))
1439 val = integer_zerop (val) ? false_val : true_val;
1440 else
1442 new_temp = make_ssa_name (TREE_TYPE (type));
1443 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1444 val, true_val, false_val);
1445 vect_init_vector_1 (stmt, init_stmt, gsi);
1446 val = new_temp;
1449 else if (CONSTANT_CLASS_P (val))
1450 val = fold_convert (TREE_TYPE (type), val);
1451 else
1453 new_temp = make_ssa_name (TREE_TYPE (type));
1454 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1455 init_stmt = gimple_build_assign (new_temp,
1456 fold_build1 (VIEW_CONVERT_EXPR,
1457 TREE_TYPE (type),
1458 val));
1459 else
1460 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1461 vect_init_vector_1 (stmt, init_stmt, gsi);
1462 val = new_temp;
1465 val = build_vector_from_val (type, val);
1468 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1469 init_stmt = gimple_build_assign (new_temp, val);
1470 vect_init_vector_1 (stmt, init_stmt, gsi);
1471 return new_temp;
1474 /* Function vect_get_vec_def_for_operand_1.
1476 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1477 DT that will be used in the vectorized stmt. */
1479 tree
1480 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1482 tree vec_oprnd;
1483 gimple *vec_stmt;
1484 stmt_vec_info def_stmt_info = NULL;
1486 switch (dt)
1488 /* operand is a constant or a loop invariant. */
1489 case vect_constant_def:
1490 case vect_external_def:
1491 /* Code should use vect_get_vec_def_for_operand. */
1492 gcc_unreachable ();
1494 /* operand is defined inside the loop. */
1495 case vect_internal_def:
1497 /* Get the def from the vectorized stmt. */
1498 def_stmt_info = vinfo_for_stmt (def_stmt);
1500 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1501 /* Get vectorized pattern statement. */
1502 if (!vec_stmt
1503 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1504 && !STMT_VINFO_RELEVANT (def_stmt_info))
1505 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1506 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1507 gcc_assert (vec_stmt);
1508 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1509 vec_oprnd = PHI_RESULT (vec_stmt);
1510 else if (is_gimple_call (vec_stmt))
1511 vec_oprnd = gimple_call_lhs (vec_stmt);
1512 else
1513 vec_oprnd = gimple_assign_lhs (vec_stmt);
1514 return vec_oprnd;
1517 /* operand is defined by a loop header phi. */
1518 case vect_reduction_def:
1519 case vect_double_reduction_def:
1520 case vect_nested_cycle:
1521 case vect_induction_def:
1523 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1525 /* Get the def from the vectorized stmt. */
1526 def_stmt_info = vinfo_for_stmt (def_stmt);
1527 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1528 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1529 vec_oprnd = PHI_RESULT (vec_stmt);
1530 else
1531 vec_oprnd = gimple_get_lhs (vec_stmt);
1532 return vec_oprnd;
1535 default:
1536 gcc_unreachable ();
1541 /* Function vect_get_vec_def_for_operand.
1543 OP is an operand in STMT. This function returns a (vector) def that will be
1544 used in the vectorized stmt for STMT.
1546 In the case that OP is an SSA_NAME which is defined in the loop, then
1547 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1549 In case OP is an invariant or constant, a new stmt that creates a vector def
1550 needs to be introduced. VECTYPE may be used to specify a required type for
1551 vector invariant. */
1553 tree
1554 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1556 gimple *def_stmt;
1557 enum vect_def_type dt;
1558 bool is_simple_use;
1559 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1560 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1562 if (dump_enabled_p ())
1564 dump_printf_loc (MSG_NOTE, vect_location,
1565 "vect_get_vec_def_for_operand: ");
1566 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1567 dump_printf (MSG_NOTE, "\n");
1570 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1571 gcc_assert (is_simple_use);
1572 if (def_stmt && dump_enabled_p ())
1574 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1575 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1578 if (dt == vect_constant_def || dt == vect_external_def)
1580 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1581 tree vector_type;
1583 if (vectype)
1584 vector_type = vectype;
1585 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1586 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1587 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1588 else
1589 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1591 gcc_assert (vector_type);
1592 return vect_init_vector (stmt, op, vector_type, NULL);
1594 else
1595 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1599 /* Function vect_get_vec_def_for_stmt_copy
1601 Return a vector-def for an operand. This function is used when the
1602 vectorized stmt to be created (by the caller to this function) is a "copy"
1603 created in case the vectorized result cannot fit in one vector, and several
1604 copies of the vector-stmt are required. In this case the vector-def is
1605 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1606 of the stmt that defines VEC_OPRND.
1607 DT is the type of the vector def VEC_OPRND.
1609 Context:
1610 In case the vectorization factor (VF) is bigger than the number
1611 of elements that can fit in a vectype (nunits), we have to generate
1612 more than one vector stmt to vectorize the scalar stmt. This situation
1613 arises when there are multiple data-types operated upon in the loop; the
1614 smallest data-type determines the VF, and as a result, when vectorizing
1615 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1616 vector stmt (each computing a vector of 'nunits' results, and together
1617 computing 'VF' results in each iteration). This function is called when
1618 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1619 which VF=16 and nunits=4, so the number of copies required is 4):
1621 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1623 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1624 VS1.1: vx.1 = memref1 VS1.2
1625 VS1.2: vx.2 = memref2 VS1.3
1626 VS1.3: vx.3 = memref3
1628 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1629 VSnew.1: vz1 = vx.1 + ... VSnew.2
1630 VSnew.2: vz2 = vx.2 + ... VSnew.3
1631 VSnew.3: vz3 = vx.3 + ...
1633 The vectorization of S1 is explained in vectorizable_load.
1634 The vectorization of S2:
1635 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1636 the function 'vect_get_vec_def_for_operand' is called to
1637 get the relevant vector-def for each operand of S2. For operand x it
1638 returns the vector-def 'vx.0'.
1640 To create the remaining copies of the vector-stmt (VSnew.j), this
1641 function is called to get the relevant vector-def for each operand. It is
1642 obtained from the respective VS1.j stmt, which is recorded in the
1643 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1645 For example, to obtain the vector-def 'vx.1' in order to create the
1646 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1647 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1648 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1649 and return its def ('vx.1').
1650 Overall, to create the above sequence this function will be called 3 times:
1651 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1652 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1653 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1655 tree
1656 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1658 gimple *vec_stmt_for_operand;
1659 stmt_vec_info def_stmt_info;
1661 /* Do nothing; can reuse same def. */
1662 if (dt == vect_external_def || dt == vect_constant_def )
1663 return vec_oprnd;
1665 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1666 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1667 gcc_assert (def_stmt_info);
1668 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1669 gcc_assert (vec_stmt_for_operand);
1670 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1671 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1672 else
1673 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1674 return vec_oprnd;
1678 /* Get vectorized definitions for the operands to create a copy of an original
1679 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1681 void
1682 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1683 vec<tree> *vec_oprnds0,
1684 vec<tree> *vec_oprnds1)
1686 tree vec_oprnd = vec_oprnds0->pop ();
1688 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1689 vec_oprnds0->quick_push (vec_oprnd);
1691 if (vec_oprnds1 && vec_oprnds1->length ())
1693 vec_oprnd = vec_oprnds1->pop ();
1694 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1695 vec_oprnds1->quick_push (vec_oprnd);
1700 /* Get vectorized definitions for OP0 and OP1. */
1702 void
1703 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1704 vec<tree> *vec_oprnds0,
1705 vec<tree> *vec_oprnds1,
1706 slp_tree slp_node)
1708 if (slp_node)
1710 int nops = (op1 == NULL_TREE) ? 1 : 2;
1711 auto_vec<tree> ops (nops);
1712 auto_vec<vec<tree> > vec_defs (nops);
1714 ops.quick_push (op0);
1715 if (op1)
1716 ops.quick_push (op1);
1718 vect_get_slp_defs (ops, slp_node, &vec_defs);
1720 *vec_oprnds0 = vec_defs[0];
1721 if (op1)
1722 *vec_oprnds1 = vec_defs[1];
1724 else
1726 tree vec_oprnd;
1728 vec_oprnds0->create (1);
1729 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1730 vec_oprnds0->quick_push (vec_oprnd);
1732 if (op1)
1734 vec_oprnds1->create (1);
1735 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1736 vec_oprnds1->quick_push (vec_oprnd);
1741 /* Helper function called by vect_finish_replace_stmt and
1742 vect_finish_stmt_generation. Set the location of the new
1743 statement and create a stmt_vec_info for it. */
1745 static void
1746 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1748 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1749 vec_info *vinfo = stmt_info->vinfo;
1751 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1753 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1756 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1759 gimple_set_location (vec_stmt, gimple_location (stmt));
1761 /* While EH edges will generally prevent vectorization, stmt might
1762 e.g. be in a must-not-throw region. Ensure newly created stmts
1763 that could throw are part of the same region. */
1764 int lp_nr = lookup_stmt_eh_lp (stmt);
1765 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1766 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1769 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1770 which sets the same scalar result as STMT did. */
1772 void
1773 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1775 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1777 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1778 gsi_replace (&gsi, vec_stmt, false);
1780 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1783 /* Function vect_finish_stmt_generation.
1785 Insert a new stmt. */
1787 void
1788 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1789 gimple_stmt_iterator *gsi)
1791 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1793 if (!gsi_end_p (*gsi)
1794 && gimple_has_mem_ops (vec_stmt))
1796 gimple *at_stmt = gsi_stmt (*gsi);
1797 tree vuse = gimple_vuse (at_stmt);
1798 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1800 tree vdef = gimple_vdef (at_stmt);
1801 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1802 /* If we have an SSA vuse and insert a store, update virtual
1803 SSA form to avoid triggering the renamer. Do so only
1804 if we can easily see all uses - which is what almost always
1805 happens with the way vectorized stmts are inserted. */
1806 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1807 && ((is_gimple_assign (vec_stmt)
1808 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1809 || (is_gimple_call (vec_stmt)
1810 && !(gimple_call_flags (vec_stmt)
1811 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1813 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1814 gimple_set_vdef (vec_stmt, new_vdef);
1815 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1819 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1820 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1823 /* We want to vectorize a call to combined function CFN with function
1824 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1825 as the types of all inputs. Check whether this is possible using
1826 an internal function, returning its code if so or IFN_LAST if not. */
1828 static internal_fn
1829 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1830 tree vectype_out, tree vectype_in)
1832 internal_fn ifn;
1833 if (internal_fn_p (cfn))
1834 ifn = as_internal_fn (cfn);
1835 else
1836 ifn = associated_internal_fn (fndecl);
1837 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1839 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1840 if (info.vectorizable)
1842 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1843 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1844 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1845 OPTIMIZE_FOR_SPEED))
1846 return ifn;
1849 return IFN_LAST;
1853 static tree permute_vec_elements (tree, tree, tree, gimple *,
1854 gimple_stmt_iterator *);
1856 /* Check whether a load or store statement in the loop described by
1857 LOOP_VINFO is possible in a fully-masked loop. This is testing
1858 whether the vectorizer pass has the appropriate support, as well as
1859 whether the target does.
1861 VLS_TYPE says whether the statement is a load or store and VECTYPE
1862 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1863 says how the load or store is going to be implemented and GROUP_SIZE
1864 is the number of load or store statements in the containing group.
1865 If the access is a gather load or scatter store, GS_INFO describes
1866 its arguments.
1868 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1869 supported, otherwise record the required mask types. */
1871 static void
1872 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1873 vec_load_store_type vls_type, int group_size,
1874 vect_memory_access_type memory_access_type,
1875 gather_scatter_info *gs_info)
1877 /* Invariant loads need no special support. */
1878 if (memory_access_type == VMAT_INVARIANT)
1879 return;
1881 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1882 machine_mode vecmode = TYPE_MODE (vectype);
1883 bool is_load = (vls_type == VLS_LOAD);
1884 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1886 if (is_load
1887 ? !vect_load_lanes_supported (vectype, group_size, true)
1888 : !vect_store_lanes_supported (vectype, group_size, true))
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "can't use a fully-masked loop because the"
1893 " target doesn't have an appropriate masked"
1894 " load/store-lanes instruction.\n");
1895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1896 return;
1898 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1899 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1900 return;
1903 if (memory_access_type == VMAT_GATHER_SCATTER)
1905 internal_fn ifn = (is_load
1906 ? IFN_MASK_GATHER_LOAD
1907 : IFN_MASK_SCATTER_STORE);
1908 tree offset_type = TREE_TYPE (gs_info->offset);
1909 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1910 gs_info->memory_type,
1911 TYPE_SIGN (offset_type),
1912 gs_info->scale))
1914 if (dump_enabled_p ())
1915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1916 "can't use a fully-masked loop because the"
1917 " target doesn't have an appropriate masked"
1918 " gather load or scatter store instruction.\n");
1919 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1920 return;
1922 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1923 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1924 return;
1927 if (memory_access_type != VMAT_CONTIGUOUS
1928 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1930 /* Element X of the data must come from iteration i * VF + X of the
1931 scalar loop. We need more work to support other mappings. */
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1934 "can't use a fully-masked loop because an access"
1935 " isn't contiguous.\n");
1936 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1937 return;
1940 machine_mode mask_mode;
1941 if (!(targetm.vectorize.get_mask_mode
1942 (GET_MODE_NUNITS (vecmode),
1943 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1944 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1946 if (dump_enabled_p ())
1947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1948 "can't use a fully-masked loop because the target"
1949 " doesn't have the appropriate masked load or"
1950 " store.\n");
1951 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1952 return;
1954 /* We might load more scalars than we need for permuting SLP loads.
1955 We checked in get_group_load_store_type that the extra elements
1956 don't leak into a new vector. */
1957 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1958 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1959 unsigned int nvectors;
1960 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1961 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1962 else
1963 gcc_unreachable ();
1966 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1967 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1968 that needs to be applied to all loads and stores in a vectorized loop.
1969 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1971 MASK_TYPE is the type of both masks. If new statements are needed,
1972 insert them before GSI. */
1974 static tree
1975 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1976 gimple_stmt_iterator *gsi)
1978 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1979 if (!loop_mask)
1980 return vec_mask;
1982 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1983 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1984 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1985 vec_mask, loop_mask);
1986 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1987 return and_res;
1990 /* Determine whether we can use a gather load or scatter store to vectorize
1991 strided load or store STMT by truncating the current offset to a smaller
1992 width. We need to be able to construct an offset vector:
1994 { 0, X, X*2, X*3, ... }
1996 without loss of precision, where X is STMT's DR_STEP.
1998 Return true if this is possible, describing the gather load or scatter
1999 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2001 static bool
2002 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
2003 bool masked_p,
2004 gather_scatter_info *gs_info)
2006 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2007 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2008 tree step = DR_STEP (dr);
2009 if (TREE_CODE (step) != INTEGER_CST)
2011 /* ??? Perhaps we could use range information here? */
2012 if (dump_enabled_p ())
2013 dump_printf_loc (MSG_NOTE, vect_location,
2014 "cannot truncate variable step.\n");
2015 return false;
2018 /* Get the number of bits in an element. */
2019 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2020 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2021 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2023 /* Set COUNT to the upper limit on the number of elements - 1.
2024 Start with the maximum vectorization factor. */
2025 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2027 /* Try lowering COUNT to the number of scalar latch iterations. */
2028 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2029 widest_int max_iters;
2030 if (max_loop_iterations (loop, &max_iters)
2031 && max_iters < count)
2032 count = max_iters.to_shwi ();
2034 /* Try scales of 1 and the element size. */
2035 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2036 bool overflow_p = false;
2037 for (int i = 0; i < 2; ++i)
2039 int scale = scales[i];
2040 widest_int factor;
2041 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2042 continue;
2044 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2045 in OFFSET_BITS bits. */
2046 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
2047 if (overflow_p)
2048 continue;
2049 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2050 if (wi::min_precision (range, sign) > element_bits)
2052 overflow_p = true;
2053 continue;
2056 /* See whether the target supports the operation. */
2057 tree memory_type = TREE_TYPE (DR_REF (dr));
2058 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2059 memory_type, element_bits, sign, scale,
2060 &gs_info->ifn, &gs_info->element_type))
2061 continue;
2063 tree offset_type = build_nonstandard_integer_type (element_bits,
2064 sign == UNSIGNED);
2066 gs_info->decl = NULL_TREE;
2067 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2068 but we don't need to store that here. */
2069 gs_info->base = NULL_TREE;
2070 gs_info->offset = fold_convert (offset_type, step);
2071 gs_info->offset_dt = vect_constant_def;
2072 gs_info->offset_vectype = NULL_TREE;
2073 gs_info->scale = scale;
2074 gs_info->memory_type = memory_type;
2075 return true;
2078 if (overflow_p && dump_enabled_p ())
2079 dump_printf_loc (MSG_NOTE, vect_location,
2080 "truncating gather/scatter offset to %d bits"
2081 " might change its value.\n", element_bits);
2083 return false;
2086 /* Return true if we can use gather/scatter internal functions to
2087 vectorize STMT, which is a grouped or strided load or store.
2088 MASKED_P is true if load or store is conditional. When returning
2089 true, fill in GS_INFO with the information required to perform the
2090 operation. */
2092 static bool
2093 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
2094 bool masked_p,
2095 gather_scatter_info *gs_info)
2097 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2098 || gs_info->decl)
2099 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2100 masked_p, gs_info);
2102 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2103 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2104 tree offset_type = TREE_TYPE (gs_info->offset);
2105 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2107 /* Enforced by vect_check_gather_scatter. */
2108 gcc_assert (element_bits >= offset_bits);
2110 /* If the elements are wider than the offset, convert the offset to the
2111 same width, without changing its sign. */
2112 if (element_bits > offset_bits)
2114 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2115 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2116 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2119 if (dump_enabled_p ())
2120 dump_printf_loc (MSG_NOTE, vect_location,
2121 "using gather/scatter for strided/grouped access,"
2122 " scale = %d\n", gs_info->scale);
2124 return true;
2127 /* STMT is a non-strided load or store, meaning that it accesses
2128 elements with a known constant step. Return -1 if that step
2129 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2131 static int
2132 compare_step_with_zero (gimple *stmt)
2134 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2135 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2136 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2137 size_zero_node);
2140 /* If the target supports a permute mask that reverses the elements in
2141 a vector of type VECTYPE, return that mask, otherwise return null. */
2143 static tree
2144 perm_mask_for_reverse (tree vectype)
2146 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2148 /* The encoding has a single stepped pattern. */
2149 vec_perm_builder sel (nunits, 1, 3);
2150 for (int i = 0; i < 3; ++i)
2151 sel.quick_push (nunits - 1 - i);
2153 vec_perm_indices indices (sel, 1, nunits);
2154 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2155 return NULL_TREE;
2156 return vect_gen_perm_mask_checked (vectype, indices);
2159 /* STMT is either a masked or unconditional store. Return the value
2160 being stored. */
2162 tree
2163 vect_get_store_rhs (gimple *stmt)
2165 if (gassign *assign = dyn_cast <gassign *> (stmt))
2167 gcc_assert (gimple_assign_single_p (assign));
2168 return gimple_assign_rhs1 (assign);
2170 if (gcall *call = dyn_cast <gcall *> (stmt))
2172 internal_fn ifn = gimple_call_internal_fn (call);
2173 int index = internal_fn_stored_value_index (ifn);
2174 gcc_assert (index >= 0);
2175 return gimple_call_arg (stmt, index);
2177 gcc_unreachable ();
2180 /* A subroutine of get_load_store_type, with a subset of the same
2181 arguments. Handle the case where STMT is part of a grouped load
2182 or store.
2184 For stores, the statements in the group are all consecutive
2185 and there is no gap at the end. For loads, the statements in the
2186 group might not be consecutive; there can be gaps between statements
2187 as well as at the end. */
2189 static bool
2190 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2191 bool masked_p, vec_load_store_type vls_type,
2192 vect_memory_access_type *memory_access_type,
2193 gather_scatter_info *gs_info)
2195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2196 vec_info *vinfo = stmt_info->vinfo;
2197 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2198 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2199 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
2200 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2201 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2202 bool single_element_p = (stmt == first_stmt
2203 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2204 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
2205 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2207 /* True if the vectorized statements would access beyond the last
2208 statement in the group. */
2209 bool overrun_p = false;
2211 /* True if we can cope with such overrun by peeling for gaps, so that
2212 there is at least one final scalar iteration after the vector loop. */
2213 bool can_overrun_p = (!masked_p
2214 && vls_type == VLS_LOAD
2215 && loop_vinfo
2216 && !loop->inner);
2218 /* There can only be a gap at the end of the group if the stride is
2219 known at compile time. */
2220 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2222 /* Stores can't yet have gaps. */
2223 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2225 if (slp)
2227 if (STMT_VINFO_STRIDED_P (stmt_info))
2229 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2230 separated by the stride, until we have a complete vector.
2231 Fall back to scalar accesses if that isn't possible. */
2232 if (multiple_p (nunits, group_size))
2233 *memory_access_type = VMAT_STRIDED_SLP;
2234 else
2235 *memory_access_type = VMAT_ELEMENTWISE;
2237 else
2239 overrun_p = loop_vinfo && gap != 0;
2240 if (overrun_p && vls_type != VLS_LOAD)
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "Grouped store with gaps requires"
2244 " non-consecutive accesses\n");
2245 return false;
2247 /* An overrun is fine if the trailing elements are smaller
2248 than the alignment boundary B. Every vector access will
2249 be a multiple of B and so we are guaranteed to access a
2250 non-gap element in the same B-sized block. */
2251 if (overrun_p
2252 && gap < (vect_known_alignment_in_bytes (first_dr)
2253 / vect_get_scalar_dr_size (first_dr)))
2254 overrun_p = false;
2255 if (overrun_p && !can_overrun_p)
2257 if (dump_enabled_p ())
2258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2259 "Peeling for outer loop is not supported\n");
2260 return false;
2262 *memory_access_type = VMAT_CONTIGUOUS;
2265 else
2267 /* We can always handle this case using elementwise accesses,
2268 but see if something more efficient is available. */
2269 *memory_access_type = VMAT_ELEMENTWISE;
2271 /* If there is a gap at the end of the group then these optimizations
2272 would access excess elements in the last iteration. */
2273 bool would_overrun_p = (gap != 0);
2274 /* An overrun is fine if the trailing elements are smaller than the
2275 alignment boundary B. Every vector access will be a multiple of B
2276 and so we are guaranteed to access a non-gap element in the
2277 same B-sized block. */
2278 if (would_overrun_p
2279 && !masked_p
2280 && gap < (vect_known_alignment_in_bytes (first_dr)
2281 / vect_get_scalar_dr_size (first_dr)))
2282 would_overrun_p = false;
2284 if (!STMT_VINFO_STRIDED_P (stmt_info)
2285 && (can_overrun_p || !would_overrun_p)
2286 && compare_step_with_zero (stmt) > 0)
2288 /* First cope with the degenerate case of a single-element
2289 vector. */
2290 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2291 *memory_access_type = VMAT_CONTIGUOUS;
2293 /* Otherwise try using LOAD/STORE_LANES. */
2294 if (*memory_access_type == VMAT_ELEMENTWISE
2295 && (vls_type == VLS_LOAD
2296 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2297 : vect_store_lanes_supported (vectype, group_size,
2298 masked_p)))
2300 *memory_access_type = VMAT_LOAD_STORE_LANES;
2301 overrun_p = would_overrun_p;
2304 /* If that fails, try using permuting loads. */
2305 if (*memory_access_type == VMAT_ELEMENTWISE
2306 && (vls_type == VLS_LOAD
2307 ? vect_grouped_load_supported (vectype, single_element_p,
2308 group_size)
2309 : vect_grouped_store_supported (vectype, group_size)))
2311 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2312 overrun_p = would_overrun_p;
2316 /* As a last resort, trying using a gather load or scatter store.
2318 ??? Although the code can handle all group sizes correctly,
2319 it probably isn't a win to use separate strided accesses based
2320 on nearby locations. Or, even if it's a win over scalar code,
2321 it might not be a win over vectorizing at a lower VF, if that
2322 allows us to use contiguous accesses. */
2323 if (*memory_access_type == VMAT_ELEMENTWISE
2324 && single_element_p
2325 && loop_vinfo
2326 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2327 masked_p, gs_info))
2328 *memory_access_type = VMAT_GATHER_SCATTER;
2331 if (vls_type != VLS_LOAD && first_stmt == stmt)
2333 /* STMT is the leader of the group. Check the operands of all the
2334 stmts of the group. */
2335 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2336 while (next_stmt)
2338 tree op = vect_get_store_rhs (next_stmt);
2339 gimple *def_stmt;
2340 enum vect_def_type dt;
2341 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2343 if (dump_enabled_p ())
2344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2345 "use not simple.\n");
2346 return false;
2348 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2352 if (overrun_p)
2354 gcc_assert (can_overrun_p);
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2357 "Data access with gaps requires scalar "
2358 "epilogue loop\n");
2359 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2362 return true;
2365 /* A subroutine of get_load_store_type, with a subset of the same
2366 arguments. Handle the case where STMT is a load or store that
2367 accesses consecutive elements with a negative step. */
2369 static vect_memory_access_type
2370 get_negative_load_store_type (gimple *stmt, tree vectype,
2371 vec_load_store_type vls_type,
2372 unsigned int ncopies)
2374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2375 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2376 dr_alignment_support alignment_support_scheme;
2378 if (ncopies > 1)
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2382 "multiple types with negative step.\n");
2383 return VMAT_ELEMENTWISE;
2386 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2387 if (alignment_support_scheme != dr_aligned
2388 && alignment_support_scheme != dr_unaligned_supported)
2390 if (dump_enabled_p ())
2391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2392 "negative step but alignment required.\n");
2393 return VMAT_ELEMENTWISE;
2396 if (vls_type == VLS_STORE_INVARIANT)
2398 if (dump_enabled_p ())
2399 dump_printf_loc (MSG_NOTE, vect_location,
2400 "negative step with invariant source;"
2401 " no permute needed.\n");
2402 return VMAT_CONTIGUOUS_DOWN;
2405 if (!perm_mask_for_reverse (vectype))
2407 if (dump_enabled_p ())
2408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 "negative step and reversing not supported.\n");
2410 return VMAT_ELEMENTWISE;
2413 return VMAT_CONTIGUOUS_REVERSE;
2416 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2417 if there is a memory access type that the vectorized form can use,
2418 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2419 or scatters, fill in GS_INFO accordingly.
2421 SLP says whether we're performing SLP rather than loop vectorization.
2422 MASKED_P is true if the statement is conditional on a vectorized mask.
2423 VECTYPE is the vector type that the vectorized statements will use.
2424 NCOPIES is the number of vector statements that will be needed. */
2426 static bool
2427 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2428 vec_load_store_type vls_type, unsigned int ncopies,
2429 vect_memory_access_type *memory_access_type,
2430 gather_scatter_info *gs_info)
2432 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2433 vec_info *vinfo = stmt_info->vinfo;
2434 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2435 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2436 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2438 *memory_access_type = VMAT_GATHER_SCATTER;
2439 gimple *def_stmt;
2440 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2441 gcc_unreachable ();
2442 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2443 &gs_info->offset_dt,
2444 &gs_info->offset_vectype))
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "%s index use not simple.\n",
2449 vls_type == VLS_LOAD ? "gather" : "scatter");
2450 return false;
2453 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2455 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2456 memory_access_type, gs_info))
2457 return false;
2459 else if (STMT_VINFO_STRIDED_P (stmt_info))
2461 gcc_assert (!slp);
2462 if (loop_vinfo
2463 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2464 masked_p, gs_info))
2465 *memory_access_type = VMAT_GATHER_SCATTER;
2466 else
2467 *memory_access_type = VMAT_ELEMENTWISE;
2469 else
2471 int cmp = compare_step_with_zero (stmt);
2472 if (cmp < 0)
2473 *memory_access_type = get_negative_load_store_type
2474 (stmt, vectype, vls_type, ncopies);
2475 else if (cmp == 0)
2477 gcc_assert (vls_type == VLS_LOAD);
2478 *memory_access_type = VMAT_INVARIANT;
2480 else
2481 *memory_access_type = VMAT_CONTIGUOUS;
2484 if ((*memory_access_type == VMAT_ELEMENTWISE
2485 || *memory_access_type == VMAT_STRIDED_SLP)
2486 && !nunits.is_constant ())
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490 "Not using elementwise accesses due to variable "
2491 "vectorization factor.\n");
2492 return false;
2495 /* FIXME: At the moment the cost model seems to underestimate the
2496 cost of using elementwise accesses. This check preserves the
2497 traditional behavior until that can be fixed. */
2498 if (*memory_access_type == VMAT_ELEMENTWISE
2499 && !STMT_VINFO_STRIDED_P (stmt_info)
2500 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2501 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2502 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2504 if (dump_enabled_p ())
2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506 "not falling back to elementwise accesses\n");
2507 return false;
2509 return true;
2512 /* Return true if boolean argument MASK is suitable for vectorizing
2513 conditional load or store STMT. When returning true, store the type
2514 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2515 in *MASK_VECTYPE_OUT. */
2517 static bool
2518 vect_check_load_store_mask (gimple *stmt, tree mask,
2519 vect_def_type *mask_dt_out,
2520 tree *mask_vectype_out)
2522 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2524 if (dump_enabled_p ())
2525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2526 "mask argument is not a boolean.\n");
2527 return false;
2530 if (TREE_CODE (mask) != SSA_NAME)
2532 if (dump_enabled_p ())
2533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2534 "mask argument is not an SSA name.\n");
2535 return false;
2538 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2539 gimple *def_stmt;
2540 enum vect_def_type mask_dt;
2541 tree mask_vectype;
2542 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2543 &mask_vectype))
2545 if (dump_enabled_p ())
2546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2547 "mask use not simple.\n");
2548 return false;
2551 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2552 if (!mask_vectype)
2553 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2555 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2559 "could not find an appropriate vector mask type.\n");
2560 return false;
2563 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2564 TYPE_VECTOR_SUBPARTS (vectype)))
2566 if (dump_enabled_p ())
2568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2569 "vector mask type ");
2570 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2571 dump_printf (MSG_MISSED_OPTIMIZATION,
2572 " does not match vector data type ");
2573 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2574 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2576 return false;
2579 *mask_dt_out = mask_dt;
2580 *mask_vectype_out = mask_vectype;
2581 return true;
2584 /* Return true if stored value RHS is suitable for vectorizing store
2585 statement STMT. When returning true, store the type of the
2586 definition in *RHS_DT_OUT, the type of the vectorized store value in
2587 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2589 static bool
2590 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2591 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2593 /* In the case this is a store from a constant make sure
2594 native_encode_expr can handle it. */
2595 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2597 if (dump_enabled_p ())
2598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2599 "cannot encode constant as a byte sequence.\n");
2600 return false;
2603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2604 gimple *def_stmt;
2605 enum vect_def_type rhs_dt;
2606 tree rhs_vectype;
2607 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2608 &rhs_vectype))
2610 if (dump_enabled_p ())
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2612 "use not simple.\n");
2613 return false;
2616 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2617 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2619 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2621 "incompatible vector types.\n");
2622 return false;
2625 *rhs_dt_out = rhs_dt;
2626 *rhs_vectype_out = rhs_vectype;
2627 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2628 *vls_type_out = VLS_STORE_INVARIANT;
2629 else
2630 *vls_type_out = VLS_STORE;
2631 return true;
2634 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2635 Note that we support masks with floating-point type, in which case the
2636 floats are interpreted as a bitmask. */
2638 static tree
2639 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2641 if (TREE_CODE (masktype) == INTEGER_TYPE)
2642 return build_int_cst (masktype, -1);
2643 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2645 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2646 mask = build_vector_from_val (masktype, mask);
2647 return vect_init_vector (stmt, mask, masktype, NULL);
2649 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2651 REAL_VALUE_TYPE r;
2652 long tmp[6];
2653 for (int j = 0; j < 6; ++j)
2654 tmp[j] = -1;
2655 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2656 tree mask = build_real (TREE_TYPE (masktype), r);
2657 mask = build_vector_from_val (masktype, mask);
2658 return vect_init_vector (stmt, mask, masktype, NULL);
2660 gcc_unreachable ();
2663 /* Build an all-zero merge value of type VECTYPE while vectorizing
2664 STMT as a gather load. */
2666 static tree
2667 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2669 tree merge;
2670 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2671 merge = build_int_cst (TREE_TYPE (vectype), 0);
2672 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2674 REAL_VALUE_TYPE r;
2675 long tmp[6];
2676 for (int j = 0; j < 6; ++j)
2677 tmp[j] = 0;
2678 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2679 merge = build_real (TREE_TYPE (vectype), r);
2681 else
2682 gcc_unreachable ();
2683 merge = build_vector_from_val (vectype, merge);
2684 return vect_init_vector (stmt, merge, vectype, NULL);
2687 /* Build a gather load call while vectorizing STMT. Insert new instructions
2688 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2689 operation. If the load is conditional, MASK is the unvectorized
2690 condition and MASK_DT is its definition type, otherwise MASK is null. */
2692 static void
2693 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2694 gimple **vec_stmt, gather_scatter_info *gs_info,
2695 tree mask, vect_def_type mask_dt)
2697 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2698 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2699 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2700 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2701 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2702 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2703 edge pe = loop_preheader_edge (loop);
2704 enum { NARROW, NONE, WIDEN } modifier;
2705 poly_uint64 gather_off_nunits
2706 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2708 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2709 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2710 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2711 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2712 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2713 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2714 tree scaletype = TREE_VALUE (arglist);
2715 gcc_checking_assert (types_compatible_p (srctype, rettype)
2716 && (!mask || types_compatible_p (srctype, masktype)));
2718 tree perm_mask = NULL_TREE;
2719 tree mask_perm_mask = NULL_TREE;
2720 if (known_eq (nunits, gather_off_nunits))
2721 modifier = NONE;
2722 else if (known_eq (nunits * 2, gather_off_nunits))
2724 modifier = WIDEN;
2726 /* Currently widening gathers and scatters are only supported for
2727 fixed-length vectors. */
2728 int count = gather_off_nunits.to_constant ();
2729 vec_perm_builder sel (count, count, 1);
2730 for (int i = 0; i < count; ++i)
2731 sel.quick_push (i | (count / 2));
2733 vec_perm_indices indices (sel, 1, count);
2734 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2735 indices);
2737 else if (known_eq (nunits, gather_off_nunits * 2))
2739 modifier = NARROW;
2741 /* Currently narrowing gathers and scatters are only supported for
2742 fixed-length vectors. */
2743 int count = nunits.to_constant ();
2744 vec_perm_builder sel (count, count, 1);
2745 sel.quick_grow (count);
2746 for (int i = 0; i < count; ++i)
2747 sel[i] = i < count / 2 ? i : i + count / 2;
2748 vec_perm_indices indices (sel, 2, count);
2749 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2751 ncopies *= 2;
2753 if (mask)
2755 for (int i = 0; i < count; ++i)
2756 sel[i] = i | (count / 2);
2757 indices.new_vector (sel, 2, count);
2758 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2761 else
2762 gcc_unreachable ();
2764 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2765 vectype);
2767 tree ptr = fold_convert (ptrtype, gs_info->base);
2768 if (!is_gimple_min_invariant (ptr))
2770 gimple_seq seq;
2771 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2772 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2773 gcc_assert (!new_bb);
2776 tree scale = build_int_cst (scaletype, gs_info->scale);
2778 tree vec_oprnd0 = NULL_TREE;
2779 tree vec_mask = NULL_TREE;
2780 tree src_op = NULL_TREE;
2781 tree mask_op = NULL_TREE;
2782 tree prev_res = NULL_TREE;
2783 stmt_vec_info prev_stmt_info = NULL;
2785 if (!mask)
2787 src_op = vect_build_zero_merge_argument (stmt, rettype);
2788 mask_op = vect_build_all_ones_mask (stmt, masktype);
2791 for (int j = 0; j < ncopies; ++j)
2793 tree op, var;
2794 gimple *new_stmt;
2795 if (modifier == WIDEN && (j & 1))
2796 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2797 perm_mask, stmt, gsi);
2798 else if (j == 0)
2799 op = vec_oprnd0
2800 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2801 else
2802 op = vec_oprnd0
2803 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2805 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2807 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2808 TYPE_VECTOR_SUBPARTS (idxtype)));
2809 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2810 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2811 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2812 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2813 op = var;
2816 if (mask)
2818 if (mask_perm_mask && (j & 1))
2819 mask_op = permute_vec_elements (mask_op, mask_op,
2820 mask_perm_mask, stmt, gsi);
2821 else
2823 if (j == 0)
2824 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2825 else
2826 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2828 mask_op = vec_mask;
2829 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2831 gcc_assert
2832 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2833 TYPE_VECTOR_SUBPARTS (masktype)));
2834 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2835 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2836 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2837 mask_op);
2838 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2839 mask_op = var;
2842 src_op = mask_op;
2845 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2846 mask_op, scale);
2848 if (!useless_type_conversion_p (vectype, rettype))
2850 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2851 TYPE_VECTOR_SUBPARTS (rettype)));
2852 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2853 gimple_call_set_lhs (new_stmt, op);
2854 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2855 var = make_ssa_name (vec_dest);
2856 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2857 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2859 else
2861 var = make_ssa_name (vec_dest, new_stmt);
2862 gimple_call_set_lhs (new_stmt, var);
2865 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2867 if (modifier == NARROW)
2869 if ((j & 1) == 0)
2871 prev_res = var;
2872 continue;
2874 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2875 new_stmt = SSA_NAME_DEF_STMT (var);
2878 if (prev_stmt_info == NULL)
2879 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2880 else
2881 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2882 prev_stmt_info = vinfo_for_stmt (new_stmt);
2886 /* Prepare the base and offset in GS_INFO for vectorization.
2887 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2888 to the vectorized offset argument for the first copy of STMT. STMT
2889 is the statement described by GS_INFO and LOOP is the containing loop. */
2891 static void
2892 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2893 gather_scatter_info *gs_info,
2894 tree *dataref_ptr, tree *vec_offset)
2896 gimple_seq stmts = NULL;
2897 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2898 if (stmts != NULL)
2900 basic_block new_bb;
2901 edge pe = loop_preheader_edge (loop);
2902 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2903 gcc_assert (!new_bb);
2905 tree offset_type = TREE_TYPE (gs_info->offset);
2906 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2907 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2908 offset_vectype);
2911 /* Prepare to implement a grouped or strided load or store using
2912 the gather load or scatter store operation described by GS_INFO.
2913 STMT is the load or store statement.
2915 Set *DATAREF_BUMP to the amount that should be added to the base
2916 address after each copy of the vectorized statement. Set *VEC_OFFSET
2917 to an invariant offset vector in which element I has the value
2918 I * DR_STEP / SCALE. */
2920 static void
2921 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2922 gather_scatter_info *gs_info,
2923 tree *dataref_bump, tree *vec_offset)
2925 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2926 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2927 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2928 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2929 gimple_seq stmts;
2931 tree bump = size_binop (MULT_EXPR,
2932 fold_convert (sizetype, DR_STEP (dr)),
2933 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2934 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2935 if (stmts)
2936 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2938 /* The offset given in GS_INFO can have pointer type, so use the element
2939 type of the vector instead. */
2940 tree offset_type = TREE_TYPE (gs_info->offset);
2941 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2942 offset_type = TREE_TYPE (offset_vectype);
2944 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2945 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2946 ssize_int (gs_info->scale));
2947 step = fold_convert (offset_type, step);
2948 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2950 /* Create {0, X, X*2, X*3, ...}. */
2951 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2952 build_zero_cst (offset_type), step);
2953 if (stmts)
2954 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2957 /* Return the amount that should be added to a vector pointer to move
2958 to the next or previous copy of AGGR_TYPE. DR is the data reference
2959 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2960 vectorization. */
2962 static tree
2963 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2964 vect_memory_access_type memory_access_type)
2966 if (memory_access_type == VMAT_INVARIANT)
2967 return size_zero_node;
2969 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2970 tree step = vect_dr_behavior (dr)->step;
2971 if (tree_int_cst_sgn (step) == -1)
2972 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2973 return iv_step;
2976 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2978 static bool
2979 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2980 gimple **vec_stmt, slp_tree slp_node,
2981 tree vectype_in, enum vect_def_type *dt,
2982 stmt_vector_for_cost *cost_vec)
2984 tree op, vectype;
2985 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2986 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2987 unsigned ncopies;
2988 unsigned HOST_WIDE_INT nunits, num_bytes;
2990 op = gimple_call_arg (stmt, 0);
2991 vectype = STMT_VINFO_VECTYPE (stmt_info);
2993 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2994 return false;
2996 /* Multiple types in SLP are handled by creating the appropriate number of
2997 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2998 case of SLP. */
2999 if (slp_node)
3000 ncopies = 1;
3001 else
3002 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3004 gcc_assert (ncopies >= 1);
3006 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3007 if (! char_vectype)
3008 return false;
3010 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3011 return false;
3013 unsigned word_bytes = num_bytes / nunits;
3015 /* The encoding uses one stepped pattern for each byte in the word. */
3016 vec_perm_builder elts (num_bytes, word_bytes, 3);
3017 for (unsigned i = 0; i < 3; ++i)
3018 for (unsigned j = 0; j < word_bytes; ++j)
3019 elts.quick_push ((i + 1) * word_bytes - j - 1);
3021 vec_perm_indices indices (elts, 1, num_bytes);
3022 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3023 return false;
3025 if (! vec_stmt)
3027 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3028 DUMP_VECT_SCOPE ("vectorizable_bswap");
3029 if (! slp_node)
3031 record_stmt_cost (cost_vec,
3032 1, vector_stmt, stmt_info, 0, vect_prologue);
3033 record_stmt_cost (cost_vec,
3034 ncopies, vec_perm, stmt_info, 0, vect_body);
3036 return true;
3039 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3041 /* Transform. */
3042 vec<tree> vec_oprnds = vNULL;
3043 gimple *new_stmt = NULL;
3044 stmt_vec_info prev_stmt_info = NULL;
3045 for (unsigned j = 0; j < ncopies; j++)
3047 /* Handle uses. */
3048 if (j == 0)
3049 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3050 else
3051 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3053 /* Arguments are ready. create the new vector stmt. */
3054 unsigned i;
3055 tree vop;
3056 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3058 tree tem = make_ssa_name (char_vectype);
3059 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3060 char_vectype, vop));
3061 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3062 tree tem2 = make_ssa_name (char_vectype);
3063 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3064 tem, tem, bswap_vconst);
3065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3066 tem = make_ssa_name (vectype);
3067 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3068 vectype, tem2));
3069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3070 if (slp_node)
3071 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3074 if (slp_node)
3075 continue;
3077 if (j == 0)
3078 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3079 else
3080 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3082 prev_stmt_info = vinfo_for_stmt (new_stmt);
3085 vec_oprnds.release ();
3086 return true;
3089 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3090 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3091 in a single step. On success, store the binary pack code in
3092 *CONVERT_CODE. */
3094 static bool
3095 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3096 tree_code *convert_code)
3098 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3099 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3100 return false;
3102 tree_code code;
3103 int multi_step_cvt = 0;
3104 auto_vec <tree, 8> interm_types;
3105 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3106 &code, &multi_step_cvt,
3107 &interm_types)
3108 || multi_step_cvt)
3109 return false;
3111 *convert_code = code;
3112 return true;
3115 /* Function vectorizable_call.
3117 Check if GS performs a function call that can be vectorized.
3118 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3119 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3120 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3122 static bool
3123 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
3124 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
3126 gcall *stmt;
3127 tree vec_dest;
3128 tree scalar_dest;
3129 tree op, type;
3130 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3131 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
3132 tree vectype_out, vectype_in;
3133 poly_uint64 nunits_in;
3134 poly_uint64 nunits_out;
3135 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3136 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3137 vec_info *vinfo = stmt_info->vinfo;
3138 tree fndecl, new_temp, rhs_type;
3139 gimple *def_stmt;
3140 enum vect_def_type dt[3]
3141 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3142 int ndts = 3;
3143 gimple *new_stmt = NULL;
3144 int ncopies, j;
3145 vec<tree> vargs = vNULL;
3146 enum { NARROW, NONE, WIDEN } modifier;
3147 size_t i, nargs;
3148 tree lhs;
3150 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3151 return false;
3153 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3154 && ! vec_stmt)
3155 return false;
3157 /* Is GS a vectorizable call? */
3158 stmt = dyn_cast <gcall *> (gs);
3159 if (!stmt)
3160 return false;
3162 if (gimple_call_internal_p (stmt)
3163 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3164 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3165 /* Handled by vectorizable_load and vectorizable_store. */
3166 return false;
3168 if (gimple_call_lhs (stmt) == NULL_TREE
3169 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3170 return false;
3172 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3174 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3176 /* Process function arguments. */
3177 rhs_type = NULL_TREE;
3178 vectype_in = NULL_TREE;
3179 nargs = gimple_call_num_args (stmt);
3181 /* Bail out if the function has more than three arguments, we do not have
3182 interesting builtin functions to vectorize with more than two arguments
3183 except for fma. No arguments is also not good. */
3184 if (nargs == 0 || nargs > 3)
3185 return false;
3187 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3188 if (gimple_call_internal_p (stmt)
3189 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3191 nargs = 0;
3192 rhs_type = unsigned_type_node;
3195 for (i = 0; i < nargs; i++)
3197 tree opvectype;
3199 op = gimple_call_arg (stmt, i);
3201 /* We can only handle calls with arguments of the same type. */
3202 if (rhs_type
3203 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 "argument types differ.\n");
3208 return false;
3210 if (!rhs_type)
3211 rhs_type = TREE_TYPE (op);
3213 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3215 if (dump_enabled_p ())
3216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3217 "use not simple.\n");
3218 return false;
3221 if (!vectype_in)
3222 vectype_in = opvectype;
3223 else if (opvectype
3224 && opvectype != vectype_in)
3226 if (dump_enabled_p ())
3227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3228 "argument vector types differ.\n");
3229 return false;
3232 /* If all arguments are external or constant defs use a vector type with
3233 the same size as the output vector type. */
3234 if (!vectype_in)
3235 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3236 if (vec_stmt)
3237 gcc_assert (vectype_in);
3238 if (!vectype_in)
3240 if (dump_enabled_p ())
3242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3243 "no vectype for scalar type ");
3244 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3245 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3248 return false;
3251 /* FORNOW */
3252 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3253 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3254 if (known_eq (nunits_in * 2, nunits_out))
3255 modifier = NARROW;
3256 else if (known_eq (nunits_out, nunits_in))
3257 modifier = NONE;
3258 else if (known_eq (nunits_out * 2, nunits_in))
3259 modifier = WIDEN;
3260 else
3261 return false;
3263 /* We only handle functions that do not read or clobber memory. */
3264 if (gimple_vuse (stmt))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268 "function reads from or writes to memory.\n");
3269 return false;
3272 /* For now, we only vectorize functions if a target specific builtin
3273 is available. TODO -- in some cases, it might be profitable to
3274 insert the calls for pieces of the vector, in order to be able
3275 to vectorize other operations in the loop. */
3276 fndecl = NULL_TREE;
3277 internal_fn ifn = IFN_LAST;
3278 combined_fn cfn = gimple_call_combined_fn (stmt);
3279 tree callee = gimple_call_fndecl (stmt);
3281 /* First try using an internal function. */
3282 tree_code convert_code = ERROR_MARK;
3283 if (cfn != CFN_LAST
3284 && (modifier == NONE
3285 || (modifier == NARROW
3286 && simple_integer_narrowing (vectype_out, vectype_in,
3287 &convert_code))))
3288 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3289 vectype_in);
3291 /* If that fails, try asking for a target-specific built-in function. */
3292 if (ifn == IFN_LAST)
3294 if (cfn != CFN_LAST)
3295 fndecl = targetm.vectorize.builtin_vectorized_function
3296 (cfn, vectype_out, vectype_in);
3297 else if (callee)
3298 fndecl = targetm.vectorize.builtin_md_vectorized_function
3299 (callee, vectype_out, vectype_in);
3302 if (ifn == IFN_LAST && !fndecl)
3304 if (cfn == CFN_GOMP_SIMD_LANE
3305 && !slp_node
3306 && loop_vinfo
3307 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3308 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3309 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3310 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3312 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3313 { 0, 1, 2, ... vf - 1 } vector. */
3314 gcc_assert (nargs == 0);
3316 else if (modifier == NONE
3317 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3318 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3319 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3320 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3321 vectype_in, dt, cost_vec);
3322 else
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3326 "function is not vectorizable.\n");
3327 return false;
3331 if (slp_node)
3332 ncopies = 1;
3333 else if (modifier == NARROW && ifn == IFN_LAST)
3334 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3335 else
3336 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3338 /* Sanity check: make sure that at least one copy of the vectorized stmt
3339 needs to be generated. */
3340 gcc_assert (ncopies >= 1);
3342 if (!vec_stmt) /* transformation not required. */
3344 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3345 DUMP_VECT_SCOPE ("vectorizable_call");
3346 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3347 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3348 record_stmt_cost (cost_vec, ncopies / 2,
3349 vec_promote_demote, stmt_info, 0, vect_body);
3351 return true;
3354 /* Transform. */
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3359 /* Handle def. */
3360 scalar_dest = gimple_call_lhs (stmt);
3361 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3363 prev_stmt_info = NULL;
3364 if (modifier == NONE || ifn != IFN_LAST)
3366 tree prev_res = NULL_TREE;
3367 for (j = 0; j < ncopies; ++j)
3369 /* Build argument list for the vectorized call. */
3370 if (j == 0)
3371 vargs.create (nargs);
3372 else
3373 vargs.truncate (0);
3375 if (slp_node)
3377 auto_vec<vec<tree> > vec_defs (nargs);
3378 vec<tree> vec_oprnds0;
3380 for (i = 0; i < nargs; i++)
3381 vargs.quick_push (gimple_call_arg (stmt, i));
3382 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3383 vec_oprnds0 = vec_defs[0];
3385 /* Arguments are ready. Create the new vector stmt. */
3386 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3388 size_t k;
3389 for (k = 0; k < nargs; k++)
3391 vec<tree> vec_oprndsk = vec_defs[k];
3392 vargs[k] = vec_oprndsk[i];
3394 if (modifier == NARROW)
3396 tree half_res = make_ssa_name (vectype_in);
3397 gcall *call
3398 = gimple_build_call_internal_vec (ifn, vargs);
3399 gimple_call_set_lhs (call, half_res);
3400 gimple_call_set_nothrow (call, true);
3401 new_stmt = call;
3402 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3403 if ((i & 1) == 0)
3405 prev_res = half_res;
3406 continue;
3408 new_temp = make_ssa_name (vec_dest);
3409 new_stmt = gimple_build_assign (new_temp, convert_code,
3410 prev_res, half_res);
3412 else
3414 gcall *call;
3415 if (ifn != IFN_LAST)
3416 call = gimple_build_call_internal_vec (ifn, vargs);
3417 else
3418 call = gimple_build_call_vec (fndecl, vargs);
3419 new_temp = make_ssa_name (vec_dest, call);
3420 gimple_call_set_lhs (call, new_temp);
3421 gimple_call_set_nothrow (call, true);
3422 new_stmt = call;
3424 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3425 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3428 for (i = 0; i < nargs; i++)
3430 vec<tree> vec_oprndsi = vec_defs[i];
3431 vec_oprndsi.release ();
3433 continue;
3436 for (i = 0; i < nargs; i++)
3438 op = gimple_call_arg (stmt, i);
3439 if (j == 0)
3440 vec_oprnd0
3441 = vect_get_vec_def_for_operand (op, stmt);
3442 else
3444 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3445 vec_oprnd0
3446 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3449 vargs.quick_push (vec_oprnd0);
3452 if (gimple_call_internal_p (stmt)
3453 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3455 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3456 tree new_var
3457 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3458 gimple *init_stmt = gimple_build_assign (new_var, cst);
3459 vect_init_vector_1 (stmt, init_stmt, NULL);
3460 new_temp = make_ssa_name (vec_dest);
3461 new_stmt = gimple_build_assign (new_temp, new_var);
3463 else if (modifier == NARROW)
3465 tree half_res = make_ssa_name (vectype_in);
3466 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3467 gimple_call_set_lhs (call, half_res);
3468 gimple_call_set_nothrow (call, true);
3469 new_stmt = call;
3470 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3471 if ((j & 1) == 0)
3473 prev_res = half_res;
3474 continue;
3476 new_temp = make_ssa_name (vec_dest);
3477 new_stmt = gimple_build_assign (new_temp, convert_code,
3478 prev_res, half_res);
3480 else
3482 gcall *call;
3483 if (ifn != IFN_LAST)
3484 call = gimple_build_call_internal_vec (ifn, vargs);
3485 else
3486 call = gimple_build_call_vec (fndecl, vargs);
3487 new_temp = make_ssa_name (vec_dest, new_stmt);
3488 gimple_call_set_lhs (call, new_temp);
3489 gimple_call_set_nothrow (call, true);
3490 new_stmt = call;
3492 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3494 if (j == (modifier == NARROW ? 1 : 0))
3495 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3496 else
3497 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3499 prev_stmt_info = vinfo_for_stmt (new_stmt);
3502 else if (modifier == NARROW)
3504 for (j = 0; j < ncopies; ++j)
3506 /* Build argument list for the vectorized call. */
3507 if (j == 0)
3508 vargs.create (nargs * 2);
3509 else
3510 vargs.truncate (0);
3512 if (slp_node)
3514 auto_vec<vec<tree> > vec_defs (nargs);
3515 vec<tree> vec_oprnds0;
3517 for (i = 0; i < nargs; i++)
3518 vargs.quick_push (gimple_call_arg (stmt, i));
3519 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3520 vec_oprnds0 = vec_defs[0];
3522 /* Arguments are ready. Create the new vector stmt. */
3523 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3525 size_t k;
3526 vargs.truncate (0);
3527 for (k = 0; k < nargs; k++)
3529 vec<tree> vec_oprndsk = vec_defs[k];
3530 vargs.quick_push (vec_oprndsk[i]);
3531 vargs.quick_push (vec_oprndsk[i + 1]);
3533 gcall *call;
3534 if (ifn != IFN_LAST)
3535 call = gimple_build_call_internal_vec (ifn, vargs);
3536 else
3537 call = gimple_build_call_vec (fndecl, vargs);
3538 new_temp = make_ssa_name (vec_dest, call);
3539 gimple_call_set_lhs (call, new_temp);
3540 gimple_call_set_nothrow (call, true);
3541 new_stmt = call;
3542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3543 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3546 for (i = 0; i < nargs; i++)
3548 vec<tree> vec_oprndsi = vec_defs[i];
3549 vec_oprndsi.release ();
3551 continue;
3554 for (i = 0; i < nargs; i++)
3556 op = gimple_call_arg (stmt, i);
3557 if (j == 0)
3559 vec_oprnd0
3560 = vect_get_vec_def_for_operand (op, stmt);
3561 vec_oprnd1
3562 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3564 else
3566 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3567 vec_oprnd0
3568 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3569 vec_oprnd1
3570 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3573 vargs.quick_push (vec_oprnd0);
3574 vargs.quick_push (vec_oprnd1);
3577 new_stmt = gimple_build_call_vec (fndecl, vargs);
3578 new_temp = make_ssa_name (vec_dest, new_stmt);
3579 gimple_call_set_lhs (new_stmt, new_temp);
3580 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3582 if (j == 0)
3583 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3584 else
3585 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3587 prev_stmt_info = vinfo_for_stmt (new_stmt);
3590 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3592 else
3593 /* No current target implements this case. */
3594 return false;
3596 vargs.release ();
3598 /* The call in STMT might prevent it from being removed in dce.
3599 We however cannot remove it here, due to the way the ssa name
3600 it defines is mapped to the new definition. So just replace
3601 rhs of the statement with something harmless. */
3603 if (slp_node)
3604 return true;
3606 type = TREE_TYPE (scalar_dest);
3607 if (is_pattern_stmt_p (stmt_info))
3608 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
3609 lhs = gimple_get_lhs (stmt_info->stmt);
3611 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3612 set_vinfo_for_stmt (new_stmt, stmt_info);
3613 set_vinfo_for_stmt (stmt_info->stmt, NULL);
3614 STMT_VINFO_STMT (stmt_info) = new_stmt;
3615 gsi_replace (gsi, new_stmt, false);
3617 return true;
3621 struct simd_call_arg_info
3623 tree vectype;
3624 tree op;
3625 HOST_WIDE_INT linear_step;
3626 enum vect_def_type dt;
3627 unsigned int align;
3628 bool simd_lane_linear;
3631 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3632 is linear within simd lane (but not within whole loop), note it in
3633 *ARGINFO. */
3635 static void
3636 vect_simd_lane_linear (tree op, struct loop *loop,
3637 struct simd_call_arg_info *arginfo)
3639 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3641 if (!is_gimple_assign (def_stmt)
3642 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3643 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3644 return;
3646 tree base = gimple_assign_rhs1 (def_stmt);
3647 HOST_WIDE_INT linear_step = 0;
3648 tree v = gimple_assign_rhs2 (def_stmt);
3649 while (TREE_CODE (v) == SSA_NAME)
3651 tree t;
3652 def_stmt = SSA_NAME_DEF_STMT (v);
3653 if (is_gimple_assign (def_stmt))
3654 switch (gimple_assign_rhs_code (def_stmt))
3656 case PLUS_EXPR:
3657 t = gimple_assign_rhs2 (def_stmt);
3658 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3659 return;
3660 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3661 v = gimple_assign_rhs1 (def_stmt);
3662 continue;
3663 case MULT_EXPR:
3664 t = gimple_assign_rhs2 (def_stmt);
3665 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3666 return;
3667 linear_step = tree_to_shwi (t);
3668 v = gimple_assign_rhs1 (def_stmt);
3669 continue;
3670 CASE_CONVERT:
3671 t = gimple_assign_rhs1 (def_stmt);
3672 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3673 || (TYPE_PRECISION (TREE_TYPE (v))
3674 < TYPE_PRECISION (TREE_TYPE (t))))
3675 return;
3676 if (!linear_step)
3677 linear_step = 1;
3678 v = t;
3679 continue;
3680 default:
3681 return;
3683 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3684 && loop->simduid
3685 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3686 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3687 == loop->simduid))
3689 if (!linear_step)
3690 linear_step = 1;
3691 arginfo->linear_step = linear_step;
3692 arginfo->op = base;
3693 arginfo->simd_lane_linear = true;
3694 return;
3699 /* Return the number of elements in vector type VECTYPE, which is associated
3700 with a SIMD clone. At present these vectors always have a constant
3701 length. */
3703 static unsigned HOST_WIDE_INT
3704 simd_clone_subparts (tree vectype)
3706 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3709 /* Function vectorizable_simd_clone_call.
3711 Check if STMT performs a function call that can be vectorized
3712 by calling a simd clone of the function.
3713 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3714 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3715 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3717 static bool
3718 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3719 gimple **vec_stmt, slp_tree slp_node,
3720 stmt_vector_for_cost *)
3722 tree vec_dest;
3723 tree scalar_dest;
3724 tree op, type;
3725 tree vec_oprnd0 = NULL_TREE;
3726 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3727 tree vectype;
3728 unsigned int nunits;
3729 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3730 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3731 vec_info *vinfo = stmt_info->vinfo;
3732 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3733 tree fndecl, new_temp;
3734 gimple *def_stmt;
3735 gimple *new_stmt = NULL;
3736 int ncopies, j;
3737 auto_vec<simd_call_arg_info> arginfo;
3738 vec<tree> vargs = vNULL;
3739 size_t i, nargs;
3740 tree lhs, rtype, ratype;
3741 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3743 /* Is STMT a vectorizable call? */
3744 if (!is_gimple_call (stmt))
3745 return false;
3747 fndecl = gimple_call_fndecl (stmt);
3748 if (fndecl == NULL_TREE)
3749 return false;
3751 struct cgraph_node *node = cgraph_node::get (fndecl);
3752 if (node == NULL || node->simd_clones == NULL)
3753 return false;
3755 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3756 return false;
3758 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3759 && ! vec_stmt)
3760 return false;
3762 if (gimple_call_lhs (stmt)
3763 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3764 return false;
3766 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3768 vectype = STMT_VINFO_VECTYPE (stmt_info);
3770 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3771 return false;
3773 /* FORNOW */
3774 if (slp_node)
3775 return false;
3777 /* Process function arguments. */
3778 nargs = gimple_call_num_args (stmt);
3780 /* Bail out if the function has zero arguments. */
3781 if (nargs == 0)
3782 return false;
3784 arginfo.reserve (nargs, true);
3786 for (i = 0; i < nargs; i++)
3788 simd_call_arg_info thisarginfo;
3789 affine_iv iv;
3791 thisarginfo.linear_step = 0;
3792 thisarginfo.align = 0;
3793 thisarginfo.op = NULL_TREE;
3794 thisarginfo.simd_lane_linear = false;
3796 op = gimple_call_arg (stmt, i);
3797 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3798 &thisarginfo.vectype)
3799 || thisarginfo.dt == vect_uninitialized_def)
3801 if (dump_enabled_p ())
3802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3803 "use not simple.\n");
3804 return false;
3807 if (thisarginfo.dt == vect_constant_def
3808 || thisarginfo.dt == vect_external_def)
3809 gcc_assert (thisarginfo.vectype == NULL_TREE);
3810 else
3811 gcc_assert (thisarginfo.vectype != NULL_TREE);
3813 /* For linear arguments, the analyze phase should have saved
3814 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3815 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3816 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3818 gcc_assert (vec_stmt);
3819 thisarginfo.linear_step
3820 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3821 thisarginfo.op
3822 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3823 thisarginfo.simd_lane_linear
3824 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3825 == boolean_true_node);
3826 /* If loop has been peeled for alignment, we need to adjust it. */
3827 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3828 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3829 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3831 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3832 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3833 tree opt = TREE_TYPE (thisarginfo.op);
3834 bias = fold_convert (TREE_TYPE (step), bias);
3835 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3836 thisarginfo.op
3837 = fold_build2 (POINTER_TYPE_P (opt)
3838 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3839 thisarginfo.op, bias);
3842 else if (!vec_stmt
3843 && thisarginfo.dt != vect_constant_def
3844 && thisarginfo.dt != vect_external_def
3845 && loop_vinfo
3846 && TREE_CODE (op) == SSA_NAME
3847 && simple_iv (loop, loop_containing_stmt (stmt), op,
3848 &iv, false)
3849 && tree_fits_shwi_p (iv.step))
3851 thisarginfo.linear_step = tree_to_shwi (iv.step);
3852 thisarginfo.op = iv.base;
3854 else if ((thisarginfo.dt == vect_constant_def
3855 || thisarginfo.dt == vect_external_def)
3856 && POINTER_TYPE_P (TREE_TYPE (op)))
3857 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3858 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3859 linear too. */
3860 if (POINTER_TYPE_P (TREE_TYPE (op))
3861 && !thisarginfo.linear_step
3862 && !vec_stmt
3863 && thisarginfo.dt != vect_constant_def
3864 && thisarginfo.dt != vect_external_def
3865 && loop_vinfo
3866 && !slp_node
3867 && TREE_CODE (op) == SSA_NAME)
3868 vect_simd_lane_linear (op, loop, &thisarginfo);
3870 arginfo.quick_push (thisarginfo);
3873 unsigned HOST_WIDE_INT vf;
3874 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3876 if (dump_enabled_p ())
3877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3878 "not considering SIMD clones; not yet supported"
3879 " for variable-width vectors.\n");
3880 return NULL;
3883 unsigned int badness = 0;
3884 struct cgraph_node *bestn = NULL;
3885 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3886 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3887 else
3888 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3889 n = n->simdclone->next_clone)
3891 unsigned int this_badness = 0;
3892 if (n->simdclone->simdlen > vf
3893 || n->simdclone->nargs != nargs)
3894 continue;
3895 if (n->simdclone->simdlen < vf)
3896 this_badness += (exact_log2 (vf)
3897 - exact_log2 (n->simdclone->simdlen)) * 1024;
3898 if (n->simdclone->inbranch)
3899 this_badness += 2048;
3900 int target_badness = targetm.simd_clone.usable (n);
3901 if (target_badness < 0)
3902 continue;
3903 this_badness += target_badness * 512;
3904 /* FORNOW: Have to add code to add the mask argument. */
3905 if (n->simdclone->inbranch)
3906 continue;
3907 for (i = 0; i < nargs; i++)
3909 switch (n->simdclone->args[i].arg_type)
3911 case SIMD_CLONE_ARG_TYPE_VECTOR:
3912 if (!useless_type_conversion_p
3913 (n->simdclone->args[i].orig_type,
3914 TREE_TYPE (gimple_call_arg (stmt, i))))
3915 i = -1;
3916 else if (arginfo[i].dt == vect_constant_def
3917 || arginfo[i].dt == vect_external_def
3918 || arginfo[i].linear_step)
3919 this_badness += 64;
3920 break;
3921 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3922 if (arginfo[i].dt != vect_constant_def
3923 && arginfo[i].dt != vect_external_def)
3924 i = -1;
3925 break;
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3928 if (arginfo[i].dt == vect_constant_def
3929 || arginfo[i].dt == vect_external_def
3930 || (arginfo[i].linear_step
3931 != n->simdclone->args[i].linear_step))
3932 i = -1;
3933 break;
3934 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3935 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3936 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3937 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3938 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3939 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3940 /* FORNOW */
3941 i = -1;
3942 break;
3943 case SIMD_CLONE_ARG_TYPE_MASK:
3944 gcc_unreachable ();
3946 if (i == (size_t) -1)
3947 break;
3948 if (n->simdclone->args[i].alignment > arginfo[i].align)
3950 i = -1;
3951 break;
3953 if (arginfo[i].align)
3954 this_badness += (exact_log2 (arginfo[i].align)
3955 - exact_log2 (n->simdclone->args[i].alignment));
3957 if (i == (size_t) -1)
3958 continue;
3959 if (bestn == NULL || this_badness < badness)
3961 bestn = n;
3962 badness = this_badness;
3966 if (bestn == NULL)
3967 return false;
3969 for (i = 0; i < nargs; i++)
3970 if ((arginfo[i].dt == vect_constant_def
3971 || arginfo[i].dt == vect_external_def)
3972 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3974 arginfo[i].vectype
3975 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3976 i)));
3977 if (arginfo[i].vectype == NULL
3978 || (simd_clone_subparts (arginfo[i].vectype)
3979 > bestn->simdclone->simdlen))
3980 return false;
3983 fndecl = bestn->decl;
3984 nunits = bestn->simdclone->simdlen;
3985 ncopies = vf / nunits;
3987 /* If the function isn't const, only allow it in simd loops where user
3988 has asserted that at least nunits consecutive iterations can be
3989 performed using SIMD instructions. */
3990 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3991 && gimple_vuse (stmt))
3992 return false;
3994 /* Sanity check: make sure that at least one copy of the vectorized stmt
3995 needs to be generated. */
3996 gcc_assert (ncopies >= 1);
3998 if (!vec_stmt) /* transformation not required. */
4000 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4001 for (i = 0; i < nargs; i++)
4002 if ((bestn->simdclone->args[i].arg_type
4003 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4004 || (bestn->simdclone->args[i].arg_type
4005 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4008 + 1);
4009 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4010 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4011 ? size_type_node : TREE_TYPE (arginfo[i].op);
4012 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4013 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4014 tree sll = arginfo[i].simd_lane_linear
4015 ? boolean_true_node : boolean_false_node;
4016 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4018 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4019 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4020 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4021 return true;
4024 /* Transform. */
4026 if (dump_enabled_p ())
4027 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4029 /* Handle def. */
4030 scalar_dest = gimple_call_lhs (stmt);
4031 vec_dest = NULL_TREE;
4032 rtype = NULL_TREE;
4033 ratype = NULL_TREE;
4034 if (scalar_dest)
4036 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4037 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4038 if (TREE_CODE (rtype) == ARRAY_TYPE)
4040 ratype = rtype;
4041 rtype = TREE_TYPE (ratype);
4045 prev_stmt_info = NULL;
4046 for (j = 0; j < ncopies; ++j)
4048 /* Build argument list for the vectorized call. */
4049 if (j == 0)
4050 vargs.create (nargs);
4051 else
4052 vargs.truncate (0);
4054 for (i = 0; i < nargs; i++)
4056 unsigned int k, l, m, o;
4057 tree atype;
4058 op = gimple_call_arg (stmt, i);
4059 switch (bestn->simdclone->args[i].arg_type)
4061 case SIMD_CLONE_ARG_TYPE_VECTOR:
4062 atype = bestn->simdclone->args[i].vector_type;
4063 o = nunits / simd_clone_subparts (atype);
4064 for (m = j * o; m < (j + 1) * o; m++)
4066 if (simd_clone_subparts (atype)
4067 < simd_clone_subparts (arginfo[i].vectype))
4069 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4070 k = (simd_clone_subparts (arginfo[i].vectype)
4071 / simd_clone_subparts (atype));
4072 gcc_assert ((k & (k - 1)) == 0);
4073 if (m == 0)
4074 vec_oprnd0
4075 = vect_get_vec_def_for_operand (op, stmt);
4076 else
4078 vec_oprnd0 = arginfo[i].op;
4079 if ((m & (k - 1)) == 0)
4080 vec_oprnd0
4081 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4082 vec_oprnd0);
4084 arginfo[i].op = vec_oprnd0;
4085 vec_oprnd0
4086 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4087 bitsize_int (prec),
4088 bitsize_int ((m & (k - 1)) * prec));
4089 new_stmt
4090 = gimple_build_assign (make_ssa_name (atype),
4091 vec_oprnd0);
4092 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4093 vargs.safe_push (gimple_assign_lhs (new_stmt));
4095 else
4097 k = (simd_clone_subparts (atype)
4098 / simd_clone_subparts (arginfo[i].vectype));
4099 gcc_assert ((k & (k - 1)) == 0);
4100 vec<constructor_elt, va_gc> *ctor_elts;
4101 if (k != 1)
4102 vec_alloc (ctor_elts, k);
4103 else
4104 ctor_elts = NULL;
4105 for (l = 0; l < k; l++)
4107 if (m == 0 && l == 0)
4108 vec_oprnd0
4109 = vect_get_vec_def_for_operand (op, stmt);
4110 else
4111 vec_oprnd0
4112 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4113 arginfo[i].op);
4114 arginfo[i].op = vec_oprnd0;
4115 if (k == 1)
4116 break;
4117 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4118 vec_oprnd0);
4120 if (k == 1)
4121 vargs.safe_push (vec_oprnd0);
4122 else
4124 vec_oprnd0 = build_constructor (atype, ctor_elts);
4125 new_stmt
4126 = gimple_build_assign (make_ssa_name (atype),
4127 vec_oprnd0);
4128 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4129 vargs.safe_push (gimple_assign_lhs (new_stmt));
4133 break;
4134 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4135 vargs.safe_push (op);
4136 break;
4137 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4138 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4139 if (j == 0)
4141 gimple_seq stmts;
4142 arginfo[i].op
4143 = force_gimple_operand (arginfo[i].op, &stmts, true,
4144 NULL_TREE);
4145 if (stmts != NULL)
4147 basic_block new_bb;
4148 edge pe = loop_preheader_edge (loop);
4149 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4150 gcc_assert (!new_bb);
4152 if (arginfo[i].simd_lane_linear)
4154 vargs.safe_push (arginfo[i].op);
4155 break;
4157 tree phi_res = copy_ssa_name (op);
4158 gphi *new_phi = create_phi_node (phi_res, loop->header);
4159 set_vinfo_for_stmt (new_phi,
4160 new_stmt_vec_info (new_phi, loop_vinfo));
4161 add_phi_arg (new_phi, arginfo[i].op,
4162 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4163 enum tree_code code
4164 = POINTER_TYPE_P (TREE_TYPE (op))
4165 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4166 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4167 ? sizetype : TREE_TYPE (op);
4168 widest_int cst
4169 = wi::mul (bestn->simdclone->args[i].linear_step,
4170 ncopies * nunits);
4171 tree tcst = wide_int_to_tree (type, cst);
4172 tree phi_arg = copy_ssa_name (op);
4173 new_stmt
4174 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4175 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4176 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4177 set_vinfo_for_stmt (new_stmt,
4178 new_stmt_vec_info (new_stmt, loop_vinfo));
4179 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4180 UNKNOWN_LOCATION);
4181 arginfo[i].op = phi_res;
4182 vargs.safe_push (phi_res);
4184 else
4186 enum tree_code code
4187 = POINTER_TYPE_P (TREE_TYPE (op))
4188 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4189 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4190 ? sizetype : TREE_TYPE (op);
4191 widest_int cst
4192 = wi::mul (bestn->simdclone->args[i].linear_step,
4193 j * nunits);
4194 tree tcst = wide_int_to_tree (type, cst);
4195 new_temp = make_ssa_name (TREE_TYPE (op));
4196 new_stmt = gimple_build_assign (new_temp, code,
4197 arginfo[i].op, tcst);
4198 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4199 vargs.safe_push (new_temp);
4201 break;
4202 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4203 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4204 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4205 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4206 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4207 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4208 default:
4209 gcc_unreachable ();
4213 new_stmt = gimple_build_call_vec (fndecl, vargs);
4214 if (vec_dest)
4216 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4217 if (ratype)
4218 new_temp = create_tmp_var (ratype);
4219 else if (simd_clone_subparts (vectype)
4220 == simd_clone_subparts (rtype))
4221 new_temp = make_ssa_name (vec_dest, new_stmt);
4222 else
4223 new_temp = make_ssa_name (rtype, new_stmt);
4224 gimple_call_set_lhs (new_stmt, new_temp);
4226 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4228 if (vec_dest)
4230 if (simd_clone_subparts (vectype) < nunits)
4232 unsigned int k, l;
4233 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4234 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4235 k = nunits / simd_clone_subparts (vectype);
4236 gcc_assert ((k & (k - 1)) == 0);
4237 for (l = 0; l < k; l++)
4239 tree t;
4240 if (ratype)
4242 t = build_fold_addr_expr (new_temp);
4243 t = build2 (MEM_REF, vectype, t,
4244 build_int_cst (TREE_TYPE (t), l * bytes));
4246 else
4247 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4248 bitsize_int (prec), bitsize_int (l * prec));
4249 new_stmt
4250 = gimple_build_assign (make_ssa_name (vectype), t);
4251 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4252 if (j == 0 && l == 0)
4253 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4254 else
4255 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4257 prev_stmt_info = vinfo_for_stmt (new_stmt);
4260 if (ratype)
4261 vect_clobber_variable (stmt, gsi, new_temp);
4262 continue;
4264 else if (simd_clone_subparts (vectype) > nunits)
4266 unsigned int k = (simd_clone_subparts (vectype)
4267 / simd_clone_subparts (rtype));
4268 gcc_assert ((k & (k - 1)) == 0);
4269 if ((j & (k - 1)) == 0)
4270 vec_alloc (ret_ctor_elts, k);
4271 if (ratype)
4273 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4274 for (m = 0; m < o; m++)
4276 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4277 size_int (m), NULL_TREE, NULL_TREE);
4278 new_stmt
4279 = gimple_build_assign (make_ssa_name (rtype), tem);
4280 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4281 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4282 gimple_assign_lhs (new_stmt));
4284 vect_clobber_variable (stmt, gsi, new_temp);
4286 else
4287 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4288 if ((j & (k - 1)) != k - 1)
4289 continue;
4290 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4291 new_stmt
4292 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4293 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4295 if ((unsigned) j == k - 1)
4296 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4297 else
4298 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4300 prev_stmt_info = vinfo_for_stmt (new_stmt);
4301 continue;
4303 else if (ratype)
4305 tree t = build_fold_addr_expr (new_temp);
4306 t = build2 (MEM_REF, vectype, t,
4307 build_int_cst (TREE_TYPE (t), 0));
4308 new_stmt
4309 = gimple_build_assign (make_ssa_name (vec_dest), t);
4310 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4311 vect_clobber_variable (stmt, gsi, new_temp);
4315 if (j == 0)
4316 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4317 else
4318 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4320 prev_stmt_info = vinfo_for_stmt (new_stmt);
4323 vargs.release ();
4325 /* The call in STMT might prevent it from being removed in dce.
4326 We however cannot remove it here, due to the way the ssa name
4327 it defines is mapped to the new definition. So just replace
4328 rhs of the statement with something harmless. */
4330 if (slp_node)
4331 return true;
4333 if (scalar_dest)
4335 type = TREE_TYPE (scalar_dest);
4336 if (is_pattern_stmt_p (stmt_info))
4337 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4338 else
4339 lhs = gimple_call_lhs (stmt);
4340 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4342 else
4343 new_stmt = gimple_build_nop ();
4344 set_vinfo_for_stmt (new_stmt, stmt_info);
4345 set_vinfo_for_stmt (stmt, NULL);
4346 STMT_VINFO_STMT (stmt_info) = new_stmt;
4347 gsi_replace (gsi, new_stmt, true);
4348 unlink_stmt_vdef (stmt);
4350 return true;
4354 /* Function vect_gen_widened_results_half
4356 Create a vector stmt whose code, type, number of arguments, and result
4357 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4358 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4359 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4360 needs to be created (DECL is a function-decl of a target-builtin).
4361 STMT is the original scalar stmt that we are vectorizing. */
4363 static gimple *
4364 vect_gen_widened_results_half (enum tree_code code,
4365 tree decl,
4366 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4367 tree vec_dest, gimple_stmt_iterator *gsi,
4368 gimple *stmt)
4370 gimple *new_stmt;
4371 tree new_temp;
4373 /* Generate half of the widened result: */
4374 if (code == CALL_EXPR)
4376 /* Target specific support */
4377 if (op_type == binary_op)
4378 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4379 else
4380 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4381 new_temp = make_ssa_name (vec_dest, new_stmt);
4382 gimple_call_set_lhs (new_stmt, new_temp);
4384 else
4386 /* Generic support */
4387 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4388 if (op_type != binary_op)
4389 vec_oprnd1 = NULL;
4390 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4391 new_temp = make_ssa_name (vec_dest, new_stmt);
4392 gimple_assign_set_lhs (new_stmt, new_temp);
4394 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4396 return new_stmt;
4400 /* Get vectorized definitions for loop-based vectorization. For the first
4401 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4402 scalar operand), and for the rest we get a copy with
4403 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4404 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4405 The vectors are collected into VEC_OPRNDS. */
4407 static void
4408 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4409 vec<tree> *vec_oprnds, int multi_step_cvt)
4411 tree vec_oprnd;
4413 /* Get first vector operand. */
4414 /* All the vector operands except the very first one (that is scalar oprnd)
4415 are stmt copies. */
4416 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4417 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4418 else
4419 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4421 vec_oprnds->quick_push (vec_oprnd);
4423 /* Get second vector operand. */
4424 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4425 vec_oprnds->quick_push (vec_oprnd);
4427 *oprnd = vec_oprnd;
4429 /* For conversion in multiple steps, continue to get operands
4430 recursively. */
4431 if (multi_step_cvt)
4432 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4436 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4437 For multi-step conversions store the resulting vectors and call the function
4438 recursively. */
4440 static void
4441 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4442 int multi_step_cvt, gimple *stmt,
4443 vec<tree> vec_dsts,
4444 gimple_stmt_iterator *gsi,
4445 slp_tree slp_node, enum tree_code code,
4446 stmt_vec_info *prev_stmt_info)
4448 unsigned int i;
4449 tree vop0, vop1, new_tmp, vec_dest;
4450 gimple *new_stmt;
4451 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4453 vec_dest = vec_dsts.pop ();
4455 for (i = 0; i < vec_oprnds->length (); i += 2)
4457 /* Create demotion operation. */
4458 vop0 = (*vec_oprnds)[i];
4459 vop1 = (*vec_oprnds)[i + 1];
4460 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4461 new_tmp = make_ssa_name (vec_dest, new_stmt);
4462 gimple_assign_set_lhs (new_stmt, new_tmp);
4463 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4465 if (multi_step_cvt)
4466 /* Store the resulting vector for next recursive call. */
4467 (*vec_oprnds)[i/2] = new_tmp;
4468 else
4470 /* This is the last step of the conversion sequence. Store the
4471 vectors in SLP_NODE or in vector info of the scalar statement
4472 (or in STMT_VINFO_RELATED_STMT chain). */
4473 if (slp_node)
4474 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4475 else
4477 if (!*prev_stmt_info)
4478 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4479 else
4480 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4482 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4487 /* For multi-step demotion operations we first generate demotion operations
4488 from the source type to the intermediate types, and then combine the
4489 results (stored in VEC_OPRNDS) in demotion operation to the destination
4490 type. */
4491 if (multi_step_cvt)
4493 /* At each level of recursion we have half of the operands we had at the
4494 previous level. */
4495 vec_oprnds->truncate ((i+1)/2);
4496 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4497 stmt, vec_dsts, gsi, slp_node,
4498 VEC_PACK_TRUNC_EXPR,
4499 prev_stmt_info);
4502 vec_dsts.quick_push (vec_dest);
4506 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4507 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4508 the resulting vectors and call the function recursively. */
4510 static void
4511 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4512 vec<tree> *vec_oprnds1,
4513 gimple *stmt, tree vec_dest,
4514 gimple_stmt_iterator *gsi,
4515 enum tree_code code1,
4516 enum tree_code code2, tree decl1,
4517 tree decl2, int op_type)
4519 int i;
4520 tree vop0, vop1, new_tmp1, new_tmp2;
4521 gimple *new_stmt1, *new_stmt2;
4522 vec<tree> vec_tmp = vNULL;
4524 vec_tmp.create (vec_oprnds0->length () * 2);
4525 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4527 if (op_type == binary_op)
4528 vop1 = (*vec_oprnds1)[i];
4529 else
4530 vop1 = NULL_TREE;
4532 /* Generate the two halves of promotion operation. */
4533 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4534 op_type, vec_dest, gsi, stmt);
4535 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4536 op_type, vec_dest, gsi, stmt);
4537 if (is_gimple_call (new_stmt1))
4539 new_tmp1 = gimple_call_lhs (new_stmt1);
4540 new_tmp2 = gimple_call_lhs (new_stmt2);
4542 else
4544 new_tmp1 = gimple_assign_lhs (new_stmt1);
4545 new_tmp2 = gimple_assign_lhs (new_stmt2);
4548 /* Store the results for the next step. */
4549 vec_tmp.quick_push (new_tmp1);
4550 vec_tmp.quick_push (new_tmp2);
4553 vec_oprnds0->release ();
4554 *vec_oprnds0 = vec_tmp;
4558 /* Check if STMT performs a conversion operation, that can be vectorized.
4559 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4560 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4561 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4563 static bool
4564 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4565 gimple **vec_stmt, slp_tree slp_node,
4566 stmt_vector_for_cost *cost_vec)
4568 tree vec_dest;
4569 tree scalar_dest;
4570 tree op0, op1 = NULL_TREE;
4571 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4572 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4573 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4574 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4575 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4576 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4577 tree new_temp;
4578 gimple *def_stmt;
4579 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4580 int ndts = 2;
4581 gimple *new_stmt = NULL;
4582 stmt_vec_info prev_stmt_info;
4583 poly_uint64 nunits_in;
4584 poly_uint64 nunits_out;
4585 tree vectype_out, vectype_in;
4586 int ncopies, i, j;
4587 tree lhs_type, rhs_type;
4588 enum { NARROW, NONE, WIDEN } modifier;
4589 vec<tree> vec_oprnds0 = vNULL;
4590 vec<tree> vec_oprnds1 = vNULL;
4591 tree vop0;
4592 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4593 vec_info *vinfo = stmt_info->vinfo;
4594 int multi_step_cvt = 0;
4595 vec<tree> interm_types = vNULL;
4596 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4597 int op_type;
4598 unsigned short fltsz;
4600 /* Is STMT a vectorizable conversion? */
4602 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4603 return false;
4605 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4606 && ! vec_stmt)
4607 return false;
4609 if (!is_gimple_assign (stmt))
4610 return false;
4612 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4613 return false;
4615 code = gimple_assign_rhs_code (stmt);
4616 if (!CONVERT_EXPR_CODE_P (code)
4617 && code != FIX_TRUNC_EXPR
4618 && code != FLOAT_EXPR
4619 && code != WIDEN_MULT_EXPR
4620 && code != WIDEN_LSHIFT_EXPR)
4621 return false;
4623 op_type = TREE_CODE_LENGTH (code);
4625 /* Check types of lhs and rhs. */
4626 scalar_dest = gimple_assign_lhs (stmt);
4627 lhs_type = TREE_TYPE (scalar_dest);
4628 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4630 op0 = gimple_assign_rhs1 (stmt);
4631 rhs_type = TREE_TYPE (op0);
4633 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4634 && !((INTEGRAL_TYPE_P (lhs_type)
4635 && INTEGRAL_TYPE_P (rhs_type))
4636 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4637 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4638 return false;
4640 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4641 && ((INTEGRAL_TYPE_P (lhs_type)
4642 && !type_has_mode_precision_p (lhs_type))
4643 || (INTEGRAL_TYPE_P (rhs_type)
4644 && !type_has_mode_precision_p (rhs_type))))
4646 if (dump_enabled_p ())
4647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4648 "type conversion to/from bit-precision unsupported."
4649 "\n");
4650 return false;
4653 /* Check the operands of the operation. */
4654 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4656 if (dump_enabled_p ())
4657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4658 "use not simple.\n");
4659 return false;
4661 if (op_type == binary_op)
4663 bool ok;
4665 op1 = gimple_assign_rhs2 (stmt);
4666 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4667 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4668 OP1. */
4669 if (CONSTANT_CLASS_P (op0))
4670 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4671 else
4672 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4674 if (!ok)
4676 if (dump_enabled_p ())
4677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4678 "use not simple.\n");
4679 return false;
4683 /* If op0 is an external or constant defs use a vector type of
4684 the same size as the output vector type. */
4685 if (!vectype_in)
4686 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4687 if (vec_stmt)
4688 gcc_assert (vectype_in);
4689 if (!vectype_in)
4691 if (dump_enabled_p ())
4693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4694 "no vectype for scalar type ");
4695 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4696 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4699 return false;
4702 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4703 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4705 if (dump_enabled_p ())
4707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4708 "can't convert between boolean and non "
4709 "boolean vectors");
4710 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4711 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4714 return false;
4717 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4718 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4719 if (known_eq (nunits_out, nunits_in))
4720 modifier = NONE;
4721 else if (multiple_p (nunits_out, nunits_in))
4722 modifier = NARROW;
4723 else
4725 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4726 modifier = WIDEN;
4729 /* Multiple types in SLP are handled by creating the appropriate number of
4730 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4731 case of SLP. */
4732 if (slp_node)
4733 ncopies = 1;
4734 else if (modifier == NARROW)
4735 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4736 else
4737 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4739 /* Sanity check: make sure that at least one copy of the vectorized stmt
4740 needs to be generated. */
4741 gcc_assert (ncopies >= 1);
4743 bool found_mode = false;
4744 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4745 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4746 opt_scalar_mode rhs_mode_iter;
4748 /* Supportable by target? */
4749 switch (modifier)
4751 case NONE:
4752 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4753 return false;
4754 if (supportable_convert_operation (code, vectype_out, vectype_in,
4755 &decl1, &code1))
4756 break;
4757 /* FALLTHRU */
4758 unsupported:
4759 if (dump_enabled_p ())
4760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4761 "conversion not supported by target.\n");
4762 return false;
4764 case WIDEN:
4765 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4766 &code1, &code2, &multi_step_cvt,
4767 &interm_types))
4769 /* Binary widening operation can only be supported directly by the
4770 architecture. */
4771 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4772 break;
4775 if (code != FLOAT_EXPR
4776 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4777 goto unsupported;
4779 fltsz = GET_MODE_SIZE (lhs_mode);
4780 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4782 rhs_mode = rhs_mode_iter.require ();
4783 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4784 break;
4786 cvt_type
4787 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4788 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4789 if (cvt_type == NULL_TREE)
4790 goto unsupported;
4792 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4794 if (!supportable_convert_operation (code, vectype_out,
4795 cvt_type, &decl1, &codecvt1))
4796 goto unsupported;
4798 else if (!supportable_widening_operation (code, stmt, vectype_out,
4799 cvt_type, &codecvt1,
4800 &codecvt2, &multi_step_cvt,
4801 &interm_types))
4802 continue;
4803 else
4804 gcc_assert (multi_step_cvt == 0);
4806 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4807 vectype_in, &code1, &code2,
4808 &multi_step_cvt, &interm_types))
4810 found_mode = true;
4811 break;
4815 if (!found_mode)
4816 goto unsupported;
4818 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4819 codecvt2 = ERROR_MARK;
4820 else
4822 multi_step_cvt++;
4823 interm_types.safe_push (cvt_type);
4824 cvt_type = NULL_TREE;
4826 break;
4828 case NARROW:
4829 gcc_assert (op_type == unary_op);
4830 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4831 &code1, &multi_step_cvt,
4832 &interm_types))
4833 break;
4835 if (code != FIX_TRUNC_EXPR
4836 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4837 goto unsupported;
4839 cvt_type
4840 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4841 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4842 if (cvt_type == NULL_TREE)
4843 goto unsupported;
4844 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4845 &decl1, &codecvt1))
4846 goto unsupported;
4847 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4848 &code1, &multi_step_cvt,
4849 &interm_types))
4850 break;
4851 goto unsupported;
4853 default:
4854 gcc_unreachable ();
4857 if (!vec_stmt) /* transformation not required. */
4859 DUMP_VECT_SCOPE ("vectorizable_conversion");
4860 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4862 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4863 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4864 cost_vec);
4866 else if (modifier == NARROW)
4868 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4869 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4870 cost_vec);
4872 else
4874 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4875 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4876 cost_vec);
4878 interm_types.release ();
4879 return true;
4882 /* Transform. */
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_NOTE, vect_location,
4885 "transform conversion. ncopies = %d.\n", ncopies);
4887 if (op_type == binary_op)
4889 if (CONSTANT_CLASS_P (op0))
4890 op0 = fold_convert (TREE_TYPE (op1), op0);
4891 else if (CONSTANT_CLASS_P (op1))
4892 op1 = fold_convert (TREE_TYPE (op0), op1);
4895 /* In case of multi-step conversion, we first generate conversion operations
4896 to the intermediate types, and then from that types to the final one.
4897 We create vector destinations for the intermediate type (TYPES) received
4898 from supportable_*_operation, and store them in the correct order
4899 for future use in vect_create_vectorized_*_stmts (). */
4900 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4901 vec_dest = vect_create_destination_var (scalar_dest,
4902 (cvt_type && modifier == WIDEN)
4903 ? cvt_type : vectype_out);
4904 vec_dsts.quick_push (vec_dest);
4906 if (multi_step_cvt)
4908 for (i = interm_types.length () - 1;
4909 interm_types.iterate (i, &intermediate_type); i--)
4911 vec_dest = vect_create_destination_var (scalar_dest,
4912 intermediate_type);
4913 vec_dsts.quick_push (vec_dest);
4917 if (cvt_type)
4918 vec_dest = vect_create_destination_var (scalar_dest,
4919 modifier == WIDEN
4920 ? vectype_out : cvt_type);
4922 if (!slp_node)
4924 if (modifier == WIDEN)
4926 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4927 if (op_type == binary_op)
4928 vec_oprnds1.create (1);
4930 else if (modifier == NARROW)
4931 vec_oprnds0.create (
4932 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4934 else if (code == WIDEN_LSHIFT_EXPR)
4935 vec_oprnds1.create (slp_node->vec_stmts_size);
4937 last_oprnd = op0;
4938 prev_stmt_info = NULL;
4939 switch (modifier)
4941 case NONE:
4942 for (j = 0; j < ncopies; j++)
4944 if (j == 0)
4945 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4946 else
4947 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4949 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4951 /* Arguments are ready, create the new vector stmt. */
4952 if (code1 == CALL_EXPR)
4954 new_stmt = gimple_build_call (decl1, 1, vop0);
4955 new_temp = make_ssa_name (vec_dest, new_stmt);
4956 gimple_call_set_lhs (new_stmt, new_temp);
4958 else
4960 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4961 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4962 new_temp = make_ssa_name (vec_dest, new_stmt);
4963 gimple_assign_set_lhs (new_stmt, new_temp);
4966 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4967 if (slp_node)
4968 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4969 else
4971 if (!prev_stmt_info)
4972 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4973 else
4974 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4975 prev_stmt_info = vinfo_for_stmt (new_stmt);
4979 break;
4981 case WIDEN:
4982 /* In case the vectorization factor (VF) is bigger than the number
4983 of elements that we can fit in a vectype (nunits), we have to
4984 generate more than one vector stmt - i.e - we need to "unroll"
4985 the vector stmt by a factor VF/nunits. */
4986 for (j = 0; j < ncopies; j++)
4988 /* Handle uses. */
4989 if (j == 0)
4991 if (slp_node)
4993 if (code == WIDEN_LSHIFT_EXPR)
4995 unsigned int k;
4997 vec_oprnd1 = op1;
4998 /* Store vec_oprnd1 for every vector stmt to be created
4999 for SLP_NODE. We check during the analysis that all
5000 the shift arguments are the same. */
5001 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5002 vec_oprnds1.quick_push (vec_oprnd1);
5004 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5005 slp_node);
5007 else
5008 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
5009 &vec_oprnds1, slp_node);
5011 else
5013 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
5014 vec_oprnds0.quick_push (vec_oprnd0);
5015 if (op_type == binary_op)
5017 if (code == WIDEN_LSHIFT_EXPR)
5018 vec_oprnd1 = op1;
5019 else
5020 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
5021 vec_oprnds1.quick_push (vec_oprnd1);
5025 else
5027 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
5028 vec_oprnds0.truncate (0);
5029 vec_oprnds0.quick_push (vec_oprnd0);
5030 if (op_type == binary_op)
5032 if (code == WIDEN_LSHIFT_EXPR)
5033 vec_oprnd1 = op1;
5034 else
5035 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5036 vec_oprnd1);
5037 vec_oprnds1.truncate (0);
5038 vec_oprnds1.quick_push (vec_oprnd1);
5042 /* Arguments are ready. Create the new vector stmts. */
5043 for (i = multi_step_cvt; i >= 0; i--)
5045 tree this_dest = vec_dsts[i];
5046 enum tree_code c1 = code1, c2 = code2;
5047 if (i == 0 && codecvt2 != ERROR_MARK)
5049 c1 = codecvt1;
5050 c2 = codecvt2;
5052 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5053 &vec_oprnds1,
5054 stmt, this_dest, gsi,
5055 c1, c2, decl1, decl2,
5056 op_type);
5059 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5061 if (cvt_type)
5063 if (codecvt1 == CALL_EXPR)
5065 new_stmt = gimple_build_call (decl1, 1, vop0);
5066 new_temp = make_ssa_name (vec_dest, new_stmt);
5067 gimple_call_set_lhs (new_stmt, new_temp);
5069 else
5071 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5072 new_temp = make_ssa_name (vec_dest);
5073 new_stmt = gimple_build_assign (new_temp, codecvt1,
5074 vop0);
5077 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5079 else
5080 new_stmt = SSA_NAME_DEF_STMT (vop0);
5082 if (slp_node)
5083 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5084 else
5086 if (!prev_stmt_info)
5087 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5088 else
5089 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5090 prev_stmt_info = vinfo_for_stmt (new_stmt);
5095 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5096 break;
5098 case NARROW:
5099 /* In case the vectorization factor (VF) is bigger than the number
5100 of elements that we can fit in a vectype (nunits), we have to
5101 generate more than one vector stmt - i.e - we need to "unroll"
5102 the vector stmt by a factor VF/nunits. */
5103 for (j = 0; j < ncopies; j++)
5105 /* Handle uses. */
5106 if (slp_node)
5107 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5108 slp_node);
5109 else
5111 vec_oprnds0.truncate (0);
5112 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5113 vect_pow2 (multi_step_cvt) - 1);
5116 /* Arguments are ready. Create the new vector stmts. */
5117 if (cvt_type)
5118 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5120 if (codecvt1 == CALL_EXPR)
5122 new_stmt = gimple_build_call (decl1, 1, vop0);
5123 new_temp = make_ssa_name (vec_dest, new_stmt);
5124 gimple_call_set_lhs (new_stmt, new_temp);
5126 else
5128 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5129 new_temp = make_ssa_name (vec_dest);
5130 new_stmt = gimple_build_assign (new_temp, codecvt1,
5131 vop0);
5134 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5135 vec_oprnds0[i] = new_temp;
5138 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5139 stmt, vec_dsts, gsi,
5140 slp_node, code1,
5141 &prev_stmt_info);
5144 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5145 break;
5148 vec_oprnds0.release ();
5149 vec_oprnds1.release ();
5150 interm_types.release ();
5152 return true;
5156 /* Function vectorizable_assignment.
5158 Check if STMT performs an assignment (copy) that can be vectorized.
5159 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5160 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5161 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5163 static bool
5164 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5165 gimple **vec_stmt, slp_tree slp_node,
5166 stmt_vector_for_cost *cost_vec)
5168 tree vec_dest;
5169 tree scalar_dest;
5170 tree op;
5171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5173 tree new_temp;
5174 gimple *def_stmt;
5175 enum vect_def_type dt[1] = {vect_unknown_def_type};
5176 int ndts = 1;
5177 int ncopies;
5178 int i, j;
5179 vec<tree> vec_oprnds = vNULL;
5180 tree vop;
5181 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5182 vec_info *vinfo = stmt_info->vinfo;
5183 gimple *new_stmt = NULL;
5184 stmt_vec_info prev_stmt_info = NULL;
5185 enum tree_code code;
5186 tree vectype_in;
5188 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5189 return false;
5191 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5192 && ! vec_stmt)
5193 return false;
5195 /* Is vectorizable assignment? */
5196 if (!is_gimple_assign (stmt))
5197 return false;
5199 scalar_dest = gimple_assign_lhs (stmt);
5200 if (TREE_CODE (scalar_dest) != SSA_NAME)
5201 return false;
5203 code = gimple_assign_rhs_code (stmt);
5204 if (gimple_assign_single_p (stmt)
5205 || code == PAREN_EXPR
5206 || CONVERT_EXPR_CODE_P (code))
5207 op = gimple_assign_rhs1 (stmt);
5208 else
5209 return false;
5211 if (code == VIEW_CONVERT_EXPR)
5212 op = TREE_OPERAND (op, 0);
5214 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5215 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5217 /* Multiple types in SLP are handled by creating the appropriate number of
5218 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5219 case of SLP. */
5220 if (slp_node)
5221 ncopies = 1;
5222 else
5223 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5225 gcc_assert (ncopies >= 1);
5227 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5229 if (dump_enabled_p ())
5230 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5231 "use not simple.\n");
5232 return false;
5235 /* We can handle NOP_EXPR conversions that do not change the number
5236 of elements or the vector size. */
5237 if ((CONVERT_EXPR_CODE_P (code)
5238 || code == VIEW_CONVERT_EXPR)
5239 && (!vectype_in
5240 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5241 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5242 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5243 return false;
5245 /* We do not handle bit-precision changes. */
5246 if ((CONVERT_EXPR_CODE_P (code)
5247 || code == VIEW_CONVERT_EXPR)
5248 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5249 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5250 || !type_has_mode_precision_p (TREE_TYPE (op)))
5251 /* But a conversion that does not change the bit-pattern is ok. */
5252 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5253 > TYPE_PRECISION (TREE_TYPE (op)))
5254 && TYPE_UNSIGNED (TREE_TYPE (op)))
5255 /* Conversion between boolean types of different sizes is
5256 a simple assignment in case their vectypes are same
5257 boolean vectors. */
5258 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5259 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5263 "type conversion to/from bit-precision "
5264 "unsupported.\n");
5265 return false;
5268 if (!vec_stmt) /* transformation not required. */
5270 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5271 DUMP_VECT_SCOPE ("vectorizable_assignment");
5272 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5273 return true;
5276 /* Transform. */
5277 if (dump_enabled_p ())
5278 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5280 /* Handle def. */
5281 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5283 /* Handle use. */
5284 for (j = 0; j < ncopies; j++)
5286 /* Handle uses. */
5287 if (j == 0)
5288 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5289 else
5290 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5292 /* Arguments are ready. create the new vector stmt. */
5293 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5295 if (CONVERT_EXPR_CODE_P (code)
5296 || code == VIEW_CONVERT_EXPR)
5297 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5298 new_stmt = gimple_build_assign (vec_dest, vop);
5299 new_temp = make_ssa_name (vec_dest, new_stmt);
5300 gimple_assign_set_lhs (new_stmt, new_temp);
5301 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5302 if (slp_node)
5303 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5306 if (slp_node)
5307 continue;
5309 if (j == 0)
5310 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5311 else
5312 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5314 prev_stmt_info = vinfo_for_stmt (new_stmt);
5317 vec_oprnds.release ();
5318 return true;
5322 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5323 either as shift by a scalar or by a vector. */
5325 bool
5326 vect_supportable_shift (enum tree_code code, tree scalar_type)
5329 machine_mode vec_mode;
5330 optab optab;
5331 int icode;
5332 tree vectype;
5334 vectype = get_vectype_for_scalar_type (scalar_type);
5335 if (!vectype)
5336 return false;
5338 optab = optab_for_tree_code (code, vectype, optab_scalar);
5339 if (!optab
5340 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5342 optab = optab_for_tree_code (code, vectype, optab_vector);
5343 if (!optab
5344 || (optab_handler (optab, TYPE_MODE (vectype))
5345 == CODE_FOR_nothing))
5346 return false;
5349 vec_mode = TYPE_MODE (vectype);
5350 icode = (int) optab_handler (optab, vec_mode);
5351 if (icode == CODE_FOR_nothing)
5352 return false;
5354 return true;
5358 /* Function vectorizable_shift.
5360 Check if STMT performs a shift operation that can be vectorized.
5361 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5362 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5363 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5365 static bool
5366 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5367 gimple **vec_stmt, slp_tree slp_node,
5368 stmt_vector_for_cost *cost_vec)
5370 tree vec_dest;
5371 tree scalar_dest;
5372 tree op0, op1 = NULL;
5373 tree vec_oprnd1 = NULL_TREE;
5374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5375 tree vectype;
5376 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5377 enum tree_code code;
5378 machine_mode vec_mode;
5379 tree new_temp;
5380 optab optab;
5381 int icode;
5382 machine_mode optab_op2_mode;
5383 gimple *def_stmt;
5384 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5385 int ndts = 2;
5386 gimple *new_stmt = NULL;
5387 stmt_vec_info prev_stmt_info;
5388 poly_uint64 nunits_in;
5389 poly_uint64 nunits_out;
5390 tree vectype_out;
5391 tree op1_vectype;
5392 int ncopies;
5393 int j, i;
5394 vec<tree> vec_oprnds0 = vNULL;
5395 vec<tree> vec_oprnds1 = vNULL;
5396 tree vop0, vop1;
5397 unsigned int k;
5398 bool scalar_shift_arg = true;
5399 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5400 vec_info *vinfo = stmt_info->vinfo;
5402 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5403 return false;
5405 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5406 && ! vec_stmt)
5407 return false;
5409 /* Is STMT a vectorizable binary/unary operation? */
5410 if (!is_gimple_assign (stmt))
5411 return false;
5413 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5414 return false;
5416 code = gimple_assign_rhs_code (stmt);
5418 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5419 || code == RROTATE_EXPR))
5420 return false;
5422 scalar_dest = gimple_assign_lhs (stmt);
5423 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5424 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5426 if (dump_enabled_p ())
5427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5428 "bit-precision shifts not supported.\n");
5429 return false;
5432 op0 = gimple_assign_rhs1 (stmt);
5433 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5435 if (dump_enabled_p ())
5436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5437 "use not simple.\n");
5438 return false;
5440 /* If op0 is an external or constant def use a vector type with
5441 the same size as the output vector type. */
5442 if (!vectype)
5443 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5444 if (vec_stmt)
5445 gcc_assert (vectype);
5446 if (!vectype)
5448 if (dump_enabled_p ())
5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5450 "no vectype for scalar type\n");
5451 return false;
5454 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5455 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5456 if (maybe_ne (nunits_out, nunits_in))
5457 return false;
5459 op1 = gimple_assign_rhs2 (stmt);
5460 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5462 if (dump_enabled_p ())
5463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5464 "use not simple.\n");
5465 return false;
5468 /* Multiple types in SLP are handled by creating the appropriate number of
5469 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5470 case of SLP. */
5471 if (slp_node)
5472 ncopies = 1;
5473 else
5474 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5476 gcc_assert (ncopies >= 1);
5478 /* Determine whether the shift amount is a vector, or scalar. If the
5479 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5481 if ((dt[1] == vect_internal_def
5482 || dt[1] == vect_induction_def)
5483 && !slp_node)
5484 scalar_shift_arg = false;
5485 else if (dt[1] == vect_constant_def
5486 || dt[1] == vect_external_def
5487 || dt[1] == vect_internal_def)
5489 /* In SLP, need to check whether the shift count is the same,
5490 in loops if it is a constant or invariant, it is always
5491 a scalar shift. */
5492 if (slp_node)
5494 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5495 gimple *slpstmt;
5497 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5498 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5499 scalar_shift_arg = false;
5502 /* If the shift amount is computed by a pattern stmt we cannot
5503 use the scalar amount directly thus give up and use a vector
5504 shift. */
5505 if (dt[1] == vect_internal_def)
5507 gimple *def = SSA_NAME_DEF_STMT (op1);
5508 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5509 scalar_shift_arg = false;
5512 else
5514 if (dump_enabled_p ())
5515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5516 "operand mode requires invariant argument.\n");
5517 return false;
5520 /* Vector shifted by vector. */
5521 if (!scalar_shift_arg)
5523 optab = optab_for_tree_code (code, vectype, optab_vector);
5524 if (dump_enabled_p ())
5525 dump_printf_loc (MSG_NOTE, vect_location,
5526 "vector/vector shift/rotate found.\n");
5528 if (!op1_vectype)
5529 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5530 if (op1_vectype == NULL_TREE
5531 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5533 if (dump_enabled_p ())
5534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5535 "unusable type for last operand in"
5536 " vector/vector shift/rotate.\n");
5537 return false;
5540 /* See if the machine has a vector shifted by scalar insn and if not
5541 then see if it has a vector shifted by vector insn. */
5542 else
5544 optab = optab_for_tree_code (code, vectype, optab_scalar);
5545 if (optab
5546 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5548 if (dump_enabled_p ())
5549 dump_printf_loc (MSG_NOTE, vect_location,
5550 "vector/scalar shift/rotate found.\n");
5552 else
5554 optab = optab_for_tree_code (code, vectype, optab_vector);
5555 if (optab
5556 && (optab_handler (optab, TYPE_MODE (vectype))
5557 != CODE_FOR_nothing))
5559 scalar_shift_arg = false;
5561 if (dump_enabled_p ())
5562 dump_printf_loc (MSG_NOTE, vect_location,
5563 "vector/vector shift/rotate found.\n");
5565 /* Unlike the other binary operators, shifts/rotates have
5566 the rhs being int, instead of the same type as the lhs,
5567 so make sure the scalar is the right type if we are
5568 dealing with vectors of long long/long/short/char. */
5569 if (dt[1] == vect_constant_def)
5570 op1 = fold_convert (TREE_TYPE (vectype), op1);
5571 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5572 TREE_TYPE (op1)))
5574 if (slp_node
5575 && TYPE_MODE (TREE_TYPE (vectype))
5576 != TYPE_MODE (TREE_TYPE (op1)))
5578 if (dump_enabled_p ())
5579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5580 "unusable type for last operand in"
5581 " vector/vector shift/rotate.\n");
5582 return false;
5584 if (vec_stmt && !slp_node)
5586 op1 = fold_convert (TREE_TYPE (vectype), op1);
5587 op1 = vect_init_vector (stmt, op1,
5588 TREE_TYPE (vectype), NULL);
5595 /* Supportable by target? */
5596 if (!optab)
5598 if (dump_enabled_p ())
5599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5600 "no optab.\n");
5601 return false;
5603 vec_mode = TYPE_MODE (vectype);
5604 icode = (int) optab_handler (optab, vec_mode);
5605 if (icode == CODE_FOR_nothing)
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5609 "op not supported by target.\n");
5610 /* Check only during analysis. */
5611 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5612 || (!vec_stmt
5613 && !vect_worthwhile_without_simd_p (vinfo, code)))
5614 return false;
5615 if (dump_enabled_p ())
5616 dump_printf_loc (MSG_NOTE, vect_location,
5617 "proceeding using word mode.\n");
5620 /* Worthwhile without SIMD support? Check only during analysis. */
5621 if (!vec_stmt
5622 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5623 && !vect_worthwhile_without_simd_p (vinfo, code))
5625 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5627 "not worthwhile without SIMD support.\n");
5628 return false;
5631 if (!vec_stmt) /* transformation not required. */
5633 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5634 DUMP_VECT_SCOPE ("vectorizable_shift");
5635 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5636 return true;
5639 /* Transform. */
5641 if (dump_enabled_p ())
5642 dump_printf_loc (MSG_NOTE, vect_location,
5643 "transform binary/unary operation.\n");
5645 /* Handle def. */
5646 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5648 prev_stmt_info = NULL;
5649 for (j = 0; j < ncopies; j++)
5651 /* Handle uses. */
5652 if (j == 0)
5654 if (scalar_shift_arg)
5656 /* Vector shl and shr insn patterns can be defined with scalar
5657 operand 2 (shift operand). In this case, use constant or loop
5658 invariant op1 directly, without extending it to vector mode
5659 first. */
5660 optab_op2_mode = insn_data[icode].operand[2].mode;
5661 if (!VECTOR_MODE_P (optab_op2_mode))
5663 if (dump_enabled_p ())
5664 dump_printf_loc (MSG_NOTE, vect_location,
5665 "operand 1 using scalar mode.\n");
5666 vec_oprnd1 = op1;
5667 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5668 vec_oprnds1.quick_push (vec_oprnd1);
5669 if (slp_node)
5671 /* Store vec_oprnd1 for every vector stmt to be created
5672 for SLP_NODE. We check during the analysis that all
5673 the shift arguments are the same.
5674 TODO: Allow different constants for different vector
5675 stmts generated for an SLP instance. */
5676 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5677 vec_oprnds1.quick_push (vec_oprnd1);
5682 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5683 (a special case for certain kind of vector shifts); otherwise,
5684 operand 1 should be of a vector type (the usual case). */
5685 if (vec_oprnd1)
5686 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5687 slp_node);
5688 else
5689 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5690 slp_node);
5692 else
5693 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5695 /* Arguments are ready. Create the new vector stmt. */
5696 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5698 vop1 = vec_oprnds1[i];
5699 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5700 new_temp = make_ssa_name (vec_dest, new_stmt);
5701 gimple_assign_set_lhs (new_stmt, new_temp);
5702 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5703 if (slp_node)
5704 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5707 if (slp_node)
5708 continue;
5710 if (j == 0)
5711 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5712 else
5713 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5714 prev_stmt_info = vinfo_for_stmt (new_stmt);
5717 vec_oprnds0.release ();
5718 vec_oprnds1.release ();
5720 return true;
5724 /* Function vectorizable_operation.
5726 Check if STMT performs a binary, unary or ternary operation that can
5727 be vectorized.
5728 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5729 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5730 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5732 static bool
5733 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5734 gimple **vec_stmt, slp_tree slp_node,
5735 stmt_vector_for_cost *cost_vec)
5737 tree vec_dest;
5738 tree scalar_dest;
5739 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5740 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5741 tree vectype;
5742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5743 enum tree_code code, orig_code;
5744 machine_mode vec_mode;
5745 tree new_temp;
5746 int op_type;
5747 optab optab;
5748 bool target_support_p;
5749 gimple *def_stmt;
5750 enum vect_def_type dt[3]
5751 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5752 int ndts = 3;
5753 gimple *new_stmt = NULL;
5754 stmt_vec_info prev_stmt_info;
5755 poly_uint64 nunits_in;
5756 poly_uint64 nunits_out;
5757 tree vectype_out;
5758 int ncopies;
5759 int j, i;
5760 vec<tree> vec_oprnds0 = vNULL;
5761 vec<tree> vec_oprnds1 = vNULL;
5762 vec<tree> vec_oprnds2 = vNULL;
5763 tree vop0, vop1, vop2;
5764 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5765 vec_info *vinfo = stmt_info->vinfo;
5767 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5768 return false;
5770 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5771 && ! vec_stmt)
5772 return false;
5774 /* Is STMT a vectorizable binary/unary operation? */
5775 if (!is_gimple_assign (stmt))
5776 return false;
5778 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5779 return false;
5781 orig_code = code = gimple_assign_rhs_code (stmt);
5783 /* For pointer addition and subtraction, we should use the normal
5784 plus and minus for the vector operation. */
5785 if (code == POINTER_PLUS_EXPR)
5786 code = PLUS_EXPR;
5787 if (code == POINTER_DIFF_EXPR)
5788 code = MINUS_EXPR;
5790 /* Support only unary or binary operations. */
5791 op_type = TREE_CODE_LENGTH (code);
5792 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5794 if (dump_enabled_p ())
5795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5796 "num. args = %d (not unary/binary/ternary op).\n",
5797 op_type);
5798 return false;
5801 scalar_dest = gimple_assign_lhs (stmt);
5802 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5804 /* Most operations cannot handle bit-precision types without extra
5805 truncations. */
5806 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5807 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5808 /* Exception are bitwise binary operations. */
5809 && code != BIT_IOR_EXPR
5810 && code != BIT_XOR_EXPR
5811 && code != BIT_AND_EXPR)
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5815 "bit-precision arithmetic not supported.\n");
5816 return false;
5819 op0 = gimple_assign_rhs1 (stmt);
5820 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5824 "use not simple.\n");
5825 return false;
5827 /* If op0 is an external or constant def use a vector type with
5828 the same size as the output vector type. */
5829 if (!vectype)
5831 /* For boolean type we cannot determine vectype by
5832 invariant value (don't know whether it is a vector
5833 of booleans or vector of integers). We use output
5834 vectype because operations on boolean don't change
5835 type. */
5836 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5838 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5840 if (dump_enabled_p ())
5841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5842 "not supported operation on bool value.\n");
5843 return false;
5845 vectype = vectype_out;
5847 else
5848 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5850 if (vec_stmt)
5851 gcc_assert (vectype);
5852 if (!vectype)
5854 if (dump_enabled_p ())
5856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5857 "no vectype for scalar type ");
5858 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5859 TREE_TYPE (op0));
5860 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5863 return false;
5866 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5867 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5868 if (maybe_ne (nunits_out, nunits_in))
5869 return false;
5871 if (op_type == binary_op || op_type == ternary_op)
5873 op1 = gimple_assign_rhs2 (stmt);
5874 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5876 if (dump_enabled_p ())
5877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5878 "use not simple.\n");
5879 return false;
5882 if (op_type == ternary_op)
5884 op2 = gimple_assign_rhs3 (stmt);
5885 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5887 if (dump_enabled_p ())
5888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5889 "use not simple.\n");
5890 return false;
5894 /* Multiple types in SLP are handled by creating the appropriate number of
5895 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5896 case of SLP. */
5897 if (slp_node)
5898 ncopies = 1;
5899 else
5900 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5902 gcc_assert (ncopies >= 1);
5904 /* Shifts are handled in vectorizable_shift (). */
5905 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5906 || code == RROTATE_EXPR)
5907 return false;
5909 /* Supportable by target? */
5911 vec_mode = TYPE_MODE (vectype);
5912 if (code == MULT_HIGHPART_EXPR)
5913 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5914 else
5916 optab = optab_for_tree_code (code, vectype, optab_default);
5917 if (!optab)
5919 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5921 "no optab.\n");
5922 return false;
5924 target_support_p = (optab_handler (optab, vec_mode)
5925 != CODE_FOR_nothing);
5928 if (!target_support_p)
5930 if (dump_enabled_p ())
5931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5932 "op not supported by target.\n");
5933 /* Check only during analysis. */
5934 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5935 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5936 return false;
5937 if (dump_enabled_p ())
5938 dump_printf_loc (MSG_NOTE, vect_location,
5939 "proceeding using word mode.\n");
5942 /* Worthwhile without SIMD support? Check only during analysis. */
5943 if (!VECTOR_MODE_P (vec_mode)
5944 && !vec_stmt
5945 && !vect_worthwhile_without_simd_p (vinfo, code))
5947 if (dump_enabled_p ())
5948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5949 "not worthwhile without SIMD support.\n");
5950 return false;
5953 if (!vec_stmt) /* transformation not required. */
5955 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5956 DUMP_VECT_SCOPE ("vectorizable_operation");
5957 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5958 return true;
5961 /* Transform. */
5963 if (dump_enabled_p ())
5964 dump_printf_loc (MSG_NOTE, vect_location,
5965 "transform binary/unary operation.\n");
5967 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5968 vectors with unsigned elements, but the result is signed. So, we
5969 need to compute the MINUS_EXPR into vectype temporary and
5970 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5971 tree vec_cvt_dest = NULL_TREE;
5972 if (orig_code == POINTER_DIFF_EXPR)
5974 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5975 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5977 /* Handle def. */
5978 else
5979 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5981 /* In case the vectorization factor (VF) is bigger than the number
5982 of elements that we can fit in a vectype (nunits), we have to generate
5983 more than one vector stmt - i.e - we need to "unroll" the
5984 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5985 from one copy of the vector stmt to the next, in the field
5986 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5987 stages to find the correct vector defs to be used when vectorizing
5988 stmts that use the defs of the current stmt. The example below
5989 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5990 we need to create 4 vectorized stmts):
5992 before vectorization:
5993 RELATED_STMT VEC_STMT
5994 S1: x = memref - -
5995 S2: z = x + 1 - -
5997 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5998 there):
5999 RELATED_STMT VEC_STMT
6000 VS1_0: vx0 = memref0 VS1_1 -
6001 VS1_1: vx1 = memref1 VS1_2 -
6002 VS1_2: vx2 = memref2 VS1_3 -
6003 VS1_3: vx3 = memref3 - -
6004 S1: x = load - VS1_0
6005 S2: z = x + 1 - -
6007 step2: vectorize stmt S2 (done here):
6008 To vectorize stmt S2 we first need to find the relevant vector
6009 def for the first operand 'x'. This is, as usual, obtained from
6010 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6011 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6012 relevant vector def 'vx0'. Having found 'vx0' we can generate
6013 the vector stmt VS2_0, and as usual, record it in the
6014 STMT_VINFO_VEC_STMT of stmt S2.
6015 When creating the second copy (VS2_1), we obtain the relevant vector
6016 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6017 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6018 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6019 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6020 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6021 chain of stmts and pointers:
6022 RELATED_STMT VEC_STMT
6023 VS1_0: vx0 = memref0 VS1_1 -
6024 VS1_1: vx1 = memref1 VS1_2 -
6025 VS1_2: vx2 = memref2 VS1_3 -
6026 VS1_3: vx3 = memref3 - -
6027 S1: x = load - VS1_0
6028 VS2_0: vz0 = vx0 + v1 VS2_1 -
6029 VS2_1: vz1 = vx1 + v1 VS2_2 -
6030 VS2_2: vz2 = vx2 + v1 VS2_3 -
6031 VS2_3: vz3 = vx3 + v1 - -
6032 S2: z = x + 1 - VS2_0 */
6034 prev_stmt_info = NULL;
6035 for (j = 0; j < ncopies; j++)
6037 /* Handle uses. */
6038 if (j == 0)
6040 if (op_type == binary_op)
6041 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6042 slp_node);
6043 else if (op_type == ternary_op)
6045 if (slp_node)
6047 auto_vec<tree> ops(3);
6048 ops.quick_push (op0);
6049 ops.quick_push (op1);
6050 ops.quick_push (op2);
6051 auto_vec<vec<tree> > vec_defs(3);
6052 vect_get_slp_defs (ops, slp_node, &vec_defs);
6053 vec_oprnds0 = vec_defs[0];
6054 vec_oprnds1 = vec_defs[1];
6055 vec_oprnds2 = vec_defs[2];
6057 else
6059 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6060 NULL);
6061 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6062 NULL);
6065 else
6066 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
6067 slp_node);
6069 else
6071 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6072 if (op_type == ternary_op)
6074 tree vec_oprnd = vec_oprnds2.pop ();
6075 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6076 vec_oprnd));
6080 /* Arguments are ready. Create the new vector stmt. */
6081 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6083 vop1 = ((op_type == binary_op || op_type == ternary_op)
6084 ? vec_oprnds1[i] : NULL_TREE);
6085 vop2 = ((op_type == ternary_op)
6086 ? vec_oprnds2[i] : NULL_TREE);
6087 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6088 new_temp = make_ssa_name (vec_dest, new_stmt);
6089 gimple_assign_set_lhs (new_stmt, new_temp);
6090 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6091 if (vec_cvt_dest)
6093 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6094 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6095 new_temp);
6096 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6097 gimple_assign_set_lhs (new_stmt, new_temp);
6098 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6100 if (slp_node)
6101 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6104 if (slp_node)
6105 continue;
6107 if (j == 0)
6108 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6109 else
6110 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6111 prev_stmt_info = vinfo_for_stmt (new_stmt);
6114 vec_oprnds0.release ();
6115 vec_oprnds1.release ();
6116 vec_oprnds2.release ();
6118 return true;
6121 /* A helper function to ensure data reference DR's base alignment. */
6123 static void
6124 ensure_base_align (struct data_reference *dr)
6126 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6127 return;
6129 if (DR_VECT_AUX (dr)->base_misaligned)
6131 tree base_decl = DR_VECT_AUX (dr)->base_decl;
6133 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6135 if (decl_in_symtab_p (base_decl))
6136 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6137 else
6139 SET_DECL_ALIGN (base_decl, align_base_to);
6140 DECL_USER_ALIGN (base_decl) = 1;
6142 DR_VECT_AUX (dr)->base_misaligned = false;
6147 /* Function get_group_alias_ptr_type.
6149 Return the alias type for the group starting at FIRST_STMT. */
6151 static tree
6152 get_group_alias_ptr_type (gimple *first_stmt)
6154 struct data_reference *first_dr, *next_dr;
6155 gimple *next_stmt;
6157 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6158 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6159 while (next_stmt)
6161 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6162 if (get_alias_set (DR_REF (first_dr))
6163 != get_alias_set (DR_REF (next_dr)))
6165 if (dump_enabled_p ())
6166 dump_printf_loc (MSG_NOTE, vect_location,
6167 "conflicting alias set types.\n");
6168 return ptr_type_node;
6170 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6172 return reference_alias_ptr_type (DR_REF (first_dr));
6176 /* Function vectorizable_store.
6178 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6179 can be vectorized.
6180 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6181 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6182 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6184 static bool
6185 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6186 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
6188 tree data_ref;
6189 tree op;
6190 tree vec_oprnd = NULL_TREE;
6191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6192 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6193 tree elem_type;
6194 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6195 struct loop *loop = NULL;
6196 machine_mode vec_mode;
6197 tree dummy;
6198 enum dr_alignment_support alignment_support_scheme;
6199 gimple *def_stmt;
6200 enum vect_def_type rhs_dt = vect_unknown_def_type;
6201 enum vect_def_type mask_dt = vect_unknown_def_type;
6202 stmt_vec_info prev_stmt_info = NULL;
6203 tree dataref_ptr = NULL_TREE;
6204 tree dataref_offset = NULL_TREE;
6205 gimple *ptr_incr = NULL;
6206 int ncopies;
6207 int j;
6208 gimple *next_stmt, *first_stmt;
6209 bool grouped_store;
6210 unsigned int group_size, i;
6211 vec<tree> oprnds = vNULL;
6212 vec<tree> result_chain = vNULL;
6213 bool inv_p;
6214 tree offset = NULL_TREE;
6215 vec<tree> vec_oprnds = vNULL;
6216 bool slp = (slp_node != NULL);
6217 unsigned int vec_num;
6218 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6219 vec_info *vinfo = stmt_info->vinfo;
6220 tree aggr_type;
6221 gather_scatter_info gs_info;
6222 gimple *new_stmt;
6223 poly_uint64 vf;
6224 vec_load_store_type vls_type;
6225 tree ref_type;
6227 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6228 return false;
6230 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6231 && ! vec_stmt)
6232 return false;
6234 /* Is vectorizable store? */
6236 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6237 if (is_gimple_assign (stmt))
6239 tree scalar_dest = gimple_assign_lhs (stmt);
6240 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6241 && is_pattern_stmt_p (stmt_info))
6242 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6243 if (TREE_CODE (scalar_dest) != ARRAY_REF
6244 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6245 && TREE_CODE (scalar_dest) != INDIRECT_REF
6246 && TREE_CODE (scalar_dest) != COMPONENT_REF
6247 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6248 && TREE_CODE (scalar_dest) != REALPART_EXPR
6249 && TREE_CODE (scalar_dest) != MEM_REF)
6250 return false;
6252 else
6254 gcall *call = dyn_cast <gcall *> (stmt);
6255 if (!call || !gimple_call_internal_p (call))
6256 return false;
6258 internal_fn ifn = gimple_call_internal_fn (call);
6259 if (!internal_store_fn_p (ifn))
6260 return false;
6262 if (slp_node != NULL)
6264 if (dump_enabled_p ())
6265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6266 "SLP of masked stores not supported.\n");
6267 return false;
6270 int mask_index = internal_fn_mask_index (ifn);
6271 if (mask_index >= 0)
6273 mask = gimple_call_arg (call, mask_index);
6274 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6275 &mask_vectype))
6276 return false;
6280 op = vect_get_store_rhs (stmt);
6282 /* Cannot have hybrid store SLP -- that would mean storing to the
6283 same location twice. */
6284 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6286 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6287 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6289 if (loop_vinfo)
6291 loop = LOOP_VINFO_LOOP (loop_vinfo);
6292 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6294 else
6295 vf = 1;
6297 /* Multiple types in SLP are handled by creating the appropriate number of
6298 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6299 case of SLP. */
6300 if (slp)
6301 ncopies = 1;
6302 else
6303 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6305 gcc_assert (ncopies >= 1);
6307 /* FORNOW. This restriction should be relaxed. */
6308 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6310 if (dump_enabled_p ())
6311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6312 "multiple types in nested loop.\n");
6313 return false;
6316 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6317 return false;
6319 elem_type = TREE_TYPE (vectype);
6320 vec_mode = TYPE_MODE (vectype);
6322 if (!STMT_VINFO_DATA_REF (stmt_info))
6323 return false;
6325 vect_memory_access_type memory_access_type;
6326 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6327 &memory_access_type, &gs_info))
6328 return false;
6330 if (mask)
6332 if (memory_access_type == VMAT_CONTIGUOUS)
6334 if (!VECTOR_MODE_P (vec_mode)
6335 || !can_vec_mask_load_store_p (vec_mode,
6336 TYPE_MODE (mask_vectype), false))
6337 return false;
6339 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6340 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6342 if (dump_enabled_p ())
6343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6344 "unsupported access type for masked store.\n");
6345 return false;
6348 else
6350 /* FORNOW. In some cases can vectorize even if data-type not supported
6351 (e.g. - array initialization with 0). */
6352 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6353 return false;
6356 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6357 && memory_access_type != VMAT_GATHER_SCATTER
6358 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6359 if (grouped_store)
6361 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6362 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6363 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
6365 else
6367 first_stmt = stmt;
6368 first_dr = dr;
6369 group_size = vec_num = 1;
6372 if (!vec_stmt) /* transformation not required. */
6374 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6376 if (loop_vinfo
6377 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6378 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6379 memory_access_type, &gs_info);
6381 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6382 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6383 vls_type, slp_node, cost_vec);
6384 return true;
6386 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6388 /* Transform. */
6390 ensure_base_align (dr);
6392 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6394 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6395 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6396 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6397 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6398 edge pe = loop_preheader_edge (loop);
6399 gimple_seq seq;
6400 basic_block new_bb;
6401 enum { NARROW, NONE, WIDEN } modifier;
6402 poly_uint64 scatter_off_nunits
6403 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6405 if (known_eq (nunits, scatter_off_nunits))
6406 modifier = NONE;
6407 else if (known_eq (nunits * 2, scatter_off_nunits))
6409 modifier = WIDEN;
6411 /* Currently gathers and scatters are only supported for
6412 fixed-length vectors. */
6413 unsigned int count = scatter_off_nunits.to_constant ();
6414 vec_perm_builder sel (count, count, 1);
6415 for (i = 0; i < (unsigned int) count; ++i)
6416 sel.quick_push (i | (count / 2));
6418 vec_perm_indices indices (sel, 1, count);
6419 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6420 indices);
6421 gcc_assert (perm_mask != NULL_TREE);
6423 else if (known_eq (nunits, scatter_off_nunits * 2))
6425 modifier = NARROW;
6427 /* Currently gathers and scatters are only supported for
6428 fixed-length vectors. */
6429 unsigned int count = nunits.to_constant ();
6430 vec_perm_builder sel (count, count, 1);
6431 for (i = 0; i < (unsigned int) count; ++i)
6432 sel.quick_push (i | (count / 2));
6434 vec_perm_indices indices (sel, 2, count);
6435 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6436 gcc_assert (perm_mask != NULL_TREE);
6437 ncopies *= 2;
6439 else
6440 gcc_unreachable ();
6442 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6443 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6444 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6445 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6446 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6447 scaletype = TREE_VALUE (arglist);
6449 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6450 && TREE_CODE (rettype) == VOID_TYPE);
6452 ptr = fold_convert (ptrtype, gs_info.base);
6453 if (!is_gimple_min_invariant (ptr))
6455 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6456 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6457 gcc_assert (!new_bb);
6460 /* Currently we support only unconditional scatter stores,
6461 so mask should be all ones. */
6462 mask = build_int_cst (masktype, -1);
6463 mask = vect_init_vector (stmt, mask, masktype, NULL);
6465 scale = build_int_cst (scaletype, gs_info.scale);
6467 prev_stmt_info = NULL;
6468 for (j = 0; j < ncopies; ++j)
6470 if (j == 0)
6472 src = vec_oprnd1
6473 = vect_get_vec_def_for_operand (op, stmt);
6474 op = vec_oprnd0
6475 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6477 else if (modifier != NONE && (j & 1))
6479 if (modifier == WIDEN)
6481 src = vec_oprnd1
6482 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6483 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6484 stmt, gsi);
6486 else if (modifier == NARROW)
6488 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6489 stmt, gsi);
6490 op = vec_oprnd0
6491 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6492 vec_oprnd0);
6494 else
6495 gcc_unreachable ();
6497 else
6499 src = vec_oprnd1
6500 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6501 op = vec_oprnd0
6502 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6503 vec_oprnd0);
6506 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6508 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6509 TYPE_VECTOR_SUBPARTS (srctype)));
6510 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6511 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6512 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6514 src = var;
6517 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6519 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6520 TYPE_VECTOR_SUBPARTS (idxtype)));
6521 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6522 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6523 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6524 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6525 op = var;
6528 new_stmt
6529 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6533 if (prev_stmt_info == NULL)
6534 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6535 else
6536 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6537 prev_stmt_info = vinfo_for_stmt (new_stmt);
6539 return true;
6542 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6544 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6545 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6548 if (grouped_store)
6550 /* FORNOW */
6551 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6553 /* We vectorize all the stmts of the interleaving group when we
6554 reach the last stmt in the group. */
6555 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6556 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
6557 && !slp)
6559 *vec_stmt = NULL;
6560 return true;
6563 if (slp)
6565 grouped_store = false;
6566 /* VEC_NUM is the number of vect stmts to be created for this
6567 group. */
6568 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6569 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6570 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6571 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6572 op = vect_get_store_rhs (first_stmt);
6574 else
6575 /* VEC_NUM is the number of vect stmts to be created for this
6576 group. */
6577 vec_num = group_size;
6579 ref_type = get_group_alias_ptr_type (first_stmt);
6581 else
6582 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6584 if (dump_enabled_p ())
6585 dump_printf_loc (MSG_NOTE, vect_location,
6586 "transform store. ncopies = %d\n", ncopies);
6588 if (memory_access_type == VMAT_ELEMENTWISE
6589 || memory_access_type == VMAT_STRIDED_SLP)
6591 gimple_stmt_iterator incr_gsi;
6592 bool insert_after;
6593 gimple *incr;
6594 tree offvar;
6595 tree ivstep;
6596 tree running_off;
6597 tree stride_base, stride_step, alias_off;
6598 tree vec_oprnd;
6599 unsigned int g;
6600 /* Checked by get_load_store_type. */
6601 unsigned int const_nunits = nunits.to_constant ();
6603 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6604 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6606 stride_base
6607 = fold_build_pointer_plus
6608 (DR_BASE_ADDRESS (first_dr),
6609 size_binop (PLUS_EXPR,
6610 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6611 convert_to_ptrofftype (DR_INIT (first_dr))));
6612 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6614 /* For a store with loop-invariant (but other than power-of-2)
6615 stride (i.e. not a grouped access) like so:
6617 for (i = 0; i < n; i += stride)
6618 array[i] = ...;
6620 we generate a new induction variable and new stores from
6621 the components of the (vectorized) rhs:
6623 for (j = 0; ; j += VF*stride)
6624 vectemp = ...;
6625 tmp1 = vectemp[0];
6626 array[j] = tmp1;
6627 tmp2 = vectemp[1];
6628 array[j + stride] = tmp2;
6632 unsigned nstores = const_nunits;
6633 unsigned lnel = 1;
6634 tree ltype = elem_type;
6635 tree lvectype = vectype;
6636 if (slp)
6638 if (group_size < const_nunits
6639 && const_nunits % group_size == 0)
6641 nstores = const_nunits / group_size;
6642 lnel = group_size;
6643 ltype = build_vector_type (elem_type, group_size);
6644 lvectype = vectype;
6646 /* First check if vec_extract optab doesn't support extraction
6647 of vector elts directly. */
6648 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6649 machine_mode vmode;
6650 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6651 || !VECTOR_MODE_P (vmode)
6652 || !targetm.vector_mode_supported_p (vmode)
6653 || (convert_optab_handler (vec_extract_optab,
6654 TYPE_MODE (vectype), vmode)
6655 == CODE_FOR_nothing))
6657 /* Try to avoid emitting an extract of vector elements
6658 by performing the extracts using an integer type of the
6659 same size, extracting from a vector of those and then
6660 re-interpreting it as the original vector type if
6661 supported. */
6662 unsigned lsize
6663 = group_size * GET_MODE_BITSIZE (elmode);
6664 elmode = int_mode_for_size (lsize, 0).require ();
6665 unsigned int lnunits = const_nunits / group_size;
6666 /* If we can't construct such a vector fall back to
6667 element extracts from the original vector type and
6668 element size stores. */
6669 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6670 && VECTOR_MODE_P (vmode)
6671 && targetm.vector_mode_supported_p (vmode)
6672 && (convert_optab_handler (vec_extract_optab,
6673 vmode, elmode)
6674 != CODE_FOR_nothing))
6676 nstores = lnunits;
6677 lnel = group_size;
6678 ltype = build_nonstandard_integer_type (lsize, 1);
6679 lvectype = build_vector_type (ltype, nstores);
6681 /* Else fall back to vector extraction anyway.
6682 Fewer stores are more important than avoiding spilling
6683 of the vector we extract from. Compared to the
6684 construction case in vectorizable_load no store-forwarding
6685 issue exists here for reasonable archs. */
6688 else if (group_size >= const_nunits
6689 && group_size % const_nunits == 0)
6691 nstores = 1;
6692 lnel = const_nunits;
6693 ltype = vectype;
6694 lvectype = vectype;
6696 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6697 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6700 ivstep = stride_step;
6701 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6702 build_int_cst (TREE_TYPE (ivstep), vf));
6704 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6706 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6707 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6708 create_iv (stride_base, ivstep, NULL,
6709 loop, &incr_gsi, insert_after,
6710 &offvar, NULL);
6711 incr = gsi_stmt (incr_gsi);
6712 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6714 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6716 prev_stmt_info = NULL;
6717 alias_off = build_int_cst (ref_type, 0);
6718 next_stmt = first_stmt;
6719 for (g = 0; g < group_size; g++)
6721 running_off = offvar;
6722 if (g)
6724 tree size = TYPE_SIZE_UNIT (ltype);
6725 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6726 size);
6727 tree newoff = copy_ssa_name (running_off, NULL);
6728 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6729 running_off, pos);
6730 vect_finish_stmt_generation (stmt, incr, gsi);
6731 running_off = newoff;
6733 unsigned int group_el = 0;
6734 unsigned HOST_WIDE_INT
6735 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6736 for (j = 0; j < ncopies; j++)
6738 /* We've set op and dt above, from vect_get_store_rhs,
6739 and first_stmt == stmt. */
6740 if (j == 0)
6742 if (slp)
6744 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6745 slp_node);
6746 vec_oprnd = vec_oprnds[0];
6748 else
6750 op = vect_get_store_rhs (next_stmt);
6751 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6754 else
6756 if (slp)
6757 vec_oprnd = vec_oprnds[j];
6758 else
6760 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6761 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6762 vec_oprnd);
6765 /* Pun the vector to extract from if necessary. */
6766 if (lvectype != vectype)
6768 tree tem = make_ssa_name (lvectype);
6769 gimple *pun
6770 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6771 lvectype, vec_oprnd));
6772 vect_finish_stmt_generation (stmt, pun, gsi);
6773 vec_oprnd = tem;
6775 for (i = 0; i < nstores; i++)
6777 tree newref, newoff;
6778 gimple *incr, *assign;
6779 tree size = TYPE_SIZE (ltype);
6780 /* Extract the i'th component. */
6781 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6782 bitsize_int (i), size);
6783 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6784 size, pos);
6786 elem = force_gimple_operand_gsi (gsi, elem, true,
6787 NULL_TREE, true,
6788 GSI_SAME_STMT);
6790 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6791 group_el * elsz);
6792 newref = build2 (MEM_REF, ltype,
6793 running_off, this_off);
6794 vect_copy_ref_info (newref, DR_REF (first_dr));
6796 /* And store it to *running_off. */
6797 assign = gimple_build_assign (newref, elem);
6798 vect_finish_stmt_generation (stmt, assign, gsi);
6800 group_el += lnel;
6801 if (! slp
6802 || group_el == group_size)
6804 newoff = copy_ssa_name (running_off, NULL);
6805 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6806 running_off, stride_step);
6807 vect_finish_stmt_generation (stmt, incr, gsi);
6809 running_off = newoff;
6810 group_el = 0;
6812 if (g == group_size - 1
6813 && !slp)
6815 if (j == 0 && i == 0)
6816 STMT_VINFO_VEC_STMT (stmt_info)
6817 = *vec_stmt = assign;
6818 else
6819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6820 prev_stmt_info = vinfo_for_stmt (assign);
6824 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6825 if (slp)
6826 break;
6829 vec_oprnds.release ();
6830 return true;
6833 auto_vec<tree> dr_chain (group_size);
6834 oprnds.create (group_size);
6836 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6837 gcc_assert (alignment_support_scheme);
6838 vec_loop_masks *loop_masks
6839 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6840 ? &LOOP_VINFO_MASKS (loop_vinfo)
6841 : NULL);
6842 /* Targets with store-lane instructions must not require explicit
6843 realignment. vect_supportable_dr_alignment always returns either
6844 dr_aligned or dr_unaligned_supported for masked operations. */
6845 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6846 && !mask
6847 && !loop_masks)
6848 || alignment_support_scheme == dr_aligned
6849 || alignment_support_scheme == dr_unaligned_supported);
6851 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6852 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6853 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6855 tree bump;
6856 tree vec_offset = NULL_TREE;
6857 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6859 aggr_type = NULL_TREE;
6860 bump = NULL_TREE;
6862 else if (memory_access_type == VMAT_GATHER_SCATTER)
6864 aggr_type = elem_type;
6865 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6866 &bump, &vec_offset);
6868 else
6870 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6871 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6872 else
6873 aggr_type = vectype;
6874 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6877 if (mask)
6878 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6880 /* In case the vectorization factor (VF) is bigger than the number
6881 of elements that we can fit in a vectype (nunits), we have to generate
6882 more than one vector stmt - i.e - we need to "unroll" the
6883 vector stmt by a factor VF/nunits. For more details see documentation in
6884 vect_get_vec_def_for_copy_stmt. */
6886 /* In case of interleaving (non-unit grouped access):
6888 S1: &base + 2 = x2
6889 S2: &base = x0
6890 S3: &base + 1 = x1
6891 S4: &base + 3 = x3
6893 We create vectorized stores starting from base address (the access of the
6894 first stmt in the chain (S2 in the above example), when the last store stmt
6895 of the chain (S4) is reached:
6897 VS1: &base = vx2
6898 VS2: &base + vec_size*1 = vx0
6899 VS3: &base + vec_size*2 = vx1
6900 VS4: &base + vec_size*3 = vx3
6902 Then permutation statements are generated:
6904 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6905 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6908 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6909 (the order of the data-refs in the output of vect_permute_store_chain
6910 corresponds to the order of scalar stmts in the interleaving chain - see
6911 the documentation of vect_permute_store_chain()).
6913 In case of both multiple types and interleaving, above vector stores and
6914 permutation stmts are created for every copy. The result vector stmts are
6915 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6916 STMT_VINFO_RELATED_STMT for the next copies.
6919 prev_stmt_info = NULL;
6920 tree vec_mask = NULL_TREE;
6921 for (j = 0; j < ncopies; j++)
6924 if (j == 0)
6926 if (slp)
6928 /* Get vectorized arguments for SLP_NODE. */
6929 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6930 NULL, slp_node);
6932 vec_oprnd = vec_oprnds[0];
6934 else
6936 /* For interleaved stores we collect vectorized defs for all the
6937 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6938 used as an input to vect_permute_store_chain(), and OPRNDS as
6939 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6941 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6942 OPRNDS are of size 1. */
6943 next_stmt = first_stmt;
6944 for (i = 0; i < group_size; i++)
6946 /* Since gaps are not supported for interleaved stores,
6947 DR_GROUP_SIZE is the exact number of stmts in the chain.
6948 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6949 there is no interleaving, DR_GROUP_SIZE is 1, and only one
6950 iteration of the loop will be executed. */
6951 op = vect_get_store_rhs (next_stmt);
6952 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6953 dr_chain.quick_push (vec_oprnd);
6954 oprnds.quick_push (vec_oprnd);
6955 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6957 if (mask)
6958 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6959 mask_vectype);
6962 /* We should have catched mismatched types earlier. */
6963 gcc_assert (useless_type_conversion_p (vectype,
6964 TREE_TYPE (vec_oprnd)));
6965 bool simd_lane_access_p
6966 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6967 if (simd_lane_access_p
6968 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6969 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6970 && integer_zerop (DR_OFFSET (first_dr))
6971 && integer_zerop (DR_INIT (first_dr))
6972 && alias_sets_conflict_p (get_alias_set (aggr_type),
6973 get_alias_set (TREE_TYPE (ref_type))))
6975 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6976 dataref_offset = build_int_cst (ref_type, 0);
6977 inv_p = false;
6979 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6981 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6982 &dataref_ptr, &vec_offset);
6983 inv_p = false;
6985 else
6986 dataref_ptr
6987 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6988 simd_lane_access_p ? loop : NULL,
6989 offset, &dummy, gsi, &ptr_incr,
6990 simd_lane_access_p, &inv_p,
6991 NULL_TREE, bump);
6992 gcc_assert (bb_vinfo || !inv_p);
6994 else
6996 /* For interleaved stores we created vectorized defs for all the
6997 defs stored in OPRNDS in the previous iteration (previous copy).
6998 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6999 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7000 next copy.
7001 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7002 OPRNDS are of size 1. */
7003 for (i = 0; i < group_size; i++)
7005 op = oprnds[i];
7006 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
7007 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
7008 dr_chain[i] = vec_oprnd;
7009 oprnds[i] = vec_oprnd;
7011 if (mask)
7012 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
7013 if (dataref_offset)
7014 dataref_offset
7015 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7016 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7017 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7018 vec_offset);
7019 else
7020 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7021 bump);
7024 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7026 tree vec_array;
7028 /* Get an array into which we can store the individual vectors. */
7029 vec_array = create_vector_array (vectype, vec_num);
7031 /* Invalidate the current contents of VEC_ARRAY. This should
7032 become an RTL clobber too, which prevents the vector registers
7033 from being upward-exposed. */
7034 vect_clobber_variable (stmt, gsi, vec_array);
7036 /* Store the individual vectors into the array. */
7037 for (i = 0; i < vec_num; i++)
7039 vec_oprnd = dr_chain[i];
7040 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
7043 tree final_mask = NULL;
7044 if (loop_masks)
7045 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7046 vectype, j);
7047 if (vec_mask)
7048 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7049 vec_mask, gsi);
7051 gcall *call;
7052 if (final_mask)
7054 /* Emit:
7055 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7056 VEC_ARRAY). */
7057 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7058 tree alias_ptr = build_int_cst (ref_type, align);
7059 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7060 dataref_ptr, alias_ptr,
7061 final_mask, vec_array);
7063 else
7065 /* Emit:
7066 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7067 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7068 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7069 vec_array);
7070 gimple_call_set_lhs (call, data_ref);
7072 gimple_call_set_nothrow (call, true);
7073 new_stmt = call;
7074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7076 /* Record that VEC_ARRAY is now dead. */
7077 vect_clobber_variable (stmt, gsi, vec_array);
7079 else
7081 new_stmt = NULL;
7082 if (grouped_store)
7084 if (j == 0)
7085 result_chain.create (group_size);
7086 /* Permute. */
7087 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7088 &result_chain);
7091 next_stmt = first_stmt;
7092 for (i = 0; i < vec_num; i++)
7094 unsigned align, misalign;
7096 tree final_mask = NULL_TREE;
7097 if (loop_masks)
7098 final_mask = vect_get_loop_mask (gsi, loop_masks,
7099 vec_num * ncopies,
7100 vectype, vec_num * j + i);
7101 if (vec_mask)
7102 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7103 vec_mask, gsi);
7105 if (memory_access_type == VMAT_GATHER_SCATTER)
7107 tree scale = size_int (gs_info.scale);
7108 gcall *call;
7109 if (loop_masks)
7110 call = gimple_build_call_internal
7111 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7112 scale, vec_oprnd, final_mask);
7113 else
7114 call = gimple_build_call_internal
7115 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7116 scale, vec_oprnd);
7117 gimple_call_set_nothrow (call, true);
7118 new_stmt = call;
7119 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7120 break;
7123 if (i > 0)
7124 /* Bump the vector pointer. */
7125 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7126 stmt, bump);
7128 if (slp)
7129 vec_oprnd = vec_oprnds[i];
7130 else if (grouped_store)
7131 /* For grouped stores vectorized defs are interleaved in
7132 vect_permute_store_chain(). */
7133 vec_oprnd = result_chain[i];
7135 align = DR_TARGET_ALIGNMENT (first_dr);
7136 if (aligned_access_p (first_dr))
7137 misalign = 0;
7138 else if (DR_MISALIGNMENT (first_dr) == -1)
7140 align = dr_alignment (vect_dr_behavior (first_dr));
7141 misalign = 0;
7143 else
7144 misalign = DR_MISALIGNMENT (first_dr);
7145 if (dataref_offset == NULL_TREE
7146 && TREE_CODE (dataref_ptr) == SSA_NAME)
7147 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7148 misalign);
7150 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7152 tree perm_mask = perm_mask_for_reverse (vectype);
7153 tree perm_dest
7154 = vect_create_destination_var (vect_get_store_rhs (stmt),
7155 vectype);
7156 tree new_temp = make_ssa_name (perm_dest);
7158 /* Generate the permute statement. */
7159 gimple *perm_stmt
7160 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7161 vec_oprnd, perm_mask);
7162 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7164 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7165 vec_oprnd = new_temp;
7168 /* Arguments are ready. Create the new vector stmt. */
7169 if (final_mask)
7171 align = least_bit_hwi (misalign | align);
7172 tree ptr = build_int_cst (ref_type, align);
7173 gcall *call
7174 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7175 dataref_ptr, ptr,
7176 final_mask, vec_oprnd);
7177 gimple_call_set_nothrow (call, true);
7178 new_stmt = call;
7180 else
7182 data_ref = fold_build2 (MEM_REF, vectype,
7183 dataref_ptr,
7184 dataref_offset
7185 ? dataref_offset
7186 : build_int_cst (ref_type, 0));
7187 if (aligned_access_p (first_dr))
7189 else if (DR_MISALIGNMENT (first_dr) == -1)
7190 TREE_TYPE (data_ref)
7191 = build_aligned_type (TREE_TYPE (data_ref),
7192 align * BITS_PER_UNIT);
7193 else
7194 TREE_TYPE (data_ref)
7195 = build_aligned_type (TREE_TYPE (data_ref),
7196 TYPE_ALIGN (elem_type));
7197 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7198 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7200 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7202 if (slp)
7203 continue;
7205 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7206 if (!next_stmt)
7207 break;
7210 if (!slp)
7212 if (j == 0)
7213 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7214 else
7215 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7216 prev_stmt_info = vinfo_for_stmt (new_stmt);
7220 oprnds.release ();
7221 result_chain.release ();
7222 vec_oprnds.release ();
7224 return true;
7227 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7228 VECTOR_CST mask. No checks are made that the target platform supports the
7229 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7230 vect_gen_perm_mask_checked. */
7232 tree
7233 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7235 tree mask_type;
7237 poly_uint64 nunits = sel.length ();
7238 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7240 mask_type = build_vector_type (ssizetype, nunits);
7241 return vec_perm_indices_to_tree (mask_type, sel);
7244 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7245 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7247 tree
7248 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7250 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7251 return vect_gen_perm_mask_any (vectype, sel);
7254 /* Given a vector variable X and Y, that was generated for the scalar
7255 STMT, generate instructions to permute the vector elements of X and Y
7256 using permutation mask MASK_VEC, insert them at *GSI and return the
7257 permuted vector variable. */
7259 static tree
7260 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7261 gimple_stmt_iterator *gsi)
7263 tree vectype = TREE_TYPE (x);
7264 tree perm_dest, data_ref;
7265 gimple *perm_stmt;
7267 tree scalar_dest = gimple_get_lhs (stmt);
7268 if (TREE_CODE (scalar_dest) == SSA_NAME)
7269 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7270 else
7271 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7272 data_ref = make_ssa_name (perm_dest);
7274 /* Generate the permute statement. */
7275 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7276 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7278 return data_ref;
7281 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7282 inserting them on the loops preheader edge. Returns true if we
7283 were successful in doing so (and thus STMT can be moved then),
7284 otherwise returns false. */
7286 static bool
7287 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7289 ssa_op_iter i;
7290 tree op;
7291 bool any = false;
7293 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7295 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7296 if (!gimple_nop_p (def_stmt)
7297 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7299 /* Make sure we don't need to recurse. While we could do
7300 so in simple cases when there are more complex use webs
7301 we don't have an easy way to preserve stmt order to fulfil
7302 dependencies within them. */
7303 tree op2;
7304 ssa_op_iter i2;
7305 if (gimple_code (def_stmt) == GIMPLE_PHI)
7306 return false;
7307 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7309 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7310 if (!gimple_nop_p (def_stmt2)
7311 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7312 return false;
7314 any = true;
7318 if (!any)
7319 return true;
7321 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7323 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7324 if (!gimple_nop_p (def_stmt)
7325 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7327 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7328 gsi_remove (&gsi, false);
7329 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7333 return true;
7336 /* vectorizable_load.
7338 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7339 can be vectorized.
7340 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7341 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7342 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7344 static bool
7345 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7346 slp_tree slp_node, slp_instance slp_node_instance,
7347 stmt_vector_for_cost *cost_vec)
7349 tree scalar_dest;
7350 tree vec_dest = NULL;
7351 tree data_ref = NULL;
7352 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7353 stmt_vec_info prev_stmt_info;
7354 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7355 struct loop *loop = NULL;
7356 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7357 bool nested_in_vect_loop = false;
7358 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7359 tree elem_type;
7360 tree new_temp;
7361 machine_mode mode;
7362 gimple *new_stmt = NULL;
7363 tree dummy;
7364 enum dr_alignment_support alignment_support_scheme;
7365 tree dataref_ptr = NULL_TREE;
7366 tree dataref_offset = NULL_TREE;
7367 gimple *ptr_incr = NULL;
7368 int ncopies;
7369 int i, j;
7370 unsigned int group_size;
7371 poly_uint64 group_gap_adj;
7372 tree msq = NULL_TREE, lsq;
7373 tree offset = NULL_TREE;
7374 tree byte_offset = NULL_TREE;
7375 tree realignment_token = NULL_TREE;
7376 gphi *phi = NULL;
7377 vec<tree> dr_chain = vNULL;
7378 bool grouped_load = false;
7379 gimple *first_stmt;
7380 gimple *first_stmt_for_drptr = NULL;
7381 bool inv_p;
7382 bool compute_in_loop = false;
7383 struct loop *at_loop;
7384 int vec_num;
7385 bool slp = (slp_node != NULL);
7386 bool slp_perm = false;
7387 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7388 poly_uint64 vf;
7389 tree aggr_type;
7390 gather_scatter_info gs_info;
7391 vec_info *vinfo = stmt_info->vinfo;
7392 tree ref_type;
7393 enum vect_def_type mask_dt = vect_unknown_def_type;
7395 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7396 return false;
7398 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7399 && ! vec_stmt)
7400 return false;
7402 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7403 if (is_gimple_assign (stmt))
7405 scalar_dest = gimple_assign_lhs (stmt);
7406 if (TREE_CODE (scalar_dest) != SSA_NAME)
7407 return false;
7409 tree_code code = gimple_assign_rhs_code (stmt);
7410 if (code != ARRAY_REF
7411 && code != BIT_FIELD_REF
7412 && code != INDIRECT_REF
7413 && code != COMPONENT_REF
7414 && code != IMAGPART_EXPR
7415 && code != REALPART_EXPR
7416 && code != MEM_REF
7417 && TREE_CODE_CLASS (code) != tcc_declaration)
7418 return false;
7420 else
7422 gcall *call = dyn_cast <gcall *> (stmt);
7423 if (!call || !gimple_call_internal_p (call))
7424 return false;
7426 internal_fn ifn = gimple_call_internal_fn (call);
7427 if (!internal_load_fn_p (ifn))
7428 return false;
7430 scalar_dest = gimple_call_lhs (call);
7431 if (!scalar_dest)
7432 return false;
7434 if (slp_node != NULL)
7436 if (dump_enabled_p ())
7437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7438 "SLP of masked loads not supported.\n");
7439 return false;
7442 int mask_index = internal_fn_mask_index (ifn);
7443 if (mask_index >= 0)
7445 mask = gimple_call_arg (call, mask_index);
7446 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7447 &mask_vectype))
7448 return false;
7452 if (!STMT_VINFO_DATA_REF (stmt_info))
7453 return false;
7455 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7456 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7458 if (loop_vinfo)
7460 loop = LOOP_VINFO_LOOP (loop_vinfo);
7461 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7462 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7464 else
7465 vf = 1;
7467 /* Multiple types in SLP are handled by creating the appropriate number of
7468 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7469 case of SLP. */
7470 if (slp)
7471 ncopies = 1;
7472 else
7473 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7475 gcc_assert (ncopies >= 1);
7477 /* FORNOW. This restriction should be relaxed. */
7478 if (nested_in_vect_loop && ncopies > 1)
7480 if (dump_enabled_p ())
7481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7482 "multiple types in nested loop.\n");
7483 return false;
7486 /* Invalidate assumptions made by dependence analysis when vectorization
7487 on the unrolled body effectively re-orders stmts. */
7488 if (ncopies > 1
7489 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7490 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7491 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7493 if (dump_enabled_p ())
7494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7495 "cannot perform implicit CSE when unrolling "
7496 "with negative dependence distance\n");
7497 return false;
7500 elem_type = TREE_TYPE (vectype);
7501 mode = TYPE_MODE (vectype);
7503 /* FORNOW. In some cases can vectorize even if data-type not supported
7504 (e.g. - data copies). */
7505 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7507 if (dump_enabled_p ())
7508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7509 "Aligned load, but unsupported type.\n");
7510 return false;
7513 /* Check if the load is a part of an interleaving chain. */
7514 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7516 grouped_load = true;
7517 /* FORNOW */
7518 gcc_assert (!nested_in_vect_loop);
7519 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7521 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7522 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7524 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7525 slp_perm = true;
7527 /* Invalidate assumptions made by dependence analysis when vectorization
7528 on the unrolled body effectively re-orders stmts. */
7529 if (!PURE_SLP_STMT (stmt_info)
7530 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7531 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7532 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7534 if (dump_enabled_p ())
7535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7536 "cannot perform implicit CSE when performing "
7537 "group loads with negative dependence distance\n");
7538 return false;
7541 /* Similarly when the stmt is a load that is both part of a SLP
7542 instance and a loop vectorized stmt via the same-dr mechanism
7543 we have to give up. */
7544 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7545 && (STMT_SLP_TYPE (stmt_info)
7546 != STMT_SLP_TYPE (vinfo_for_stmt
7547 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
7549 if (dump_enabled_p ())
7550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7551 "conflicting SLP types for CSEd load\n");
7552 return false;
7555 else
7556 group_size = 1;
7558 vect_memory_access_type memory_access_type;
7559 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7560 &memory_access_type, &gs_info))
7561 return false;
7563 if (mask)
7565 if (memory_access_type == VMAT_CONTIGUOUS)
7567 machine_mode vec_mode = TYPE_MODE (vectype);
7568 if (!VECTOR_MODE_P (vec_mode)
7569 || !can_vec_mask_load_store_p (vec_mode,
7570 TYPE_MODE (mask_vectype), true))
7571 return false;
7573 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7575 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7576 tree masktype
7577 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7578 if (TREE_CODE (masktype) == INTEGER_TYPE)
7580 if (dump_enabled_p ())
7581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7582 "masked gather with integer mask not"
7583 " supported.");
7584 return false;
7587 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7588 && memory_access_type != VMAT_GATHER_SCATTER)
7590 if (dump_enabled_p ())
7591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7592 "unsupported access type for masked load.\n");
7593 return false;
7597 if (!vec_stmt) /* transformation not required. */
7599 if (!slp)
7600 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7602 if (loop_vinfo
7603 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7604 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7605 memory_access_type, &gs_info);
7607 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7608 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7609 slp_node_instance, slp_node, cost_vec);
7610 return true;
7613 if (!slp)
7614 gcc_assert (memory_access_type
7615 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7617 if (dump_enabled_p ())
7618 dump_printf_loc (MSG_NOTE, vect_location,
7619 "transform load. ncopies = %d\n", ncopies);
7621 /* Transform. */
7623 ensure_base_align (dr);
7625 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7627 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7628 mask_dt);
7629 return true;
7632 if (memory_access_type == VMAT_ELEMENTWISE
7633 || memory_access_type == VMAT_STRIDED_SLP)
7635 gimple_stmt_iterator incr_gsi;
7636 bool insert_after;
7637 gimple *incr;
7638 tree offvar;
7639 tree ivstep;
7640 tree running_off;
7641 vec<constructor_elt, va_gc> *v = NULL;
7642 tree stride_base, stride_step, alias_off;
7643 /* Checked by get_load_store_type. */
7644 unsigned int const_nunits = nunits.to_constant ();
7645 unsigned HOST_WIDE_INT cst_offset = 0;
7647 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7648 gcc_assert (!nested_in_vect_loop);
7650 if (grouped_load)
7652 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7653 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7655 else
7657 first_stmt = stmt;
7658 first_dr = dr;
7660 if (slp && grouped_load)
7662 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7663 ref_type = get_group_alias_ptr_type (first_stmt);
7665 else
7667 if (grouped_load)
7668 cst_offset
7669 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7670 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7671 group_size = 1;
7672 ref_type = reference_alias_ptr_type (DR_REF (dr));
7675 stride_base
7676 = fold_build_pointer_plus
7677 (DR_BASE_ADDRESS (first_dr),
7678 size_binop (PLUS_EXPR,
7679 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7680 convert_to_ptrofftype (DR_INIT (first_dr))));
7681 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7683 /* For a load with loop-invariant (but other than power-of-2)
7684 stride (i.e. not a grouped access) like so:
7686 for (i = 0; i < n; i += stride)
7687 ... = array[i];
7689 we generate a new induction variable and new accesses to
7690 form a new vector (or vectors, depending on ncopies):
7692 for (j = 0; ; j += VF*stride)
7693 tmp1 = array[j];
7694 tmp2 = array[j + stride];
7696 vectemp = {tmp1, tmp2, ...}
7699 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7700 build_int_cst (TREE_TYPE (stride_step), vf));
7702 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7704 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7705 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7706 create_iv (stride_base, ivstep, NULL,
7707 loop, &incr_gsi, insert_after,
7708 &offvar, NULL);
7709 incr = gsi_stmt (incr_gsi);
7710 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7712 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7714 prev_stmt_info = NULL;
7715 running_off = offvar;
7716 alias_off = build_int_cst (ref_type, 0);
7717 int nloads = const_nunits;
7718 int lnel = 1;
7719 tree ltype = TREE_TYPE (vectype);
7720 tree lvectype = vectype;
7721 auto_vec<tree> dr_chain;
7722 if (memory_access_type == VMAT_STRIDED_SLP)
7724 if (group_size < const_nunits)
7726 /* First check if vec_init optab supports construction from
7727 vector elts directly. */
7728 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7729 machine_mode vmode;
7730 if (mode_for_vector (elmode, group_size).exists (&vmode)
7731 && VECTOR_MODE_P (vmode)
7732 && targetm.vector_mode_supported_p (vmode)
7733 && (convert_optab_handler (vec_init_optab,
7734 TYPE_MODE (vectype), vmode)
7735 != CODE_FOR_nothing))
7737 nloads = const_nunits / group_size;
7738 lnel = group_size;
7739 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7741 else
7743 /* Otherwise avoid emitting a constructor of vector elements
7744 by performing the loads using an integer type of the same
7745 size, constructing a vector of those and then
7746 re-interpreting it as the original vector type.
7747 This avoids a huge runtime penalty due to the general
7748 inability to perform store forwarding from smaller stores
7749 to a larger load. */
7750 unsigned lsize
7751 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7752 elmode = int_mode_for_size (lsize, 0).require ();
7753 unsigned int lnunits = const_nunits / group_size;
7754 /* If we can't construct such a vector fall back to
7755 element loads of the original vector type. */
7756 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7757 && VECTOR_MODE_P (vmode)
7758 && targetm.vector_mode_supported_p (vmode)
7759 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7760 != CODE_FOR_nothing))
7762 nloads = lnunits;
7763 lnel = group_size;
7764 ltype = build_nonstandard_integer_type (lsize, 1);
7765 lvectype = build_vector_type (ltype, nloads);
7769 else
7771 nloads = 1;
7772 lnel = const_nunits;
7773 ltype = vectype;
7775 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7777 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7778 else if (nloads == 1)
7779 ltype = vectype;
7781 if (slp)
7783 /* For SLP permutation support we need to load the whole group,
7784 not only the number of vector stmts the permutation result
7785 fits in. */
7786 if (slp_perm)
7788 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7789 variable VF. */
7790 unsigned int const_vf = vf.to_constant ();
7791 ncopies = CEIL (group_size * const_vf, const_nunits);
7792 dr_chain.create (ncopies);
7794 else
7795 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7797 unsigned int group_el = 0;
7798 unsigned HOST_WIDE_INT
7799 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7800 for (j = 0; j < ncopies; j++)
7802 if (nloads > 1)
7803 vec_alloc (v, nloads);
7804 for (i = 0; i < nloads; i++)
7806 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7807 group_el * elsz + cst_offset);
7808 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7809 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7810 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7811 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7812 if (nloads > 1)
7813 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7814 gimple_assign_lhs (new_stmt));
7816 group_el += lnel;
7817 if (! slp
7818 || group_el == group_size)
7820 tree newoff = copy_ssa_name (running_off);
7821 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7822 running_off, stride_step);
7823 vect_finish_stmt_generation (stmt, incr, gsi);
7825 running_off = newoff;
7826 group_el = 0;
7829 if (nloads > 1)
7831 tree vec_inv = build_constructor (lvectype, v);
7832 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7833 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7834 if (lvectype != vectype)
7836 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7837 VIEW_CONVERT_EXPR,
7838 build1 (VIEW_CONVERT_EXPR,
7839 vectype, new_temp));
7840 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7844 if (slp)
7846 if (slp_perm)
7847 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7848 else
7849 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7851 else
7853 if (j == 0)
7854 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7855 else
7856 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7857 prev_stmt_info = vinfo_for_stmt (new_stmt);
7860 if (slp_perm)
7862 unsigned n_perms;
7863 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7864 slp_node_instance, false, &n_perms);
7866 return true;
7869 if (memory_access_type == VMAT_GATHER_SCATTER
7870 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7871 grouped_load = false;
7873 if (grouped_load)
7875 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7876 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7877 /* For SLP vectorization we directly vectorize a subchain
7878 without permutation. */
7879 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7880 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7881 /* For BB vectorization always use the first stmt to base
7882 the data ref pointer on. */
7883 if (bb_vinfo)
7884 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7886 /* Check if the chain of loads is already vectorized. */
7887 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7888 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7889 ??? But we can only do so if there is exactly one
7890 as we have no way to get at the rest. Leave the CSE
7891 opportunity alone.
7892 ??? With the group load eventually participating
7893 in multiple different permutations (having multiple
7894 slp nodes which refer to the same group) the CSE
7895 is even wrong code. See PR56270. */
7896 && !slp)
7898 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7899 return true;
7901 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7902 group_gap_adj = 0;
7904 /* VEC_NUM is the number of vect stmts to be created for this group. */
7905 if (slp)
7907 grouped_load = false;
7908 /* For SLP permutation support we need to load the whole group,
7909 not only the number of vector stmts the permutation result
7910 fits in. */
7911 if (slp_perm)
7913 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7914 variable VF. */
7915 unsigned int const_vf = vf.to_constant ();
7916 unsigned int const_nunits = nunits.to_constant ();
7917 vec_num = CEIL (group_size * const_vf, const_nunits);
7918 group_gap_adj = vf * group_size - nunits * vec_num;
7920 else
7922 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7923 group_gap_adj
7924 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7927 else
7928 vec_num = group_size;
7930 ref_type = get_group_alias_ptr_type (first_stmt);
7932 else
7934 first_stmt = stmt;
7935 first_dr = dr;
7936 group_size = vec_num = 1;
7937 group_gap_adj = 0;
7938 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7941 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7942 gcc_assert (alignment_support_scheme);
7943 vec_loop_masks *loop_masks
7944 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7945 ? &LOOP_VINFO_MASKS (loop_vinfo)
7946 : NULL);
7947 /* Targets with store-lane instructions must not require explicit
7948 realignment. vect_supportable_dr_alignment always returns either
7949 dr_aligned or dr_unaligned_supported for masked operations. */
7950 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7951 && !mask
7952 && !loop_masks)
7953 || alignment_support_scheme == dr_aligned
7954 || alignment_support_scheme == dr_unaligned_supported);
7956 /* In case the vectorization factor (VF) is bigger than the number
7957 of elements that we can fit in a vectype (nunits), we have to generate
7958 more than one vector stmt - i.e - we need to "unroll" the
7959 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7960 from one copy of the vector stmt to the next, in the field
7961 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7962 stages to find the correct vector defs to be used when vectorizing
7963 stmts that use the defs of the current stmt. The example below
7964 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7965 need to create 4 vectorized stmts):
7967 before vectorization:
7968 RELATED_STMT VEC_STMT
7969 S1: x = memref - -
7970 S2: z = x + 1 - -
7972 step 1: vectorize stmt S1:
7973 We first create the vector stmt VS1_0, and, as usual, record a
7974 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7975 Next, we create the vector stmt VS1_1, and record a pointer to
7976 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7977 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7978 stmts and pointers:
7979 RELATED_STMT VEC_STMT
7980 VS1_0: vx0 = memref0 VS1_1 -
7981 VS1_1: vx1 = memref1 VS1_2 -
7982 VS1_2: vx2 = memref2 VS1_3 -
7983 VS1_3: vx3 = memref3 - -
7984 S1: x = load - VS1_0
7985 S2: z = x + 1 - -
7987 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7988 information we recorded in RELATED_STMT field is used to vectorize
7989 stmt S2. */
7991 /* In case of interleaving (non-unit grouped access):
7993 S1: x2 = &base + 2
7994 S2: x0 = &base
7995 S3: x1 = &base + 1
7996 S4: x3 = &base + 3
7998 Vectorized loads are created in the order of memory accesses
7999 starting from the access of the first stmt of the chain:
8001 VS1: vx0 = &base
8002 VS2: vx1 = &base + vec_size*1
8003 VS3: vx3 = &base + vec_size*2
8004 VS4: vx4 = &base + vec_size*3
8006 Then permutation statements are generated:
8008 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8009 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8012 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8013 (the order of the data-refs in the output of vect_permute_load_chain
8014 corresponds to the order of scalar stmts in the interleaving chain - see
8015 the documentation of vect_permute_load_chain()).
8016 The generation of permutation stmts and recording them in
8017 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8019 In case of both multiple types and interleaving, the vector loads and
8020 permutation stmts above are created for every copy. The result vector
8021 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8022 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8024 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8025 on a target that supports unaligned accesses (dr_unaligned_supported)
8026 we generate the following code:
8027 p = initial_addr;
8028 indx = 0;
8029 loop {
8030 p = p + indx * vectype_size;
8031 vec_dest = *(p);
8032 indx = indx + 1;
8035 Otherwise, the data reference is potentially unaligned on a target that
8036 does not support unaligned accesses (dr_explicit_realign_optimized) -
8037 then generate the following code, in which the data in each iteration is
8038 obtained by two vector loads, one from the previous iteration, and one
8039 from the current iteration:
8040 p1 = initial_addr;
8041 msq_init = *(floor(p1))
8042 p2 = initial_addr + VS - 1;
8043 realignment_token = call target_builtin;
8044 indx = 0;
8045 loop {
8046 p2 = p2 + indx * vectype_size
8047 lsq = *(floor(p2))
8048 vec_dest = realign_load (msq, lsq, realignment_token)
8049 indx = indx + 1;
8050 msq = lsq;
8051 } */
8053 /* If the misalignment remains the same throughout the execution of the
8054 loop, we can create the init_addr and permutation mask at the loop
8055 preheader. Otherwise, it needs to be created inside the loop.
8056 This can only occur when vectorizing memory accesses in the inner-loop
8057 nested within an outer-loop that is being vectorized. */
8059 if (nested_in_vect_loop
8060 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8061 GET_MODE_SIZE (TYPE_MODE (vectype))))
8063 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8064 compute_in_loop = true;
8067 if ((alignment_support_scheme == dr_explicit_realign_optimized
8068 || alignment_support_scheme == dr_explicit_realign)
8069 && !compute_in_loop)
8071 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8072 alignment_support_scheme, NULL_TREE,
8073 &at_loop);
8074 if (alignment_support_scheme == dr_explicit_realign_optimized)
8076 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8077 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8078 size_one_node);
8081 else
8082 at_loop = loop;
8084 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8085 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8087 tree bump;
8088 tree vec_offset = NULL_TREE;
8089 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8091 aggr_type = NULL_TREE;
8092 bump = NULL_TREE;
8094 else if (memory_access_type == VMAT_GATHER_SCATTER)
8096 aggr_type = elem_type;
8097 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8098 &bump, &vec_offset);
8100 else
8102 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8103 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8104 else
8105 aggr_type = vectype;
8106 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8109 tree vec_mask = NULL_TREE;
8110 prev_stmt_info = NULL;
8111 poly_uint64 group_elt = 0;
8112 for (j = 0; j < ncopies; j++)
8114 /* 1. Create the vector or array pointer update chain. */
8115 if (j == 0)
8117 bool simd_lane_access_p
8118 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8119 if (simd_lane_access_p
8120 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8121 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8122 && integer_zerop (DR_OFFSET (first_dr))
8123 && integer_zerop (DR_INIT (first_dr))
8124 && alias_sets_conflict_p (get_alias_set (aggr_type),
8125 get_alias_set (TREE_TYPE (ref_type)))
8126 && (alignment_support_scheme == dr_aligned
8127 || alignment_support_scheme == dr_unaligned_supported))
8129 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8130 dataref_offset = build_int_cst (ref_type, 0);
8131 inv_p = false;
8133 else if (first_stmt_for_drptr
8134 && first_stmt != first_stmt_for_drptr)
8136 dataref_ptr
8137 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8138 at_loop, offset, &dummy, gsi,
8139 &ptr_incr, simd_lane_access_p,
8140 &inv_p, byte_offset, bump);
8141 /* Adjust the pointer by the difference to first_stmt. */
8142 data_reference_p ptrdr
8143 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8144 tree diff = fold_convert (sizetype,
8145 size_binop (MINUS_EXPR,
8146 DR_INIT (first_dr),
8147 DR_INIT (ptrdr)));
8148 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8149 stmt, diff);
8151 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8153 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8154 &dataref_ptr, &vec_offset);
8155 inv_p = false;
8157 else
8158 dataref_ptr
8159 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8160 offset, &dummy, gsi, &ptr_incr,
8161 simd_lane_access_p, &inv_p,
8162 byte_offset, bump);
8163 if (mask)
8164 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8165 mask_vectype);
8167 else
8169 if (dataref_offset)
8170 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8171 bump);
8172 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8173 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8174 vec_offset);
8175 else
8176 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8177 stmt, bump);
8178 if (mask)
8179 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8182 if (grouped_load || slp_perm)
8183 dr_chain.create (vec_num);
8185 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8187 tree vec_array;
8189 vec_array = create_vector_array (vectype, vec_num);
8191 tree final_mask = NULL_TREE;
8192 if (loop_masks)
8193 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8194 vectype, j);
8195 if (vec_mask)
8196 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8197 vec_mask, gsi);
8199 gcall *call;
8200 if (final_mask)
8202 /* Emit:
8203 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8204 VEC_MASK). */
8205 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8206 tree alias_ptr = build_int_cst (ref_type, align);
8207 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8208 dataref_ptr, alias_ptr,
8209 final_mask);
8211 else
8213 /* Emit:
8214 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8215 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8216 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8218 gimple_call_set_lhs (call, vec_array);
8219 gimple_call_set_nothrow (call, true);
8220 new_stmt = call;
8221 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8223 /* Extract each vector into an SSA_NAME. */
8224 for (i = 0; i < vec_num; i++)
8226 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8227 vec_array, i);
8228 dr_chain.quick_push (new_temp);
8231 /* Record the mapping between SSA_NAMEs and statements. */
8232 vect_record_grouped_load_vectors (stmt, dr_chain);
8234 /* Record that VEC_ARRAY is now dead. */
8235 vect_clobber_variable (stmt, gsi, vec_array);
8237 else
8239 for (i = 0; i < vec_num; i++)
8241 tree final_mask = NULL_TREE;
8242 if (loop_masks
8243 && memory_access_type != VMAT_INVARIANT)
8244 final_mask = vect_get_loop_mask (gsi, loop_masks,
8245 vec_num * ncopies,
8246 vectype, vec_num * j + i);
8247 if (vec_mask)
8248 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8249 vec_mask, gsi);
8251 if (i > 0)
8252 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8253 stmt, bump);
8255 /* 2. Create the vector-load in the loop. */
8256 switch (alignment_support_scheme)
8258 case dr_aligned:
8259 case dr_unaligned_supported:
8261 unsigned int align, misalign;
8263 if (memory_access_type == VMAT_GATHER_SCATTER)
8265 tree scale = size_int (gs_info.scale);
8266 gcall *call;
8267 if (loop_masks)
8268 call = gimple_build_call_internal
8269 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8270 vec_offset, scale, final_mask);
8271 else
8272 call = gimple_build_call_internal
8273 (IFN_GATHER_LOAD, 3, dataref_ptr,
8274 vec_offset, scale);
8275 gimple_call_set_nothrow (call, true);
8276 new_stmt = call;
8277 data_ref = NULL_TREE;
8278 break;
8281 align = DR_TARGET_ALIGNMENT (dr);
8282 if (alignment_support_scheme == dr_aligned)
8284 gcc_assert (aligned_access_p (first_dr));
8285 misalign = 0;
8287 else if (DR_MISALIGNMENT (first_dr) == -1)
8289 align = dr_alignment (vect_dr_behavior (first_dr));
8290 misalign = 0;
8292 else
8293 misalign = DR_MISALIGNMENT (first_dr);
8294 if (dataref_offset == NULL_TREE
8295 && TREE_CODE (dataref_ptr) == SSA_NAME)
8296 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8297 align, misalign);
8299 if (final_mask)
8301 align = least_bit_hwi (misalign | align);
8302 tree ptr = build_int_cst (ref_type, align);
8303 gcall *call
8304 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8305 dataref_ptr, ptr,
8306 final_mask);
8307 gimple_call_set_nothrow (call, true);
8308 new_stmt = call;
8309 data_ref = NULL_TREE;
8311 else
8313 data_ref
8314 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8315 dataref_offset
8316 ? dataref_offset
8317 : build_int_cst (ref_type, 0));
8318 if (alignment_support_scheme == dr_aligned)
8320 else if (DR_MISALIGNMENT (first_dr) == -1)
8321 TREE_TYPE (data_ref)
8322 = build_aligned_type (TREE_TYPE (data_ref),
8323 align * BITS_PER_UNIT);
8324 else
8325 TREE_TYPE (data_ref)
8326 = build_aligned_type (TREE_TYPE (data_ref),
8327 TYPE_ALIGN (elem_type));
8329 break;
8331 case dr_explicit_realign:
8333 tree ptr, bump;
8335 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8337 if (compute_in_loop)
8338 msq = vect_setup_realignment (first_stmt, gsi,
8339 &realignment_token,
8340 dr_explicit_realign,
8341 dataref_ptr, NULL);
8343 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8344 ptr = copy_ssa_name (dataref_ptr);
8345 else
8346 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8347 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8348 new_stmt = gimple_build_assign
8349 (ptr, BIT_AND_EXPR, dataref_ptr,
8350 build_int_cst
8351 (TREE_TYPE (dataref_ptr),
8352 -(HOST_WIDE_INT) align));
8353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8354 data_ref
8355 = build2 (MEM_REF, vectype, ptr,
8356 build_int_cst (ref_type, 0));
8357 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8358 vec_dest = vect_create_destination_var (scalar_dest,
8359 vectype);
8360 new_stmt = gimple_build_assign (vec_dest, data_ref);
8361 new_temp = make_ssa_name (vec_dest, new_stmt);
8362 gimple_assign_set_lhs (new_stmt, new_temp);
8363 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8364 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8365 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8366 msq = new_temp;
8368 bump = size_binop (MULT_EXPR, vs,
8369 TYPE_SIZE_UNIT (elem_type));
8370 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8371 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8372 new_stmt = gimple_build_assign
8373 (NULL_TREE, BIT_AND_EXPR, ptr,
8374 build_int_cst
8375 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8376 ptr = copy_ssa_name (ptr, new_stmt);
8377 gimple_assign_set_lhs (new_stmt, ptr);
8378 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8379 data_ref
8380 = build2 (MEM_REF, vectype, ptr,
8381 build_int_cst (ref_type, 0));
8382 break;
8384 case dr_explicit_realign_optimized:
8386 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8387 new_temp = copy_ssa_name (dataref_ptr);
8388 else
8389 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8390 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8391 new_stmt = gimple_build_assign
8392 (new_temp, BIT_AND_EXPR, dataref_ptr,
8393 build_int_cst (TREE_TYPE (dataref_ptr),
8394 -(HOST_WIDE_INT) align));
8395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8396 data_ref
8397 = build2 (MEM_REF, vectype, new_temp,
8398 build_int_cst (ref_type, 0));
8399 break;
8401 default:
8402 gcc_unreachable ();
8404 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8405 /* DATA_REF is null if we've already built the statement. */
8406 if (data_ref)
8408 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8409 new_stmt = gimple_build_assign (vec_dest, data_ref);
8411 new_temp = make_ssa_name (vec_dest, new_stmt);
8412 gimple_set_lhs (new_stmt, new_temp);
8413 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8415 /* 3. Handle explicit realignment if necessary/supported.
8416 Create in loop:
8417 vec_dest = realign_load (msq, lsq, realignment_token) */
8418 if (alignment_support_scheme == dr_explicit_realign_optimized
8419 || alignment_support_scheme == dr_explicit_realign)
8421 lsq = gimple_assign_lhs (new_stmt);
8422 if (!realignment_token)
8423 realignment_token = dataref_ptr;
8424 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8425 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8426 msq, lsq, realignment_token);
8427 new_temp = make_ssa_name (vec_dest, new_stmt);
8428 gimple_assign_set_lhs (new_stmt, new_temp);
8429 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8431 if (alignment_support_scheme == dr_explicit_realign_optimized)
8433 gcc_assert (phi);
8434 if (i == vec_num - 1 && j == ncopies - 1)
8435 add_phi_arg (phi, lsq,
8436 loop_latch_edge (containing_loop),
8437 UNKNOWN_LOCATION);
8438 msq = lsq;
8442 /* 4. Handle invariant-load. */
8443 if (inv_p && !bb_vinfo)
8445 gcc_assert (!grouped_load);
8446 /* If we have versioned for aliasing or the loop doesn't
8447 have any data dependencies that would preclude this,
8448 then we are sure this is a loop invariant load and
8449 thus we can insert it on the preheader edge. */
8450 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8451 && !nested_in_vect_loop
8452 && hoist_defs_of_uses (stmt, loop))
8454 if (dump_enabled_p ())
8456 dump_printf_loc (MSG_NOTE, vect_location,
8457 "hoisting out of the vectorized "
8458 "loop: ");
8459 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8461 tree tem = copy_ssa_name (scalar_dest);
8462 gsi_insert_on_edge_immediate
8463 (loop_preheader_edge (loop),
8464 gimple_build_assign (tem,
8465 unshare_expr
8466 (gimple_assign_rhs1 (stmt))));
8467 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8468 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8469 set_vinfo_for_stmt (new_stmt,
8470 new_stmt_vec_info (new_stmt, vinfo));
8472 else
8474 gimple_stmt_iterator gsi2 = *gsi;
8475 gsi_next (&gsi2);
8476 new_temp = vect_init_vector (stmt, scalar_dest,
8477 vectype, &gsi2);
8478 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8482 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8484 tree perm_mask = perm_mask_for_reverse (vectype);
8485 new_temp = permute_vec_elements (new_temp, new_temp,
8486 perm_mask, stmt, gsi);
8487 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8490 /* Collect vector loads and later create their permutation in
8491 vect_transform_grouped_load (). */
8492 if (grouped_load || slp_perm)
8493 dr_chain.quick_push (new_temp);
8495 /* Store vector loads in the corresponding SLP_NODE. */
8496 if (slp && !slp_perm)
8497 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8499 /* With SLP permutation we load the gaps as well, without
8500 we need to skip the gaps after we manage to fully load
8501 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8502 group_elt += nunits;
8503 if (maybe_ne (group_gap_adj, 0U)
8504 && !slp_perm
8505 && known_eq (group_elt, group_size - group_gap_adj))
8507 poly_wide_int bump_val
8508 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8509 * group_gap_adj);
8510 tree bump = wide_int_to_tree (sizetype, bump_val);
8511 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8512 stmt, bump);
8513 group_elt = 0;
8516 /* Bump the vector pointer to account for a gap or for excess
8517 elements loaded for a permuted SLP load. */
8518 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8520 poly_wide_int bump_val
8521 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8522 * group_gap_adj);
8523 tree bump = wide_int_to_tree (sizetype, bump_val);
8524 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8525 stmt, bump);
8529 if (slp && !slp_perm)
8530 continue;
8532 if (slp_perm)
8534 unsigned n_perms;
8535 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8536 slp_node_instance, false,
8537 &n_perms))
8539 dr_chain.release ();
8540 return false;
8543 else
8545 if (grouped_load)
8547 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8548 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8549 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8551 else
8553 if (j == 0)
8554 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8555 else
8556 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8557 prev_stmt_info = vinfo_for_stmt (new_stmt);
8560 dr_chain.release ();
8563 return true;
8566 /* Function vect_is_simple_cond.
8568 Input:
8569 LOOP - the loop that is being vectorized.
8570 COND - Condition that is checked for simple use.
8572 Output:
8573 *COMP_VECTYPE - the vector type for the comparison.
8574 *DTS - The def types for the arguments of the comparison
8576 Returns whether a COND can be vectorized. Checks whether
8577 condition operands are supportable using vec_is_simple_use. */
8579 static bool
8580 vect_is_simple_cond (tree cond, vec_info *vinfo,
8581 tree *comp_vectype, enum vect_def_type *dts,
8582 tree vectype)
8584 tree lhs, rhs;
8585 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8587 /* Mask case. */
8588 if (TREE_CODE (cond) == SSA_NAME
8589 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8591 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8592 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8593 &dts[0], comp_vectype)
8594 || !*comp_vectype
8595 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8596 return false;
8597 return true;
8600 if (!COMPARISON_CLASS_P (cond))
8601 return false;
8603 lhs = TREE_OPERAND (cond, 0);
8604 rhs = TREE_OPERAND (cond, 1);
8606 if (TREE_CODE (lhs) == SSA_NAME)
8608 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8609 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8610 return false;
8612 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8613 || TREE_CODE (lhs) == FIXED_CST)
8614 dts[0] = vect_constant_def;
8615 else
8616 return false;
8618 if (TREE_CODE (rhs) == SSA_NAME)
8620 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8621 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8622 return false;
8624 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8625 || TREE_CODE (rhs) == FIXED_CST)
8626 dts[1] = vect_constant_def;
8627 else
8628 return false;
8630 if (vectype1 && vectype2
8631 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8632 TYPE_VECTOR_SUBPARTS (vectype2)))
8633 return false;
8635 *comp_vectype = vectype1 ? vectype1 : vectype2;
8636 /* Invariant comparison. */
8637 if (! *comp_vectype && vectype)
8639 tree scalar_type = TREE_TYPE (lhs);
8640 /* If we can widen the comparison to match vectype do so. */
8641 if (INTEGRAL_TYPE_P (scalar_type)
8642 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8643 TYPE_SIZE (TREE_TYPE (vectype))))
8644 scalar_type = build_nonstandard_integer_type
8645 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8646 TYPE_UNSIGNED (scalar_type));
8647 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8650 return true;
8653 /* vectorizable_condition.
8655 Check if STMT is conditional modify expression that can be vectorized.
8656 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8657 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8658 at GSI.
8660 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8661 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8662 else clause if it is 2).
8664 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8666 bool
8667 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8668 gimple **vec_stmt, tree reduc_def, int reduc_index,
8669 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8671 tree scalar_dest = NULL_TREE;
8672 tree vec_dest = NULL_TREE;
8673 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8674 tree then_clause, else_clause;
8675 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8676 tree comp_vectype = NULL_TREE;
8677 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8678 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8679 tree vec_compare;
8680 tree new_temp;
8681 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8682 enum vect_def_type dts[4]
8683 = {vect_unknown_def_type, vect_unknown_def_type,
8684 vect_unknown_def_type, vect_unknown_def_type};
8685 int ndts = 4;
8686 int ncopies;
8687 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8688 stmt_vec_info prev_stmt_info = NULL;
8689 int i, j;
8690 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8691 vec<tree> vec_oprnds0 = vNULL;
8692 vec<tree> vec_oprnds1 = vNULL;
8693 vec<tree> vec_oprnds2 = vNULL;
8694 vec<tree> vec_oprnds3 = vNULL;
8695 tree vec_cmp_type;
8696 bool masked = false;
8698 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8699 return false;
8701 vect_reduction_type reduction_type
8702 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8703 if (reduction_type == TREE_CODE_REDUCTION)
8705 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8706 return false;
8708 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8709 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8710 && reduc_def))
8711 return false;
8713 /* FORNOW: not yet supported. */
8714 if (STMT_VINFO_LIVE_P (stmt_info))
8716 if (dump_enabled_p ())
8717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8718 "value used after loop.\n");
8719 return false;
8723 /* Is vectorizable conditional operation? */
8724 if (!is_gimple_assign (stmt))
8725 return false;
8727 code = gimple_assign_rhs_code (stmt);
8729 if (code != COND_EXPR)
8730 return false;
8732 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8733 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8735 if (slp_node)
8736 ncopies = 1;
8737 else
8738 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8740 gcc_assert (ncopies >= 1);
8741 if (reduc_index && ncopies > 1)
8742 return false; /* FORNOW */
8744 cond_expr = gimple_assign_rhs1 (stmt);
8745 then_clause = gimple_assign_rhs2 (stmt);
8746 else_clause = gimple_assign_rhs3 (stmt);
8748 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8749 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8750 || !comp_vectype)
8751 return false;
8753 gimple *def_stmt;
8754 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8755 &vectype1))
8756 return false;
8757 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8758 &vectype2))
8759 return false;
8761 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8762 return false;
8764 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8765 return false;
8767 masked = !COMPARISON_CLASS_P (cond_expr);
8768 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8770 if (vec_cmp_type == NULL_TREE)
8771 return false;
8773 cond_code = TREE_CODE (cond_expr);
8774 if (!masked)
8776 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8777 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8780 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8782 /* Boolean values may have another representation in vectors
8783 and therefore we prefer bit operations over comparison for
8784 them (which also works for scalar masks). We store opcodes
8785 to use in bitop1 and bitop2. Statement is vectorized as
8786 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8787 depending on bitop1 and bitop2 arity. */
8788 switch (cond_code)
8790 case GT_EXPR:
8791 bitop1 = BIT_NOT_EXPR;
8792 bitop2 = BIT_AND_EXPR;
8793 break;
8794 case GE_EXPR:
8795 bitop1 = BIT_NOT_EXPR;
8796 bitop2 = BIT_IOR_EXPR;
8797 break;
8798 case LT_EXPR:
8799 bitop1 = BIT_NOT_EXPR;
8800 bitop2 = BIT_AND_EXPR;
8801 std::swap (cond_expr0, cond_expr1);
8802 break;
8803 case LE_EXPR:
8804 bitop1 = BIT_NOT_EXPR;
8805 bitop2 = BIT_IOR_EXPR;
8806 std::swap (cond_expr0, cond_expr1);
8807 break;
8808 case NE_EXPR:
8809 bitop1 = BIT_XOR_EXPR;
8810 break;
8811 case EQ_EXPR:
8812 bitop1 = BIT_XOR_EXPR;
8813 bitop2 = BIT_NOT_EXPR;
8814 break;
8815 default:
8816 return false;
8818 cond_code = SSA_NAME;
8821 if (!vec_stmt)
8823 if (bitop1 != NOP_EXPR)
8825 machine_mode mode = TYPE_MODE (comp_vectype);
8826 optab optab;
8828 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8829 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8830 return false;
8832 if (bitop2 != NOP_EXPR)
8834 optab = optab_for_tree_code (bitop2, comp_vectype,
8835 optab_default);
8836 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8837 return false;
8840 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8841 cond_code))
8843 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8844 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8845 cost_vec);
8846 return true;
8848 return false;
8851 /* Transform. */
8853 if (!slp_node)
8855 vec_oprnds0.create (1);
8856 vec_oprnds1.create (1);
8857 vec_oprnds2.create (1);
8858 vec_oprnds3.create (1);
8861 /* Handle def. */
8862 scalar_dest = gimple_assign_lhs (stmt);
8863 if (reduction_type != EXTRACT_LAST_REDUCTION)
8864 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8866 /* Handle cond expr. */
8867 for (j = 0; j < ncopies; j++)
8869 gimple *new_stmt = NULL;
8870 if (j == 0)
8872 if (slp_node)
8874 auto_vec<tree, 4> ops;
8875 auto_vec<vec<tree>, 4> vec_defs;
8877 if (masked)
8878 ops.safe_push (cond_expr);
8879 else
8881 ops.safe_push (cond_expr0);
8882 ops.safe_push (cond_expr1);
8884 ops.safe_push (then_clause);
8885 ops.safe_push (else_clause);
8886 vect_get_slp_defs (ops, slp_node, &vec_defs);
8887 vec_oprnds3 = vec_defs.pop ();
8888 vec_oprnds2 = vec_defs.pop ();
8889 if (!masked)
8890 vec_oprnds1 = vec_defs.pop ();
8891 vec_oprnds0 = vec_defs.pop ();
8893 else
8895 gimple *gtemp;
8896 if (masked)
8898 vec_cond_lhs
8899 = vect_get_vec_def_for_operand (cond_expr, stmt,
8900 comp_vectype);
8901 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8902 &gtemp, &dts[0]);
8904 else
8906 vec_cond_lhs
8907 = vect_get_vec_def_for_operand (cond_expr0,
8908 stmt, comp_vectype);
8909 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8911 vec_cond_rhs
8912 = vect_get_vec_def_for_operand (cond_expr1,
8913 stmt, comp_vectype);
8914 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8916 if (reduc_index == 1)
8917 vec_then_clause = reduc_def;
8918 else
8920 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8921 stmt);
8922 vect_is_simple_use (then_clause, loop_vinfo,
8923 &gtemp, &dts[2]);
8925 if (reduc_index == 2)
8926 vec_else_clause = reduc_def;
8927 else
8929 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8930 stmt);
8931 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8935 else
8937 vec_cond_lhs
8938 = vect_get_vec_def_for_stmt_copy (dts[0],
8939 vec_oprnds0.pop ());
8940 if (!masked)
8941 vec_cond_rhs
8942 = vect_get_vec_def_for_stmt_copy (dts[1],
8943 vec_oprnds1.pop ());
8945 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8946 vec_oprnds2.pop ());
8947 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8948 vec_oprnds3.pop ());
8951 if (!slp_node)
8953 vec_oprnds0.quick_push (vec_cond_lhs);
8954 if (!masked)
8955 vec_oprnds1.quick_push (vec_cond_rhs);
8956 vec_oprnds2.quick_push (vec_then_clause);
8957 vec_oprnds3.quick_push (vec_else_clause);
8960 /* Arguments are ready. Create the new vector stmt. */
8961 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8963 vec_then_clause = vec_oprnds2[i];
8964 vec_else_clause = vec_oprnds3[i];
8966 if (masked)
8967 vec_compare = vec_cond_lhs;
8968 else
8970 vec_cond_rhs = vec_oprnds1[i];
8971 if (bitop1 == NOP_EXPR)
8972 vec_compare = build2 (cond_code, vec_cmp_type,
8973 vec_cond_lhs, vec_cond_rhs);
8974 else
8976 new_temp = make_ssa_name (vec_cmp_type);
8977 if (bitop1 == BIT_NOT_EXPR)
8978 new_stmt = gimple_build_assign (new_temp, bitop1,
8979 vec_cond_rhs);
8980 else
8981 new_stmt
8982 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8983 vec_cond_rhs);
8984 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8985 if (bitop2 == NOP_EXPR)
8986 vec_compare = new_temp;
8987 else if (bitop2 == BIT_NOT_EXPR)
8989 /* Instead of doing ~x ? y : z do x ? z : y. */
8990 vec_compare = new_temp;
8991 std::swap (vec_then_clause, vec_else_clause);
8993 else
8995 vec_compare = make_ssa_name (vec_cmp_type);
8996 new_stmt
8997 = gimple_build_assign (vec_compare, bitop2,
8998 vec_cond_lhs, new_temp);
8999 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9003 if (reduction_type == EXTRACT_LAST_REDUCTION)
9005 if (!is_gimple_val (vec_compare))
9007 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9008 new_stmt = gimple_build_assign (vec_compare_name,
9009 vec_compare);
9010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9011 vec_compare = vec_compare_name;
9013 gcc_assert (reduc_index == 2);
9014 new_stmt = gimple_build_call_internal
9015 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9016 vec_then_clause);
9017 gimple_call_set_lhs (new_stmt, scalar_dest);
9018 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9019 if (stmt == gsi_stmt (*gsi))
9020 vect_finish_replace_stmt (stmt, new_stmt);
9021 else
9023 /* In this case we're moving the definition to later in the
9024 block. That doesn't matter because the only uses of the
9025 lhs are in phi statements. */
9026 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9027 gsi_remove (&old_gsi, true);
9028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9031 else
9033 new_temp = make_ssa_name (vec_dest);
9034 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9035 vec_compare, vec_then_clause,
9036 vec_else_clause);
9037 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9039 if (slp_node)
9040 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9043 if (slp_node)
9044 continue;
9046 if (j == 0)
9047 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9048 else
9049 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9051 prev_stmt_info = vinfo_for_stmt (new_stmt);
9054 vec_oprnds0.release ();
9055 vec_oprnds1.release ();
9056 vec_oprnds2.release ();
9057 vec_oprnds3.release ();
9059 return true;
9062 /* vectorizable_comparison.
9064 Check if STMT is comparison expression that can be vectorized.
9065 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9066 comparison, put it in VEC_STMT, and insert it at GSI.
9068 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9070 static bool
9071 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9072 gimple **vec_stmt, tree reduc_def,
9073 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9075 tree lhs, rhs1, rhs2;
9076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9077 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9078 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9079 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9080 tree new_temp;
9081 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9082 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9083 int ndts = 2;
9084 poly_uint64 nunits;
9085 int ncopies;
9086 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9087 stmt_vec_info prev_stmt_info = NULL;
9088 int i, j;
9089 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9090 vec<tree> vec_oprnds0 = vNULL;
9091 vec<tree> vec_oprnds1 = vNULL;
9092 gimple *def_stmt;
9093 tree mask_type;
9094 tree mask;
9096 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9097 return false;
9099 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9100 return false;
9102 mask_type = vectype;
9103 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9105 if (slp_node)
9106 ncopies = 1;
9107 else
9108 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9110 gcc_assert (ncopies >= 1);
9111 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9112 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9113 && reduc_def))
9114 return false;
9116 if (STMT_VINFO_LIVE_P (stmt_info))
9118 if (dump_enabled_p ())
9119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9120 "value used after loop.\n");
9121 return false;
9124 if (!is_gimple_assign (stmt))
9125 return false;
9127 code = gimple_assign_rhs_code (stmt);
9129 if (TREE_CODE_CLASS (code) != tcc_comparison)
9130 return false;
9132 rhs1 = gimple_assign_rhs1 (stmt);
9133 rhs2 = gimple_assign_rhs2 (stmt);
9135 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9136 &dts[0], &vectype1))
9137 return false;
9139 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9140 &dts[1], &vectype2))
9141 return false;
9143 if (vectype1 && vectype2
9144 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9145 TYPE_VECTOR_SUBPARTS (vectype2)))
9146 return false;
9148 vectype = vectype1 ? vectype1 : vectype2;
9150 /* Invariant comparison. */
9151 if (!vectype)
9153 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9154 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9155 return false;
9157 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9158 return false;
9160 /* Can't compare mask and non-mask types. */
9161 if (vectype1 && vectype2
9162 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9163 return false;
9165 /* Boolean values may have another representation in vectors
9166 and therefore we prefer bit operations over comparison for
9167 them (which also works for scalar masks). We store opcodes
9168 to use in bitop1 and bitop2. Statement is vectorized as
9169 BITOP2 (rhs1 BITOP1 rhs2) or
9170 rhs1 BITOP2 (BITOP1 rhs2)
9171 depending on bitop1 and bitop2 arity. */
9172 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9174 if (code == GT_EXPR)
9176 bitop1 = BIT_NOT_EXPR;
9177 bitop2 = BIT_AND_EXPR;
9179 else if (code == GE_EXPR)
9181 bitop1 = BIT_NOT_EXPR;
9182 bitop2 = BIT_IOR_EXPR;
9184 else if (code == LT_EXPR)
9186 bitop1 = BIT_NOT_EXPR;
9187 bitop2 = BIT_AND_EXPR;
9188 std::swap (rhs1, rhs2);
9189 std::swap (dts[0], dts[1]);
9191 else if (code == LE_EXPR)
9193 bitop1 = BIT_NOT_EXPR;
9194 bitop2 = BIT_IOR_EXPR;
9195 std::swap (rhs1, rhs2);
9196 std::swap (dts[0], dts[1]);
9198 else
9200 bitop1 = BIT_XOR_EXPR;
9201 if (code == EQ_EXPR)
9202 bitop2 = BIT_NOT_EXPR;
9206 if (!vec_stmt)
9208 if (bitop1 == NOP_EXPR)
9210 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9211 return false;
9213 else
9215 machine_mode mode = TYPE_MODE (vectype);
9216 optab optab;
9218 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9219 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9220 return false;
9222 if (bitop2 != NOP_EXPR)
9224 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9225 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9226 return false;
9230 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9231 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9232 dts, ndts, slp_node, cost_vec);
9233 return true;
9236 /* Transform. */
9237 if (!slp_node)
9239 vec_oprnds0.create (1);
9240 vec_oprnds1.create (1);
9243 /* Handle def. */
9244 lhs = gimple_assign_lhs (stmt);
9245 mask = vect_create_destination_var (lhs, mask_type);
9247 /* Handle cmp expr. */
9248 for (j = 0; j < ncopies; j++)
9250 gassign *new_stmt = NULL;
9251 if (j == 0)
9253 if (slp_node)
9255 auto_vec<tree, 2> ops;
9256 auto_vec<vec<tree>, 2> vec_defs;
9258 ops.safe_push (rhs1);
9259 ops.safe_push (rhs2);
9260 vect_get_slp_defs (ops, slp_node, &vec_defs);
9261 vec_oprnds1 = vec_defs.pop ();
9262 vec_oprnds0 = vec_defs.pop ();
9264 else
9266 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9267 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9270 else
9272 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9273 vec_oprnds0.pop ());
9274 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9275 vec_oprnds1.pop ());
9278 if (!slp_node)
9280 vec_oprnds0.quick_push (vec_rhs1);
9281 vec_oprnds1.quick_push (vec_rhs2);
9284 /* Arguments are ready. Create the new vector stmt. */
9285 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9287 vec_rhs2 = vec_oprnds1[i];
9289 new_temp = make_ssa_name (mask);
9290 if (bitop1 == NOP_EXPR)
9292 new_stmt = gimple_build_assign (new_temp, code,
9293 vec_rhs1, vec_rhs2);
9294 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9296 else
9298 if (bitop1 == BIT_NOT_EXPR)
9299 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9300 else
9301 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9302 vec_rhs2);
9303 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9304 if (bitop2 != NOP_EXPR)
9306 tree res = make_ssa_name (mask);
9307 if (bitop2 == BIT_NOT_EXPR)
9308 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9309 else
9310 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9311 new_temp);
9312 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9315 if (slp_node)
9316 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9319 if (slp_node)
9320 continue;
9322 if (j == 0)
9323 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9324 else
9325 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9327 prev_stmt_info = vinfo_for_stmt (new_stmt);
9330 vec_oprnds0.release ();
9331 vec_oprnds1.release ();
9333 return true;
9336 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9337 can handle all live statements in the node. Otherwise return true
9338 if STMT is not live or if vectorizable_live_operation can handle it.
9339 GSI and VEC_STMT are as for vectorizable_live_operation. */
9341 static bool
9342 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9343 slp_tree slp_node, gimple **vec_stmt,
9344 stmt_vector_for_cost *cost_vec)
9346 if (slp_node)
9348 gimple *slp_stmt;
9349 unsigned int i;
9350 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9352 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9353 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9354 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9355 vec_stmt, cost_vec))
9356 return false;
9359 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9360 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9361 cost_vec))
9362 return false;
9364 return true;
9367 /* Make sure the statement is vectorizable. */
9369 bool
9370 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9371 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
9373 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9374 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9375 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9376 bool ok;
9377 gimple *pattern_stmt;
9378 gimple_seq pattern_def_seq;
9380 if (dump_enabled_p ())
9382 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9383 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9386 if (gimple_has_volatile_ops (stmt))
9388 if (dump_enabled_p ())
9389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9390 "not vectorized: stmt has volatile operands\n");
9392 return false;
9395 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9396 && node == NULL
9397 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9399 gimple_stmt_iterator si;
9401 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9403 gimple *pattern_def_stmt = gsi_stmt (si);
9404 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9405 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9407 /* Analyze def stmt of STMT if it's a pattern stmt. */
9408 if (dump_enabled_p ())
9410 dump_printf_loc (MSG_NOTE, vect_location,
9411 "==> examining pattern def statement: ");
9412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9415 if (!vect_analyze_stmt (pattern_def_stmt,
9416 need_to_vectorize, node, node_instance,
9417 cost_vec))
9418 return false;
9423 /* Skip stmts that do not need to be vectorized. In loops this is expected
9424 to include:
9425 - the COND_EXPR which is the loop exit condition
9426 - any LABEL_EXPRs in the loop
9427 - computations that are used only for array indexing or loop control.
9428 In basic blocks we only analyze statements that are a part of some SLP
9429 instance, therefore, all the statements are relevant.
9431 Pattern statement needs to be analyzed instead of the original statement
9432 if the original statement is not relevant. Otherwise, we analyze both
9433 statements. In basic blocks we are called from some SLP instance
9434 traversal, don't analyze pattern stmts instead, the pattern stmts
9435 already will be part of SLP instance. */
9437 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9438 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9439 && !STMT_VINFO_LIVE_P (stmt_info))
9441 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9442 && pattern_stmt
9443 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9444 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9446 /* Analyze PATTERN_STMT instead of the original stmt. */
9447 stmt = pattern_stmt;
9448 stmt_info = vinfo_for_stmt (pattern_stmt);
9449 if (dump_enabled_p ())
9451 dump_printf_loc (MSG_NOTE, vect_location,
9452 "==> examining pattern statement: ");
9453 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9456 else
9458 if (dump_enabled_p ())
9459 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9461 return true;
9464 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9465 && node == NULL
9466 && pattern_stmt
9467 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9468 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9470 /* Analyze PATTERN_STMT too. */
9471 if (dump_enabled_p ())
9473 dump_printf_loc (MSG_NOTE, vect_location,
9474 "==> examining pattern statement: ");
9475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9478 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9479 node_instance, cost_vec))
9480 return false;
9483 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9485 case vect_internal_def:
9486 break;
9488 case vect_reduction_def:
9489 case vect_nested_cycle:
9490 gcc_assert (!bb_vinfo
9491 && (relevance == vect_used_in_outer
9492 || relevance == vect_used_in_outer_by_reduction
9493 || relevance == vect_used_by_reduction
9494 || relevance == vect_unused_in_scope
9495 || relevance == vect_used_only_live));
9496 break;
9498 case vect_induction_def:
9499 gcc_assert (!bb_vinfo);
9500 break;
9502 case vect_constant_def:
9503 case vect_external_def:
9504 case vect_unknown_def_type:
9505 default:
9506 gcc_unreachable ();
9509 if (STMT_VINFO_RELEVANT_P (stmt_info))
9511 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9512 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9513 || (is_gimple_call (stmt)
9514 && gimple_call_lhs (stmt) == NULL_TREE));
9515 *need_to_vectorize = true;
9518 if (PURE_SLP_STMT (stmt_info) && !node)
9520 dump_printf_loc (MSG_NOTE, vect_location,
9521 "handled only by SLP analysis\n");
9522 return true;
9525 ok = true;
9526 if (!bb_vinfo
9527 && (STMT_VINFO_RELEVANT_P (stmt_info)
9528 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9529 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9530 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9531 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9532 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9533 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9534 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9535 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9536 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9537 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9538 cost_vec)
9539 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9540 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9541 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
9542 else
9544 if (bb_vinfo)
9545 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9546 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9547 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9548 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9549 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9550 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9551 cost_vec)
9552 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9553 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9554 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9555 cost_vec)
9556 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9557 cost_vec));
9560 if (!ok)
9562 if (dump_enabled_p ())
9564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9565 "not vectorized: relevant stmt not ");
9566 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9567 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9570 return false;
9573 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9574 need extra handling, except for vectorizable reductions. */
9575 if (!bb_vinfo
9576 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9577 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
9579 if (dump_enabled_p ())
9581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9582 "not vectorized: live stmt not supported: ");
9583 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9586 return false;
9589 return true;
9593 /* Function vect_transform_stmt.
9595 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9597 bool
9598 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9599 bool *grouped_store, slp_tree slp_node,
9600 slp_instance slp_node_instance)
9602 bool is_store = false;
9603 gimple *vec_stmt = NULL;
9604 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9605 bool done;
9607 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9608 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9610 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9611 && nested_in_vect_loop_p
9612 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9613 stmt));
9615 switch (STMT_VINFO_TYPE (stmt_info))
9617 case type_demotion_vec_info_type:
9618 case type_promotion_vec_info_type:
9619 case type_conversion_vec_info_type:
9620 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
9621 gcc_assert (done);
9622 break;
9624 case induc_vec_info_type:
9625 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
9626 gcc_assert (done);
9627 break;
9629 case shift_vec_info_type:
9630 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9631 gcc_assert (done);
9632 break;
9634 case op_vec_info_type:
9635 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
9636 gcc_assert (done);
9637 break;
9639 case assignment_vec_info_type:
9640 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
9641 gcc_assert (done);
9642 break;
9644 case load_vec_info_type:
9645 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9646 slp_node_instance, NULL);
9647 gcc_assert (done);
9648 break;
9650 case store_vec_info_type:
9651 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
9652 gcc_assert (done);
9653 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9655 /* In case of interleaving, the whole chain is vectorized when the
9656 last store in the chain is reached. Store stmts before the last
9657 one are skipped, and there vec_stmt_info shouldn't be freed
9658 meanwhile. */
9659 *grouped_store = true;
9660 stmt_vec_info group_info
9661 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9662 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9663 is_store = true;
9665 else
9666 is_store = true;
9667 break;
9669 case condition_vec_info_type:
9670 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
9671 gcc_assert (done);
9672 break;
9674 case comparison_vec_info_type:
9675 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
9676 gcc_assert (done);
9677 break;
9679 case call_vec_info_type:
9680 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9681 stmt = gsi_stmt (*gsi);
9682 break;
9684 case call_simd_clone_vec_info_type:
9685 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9686 stmt = gsi_stmt (*gsi);
9687 break;
9689 case reduc_vec_info_type:
9690 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9691 slp_node_instance, NULL);
9692 gcc_assert (done);
9693 break;
9695 default:
9696 if (!STMT_VINFO_LIVE_P (stmt_info))
9698 if (dump_enabled_p ())
9699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9700 "stmt not supported.\n");
9701 gcc_unreachable ();
9705 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9706 This would break hybrid SLP vectorization. */
9707 if (slp_node)
9708 gcc_assert (!vec_stmt
9709 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9711 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9712 is being vectorized, but outside the immediately enclosing loop. */
9713 if (vec_stmt
9714 && nested_p
9715 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9716 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9717 || STMT_VINFO_RELEVANT (stmt_info) ==
9718 vect_used_in_outer_by_reduction))
9720 struct loop *innerloop = LOOP_VINFO_LOOP (
9721 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9722 imm_use_iterator imm_iter;
9723 use_operand_p use_p;
9724 tree scalar_dest;
9725 gimple *exit_phi;
9727 if (dump_enabled_p ())
9728 dump_printf_loc (MSG_NOTE, vect_location,
9729 "Record the vdef for outer-loop vectorization.\n");
9731 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9732 (to be used when vectorizing outer-loop stmts that use the DEF of
9733 STMT). */
9734 if (gimple_code (stmt) == GIMPLE_PHI)
9735 scalar_dest = PHI_RESULT (stmt);
9736 else
9737 scalar_dest = gimple_assign_lhs (stmt);
9739 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9741 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9743 exit_phi = USE_STMT (use_p);
9744 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9749 /* Handle stmts whose DEF is used outside the loop-nest that is
9750 being vectorized. */
9751 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9753 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
9754 gcc_assert (done);
9757 if (vec_stmt)
9758 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9760 return is_store;
9764 /* Remove a group of stores (for SLP or interleaving), free their
9765 stmt_vec_info. */
9767 void
9768 vect_remove_stores (gimple *first_stmt)
9770 gimple *next = first_stmt;
9771 gimple *tmp;
9772 gimple_stmt_iterator next_si;
9774 while (next)
9776 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9778 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
9779 if (is_pattern_stmt_p (stmt_info))
9780 next = STMT_VINFO_RELATED_STMT (stmt_info);
9781 /* Free the attached stmt_vec_info and remove the stmt. */
9782 next_si = gsi_for_stmt (next);
9783 unlink_stmt_vdef (next);
9784 gsi_remove (&next_si, true);
9785 release_defs (next);
9786 free_stmt_vec_info (next);
9787 next = tmp;
9792 /* Function new_stmt_vec_info.
9794 Create and initialize a new stmt_vec_info struct for STMT. */
9796 stmt_vec_info
9797 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9799 stmt_vec_info res;
9800 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9802 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9803 STMT_VINFO_STMT (res) = stmt;
9804 res->vinfo = vinfo;
9805 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9806 STMT_VINFO_LIVE_P (res) = false;
9807 STMT_VINFO_VECTYPE (res) = NULL;
9808 STMT_VINFO_VEC_STMT (res) = NULL;
9809 STMT_VINFO_VECTORIZABLE (res) = true;
9810 STMT_VINFO_IN_PATTERN_P (res) = false;
9811 STMT_VINFO_RELATED_STMT (res) = NULL;
9812 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9813 STMT_VINFO_DATA_REF (res) = NULL;
9814 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9815 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9817 if (gimple_code (stmt) == GIMPLE_PHI
9818 && is_loop_header_bb_p (gimple_bb (stmt)))
9819 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9820 else
9821 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9823 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9824 STMT_SLP_TYPE (res) = loop_vect;
9825 STMT_VINFO_NUM_SLP_USES (res) = 0;
9827 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9828 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9829 res->size = 0; /* GROUP_SIZE */
9830 res->store_count = 0; /* GROUP_STORE_COUNT */
9831 res->gap = 0; /* GROUP_GAP */
9832 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
9834 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9835 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9837 return res;
9841 /* Set the current stmt_vec_info vector to V. */
9843 void
9844 set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
9846 stmt_vec_info_vec = v;
9849 /* Free the stmt_vec_info entries in V and release V. */
9851 void
9852 free_stmt_vec_infos (vec<stmt_vec_info> *v)
9854 unsigned int i;
9855 stmt_vec_info info;
9856 FOR_EACH_VEC_ELT (*v, i, info)
9857 if (info != NULL)
9858 free_stmt_vec_info (STMT_VINFO_STMT (info));
9859 if (v == stmt_vec_info_vec)
9860 stmt_vec_info_vec = NULL;
9861 v->release ();
9865 /* Free stmt vectorization related info. */
9867 void
9868 free_stmt_vec_info (gimple *stmt)
9870 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9872 if (!stmt_info)
9873 return;
9875 /* Check if this statement has a related "pattern stmt"
9876 (introduced by the vectorizer during the pattern recognition
9877 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9878 too. */
9879 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9881 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9882 for (gimple_stmt_iterator si = gsi_start (seq);
9883 !gsi_end_p (si); gsi_next (&si))
9885 gimple *seq_stmt = gsi_stmt (si);
9886 gimple_set_bb (seq_stmt, NULL);
9887 tree lhs = gimple_get_lhs (seq_stmt);
9888 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9889 release_ssa_name (lhs);
9890 free_stmt_vec_info (seq_stmt);
9892 stmt_vec_info patt_info
9893 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9894 if (patt_info)
9896 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9897 gimple_set_bb (patt_stmt, NULL);
9898 tree lhs = gimple_get_lhs (patt_stmt);
9899 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9900 release_ssa_name (lhs);
9901 free_stmt_vec_info (patt_stmt);
9905 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9906 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9907 set_vinfo_for_stmt (stmt, NULL);
9908 free (stmt_info);
9912 /* Function get_vectype_for_scalar_type_and_size.
9914 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9915 by the target. */
9917 tree
9918 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9920 tree orig_scalar_type = scalar_type;
9921 scalar_mode inner_mode;
9922 machine_mode simd_mode;
9923 poly_uint64 nunits;
9924 tree vectype;
9926 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9927 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9928 return NULL_TREE;
9930 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9932 /* For vector types of elements whose mode precision doesn't
9933 match their types precision we use a element type of mode
9934 precision. The vectorization routines will have to make sure
9935 they support the proper result truncation/extension.
9936 We also make sure to build vector types with INTEGER_TYPE
9937 component type only. */
9938 if (INTEGRAL_TYPE_P (scalar_type)
9939 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9940 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9941 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9942 TYPE_UNSIGNED (scalar_type));
9944 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9945 When the component mode passes the above test simply use a type
9946 corresponding to that mode. The theory is that any use that
9947 would cause problems with this will disable vectorization anyway. */
9948 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9949 && !INTEGRAL_TYPE_P (scalar_type))
9950 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9952 /* We can't build a vector type of elements with alignment bigger than
9953 their size. */
9954 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9955 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9956 TYPE_UNSIGNED (scalar_type));
9958 /* If we felt back to using the mode fail if there was
9959 no scalar type for it. */
9960 if (scalar_type == NULL_TREE)
9961 return NULL_TREE;
9963 /* If no size was supplied use the mode the target prefers. Otherwise
9964 lookup a vector mode of the specified size. */
9965 if (known_eq (size, 0U))
9966 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9967 else if (!multiple_p (size, nbytes, &nunits)
9968 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9969 return NULL_TREE;
9970 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9971 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9972 return NULL_TREE;
9974 vectype = build_vector_type (scalar_type, nunits);
9976 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9977 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9978 return NULL_TREE;
9980 /* Re-attach the address-space qualifier if we canonicalized the scalar
9981 type. */
9982 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9983 return build_qualified_type
9984 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9986 return vectype;
9989 poly_uint64 current_vector_size;
9991 /* Function get_vectype_for_scalar_type.
9993 Returns the vector type corresponding to SCALAR_TYPE as supported
9994 by the target. */
9996 tree
9997 get_vectype_for_scalar_type (tree scalar_type)
9999 tree vectype;
10000 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10001 current_vector_size);
10002 if (vectype
10003 && known_eq (current_vector_size, 0U))
10004 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10005 return vectype;
10008 /* Function get_mask_type_for_scalar_type.
10010 Returns the mask type corresponding to a result of comparison
10011 of vectors of specified SCALAR_TYPE as supported by target. */
10013 tree
10014 get_mask_type_for_scalar_type (tree scalar_type)
10016 tree vectype = get_vectype_for_scalar_type (scalar_type);
10018 if (!vectype)
10019 return NULL;
10021 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10022 current_vector_size);
10025 /* Function get_same_sized_vectype
10027 Returns a vector type corresponding to SCALAR_TYPE of size
10028 VECTOR_TYPE if supported by the target. */
10030 tree
10031 get_same_sized_vectype (tree scalar_type, tree vector_type)
10033 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10034 return build_same_sized_truth_vector_type (vector_type);
10036 return get_vectype_for_scalar_type_and_size
10037 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10040 /* Function vect_is_simple_use.
10042 Input:
10043 VINFO - the vect info of the loop or basic block that is being vectorized.
10044 OPERAND - operand in the loop or bb.
10045 Output:
10046 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
10047 DT - the type of definition
10049 Returns whether a stmt with OPERAND can be vectorized.
10050 For loops, supportable operands are constants, loop invariants, and operands
10051 that are defined by the current iteration of the loop. Unsupportable
10052 operands are those that are defined by a previous iteration of the loop (as
10053 is the case in reduction/induction computations).
10054 For basic blocks, supportable operands are constants and bb invariants.
10055 For now, operands defined outside the basic block are not supported. */
10057 bool
10058 vect_is_simple_use (tree operand, vec_info *vinfo,
10059 gimple **def_stmt, enum vect_def_type *dt)
10061 *def_stmt = NULL;
10062 *dt = vect_unknown_def_type;
10064 if (dump_enabled_p ())
10066 dump_printf_loc (MSG_NOTE, vect_location,
10067 "vect_is_simple_use: operand ");
10068 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10069 dump_printf (MSG_NOTE, "\n");
10072 if (CONSTANT_CLASS_P (operand))
10074 *dt = vect_constant_def;
10075 return true;
10078 if (is_gimple_min_invariant (operand))
10080 *dt = vect_external_def;
10081 return true;
10084 if (TREE_CODE (operand) != SSA_NAME)
10086 if (dump_enabled_p ())
10087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10088 "not ssa-name.\n");
10089 return false;
10092 if (SSA_NAME_IS_DEFAULT_DEF (operand))
10094 *dt = vect_external_def;
10095 return true;
10098 *def_stmt = SSA_NAME_DEF_STMT (operand);
10099 if (dump_enabled_p ())
10101 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
10102 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
10105 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
10106 *dt = vect_external_def;
10107 else
10109 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
10110 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10113 if (dump_enabled_p ())
10115 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
10116 switch (*dt)
10118 case vect_uninitialized_def:
10119 dump_printf (MSG_NOTE, "uninitialized\n");
10120 break;
10121 case vect_constant_def:
10122 dump_printf (MSG_NOTE, "constant\n");
10123 break;
10124 case vect_external_def:
10125 dump_printf (MSG_NOTE, "external\n");
10126 break;
10127 case vect_internal_def:
10128 dump_printf (MSG_NOTE, "internal\n");
10129 break;
10130 case vect_induction_def:
10131 dump_printf (MSG_NOTE, "induction\n");
10132 break;
10133 case vect_reduction_def:
10134 dump_printf (MSG_NOTE, "reduction\n");
10135 break;
10136 case vect_double_reduction_def:
10137 dump_printf (MSG_NOTE, "double reduction\n");
10138 break;
10139 case vect_nested_cycle:
10140 dump_printf (MSG_NOTE, "nested cycle\n");
10141 break;
10142 case vect_unknown_def_type:
10143 dump_printf (MSG_NOTE, "unknown\n");
10144 break;
10148 if (*dt == vect_unknown_def_type)
10150 if (dump_enabled_p ())
10151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10152 "Unsupported pattern.\n");
10153 return false;
10156 switch (gimple_code (*def_stmt))
10158 case GIMPLE_PHI:
10159 case GIMPLE_ASSIGN:
10160 case GIMPLE_CALL:
10161 break;
10162 default:
10163 if (dump_enabled_p ())
10164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10165 "unsupported defining stmt:\n");
10166 return false;
10169 return true;
10172 /* Function vect_is_simple_use.
10174 Same as vect_is_simple_use but also determines the vector operand
10175 type of OPERAND and stores it to *VECTYPE. If the definition of
10176 OPERAND is vect_uninitialized_def, vect_constant_def or
10177 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10178 is responsible to compute the best suited vector type for the
10179 scalar operand. */
10181 bool
10182 vect_is_simple_use (tree operand, vec_info *vinfo,
10183 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10185 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10186 return false;
10188 /* Now get a vector type if the def is internal, otherwise supply
10189 NULL_TREE and leave it up to the caller to figure out a proper
10190 type for the use stmt. */
10191 if (*dt == vect_internal_def
10192 || *dt == vect_induction_def
10193 || *dt == vect_reduction_def
10194 || *dt == vect_double_reduction_def
10195 || *dt == vect_nested_cycle)
10197 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10199 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10200 && !STMT_VINFO_RELEVANT (stmt_info)
10201 && !STMT_VINFO_LIVE_P (stmt_info))
10202 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10204 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10205 gcc_assert (*vectype != NULL_TREE);
10207 else if (*dt == vect_uninitialized_def
10208 || *dt == vect_constant_def
10209 || *dt == vect_external_def)
10210 *vectype = NULL_TREE;
10211 else
10212 gcc_unreachable ();
10214 return true;
10218 /* Function supportable_widening_operation
10220 Check whether an operation represented by the code CODE is a
10221 widening operation that is supported by the target platform in
10222 vector form (i.e., when operating on arguments of type VECTYPE_IN
10223 producing a result of type VECTYPE_OUT).
10225 Widening operations we currently support are NOP (CONVERT), FLOAT,
10226 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10227 are supported by the target platform either directly (via vector
10228 tree-codes), or via target builtins.
10230 Output:
10231 - CODE1 and CODE2 are codes of vector operations to be used when
10232 vectorizing the operation, if available.
10233 - MULTI_STEP_CVT determines the number of required intermediate steps in
10234 case of multi-step conversion (like char->short->int - in that case
10235 MULTI_STEP_CVT will be 1).
10236 - INTERM_TYPES contains the intermediate type required to perform the
10237 widening operation (short in the above example). */
10239 bool
10240 supportable_widening_operation (enum tree_code code, gimple *stmt,
10241 tree vectype_out, tree vectype_in,
10242 enum tree_code *code1, enum tree_code *code2,
10243 int *multi_step_cvt,
10244 vec<tree> *interm_types)
10246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10247 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10248 struct loop *vect_loop = NULL;
10249 machine_mode vec_mode;
10250 enum insn_code icode1, icode2;
10251 optab optab1, optab2;
10252 tree vectype = vectype_in;
10253 tree wide_vectype = vectype_out;
10254 enum tree_code c1, c2;
10255 int i;
10256 tree prev_type, intermediate_type;
10257 machine_mode intermediate_mode, prev_mode;
10258 optab optab3, optab4;
10260 *multi_step_cvt = 0;
10261 if (loop_info)
10262 vect_loop = LOOP_VINFO_LOOP (loop_info);
10264 switch (code)
10266 case WIDEN_MULT_EXPR:
10267 /* The result of a vectorized widening operation usually requires
10268 two vectors (because the widened results do not fit into one vector).
10269 The generated vector results would normally be expected to be
10270 generated in the same order as in the original scalar computation,
10271 i.e. if 8 results are generated in each vector iteration, they are
10272 to be organized as follows:
10273 vect1: [res1,res2,res3,res4],
10274 vect2: [res5,res6,res7,res8].
10276 However, in the special case that the result of the widening
10277 operation is used in a reduction computation only, the order doesn't
10278 matter (because when vectorizing a reduction we change the order of
10279 the computation). Some targets can take advantage of this and
10280 generate more efficient code. For example, targets like Altivec,
10281 that support widen_mult using a sequence of {mult_even,mult_odd}
10282 generate the following vectors:
10283 vect1: [res1,res3,res5,res7],
10284 vect2: [res2,res4,res6,res8].
10286 When vectorizing outer-loops, we execute the inner-loop sequentially
10287 (each vectorized inner-loop iteration contributes to VF outer-loop
10288 iterations in parallel). We therefore don't allow to change the
10289 order of the computation in the inner-loop during outer-loop
10290 vectorization. */
10291 /* TODO: Another case in which order doesn't *really* matter is when we
10292 widen and then contract again, e.g. (short)((int)x * y >> 8).
10293 Normally, pack_trunc performs an even/odd permute, whereas the
10294 repack from an even/odd expansion would be an interleave, which
10295 would be significantly simpler for e.g. AVX2. */
10296 /* In any case, in order to avoid duplicating the code below, recurse
10297 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10298 are properly set up for the caller. If we fail, we'll continue with
10299 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10300 if (vect_loop
10301 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10302 && !nested_in_vect_loop_p (vect_loop, stmt)
10303 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10304 stmt, vectype_out, vectype_in,
10305 code1, code2, multi_step_cvt,
10306 interm_types))
10308 /* Elements in a vector with vect_used_by_reduction property cannot
10309 be reordered if the use chain with this property does not have the
10310 same operation. One such an example is s += a * b, where elements
10311 in a and b cannot be reordered. Here we check if the vector defined
10312 by STMT is only directly used in the reduction statement. */
10313 tree lhs = gimple_assign_lhs (stmt);
10314 use_operand_p dummy;
10315 gimple *use_stmt;
10316 stmt_vec_info use_stmt_info = NULL;
10317 if (single_imm_use (lhs, &dummy, &use_stmt)
10318 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10319 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10320 return true;
10322 c1 = VEC_WIDEN_MULT_LO_EXPR;
10323 c2 = VEC_WIDEN_MULT_HI_EXPR;
10324 break;
10326 case DOT_PROD_EXPR:
10327 c1 = DOT_PROD_EXPR;
10328 c2 = DOT_PROD_EXPR;
10329 break;
10331 case SAD_EXPR:
10332 c1 = SAD_EXPR;
10333 c2 = SAD_EXPR;
10334 break;
10336 case VEC_WIDEN_MULT_EVEN_EXPR:
10337 /* Support the recursion induced just above. */
10338 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10339 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10340 break;
10342 case WIDEN_LSHIFT_EXPR:
10343 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10344 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10345 break;
10347 CASE_CONVERT:
10348 c1 = VEC_UNPACK_LO_EXPR;
10349 c2 = VEC_UNPACK_HI_EXPR;
10350 break;
10352 case FLOAT_EXPR:
10353 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10354 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10355 break;
10357 case FIX_TRUNC_EXPR:
10358 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10359 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10360 break;
10362 default:
10363 gcc_unreachable ();
10366 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10367 std::swap (c1, c2);
10369 if (code == FIX_TRUNC_EXPR)
10371 /* The signedness is determined from output operand. */
10372 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10373 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10375 else
10377 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10378 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10381 if (!optab1 || !optab2)
10382 return false;
10384 vec_mode = TYPE_MODE (vectype);
10385 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10386 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10387 return false;
10389 *code1 = c1;
10390 *code2 = c2;
10392 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10393 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10394 /* For scalar masks we may have different boolean
10395 vector types having the same QImode. Thus we
10396 add additional check for elements number. */
10397 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10398 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10399 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10401 /* Check if it's a multi-step conversion that can be done using intermediate
10402 types. */
10404 prev_type = vectype;
10405 prev_mode = vec_mode;
10407 if (!CONVERT_EXPR_CODE_P (code))
10408 return false;
10410 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10411 intermediate steps in promotion sequence. We try
10412 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10413 not. */
10414 interm_types->create (MAX_INTERM_CVT_STEPS);
10415 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10417 intermediate_mode = insn_data[icode1].operand[0].mode;
10418 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10420 intermediate_type = vect_halve_mask_nunits (prev_type);
10421 if (intermediate_mode != TYPE_MODE (intermediate_type))
10422 return false;
10424 else
10425 intermediate_type
10426 = lang_hooks.types.type_for_mode (intermediate_mode,
10427 TYPE_UNSIGNED (prev_type));
10429 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10430 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10432 if (!optab3 || !optab4
10433 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10434 || insn_data[icode1].operand[0].mode != intermediate_mode
10435 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10436 || insn_data[icode2].operand[0].mode != intermediate_mode
10437 || ((icode1 = optab_handler (optab3, intermediate_mode))
10438 == CODE_FOR_nothing)
10439 || ((icode2 = optab_handler (optab4, intermediate_mode))
10440 == CODE_FOR_nothing))
10441 break;
10443 interm_types->quick_push (intermediate_type);
10444 (*multi_step_cvt)++;
10446 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10447 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10448 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10449 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10450 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10452 prev_type = intermediate_type;
10453 prev_mode = intermediate_mode;
10456 interm_types->release ();
10457 return false;
10461 /* Function supportable_narrowing_operation
10463 Check whether an operation represented by the code CODE is a
10464 narrowing operation that is supported by the target platform in
10465 vector form (i.e., when operating on arguments of type VECTYPE_IN
10466 and producing a result of type VECTYPE_OUT).
10468 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10469 and FLOAT. This function checks if these operations are supported by
10470 the target platform directly via vector tree-codes.
10472 Output:
10473 - CODE1 is the code of a vector operation to be used when
10474 vectorizing the operation, if available.
10475 - MULTI_STEP_CVT determines the number of required intermediate steps in
10476 case of multi-step conversion (like int->short->char - in that case
10477 MULTI_STEP_CVT will be 1).
10478 - INTERM_TYPES contains the intermediate type required to perform the
10479 narrowing operation (short in the above example). */
10481 bool
10482 supportable_narrowing_operation (enum tree_code code,
10483 tree vectype_out, tree vectype_in,
10484 enum tree_code *code1, int *multi_step_cvt,
10485 vec<tree> *interm_types)
10487 machine_mode vec_mode;
10488 enum insn_code icode1;
10489 optab optab1, interm_optab;
10490 tree vectype = vectype_in;
10491 tree narrow_vectype = vectype_out;
10492 enum tree_code c1;
10493 tree intermediate_type, prev_type;
10494 machine_mode intermediate_mode, prev_mode;
10495 int i;
10496 bool uns;
10498 *multi_step_cvt = 0;
10499 switch (code)
10501 CASE_CONVERT:
10502 c1 = VEC_PACK_TRUNC_EXPR;
10503 break;
10505 case FIX_TRUNC_EXPR:
10506 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10507 break;
10509 case FLOAT_EXPR:
10510 c1 = VEC_PACK_FLOAT_EXPR;
10511 break;
10513 default:
10514 gcc_unreachable ();
10517 if (code == FIX_TRUNC_EXPR)
10518 /* The signedness is determined from output operand. */
10519 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10520 else
10521 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10523 if (!optab1)
10524 return false;
10526 vec_mode = TYPE_MODE (vectype);
10527 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10528 return false;
10530 *code1 = c1;
10532 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10533 /* For scalar masks we may have different boolean
10534 vector types having the same QImode. Thus we
10535 add additional check for elements number. */
10536 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10537 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10538 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10540 if (code == FLOAT_EXPR)
10541 return false;
10543 /* Check if it's a multi-step conversion that can be done using intermediate
10544 types. */
10545 prev_mode = vec_mode;
10546 prev_type = vectype;
10547 if (code == FIX_TRUNC_EXPR)
10548 uns = TYPE_UNSIGNED (vectype_out);
10549 else
10550 uns = TYPE_UNSIGNED (vectype);
10552 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10553 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10554 costly than signed. */
10555 if (code == FIX_TRUNC_EXPR && uns)
10557 enum insn_code icode2;
10559 intermediate_type
10560 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10561 interm_optab
10562 = optab_for_tree_code (c1, intermediate_type, optab_default);
10563 if (interm_optab != unknown_optab
10564 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10565 && insn_data[icode1].operand[0].mode
10566 == insn_data[icode2].operand[0].mode)
10568 uns = false;
10569 optab1 = interm_optab;
10570 icode1 = icode2;
10574 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10575 intermediate steps in promotion sequence. We try
10576 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10577 interm_types->create (MAX_INTERM_CVT_STEPS);
10578 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10580 intermediate_mode = insn_data[icode1].operand[0].mode;
10581 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10583 intermediate_type = vect_double_mask_nunits (prev_type);
10584 if (intermediate_mode != TYPE_MODE (intermediate_type))
10585 return false;
10587 else
10588 intermediate_type
10589 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10590 interm_optab
10591 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10592 optab_default);
10593 if (!interm_optab
10594 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10595 || insn_data[icode1].operand[0].mode != intermediate_mode
10596 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10597 == CODE_FOR_nothing))
10598 break;
10600 interm_types->quick_push (intermediate_type);
10601 (*multi_step_cvt)++;
10603 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10604 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10605 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10606 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10608 prev_mode = intermediate_mode;
10609 prev_type = intermediate_type;
10610 optab1 = interm_optab;
10613 interm_types->release ();
10614 return false;
10617 /* Generate and return a statement that sets vector mask MASK such that
10618 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10620 gcall *
10621 vect_gen_while (tree mask, tree start_index, tree end_index)
10623 tree cmp_type = TREE_TYPE (start_index);
10624 tree mask_type = TREE_TYPE (mask);
10625 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10626 cmp_type, mask_type,
10627 OPTIMIZE_FOR_SPEED));
10628 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10629 start_index, end_index,
10630 build_zero_cst (mask_type));
10631 gimple_call_set_lhs (call, mask);
10632 return call;
10635 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10636 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10638 tree
10639 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10640 tree end_index)
10642 tree tmp = make_ssa_name (mask_type);
10643 gcall *call = vect_gen_while (tmp, start_index, end_index);
10644 gimple_seq_add_stmt (seq, call);
10645 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10648 /* Try to compute the vector types required to vectorize STMT_INFO,
10649 returning true on success and false if vectorization isn't possible.
10651 On success:
10653 - Set *STMT_VECTYPE_OUT to:
10654 - NULL_TREE if the statement doesn't need to be vectorized;
10655 - boolean_type_node if the statement is a boolean operation whose
10656 vector type can only be determined once all the other vector types
10657 are known; and
10658 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10660 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10661 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10662 statement does not help to determine the overall number of units. */
10664 bool
10665 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10666 tree *stmt_vectype_out,
10667 tree *nunits_vectype_out)
10669 gimple *stmt = stmt_info->stmt;
10671 *stmt_vectype_out = NULL_TREE;
10672 *nunits_vectype_out = NULL_TREE;
10674 if (gimple_get_lhs (stmt) == NULL_TREE
10675 /* MASK_STORE has no lhs, but is ok. */
10676 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10678 if (is_a <gcall *> (stmt))
10680 /* Ignore calls with no lhs. These must be calls to
10681 #pragma omp simd functions, and what vectorization factor
10682 it really needs can't be determined until
10683 vectorizable_simd_clone_call. */
10684 if (dump_enabled_p ())
10685 dump_printf_loc (MSG_NOTE, vect_location,
10686 "defer to SIMD clone analysis.\n");
10687 return true;
10690 if (dump_enabled_p ())
10692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10693 "not vectorized: irregular stmt.");
10694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10696 return false;
10699 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10701 if (dump_enabled_p ())
10703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10704 "not vectorized: vector stmt in loop:");
10705 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10707 return false;
10710 tree vectype;
10711 tree scalar_type = NULL_TREE;
10712 if (STMT_VINFO_VECTYPE (stmt_info))
10713 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10714 else
10716 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10717 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10718 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10719 else
10720 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10722 /* Pure bool ops don't participate in number-of-units computation.
10723 For comparisons use the types being compared. */
10724 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10725 && is_gimple_assign (stmt)
10726 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10728 *stmt_vectype_out = boolean_type_node;
10730 tree rhs1 = gimple_assign_rhs1 (stmt);
10731 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10732 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10733 scalar_type = TREE_TYPE (rhs1);
10734 else
10736 if (dump_enabled_p ())
10737 dump_printf_loc (MSG_NOTE, vect_location,
10738 "pure bool operation.\n");
10739 return true;
10743 if (dump_enabled_p ())
10745 dump_printf_loc (MSG_NOTE, vect_location,
10746 "get vectype for scalar type: ");
10747 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10748 dump_printf (MSG_NOTE, "\n");
10750 vectype = get_vectype_for_scalar_type (scalar_type);
10751 if (!vectype)
10753 if (dump_enabled_p ())
10755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10756 "not vectorized: unsupported data-type ");
10757 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10758 scalar_type);
10759 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10761 return false;
10764 if (!*stmt_vectype_out)
10765 *stmt_vectype_out = vectype;
10767 if (dump_enabled_p ())
10769 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10770 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10771 dump_printf (MSG_NOTE, "\n");
10775 /* Don't try to compute scalar types if the stmt produces a boolean
10776 vector; use the existing vector type instead. */
10777 tree nunits_vectype;
10778 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10779 nunits_vectype = vectype;
10780 else
10782 /* The number of units is set according to the smallest scalar
10783 type (or the largest vector size, but we only support one
10784 vector size per vectorization). */
10785 if (*stmt_vectype_out != boolean_type_node)
10787 HOST_WIDE_INT dummy;
10788 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10790 if (dump_enabled_p ())
10792 dump_printf_loc (MSG_NOTE, vect_location,
10793 "get vectype for scalar type: ");
10794 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10795 dump_printf (MSG_NOTE, "\n");
10797 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10799 if (!nunits_vectype)
10801 if (dump_enabled_p ())
10803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10804 "not vectorized: unsupported data-type ");
10805 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10806 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10808 return false;
10811 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10812 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10814 if (dump_enabled_p ())
10816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10817 "not vectorized: different sized vector "
10818 "types in statement, ");
10819 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10820 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10821 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10822 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10824 return false;
10827 if (dump_enabled_p ())
10829 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10830 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10831 dump_printf (MSG_NOTE, "\n");
10833 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10834 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10835 dump_printf (MSG_NOTE, "\n");
10838 *nunits_vectype_out = nunits_vectype;
10839 return true;
10842 /* Try to determine the correct vector type for STMT_INFO, which is a
10843 statement that produces a scalar boolean result. Return the vector
10844 type on success, otherwise return NULL_TREE. */
10846 tree
10847 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10849 gimple *stmt = stmt_info->stmt;
10850 tree mask_type = NULL;
10851 tree vectype, scalar_type;
10853 if (is_gimple_assign (stmt)
10854 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10855 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10857 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10858 mask_type = get_mask_type_for_scalar_type (scalar_type);
10860 if (!mask_type)
10862 if (dump_enabled_p ())
10863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10864 "not vectorized: unsupported mask\n");
10865 return NULL_TREE;
10868 else
10870 tree rhs;
10871 ssa_op_iter iter;
10872 gimple *def_stmt;
10873 enum vect_def_type dt;
10875 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10877 if (!vect_is_simple_use (rhs, stmt_info->vinfo,
10878 &def_stmt, &dt, &vectype))
10880 if (dump_enabled_p ())
10882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10883 "not vectorized: can't compute mask type "
10884 "for statement, ");
10885 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10888 return NULL_TREE;
10891 /* No vectype probably means external definition.
10892 Allow it in case there is another operand which
10893 allows to determine mask type. */
10894 if (!vectype)
10895 continue;
10897 if (!mask_type)
10898 mask_type = vectype;
10899 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10900 TYPE_VECTOR_SUBPARTS (vectype)))
10902 if (dump_enabled_p ())
10904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10905 "not vectorized: different sized masks "
10906 "types in statement, ");
10907 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10908 mask_type);
10909 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10910 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10911 vectype);
10912 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10914 return NULL_TREE;
10916 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10917 != VECTOR_BOOLEAN_TYPE_P (vectype))
10919 if (dump_enabled_p ())
10921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10922 "not vectorized: mixed mask and "
10923 "nonmask vector types in statement, ");
10924 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10925 mask_type);
10926 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10927 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10928 vectype);
10929 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10931 return NULL_TREE;
10935 /* We may compare boolean value loaded as vector of integers.
10936 Fix mask_type in such case. */
10937 if (mask_type
10938 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10939 && gimple_code (stmt) == GIMPLE_ASSIGN
10940 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10941 mask_type = build_same_sized_truth_vector_type (mask_type);
10944 /* No mask_type should mean loop invariant predicate.
10945 This is probably a subject for optimization in if-conversion. */
10946 if (!mask_type && dump_enabled_p ())
10948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10949 "not vectorized: can't compute mask type "
10950 "for statement, ");
10951 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10953 return mask_type;