LWG 3035. std::allocator's constructors should be constexpr
[official-gcc.git] / gcc / tree-vect-stmts.c
blob9f365e31e4954cfef25c814220fb0fda1c7e3d11
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
106 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
111 /* Return a variable of type ELEM_TYPE[NELEMS]. */
113 static tree
114 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
116 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
117 "vect_array");
120 /* ARRAY is an array of vectors created by create_vector_array.
121 Return an SSA_NAME for the vector in index N. The reference
122 is part of the vectorization of STMT and the vector is associated
123 with scalar destination SCALAR_DEST. */
125 static tree
126 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
127 tree array, unsigned HOST_WIDE_INT n)
129 tree vect_type, vect, vect_name, array_ref;
130 gimple *new_stmt;
132 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
133 vect_type = TREE_TYPE (TREE_TYPE (array));
134 vect = vect_create_destination_var (scalar_dest, vect_type);
135 array_ref = build4 (ARRAY_REF, vect_type, array,
136 build_int_cst (size_type_node, n),
137 NULL_TREE, NULL_TREE);
139 new_stmt = gimple_build_assign (vect, array_ref);
140 vect_name = make_ssa_name (vect, new_stmt);
141 gimple_assign_set_lhs (new_stmt, vect_name);
142 vect_finish_stmt_generation (stmt, new_stmt, gsi);
144 return vect_name;
147 /* ARRAY is an array of vectors created by create_vector_array.
148 Emit code to store SSA_NAME VECT in index N of the array.
149 The store is part of the vectorization of STMT. */
151 static void
152 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
153 tree array, unsigned HOST_WIDE_INT n)
155 tree array_ref;
156 gimple *new_stmt;
158 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
159 build_int_cst (size_type_node, n),
160 NULL_TREE, NULL_TREE);
162 new_stmt = gimple_build_assign (array_ref, vect);
163 vect_finish_stmt_generation (stmt, new_stmt, gsi);
166 /* PTR is a pointer to an array of type TYPE. Return a representation
167 of *PTR. The memory reference replaces those in FIRST_DR
168 (and its group). */
170 static tree
171 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
173 tree mem_ref;
175 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
176 /* Arrays have the same alignment as their type. */
177 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
178 return mem_ref;
181 /* Add a clobber of variable VAR to the vectorization of STMT.
182 Emit the clobber before *GSI. */
184 static void
185 vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
187 tree clobber = build_clobber (TREE_TYPE (var));
188 gimple *new_stmt = gimple_build_assign (var, clobber);
189 vect_finish_stmt_generation (stmt, new_stmt, gsi);
192 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
194 /* Function vect_mark_relevant.
196 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
198 static void
199 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
200 enum vect_relevant relevant, bool live_p)
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
205 gimple *pattern_stmt;
207 if (dump_enabled_p ())
209 dump_printf_loc (MSG_NOTE, vect_location,
210 "mark relevant %d, live %d: ", relevant, live_p);
211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
227 if (dump_enabled_p ())
228 dump_printf_loc (MSG_NOTE, vect_location,
229 "last stmt in pattern. don't mark"
230 " relevant/live.\n");
231 stmt_info = vinfo_for_stmt (pattern_stmt);
232 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
233 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
234 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
235 stmt = pattern_stmt;
238 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
239 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
240 STMT_VINFO_RELEVANT (stmt_info) = relevant;
242 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
243 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
245 if (dump_enabled_p ())
246 dump_printf_loc (MSG_NOTE, vect_location,
247 "already marked relevant/live.\n");
248 return;
251 worklist->safe_push (stmt);
255 /* Function is_simple_and_all_uses_invariant
257 Return true if STMT is simple and all uses of it are invariant. */
259 bool
260 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
262 tree op;
263 gimple *def_stmt;
264 ssa_op_iter iter;
266 if (!is_gimple_assign (stmt))
267 return false;
269 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
271 enum vect_def_type dt = vect_uninitialized_def;
273 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
275 if (dump_enabled_p ())
276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
277 "use not simple.\n");
278 return false;
281 if (dt != vect_external_def && dt != vect_constant_def)
282 return false;
284 return true;
287 /* Function vect_stmt_relevant_p.
289 Return true if STMT in loop that is represented by LOOP_VINFO is
290 "relevant for vectorization".
292 A stmt is considered "relevant for vectorization" if:
293 - it has uses outside the loop.
294 - it has vdefs (it alters memory).
295 - control stmts in the loop (except for the exit condition).
297 CHECKME: what other side effects would the vectorizer allow? */
299 static bool
300 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
301 enum vect_relevant *relevant, bool *live_p)
303 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
304 ssa_op_iter op_iter;
305 imm_use_iterator imm_iter;
306 use_operand_p use_p;
307 def_operand_p def_p;
309 *relevant = vect_unused_in_scope;
310 *live_p = false;
312 /* cond stmt other than loop exit cond. */
313 if (is_ctrl_stmt (stmt)
314 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
315 != loop_exit_ctrl_vec_info_type)
316 *relevant = vect_used_in_scope;
318 /* changing memory. */
319 if (gimple_code (stmt) != GIMPLE_PHI)
320 if (gimple_vdef (stmt)
321 && !gimple_clobber_p (stmt))
323 if (dump_enabled_p ())
324 dump_printf_loc (MSG_NOTE, vect_location,
325 "vec_stmt_relevant_p: stmt has vdefs.\n");
326 *relevant = vect_used_in_scope;
329 /* uses outside the loop. */
330 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
332 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
334 basic_block bb = gimple_bb (USE_STMT (use_p));
335 if (!flow_bb_inside_loop_p (loop, bb))
337 if (dump_enabled_p ())
338 dump_printf_loc (MSG_NOTE, vect_location,
339 "vec_stmt_relevant_p: used out of loop.\n");
341 if (is_gimple_debug (USE_STMT (use_p)))
342 continue;
344 /* We expect all such uses to be in the loop exit phis
345 (because of loop closed form) */
346 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
347 gcc_assert (bb == single_exit (loop)->dest);
349 *live_p = true;
354 if (*live_p && *relevant == vect_unused_in_scope
355 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
357 if (dump_enabled_p ())
358 dump_printf_loc (MSG_NOTE, vect_location,
359 "vec_stmt_relevant_p: stmt live but not relevant.\n");
360 *relevant = vect_used_only_live;
363 return (*live_p || *relevant);
367 /* Function exist_non_indexing_operands_for_use_p
369 USE is one of the uses attached to STMT. Check if USE is
370 used in STMT for anything other than indexing an array. */
372 static bool
373 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
375 tree operand;
376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
378 /* USE corresponds to some operand in STMT. If there is no data
379 reference in STMT, then any operand that corresponds to USE
380 is not indexing an array. */
381 if (!STMT_VINFO_DATA_REF (stmt_info))
382 return true;
384 /* STMT has a data_ref. FORNOW this means that its of one of
385 the following forms:
386 -1- ARRAY_REF = var
387 -2- var = ARRAY_REF
388 (This should have been verified in analyze_data_refs).
390 'var' in the second case corresponds to a def, not a use,
391 so USE cannot correspond to any operands that are not used
392 for array indexing.
394 Therefore, all we need to check is if STMT falls into the
395 first case, and whether var corresponds to USE. */
397 if (!gimple_assign_copy_p (stmt))
399 if (is_gimple_call (stmt)
400 && gimple_call_internal_p (stmt))
402 internal_fn ifn = gimple_call_internal_fn (stmt);
403 int mask_index = internal_fn_mask_index (ifn);
404 if (mask_index >= 0
405 && use == gimple_call_arg (stmt, mask_index))
406 return true;
407 int stored_value_index = internal_fn_stored_value_index (ifn);
408 if (stored_value_index >= 0
409 && use == gimple_call_arg (stmt, stored_value_index))
410 return true;
411 if (internal_gather_scatter_fn_p (ifn)
412 && use == gimple_call_arg (stmt, 1))
413 return true;
415 return false;
418 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
419 return false;
420 operand = gimple_assign_rhs1 (stmt);
421 if (TREE_CODE (operand) != SSA_NAME)
422 return false;
424 if (operand == use)
425 return true;
427 return false;
432 Function process_use.
434 Inputs:
435 - a USE in STMT in a loop represented by LOOP_VINFO
436 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
437 that defined USE. This is done by calling mark_relevant and passing it
438 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
439 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
440 be performed.
442 Outputs:
443 Generally, LIVE_P and RELEVANT are used to define the liveness and
444 relevance info of the DEF_STMT of this USE:
445 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
446 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
447 Exceptions:
448 - case 1: If USE is used only for address computations (e.g. array indexing),
449 which does not need to be directly vectorized, then the liveness/relevance
450 of the respective DEF_STMT is left unchanged.
451 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
452 skip DEF_STMT cause it had already been processed.
453 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
454 be modified accordingly.
456 Return true if everything is as expected. Return false otherwise. */
458 static bool
459 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
460 enum vect_relevant relevant, vec<gimple *> *worklist,
461 bool force)
463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
465 stmt_vec_info dstmt_vinfo;
466 basic_block bb, def_bb;
467 gimple *def_stmt;
468 enum vect_def_type dt;
470 /* case 1: we are only interested in uses that need to be vectorized. Uses
471 that are used for address computation are not considered relevant. */
472 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
473 return true;
475 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
477 if (dump_enabled_p ())
478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
479 "not vectorized: unsupported use in stmt.\n");
480 return false;
483 if (!def_stmt || gimple_nop_p (def_stmt))
484 return true;
486 def_bb = gimple_bb (def_stmt);
487 if (!flow_bb_inside_loop_p (loop, def_bb))
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
491 return true;
494 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
495 DEF_STMT must have already been processed, because this should be the
496 only way that STMT, which is a reduction-phi, was put in the worklist,
497 as there should be no other uses for DEF_STMT in the loop. So we just
498 check that everything is as expected, and we are done. */
499 dstmt_vinfo = vinfo_for_stmt (def_stmt);
500 bb = gimple_bb (stmt);
501 if (gimple_code (stmt) == GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
503 && gimple_code (def_stmt) != GIMPLE_PHI
504 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
505 && bb->loop_father == def_bb->loop_father)
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "reduc-stmt defining reduc-phi in the same nest.\n");
510 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
511 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
512 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
513 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
514 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
515 return true;
518 /* case 3a: outer-loop stmt defining an inner-loop stmt:
519 outer-loop-header-bb:
520 d = def_stmt
521 inner-loop:
522 stmt # use (d)
523 outer-loop-tail-bb:
524 ... */
525 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
527 if (dump_enabled_p ())
528 dump_printf_loc (MSG_NOTE, vect_location,
529 "outer-loop def-stmt defining inner-loop stmt.\n");
531 switch (relevant)
533 case vect_unused_in_scope:
534 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
535 vect_used_in_scope : vect_unused_in_scope;
536 break;
538 case vect_used_in_outer_by_reduction:
539 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
540 relevant = vect_used_by_reduction;
541 break;
543 case vect_used_in_outer:
544 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
545 relevant = vect_used_in_scope;
546 break;
548 case vect_used_in_scope:
549 break;
551 default:
552 gcc_unreachable ();
556 /* case 3b: inner-loop stmt defining an outer-loop stmt:
557 outer-loop-header-bb:
559 inner-loop:
560 d = def_stmt
561 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
562 stmt # use (d) */
563 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "inner-loop def-stmt defining outer-loop stmt.\n");
569 switch (relevant)
571 case vect_unused_in_scope:
572 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
573 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
574 vect_used_in_outer_by_reduction : vect_unused_in_scope;
575 break;
577 case vect_used_by_reduction:
578 case vect_used_only_live:
579 relevant = vect_used_in_outer_by_reduction;
580 break;
582 case vect_used_in_scope:
583 relevant = vect_used_in_outer;
584 break;
586 default:
587 gcc_unreachable ();
590 /* We are also not interested in uses on loop PHI backedges that are
591 inductions. Otherwise we'll needlessly vectorize the IV increment
592 and cause hybrid SLP for SLP inductions. Unless the PHI is live
593 of course. */
594 else if (gimple_code (stmt) == GIMPLE_PHI
595 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
596 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
597 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
598 == use))
600 if (dump_enabled_p ())
601 dump_printf_loc (MSG_NOTE, vect_location,
602 "induction value on backedge.\n");
603 return true;
607 vect_mark_relevant (worklist, def_stmt, relevant, false);
608 return true;
612 /* Function vect_mark_stmts_to_be_vectorized.
614 Not all stmts in the loop need to be vectorized. For example:
616 for i...
617 for j...
618 1. T0 = i + j
619 2. T1 = a[T0]
621 3. j = j + 1
623 Stmt 1 and 3 do not need to be vectorized, because loop control and
624 addressing of vectorized data-refs are handled differently.
626 This pass detects such stmts. */
628 bool
629 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
632 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
633 unsigned int nbbs = loop->num_nodes;
634 gimple_stmt_iterator si;
635 gimple *stmt;
636 unsigned int i;
637 stmt_vec_info stmt_vinfo;
638 basic_block bb;
639 gimple *phi;
640 bool live_p;
641 enum vect_relevant relevant;
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "=== vect_mark_stmts_to_be_vectorized ===\n");
647 auto_vec<gimple *, 64> worklist;
649 /* 1. Init worklist. */
650 for (i = 0; i < nbbs; i++)
652 bb = bbs[i];
653 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
655 phi = gsi_stmt (si);
656 if (dump_enabled_p ())
658 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
662 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
663 vect_mark_relevant (&worklist, phi, relevant, live_p);
665 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
667 stmt = gsi_stmt (si);
668 if (dump_enabled_p ())
670 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
671 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
674 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
675 vect_mark_relevant (&worklist, stmt, relevant, live_p);
679 /* 2. Process_worklist */
680 while (worklist.length () > 0)
682 use_operand_p use_p;
683 ssa_op_iter iter;
685 stmt = worklist.pop ();
686 if (dump_enabled_p ())
688 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
689 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
692 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
693 (DEF_STMT) as relevant/irrelevant according to the relevance property
694 of STMT. */
695 stmt_vinfo = vinfo_for_stmt (stmt);
696 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
698 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
699 propagated as is to the DEF_STMTs of its USEs.
701 One exception is when STMT has been identified as defining a reduction
702 variable; in this case we set the relevance to vect_used_by_reduction.
703 This is because we distinguish between two kinds of relevant stmts -
704 those that are used by a reduction computation, and those that are
705 (also) used by a regular computation. This allows us later on to
706 identify stmts that are used solely by a reduction, and therefore the
707 order of the results that they produce does not have to be kept. */
709 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
711 case vect_reduction_def:
712 gcc_assert (relevant != vect_unused_in_scope);
713 if (relevant != vect_unused_in_scope
714 && relevant != vect_used_in_scope
715 && relevant != vect_used_by_reduction
716 && relevant != vect_used_only_live)
718 if (dump_enabled_p ())
719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "unsupported use of reduction.\n");
721 return false;
723 break;
725 case vect_nested_cycle:
726 if (relevant != vect_unused_in_scope
727 && relevant != vect_used_in_outer_by_reduction
728 && relevant != vect_used_in_outer)
730 if (dump_enabled_p ())
731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
732 "unsupported use of nested cycle.\n");
734 return false;
736 break;
738 case vect_double_reduction_def:
739 if (relevant != vect_unused_in_scope
740 && relevant != vect_used_by_reduction
741 && relevant != vect_used_only_live)
743 if (dump_enabled_p ())
744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
745 "unsupported use of double reduction.\n");
747 return false;
749 break;
751 default:
752 break;
755 if (is_pattern_stmt_p (stmt_vinfo))
757 /* Pattern statements are not inserted into the code, so
758 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
759 have to scan the RHS or function arguments instead. */
760 if (is_gimple_assign (stmt))
762 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
763 tree op = gimple_assign_rhs1 (stmt);
765 i = 1;
766 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
768 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
769 relevant, &worklist, false)
770 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
771 relevant, &worklist, false))
772 return false;
773 i = 2;
775 for (; i < gimple_num_ops (stmt); i++)
777 op = gimple_op (stmt, i);
778 if (TREE_CODE (op) == SSA_NAME
779 && !process_use (stmt, op, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
784 else if (is_gimple_call (stmt))
786 for (i = 0; i < gimple_call_num_args (stmt); i++)
788 tree arg = gimple_call_arg (stmt, i);
789 if (!process_use (stmt, arg, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
795 else
796 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
798 tree op = USE_FROM_PTR (use_p);
799 if (!process_use (stmt, op, loop_vinfo, relevant,
800 &worklist, false))
801 return false;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
810 &worklist, true))
811 return false;
813 } /* while worklist */
815 return true;
818 /* Compute the prologue cost for invariant or constant operands. */
820 static unsigned
821 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
822 unsigned opno, enum vect_def_type dt,
823 stmt_vector_for_cost *cost_vec)
825 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
826 tree op = gimple_op (stmt, opno);
827 unsigned prologue_cost = 0;
829 /* Without looking at the actual initializer a vector of
830 constants can be implemented as load from the constant pool.
831 When all elements are the same we can use a splat. */
832 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
833 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
834 unsigned num_vects_to_check;
835 unsigned HOST_WIDE_INT const_nunits;
836 unsigned nelt_limit;
837 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
838 && ! multiple_p (const_nunits, group_size))
840 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
841 nelt_limit = const_nunits;
843 else
845 /* If either the vector has variable length or the vectors
846 are composed of repeated whole groups we only need to
847 cost construction once. All vectors will be the same. */
848 num_vects_to_check = 1;
849 nelt_limit = group_size;
851 tree elt = NULL_TREE;
852 unsigned nelt = 0;
853 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
855 unsigned si = j % group_size;
856 if (nelt == 0)
857 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], opno);
858 /* ??? We're just tracking whether all operands of a single
859 vector initializer are the same, ideally we'd check if
860 we emitted the same one already. */
861 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si],
862 opno))
863 elt = NULL_TREE;
864 nelt++;
865 if (nelt == nelt_limit)
867 /* ??? We need to pass down stmt_info for a vector type
868 even if it points to the wrong stmt. */
869 prologue_cost += record_stmt_cost
870 (cost_vec, 1,
871 dt == vect_external_def
872 ? (elt ? scalar_to_vec : vec_construct)
873 : vector_load,
874 stmt_info, 0, vect_prologue);
875 nelt = 0;
879 return prologue_cost;
882 /* Function vect_model_simple_cost.
884 Models cost for simple operations, i.e. those that only emit ncopies of a
885 single op. Right now, this does not account for multiple insns that could
886 be generated for the single vector op. We will handle that shortly. */
888 static void
889 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
890 enum vect_def_type *dt,
891 int ndts,
892 slp_tree node,
893 stmt_vector_for_cost *cost_vec)
895 int inside_cost = 0, prologue_cost = 0;
897 gcc_assert (cost_vec != NULL);
899 /* ??? Somehow we need to fix this at the callers. */
900 if (node)
901 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
903 if (node)
905 /* Scan operands and account for prologue cost of constants/externals.
906 ??? This over-estimates cost for multiple uses and should be
907 re-engineered. */
908 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
909 tree lhs = gimple_get_lhs (stmt);
910 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
912 tree op = gimple_op (stmt, i);
913 gimple *def_stmt;
914 enum vect_def_type dt;
915 if (!op || op == lhs)
916 continue;
917 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)
918 && (dt == vect_constant_def || dt == vect_external_def))
919 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
920 i, dt, cost_vec);
923 else
924 /* Cost the "broadcast" of a scalar operand in to a vector operand.
925 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
926 cost model. */
927 for (int i = 0; i < ndts; i++)
928 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
929 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
930 stmt_info, 0, vect_prologue);
932 /* Adjust for two-operator SLP nodes. */
933 if (node && SLP_TREE_TWO_OPERATORS (node))
935 ncopies *= 2;
936 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
937 stmt_info, 0, vect_body);
940 /* Pass the inside-of-loop statements to the target-specific cost model. */
941 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
942 stmt_info, 0, vect_body);
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_NOTE, vect_location,
946 "vect_model_simple_cost: inside_cost = %d, "
947 "prologue_cost = %d .\n", inside_cost, prologue_cost);
951 /* Model cost for type demotion and promotion operations. PWR is normally
952 zero for single-step promotions and demotions. It will be one if
953 two-step promotion/demotion is required, and so on. Each additional
954 step doubles the number of instructions required. */
956 static void
957 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
958 enum vect_def_type *dt, int pwr,
959 stmt_vector_for_cost *cost_vec)
961 int i, tmp;
962 int inside_cost = 0, prologue_cost = 0;
964 for (i = 0; i < pwr + 1; i++)
966 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
967 (i + 1) : i;
968 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
969 vec_promote_demote, stmt_info, 0,
970 vect_body);
973 /* FORNOW: Assuming maximum 2 args per stmts. */
974 for (i = 0; i < 2; i++)
975 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
976 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
977 stmt_info, 0, vect_prologue);
979 if (dump_enabled_p ())
980 dump_printf_loc (MSG_NOTE, vect_location,
981 "vect_model_promotion_demotion_cost: inside_cost = %d, "
982 "prologue_cost = %d .\n", inside_cost, prologue_cost);
985 /* Function vect_model_store_cost
987 Models cost for stores. In the case of grouped accesses, one access
988 has the overhead of the grouped access attributed to it. */
990 static void
991 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
992 enum vect_def_type dt,
993 vect_memory_access_type memory_access_type,
994 vec_load_store_type vls_type, slp_tree slp_node,
995 stmt_vector_for_cost *cost_vec)
997 unsigned int inside_cost = 0, prologue_cost = 0;
998 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
999 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1001 /* ??? Somehow we need to fix this at the callers. */
1002 if (slp_node)
1003 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1005 if (vls_type == VLS_STORE_INVARIANT)
1007 if (slp_node)
1008 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1009 1, dt, cost_vec);
1010 else
1011 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1012 stmt_info, 0, vect_prologue);
1015 /* Grouped stores update all elements in the group at once,
1016 so we want the DR for the first statement. */
1017 if (!slp_node && grouped_access_p)
1018 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1020 /* True if we should include any once-per-group costs as well as
1021 the cost of the statement itself. For SLP we only get called
1022 once per group anyhow. */
1023 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1025 /* We assume that the cost of a single store-lanes instruction is
1026 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1027 access is instead being provided by a permute-and-store operation,
1028 include the cost of the permutes. */
1029 if (first_stmt_p
1030 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1032 /* Uses a high and low interleave or shuffle operations for each
1033 needed permute. */
1034 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
1035 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1036 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1037 stmt_info, 0, vect_body);
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_NOTE, vect_location,
1041 "vect_model_store_cost: strided group_size = %d .\n",
1042 group_size);
1045 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1046 /* Costs of the stores. */
1047 if (memory_access_type == VMAT_ELEMENTWISE
1048 || memory_access_type == VMAT_GATHER_SCATTER)
1050 /* N scalar stores plus extracting the elements. */
1051 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1052 inside_cost += record_stmt_cost (cost_vec,
1053 ncopies * assumed_nunits,
1054 scalar_store, stmt_info, 0, vect_body);
1056 else
1057 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1059 if (memory_access_type == VMAT_ELEMENTWISE
1060 || memory_access_type == VMAT_STRIDED_SLP)
1062 /* N scalar stores plus extracting the elements. */
1063 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1064 inside_cost += record_stmt_cost (cost_vec,
1065 ncopies * assumed_nunits,
1066 vec_to_scalar, stmt_info, 0, vect_body);
1069 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_NOTE, vect_location,
1071 "vect_model_store_cost: inside_cost = %d, "
1072 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1076 /* Calculate cost of DR's memory access. */
1077 void
1078 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1079 unsigned int *inside_cost,
1080 stmt_vector_for_cost *body_cost_vec)
1082 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1083 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1085 switch (alignment_support_scheme)
1087 case dr_aligned:
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1090 vector_store, stmt_info, 0,
1091 vect_body);
1093 if (dump_enabled_p ())
1094 dump_printf_loc (MSG_NOTE, vect_location,
1095 "vect_model_store_cost: aligned.\n");
1096 break;
1099 case dr_unaligned_supported:
1101 /* Here, we assign an additional cost for the unaligned store. */
1102 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1103 unaligned_store, stmt_info,
1104 DR_MISALIGNMENT (dr), vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_store_cost: unaligned supported by "
1108 "hardware.\n");
1109 break;
1112 case dr_unaligned_unsupported:
1114 *inside_cost = VECT_MAX_COST;
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1118 "vect_model_store_cost: unsupported access.\n");
1119 break;
1122 default:
1123 gcc_unreachable ();
1128 /* Function vect_model_load_cost
1130 Models cost for loads. In the case of grouped accesses, one access has
1131 the overhead of the grouped access attributed to it. Since unaligned
1132 accesses are supported for loads, we also account for the costs of the
1133 access scheme chosen. */
1135 static void
1136 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1137 vect_memory_access_type memory_access_type,
1138 slp_instance instance,
1139 slp_tree slp_node,
1140 stmt_vector_for_cost *cost_vec)
1142 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1143 unsigned int inside_cost = 0, prologue_cost = 0;
1144 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1146 gcc_assert (cost_vec);
1148 /* ??? Somehow we need to fix this at the callers. */
1149 if (slp_node)
1150 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1152 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1154 /* If the load is permuted then the alignment is determined by
1155 the first group element not by the first scalar stmt DR. */
1156 gimple *stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1158 /* Record the cost for the permutation. */
1159 unsigned n_perms;
1160 unsigned assumed_nunits
1161 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
1162 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1163 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1164 slp_vf, instance, true,
1165 &n_perms);
1166 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1167 stmt_info, 0, vect_body);
1168 /* And adjust the number of loads performed. This handles
1169 redundancies as well as loads that are later dead. */
1170 auto_sbitmap perm (DR_GROUP_SIZE (stmt_info));
1171 bitmap_clear (perm);
1172 for (unsigned i = 0;
1173 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1174 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1175 ncopies = 0;
1176 bool load_seen = false;
1177 for (unsigned i = 0; i < DR_GROUP_SIZE (stmt_info); ++i)
1179 if (i % assumed_nunits == 0)
1181 if (load_seen)
1182 ncopies++;
1183 load_seen = false;
1185 if (bitmap_bit_p (perm, i))
1186 load_seen = true;
1188 if (load_seen)
1189 ncopies++;
1190 gcc_assert (ncopies
1191 <= (DR_GROUP_SIZE (stmt_info) - DR_GROUP_GAP (stmt_info)
1192 + assumed_nunits - 1) / assumed_nunits);
1195 /* Grouped loads read all elements in the group at once,
1196 so we want the DR for the first statement. */
1197 if (!slp_node && grouped_access_p)
1198 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
1200 /* True if we should include any once-per-group costs as well as
1201 the cost of the statement itself. For SLP we only get called
1202 once per group anyhow. */
1203 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1205 /* We assume that the cost of a single load-lanes instruction is
1206 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1207 access is instead being provided by a load-and-permute operation,
1208 include the cost of the permutes. */
1209 if (first_stmt_p
1210 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1212 /* Uses an even and odd extract operations or shuffle operations
1213 for each needed permute. */
1214 int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
1215 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1216 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1217 stmt_info, 0, vect_body);
1219 if (dump_enabled_p ())
1220 dump_printf_loc (MSG_NOTE, vect_location,
1221 "vect_model_load_cost: strided group_size = %d .\n",
1222 group_size);
1225 /* The loads themselves. */
1226 if (memory_access_type == VMAT_ELEMENTWISE
1227 || memory_access_type == VMAT_GATHER_SCATTER)
1229 /* N scalar loads plus gathering them into a vector. */
1230 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1231 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1232 inside_cost += record_stmt_cost (cost_vec,
1233 ncopies * assumed_nunits,
1234 scalar_load, stmt_info, 0, vect_body);
1236 else
1237 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1238 &inside_cost, &prologue_cost,
1239 cost_vec, cost_vec, true);
1240 if (memory_access_type == VMAT_ELEMENTWISE
1241 || memory_access_type == VMAT_STRIDED_SLP)
1242 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1243 stmt_info, 0, vect_body);
1245 if (dump_enabled_p ())
1246 dump_printf_loc (MSG_NOTE, vect_location,
1247 "vect_model_load_cost: inside_cost = %d, "
1248 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1252 /* Calculate cost of DR's memory access. */
1253 void
1254 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1255 bool add_realign_cost, unsigned int *inside_cost,
1256 unsigned int *prologue_cost,
1257 stmt_vector_for_cost *prologue_cost_vec,
1258 stmt_vector_for_cost *body_cost_vec,
1259 bool record_prologue_costs)
1261 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1262 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1264 switch (alignment_support_scheme)
1266 case dr_aligned:
1268 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1269 stmt_info, 0, vect_body);
1271 if (dump_enabled_p ())
1272 dump_printf_loc (MSG_NOTE, vect_location,
1273 "vect_model_load_cost: aligned.\n");
1275 break;
1277 case dr_unaligned_supported:
1279 /* Here, we assign an additional cost for the unaligned load. */
1280 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1281 unaligned_load, stmt_info,
1282 DR_MISALIGNMENT (dr), vect_body);
1284 if (dump_enabled_p ())
1285 dump_printf_loc (MSG_NOTE, vect_location,
1286 "vect_model_load_cost: unaligned supported by "
1287 "hardware.\n");
1289 break;
1291 case dr_explicit_realign:
1293 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1294 vector_load, stmt_info, 0, vect_body);
1295 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1296 vec_perm, stmt_info, 0, vect_body);
1298 /* FIXME: If the misalignment remains fixed across the iterations of
1299 the containing loop, the following cost should be added to the
1300 prologue costs. */
1301 if (targetm.vectorize.builtin_mask_for_load)
1302 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1303 stmt_info, 0, vect_body);
1305 if (dump_enabled_p ())
1306 dump_printf_loc (MSG_NOTE, vect_location,
1307 "vect_model_load_cost: explicit realign\n");
1309 break;
1311 case dr_explicit_realign_optimized:
1313 if (dump_enabled_p ())
1314 dump_printf_loc (MSG_NOTE, vect_location,
1315 "vect_model_load_cost: unaligned software "
1316 "pipelined.\n");
1318 /* Unaligned software pipeline has a load of an address, an initial
1319 load, and possibly a mask operation to "prime" the loop. However,
1320 if this is an access in a group of loads, which provide grouped
1321 access, then the above cost should only be considered for one
1322 access in the group. Inside the loop, there is a load op
1323 and a realignment op. */
1325 if (add_realign_cost && record_prologue_costs)
1327 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1328 vector_stmt, stmt_info,
1329 0, vect_prologue);
1330 if (targetm.vectorize.builtin_mask_for_load)
1331 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1332 vector_stmt, stmt_info,
1333 0, vect_prologue);
1336 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1337 stmt_info, 0, vect_body);
1338 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1339 stmt_info, 0, vect_body);
1341 if (dump_enabled_p ())
1342 dump_printf_loc (MSG_NOTE, vect_location,
1343 "vect_model_load_cost: explicit realign optimized"
1344 "\n");
1346 break;
1349 case dr_unaligned_unsupported:
1351 *inside_cost = VECT_MAX_COST;
1353 if (dump_enabled_p ())
1354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1355 "vect_model_load_cost: unsupported access.\n");
1356 break;
1359 default:
1360 gcc_unreachable ();
1364 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1365 the loop preheader for the vectorized stmt STMT. */
1367 static void
1368 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1370 if (gsi)
1371 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1372 else
1374 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1377 if (loop_vinfo)
1379 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1380 basic_block new_bb;
1381 edge pe;
1383 if (nested_in_vect_loop_p (loop, stmt))
1384 loop = loop->inner;
1386 pe = loop_preheader_edge (loop);
1387 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1388 gcc_assert (!new_bb);
1390 else
1392 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1393 basic_block bb;
1394 gimple_stmt_iterator gsi_bb_start;
1396 gcc_assert (bb_vinfo);
1397 bb = BB_VINFO_BB (bb_vinfo);
1398 gsi_bb_start = gsi_after_labels (bb);
1399 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1403 if (dump_enabled_p ())
1405 dump_printf_loc (MSG_NOTE, vect_location,
1406 "created new init_stmt: ");
1407 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1411 /* Function vect_init_vector.
1413 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1414 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1415 vector type a vector with all elements equal to VAL is created first.
1416 Place the initialization at BSI if it is not NULL. Otherwise, place the
1417 initialization at the loop preheader.
1418 Return the DEF of INIT_STMT.
1419 It will be used in the vectorization of STMT. */
1421 tree
1422 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1424 gimple *init_stmt;
1425 tree new_temp;
1427 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1428 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1430 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1431 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1433 /* Scalar boolean value should be transformed into
1434 all zeros or all ones value before building a vector. */
1435 if (VECTOR_BOOLEAN_TYPE_P (type))
1437 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1438 tree false_val = build_zero_cst (TREE_TYPE (type));
1440 if (CONSTANT_CLASS_P (val))
1441 val = integer_zerop (val) ? false_val : true_val;
1442 else
1444 new_temp = make_ssa_name (TREE_TYPE (type));
1445 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1446 val, true_val, false_val);
1447 vect_init_vector_1 (stmt, init_stmt, gsi);
1448 val = new_temp;
1451 else if (CONSTANT_CLASS_P (val))
1452 val = fold_convert (TREE_TYPE (type), val);
1453 else
1455 new_temp = make_ssa_name (TREE_TYPE (type));
1456 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1457 init_stmt = gimple_build_assign (new_temp,
1458 fold_build1 (VIEW_CONVERT_EXPR,
1459 TREE_TYPE (type),
1460 val));
1461 else
1462 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1463 vect_init_vector_1 (stmt, init_stmt, gsi);
1464 val = new_temp;
1467 val = build_vector_from_val (type, val);
1470 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1471 init_stmt = gimple_build_assign (new_temp, val);
1472 vect_init_vector_1 (stmt, init_stmt, gsi);
1473 return new_temp;
1476 /* Function vect_get_vec_def_for_operand_1.
1478 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1479 DT that will be used in the vectorized stmt. */
1481 tree
1482 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1484 tree vec_oprnd;
1485 gimple *vec_stmt;
1486 stmt_vec_info def_stmt_info = NULL;
1488 switch (dt)
1490 /* operand is a constant or a loop invariant. */
1491 case vect_constant_def:
1492 case vect_external_def:
1493 /* Code should use vect_get_vec_def_for_operand. */
1494 gcc_unreachable ();
1496 /* operand is defined inside the loop. */
1497 case vect_internal_def:
1499 /* Get the def from the vectorized stmt. */
1500 def_stmt_info = vinfo_for_stmt (def_stmt);
1502 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1503 /* Get vectorized pattern statement. */
1504 if (!vec_stmt
1505 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1506 && !STMT_VINFO_RELEVANT (def_stmt_info))
1507 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1508 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1509 gcc_assert (vec_stmt);
1510 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1511 vec_oprnd = PHI_RESULT (vec_stmt);
1512 else if (is_gimple_call (vec_stmt))
1513 vec_oprnd = gimple_call_lhs (vec_stmt);
1514 else
1515 vec_oprnd = gimple_assign_lhs (vec_stmt);
1516 return vec_oprnd;
1519 /* operand is defined by a loop header phi. */
1520 case vect_reduction_def:
1521 case vect_double_reduction_def:
1522 case vect_nested_cycle:
1523 case vect_induction_def:
1525 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1527 /* Get the def from the vectorized stmt. */
1528 def_stmt_info = vinfo_for_stmt (def_stmt);
1529 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1530 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1531 vec_oprnd = PHI_RESULT (vec_stmt);
1532 else
1533 vec_oprnd = gimple_get_lhs (vec_stmt);
1534 return vec_oprnd;
1537 default:
1538 gcc_unreachable ();
1543 /* Function vect_get_vec_def_for_operand.
1545 OP is an operand in STMT. This function returns a (vector) def that will be
1546 used in the vectorized stmt for STMT.
1548 In the case that OP is an SSA_NAME which is defined in the loop, then
1549 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1551 In case OP is an invariant or constant, a new stmt that creates a vector def
1552 needs to be introduced. VECTYPE may be used to specify a required type for
1553 vector invariant. */
1555 tree
1556 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1558 gimple *def_stmt;
1559 enum vect_def_type dt;
1560 bool is_simple_use;
1561 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1562 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1564 if (dump_enabled_p ())
1566 dump_printf_loc (MSG_NOTE, vect_location,
1567 "vect_get_vec_def_for_operand: ");
1568 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1569 dump_printf (MSG_NOTE, "\n");
1572 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1573 gcc_assert (is_simple_use);
1574 if (def_stmt && dump_enabled_p ())
1576 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1577 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1580 if (dt == vect_constant_def || dt == vect_external_def)
1582 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1583 tree vector_type;
1585 if (vectype)
1586 vector_type = vectype;
1587 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1588 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1589 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1590 else
1591 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1593 gcc_assert (vector_type);
1594 return vect_init_vector (stmt, op, vector_type, NULL);
1596 else
1597 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1601 /* Function vect_get_vec_def_for_stmt_copy
1603 Return a vector-def for an operand. This function is used when the
1604 vectorized stmt to be created (by the caller to this function) is a "copy"
1605 created in case the vectorized result cannot fit in one vector, and several
1606 copies of the vector-stmt are required. In this case the vector-def is
1607 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1608 of the stmt that defines VEC_OPRND.
1609 DT is the type of the vector def VEC_OPRND.
1611 Context:
1612 In case the vectorization factor (VF) is bigger than the number
1613 of elements that can fit in a vectype (nunits), we have to generate
1614 more than one vector stmt to vectorize the scalar stmt. This situation
1615 arises when there are multiple data-types operated upon in the loop; the
1616 smallest data-type determines the VF, and as a result, when vectorizing
1617 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1618 vector stmt (each computing a vector of 'nunits' results, and together
1619 computing 'VF' results in each iteration). This function is called when
1620 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1621 which VF=16 and nunits=4, so the number of copies required is 4):
1623 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1625 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1626 VS1.1: vx.1 = memref1 VS1.2
1627 VS1.2: vx.2 = memref2 VS1.3
1628 VS1.3: vx.3 = memref3
1630 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1631 VSnew.1: vz1 = vx.1 + ... VSnew.2
1632 VSnew.2: vz2 = vx.2 + ... VSnew.3
1633 VSnew.3: vz3 = vx.3 + ...
1635 The vectorization of S1 is explained in vectorizable_load.
1636 The vectorization of S2:
1637 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1638 the function 'vect_get_vec_def_for_operand' is called to
1639 get the relevant vector-def for each operand of S2. For operand x it
1640 returns the vector-def 'vx.0'.
1642 To create the remaining copies of the vector-stmt (VSnew.j), this
1643 function is called to get the relevant vector-def for each operand. It is
1644 obtained from the respective VS1.j stmt, which is recorded in the
1645 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1647 For example, to obtain the vector-def 'vx.1' in order to create the
1648 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1649 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1650 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1651 and return its def ('vx.1').
1652 Overall, to create the above sequence this function will be called 3 times:
1653 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1654 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1655 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1657 tree
1658 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1660 gimple *vec_stmt_for_operand;
1661 stmt_vec_info def_stmt_info;
1663 /* Do nothing; can reuse same def. */
1664 if (dt == vect_external_def || dt == vect_constant_def )
1665 return vec_oprnd;
1667 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1668 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1669 gcc_assert (def_stmt_info);
1670 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1671 gcc_assert (vec_stmt_for_operand);
1672 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1673 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1674 else
1675 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1676 return vec_oprnd;
1680 /* Get vectorized definitions for the operands to create a copy of an original
1681 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1683 void
1684 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1685 vec<tree> *vec_oprnds0,
1686 vec<tree> *vec_oprnds1)
1688 tree vec_oprnd = vec_oprnds0->pop ();
1690 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1691 vec_oprnds0->quick_push (vec_oprnd);
1693 if (vec_oprnds1 && vec_oprnds1->length ())
1695 vec_oprnd = vec_oprnds1->pop ();
1696 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1697 vec_oprnds1->quick_push (vec_oprnd);
1702 /* Get vectorized definitions for OP0 and OP1. */
1704 void
1705 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1706 vec<tree> *vec_oprnds0,
1707 vec<tree> *vec_oprnds1,
1708 slp_tree slp_node)
1710 if (slp_node)
1712 int nops = (op1 == NULL_TREE) ? 1 : 2;
1713 auto_vec<tree> ops (nops);
1714 auto_vec<vec<tree> > vec_defs (nops);
1716 ops.quick_push (op0);
1717 if (op1)
1718 ops.quick_push (op1);
1720 vect_get_slp_defs (ops, slp_node, &vec_defs);
1722 *vec_oprnds0 = vec_defs[0];
1723 if (op1)
1724 *vec_oprnds1 = vec_defs[1];
1726 else
1728 tree vec_oprnd;
1730 vec_oprnds0->create (1);
1731 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1732 vec_oprnds0->quick_push (vec_oprnd);
1734 if (op1)
1736 vec_oprnds1->create (1);
1737 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1738 vec_oprnds1->quick_push (vec_oprnd);
1743 /* Helper function called by vect_finish_replace_stmt and
1744 vect_finish_stmt_generation. Set the location of the new
1745 statement and create a stmt_vec_info for it. */
1747 static void
1748 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1750 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1751 vec_info *vinfo = stmt_info->vinfo;
1753 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1755 if (dump_enabled_p ())
1757 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1758 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1761 gimple_set_location (vec_stmt, gimple_location (stmt));
1763 /* While EH edges will generally prevent vectorization, stmt might
1764 e.g. be in a must-not-throw region. Ensure newly created stmts
1765 that could throw are part of the same region. */
1766 int lp_nr = lookup_stmt_eh_lp (stmt);
1767 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1768 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1771 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1772 which sets the same scalar result as STMT did. */
1774 void
1775 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1777 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1779 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1780 gsi_replace (&gsi, vec_stmt, false);
1782 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1785 /* Function vect_finish_stmt_generation.
1787 Insert a new stmt. */
1789 void
1790 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1791 gimple_stmt_iterator *gsi)
1793 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1795 if (!gsi_end_p (*gsi)
1796 && gimple_has_mem_ops (vec_stmt))
1798 gimple *at_stmt = gsi_stmt (*gsi);
1799 tree vuse = gimple_vuse (at_stmt);
1800 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1802 tree vdef = gimple_vdef (at_stmt);
1803 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1804 /* If we have an SSA vuse and insert a store, update virtual
1805 SSA form to avoid triggering the renamer. Do so only
1806 if we can easily see all uses - which is what almost always
1807 happens with the way vectorized stmts are inserted. */
1808 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1809 && ((is_gimple_assign (vec_stmt)
1810 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1811 || (is_gimple_call (vec_stmt)
1812 && !(gimple_call_flags (vec_stmt)
1813 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1815 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1816 gimple_set_vdef (vec_stmt, new_vdef);
1817 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1821 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1822 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1825 /* We want to vectorize a call to combined function CFN with function
1826 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1827 as the types of all inputs. Check whether this is possible using
1828 an internal function, returning its code if so or IFN_LAST if not. */
1830 static internal_fn
1831 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1832 tree vectype_out, tree vectype_in)
1834 internal_fn ifn;
1835 if (internal_fn_p (cfn))
1836 ifn = as_internal_fn (cfn);
1837 else
1838 ifn = associated_internal_fn (fndecl);
1839 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1841 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1842 if (info.vectorizable)
1844 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1845 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1846 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1847 OPTIMIZE_FOR_SPEED))
1848 return ifn;
1851 return IFN_LAST;
1855 static tree permute_vec_elements (tree, tree, tree, gimple *,
1856 gimple_stmt_iterator *);
1858 /* Check whether a load or store statement in the loop described by
1859 LOOP_VINFO is possible in a fully-masked loop. This is testing
1860 whether the vectorizer pass has the appropriate support, as well as
1861 whether the target does.
1863 VLS_TYPE says whether the statement is a load or store and VECTYPE
1864 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1865 says how the load or store is going to be implemented and GROUP_SIZE
1866 is the number of load or store statements in the containing group.
1867 If the access is a gather load or scatter store, GS_INFO describes
1868 its arguments.
1870 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1871 supported, otherwise record the required mask types. */
1873 static void
1874 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1875 vec_load_store_type vls_type, int group_size,
1876 vect_memory_access_type memory_access_type,
1877 gather_scatter_info *gs_info)
1879 /* Invariant loads need no special support. */
1880 if (memory_access_type == VMAT_INVARIANT)
1881 return;
1883 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1884 machine_mode vecmode = TYPE_MODE (vectype);
1885 bool is_load = (vls_type == VLS_LOAD);
1886 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1888 if (is_load
1889 ? !vect_load_lanes_supported (vectype, group_size, true)
1890 : !vect_store_lanes_supported (vectype, group_size, true))
1892 if (dump_enabled_p ())
1893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1894 "can't use a fully-masked loop because the"
1895 " target doesn't have an appropriate masked"
1896 " load/store-lanes instruction.\n");
1897 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1898 return;
1900 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1901 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1902 return;
1905 if (memory_access_type == VMAT_GATHER_SCATTER)
1907 internal_fn ifn = (is_load
1908 ? IFN_MASK_GATHER_LOAD
1909 : IFN_MASK_SCATTER_STORE);
1910 tree offset_type = TREE_TYPE (gs_info->offset);
1911 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1912 gs_info->memory_type,
1913 TYPE_SIGN (offset_type),
1914 gs_info->scale))
1916 if (dump_enabled_p ())
1917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1918 "can't use a fully-masked loop because the"
1919 " target doesn't have an appropriate masked"
1920 " gather load or scatter store instruction.\n");
1921 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1922 return;
1924 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1925 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1926 return;
1929 if (memory_access_type != VMAT_CONTIGUOUS
1930 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1932 /* Element X of the data must come from iteration i * VF + X of the
1933 scalar loop. We need more work to support other mappings. */
1934 if (dump_enabled_p ())
1935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1936 "can't use a fully-masked loop because an access"
1937 " isn't contiguous.\n");
1938 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1939 return;
1942 machine_mode mask_mode;
1943 if (!(targetm.vectorize.get_mask_mode
1944 (GET_MODE_NUNITS (vecmode),
1945 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1946 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1948 if (dump_enabled_p ())
1949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1950 "can't use a fully-masked loop because the target"
1951 " doesn't have the appropriate masked load or"
1952 " store.\n");
1953 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1954 return;
1956 /* We might load more scalars than we need for permuting SLP loads.
1957 We checked in get_group_load_store_type that the extra elements
1958 don't leak into a new vector. */
1959 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1960 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1961 unsigned int nvectors;
1962 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1963 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1964 else
1965 gcc_unreachable ();
1968 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1969 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1970 that needs to be applied to all loads and stores in a vectorized loop.
1971 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1973 MASK_TYPE is the type of both masks. If new statements are needed,
1974 insert them before GSI. */
1976 static tree
1977 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1978 gimple_stmt_iterator *gsi)
1980 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1981 if (!loop_mask)
1982 return vec_mask;
1984 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1985 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1986 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1987 vec_mask, loop_mask);
1988 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1989 return and_res;
1992 /* Determine whether we can use a gather load or scatter store to vectorize
1993 strided load or store STMT by truncating the current offset to a smaller
1994 width. We need to be able to construct an offset vector:
1996 { 0, X, X*2, X*3, ... }
1998 without loss of precision, where X is STMT's DR_STEP.
2000 Return true if this is possible, describing the gather load or scatter
2001 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2003 static bool
2004 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
2005 bool masked_p,
2006 gather_scatter_info *gs_info)
2008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2009 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2010 tree step = DR_STEP (dr);
2011 if (TREE_CODE (step) != INTEGER_CST)
2013 /* ??? Perhaps we could use range information here? */
2014 if (dump_enabled_p ())
2015 dump_printf_loc (MSG_NOTE, vect_location,
2016 "cannot truncate variable step.\n");
2017 return false;
2020 /* Get the number of bits in an element. */
2021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2022 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2023 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2025 /* Set COUNT to the upper limit on the number of elements - 1.
2026 Start with the maximum vectorization factor. */
2027 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2029 /* Try lowering COUNT to the number of scalar latch iterations. */
2030 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2031 widest_int max_iters;
2032 if (max_loop_iterations (loop, &max_iters)
2033 && max_iters < count)
2034 count = max_iters.to_shwi ();
2036 /* Try scales of 1 and the element size. */
2037 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2038 bool overflow_p = false;
2039 for (int i = 0; i < 2; ++i)
2041 int scale = scales[i];
2042 widest_int factor;
2043 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2044 continue;
2046 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2047 in OFFSET_BITS bits. */
2048 widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
2049 if (overflow_p)
2050 continue;
2051 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2052 if (wi::min_precision (range, sign) > element_bits)
2054 overflow_p = true;
2055 continue;
2058 /* See whether the target supports the operation. */
2059 tree memory_type = TREE_TYPE (DR_REF (dr));
2060 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2061 memory_type, element_bits, sign, scale,
2062 &gs_info->ifn, &gs_info->element_type))
2063 continue;
2065 tree offset_type = build_nonstandard_integer_type (element_bits,
2066 sign == UNSIGNED);
2068 gs_info->decl = NULL_TREE;
2069 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2070 but we don't need to store that here. */
2071 gs_info->base = NULL_TREE;
2072 gs_info->offset = fold_convert (offset_type, step);
2073 gs_info->offset_dt = vect_constant_def;
2074 gs_info->offset_vectype = NULL_TREE;
2075 gs_info->scale = scale;
2076 gs_info->memory_type = memory_type;
2077 return true;
2080 if (overflow_p && dump_enabled_p ())
2081 dump_printf_loc (MSG_NOTE, vect_location,
2082 "truncating gather/scatter offset to %d bits"
2083 " might change its value.\n", element_bits);
2085 return false;
2088 /* Return true if we can use gather/scatter internal functions to
2089 vectorize STMT, which is a grouped or strided load or store.
2090 MASKED_P is true if load or store is conditional. When returning
2091 true, fill in GS_INFO with the information required to perform the
2092 operation. */
2094 static bool
2095 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
2096 bool masked_p,
2097 gather_scatter_info *gs_info)
2099 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2100 || gs_info->decl)
2101 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2102 masked_p, gs_info);
2104 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2105 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2106 tree offset_type = TREE_TYPE (gs_info->offset);
2107 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2109 /* Enforced by vect_check_gather_scatter. */
2110 gcc_assert (element_bits >= offset_bits);
2112 /* If the elements are wider than the offset, convert the offset to the
2113 same width, without changing its sign. */
2114 if (element_bits > offset_bits)
2116 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2117 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2118 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_NOTE, vect_location,
2123 "using gather/scatter for strided/grouped access,"
2124 " scale = %d\n", gs_info->scale);
2126 return true;
2129 /* STMT is a non-strided load or store, meaning that it accesses
2130 elements with a known constant step. Return -1 if that step
2131 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2133 static int
2134 compare_step_with_zero (gimple *stmt)
2136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2137 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2138 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2139 size_zero_node);
2142 /* If the target supports a permute mask that reverses the elements in
2143 a vector of type VECTYPE, return that mask, otherwise return null. */
2145 static tree
2146 perm_mask_for_reverse (tree vectype)
2148 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2150 /* The encoding has a single stepped pattern. */
2151 vec_perm_builder sel (nunits, 1, 3);
2152 for (int i = 0; i < 3; ++i)
2153 sel.quick_push (nunits - 1 - i);
2155 vec_perm_indices indices (sel, 1, nunits);
2156 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2157 return NULL_TREE;
2158 return vect_gen_perm_mask_checked (vectype, indices);
2161 /* STMT is either a masked or unconditional store. Return the value
2162 being stored. */
2164 tree
2165 vect_get_store_rhs (gimple *stmt)
2167 if (gassign *assign = dyn_cast <gassign *> (stmt))
2169 gcc_assert (gimple_assign_single_p (assign));
2170 return gimple_assign_rhs1 (assign);
2172 if (gcall *call = dyn_cast <gcall *> (stmt))
2174 internal_fn ifn = gimple_call_internal_fn (call);
2175 int index = internal_fn_stored_value_index (ifn);
2176 gcc_assert (index >= 0);
2177 return gimple_call_arg (stmt, index);
2179 gcc_unreachable ();
2182 /* A subroutine of get_load_store_type, with a subset of the same
2183 arguments. Handle the case where STMT is part of a grouped load
2184 or store.
2186 For stores, the statements in the group are all consecutive
2187 and there is no gap at the end. For loads, the statements in the
2188 group might not be consecutive; there can be gaps between statements
2189 as well as at the end. */
2191 static bool
2192 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2193 bool masked_p, vec_load_store_type vls_type,
2194 vect_memory_access_type *memory_access_type,
2195 gather_scatter_info *gs_info)
2197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2198 vec_info *vinfo = stmt_info->vinfo;
2199 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2200 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2201 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
2202 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2203 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
2204 bool single_element_p = (stmt == first_stmt
2205 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2206 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (vinfo_for_stmt (first_stmt));
2207 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2209 /* True if the vectorized statements would access beyond the last
2210 statement in the group. */
2211 bool overrun_p = false;
2213 /* True if we can cope with such overrun by peeling for gaps, so that
2214 there is at least one final scalar iteration after the vector loop. */
2215 bool can_overrun_p = (!masked_p
2216 && vls_type == VLS_LOAD
2217 && loop_vinfo
2218 && !loop->inner);
2220 /* There can only be a gap at the end of the group if the stride is
2221 known at compile time. */
2222 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2224 /* Stores can't yet have gaps. */
2225 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2227 if (slp)
2229 if (STMT_VINFO_STRIDED_P (stmt_info))
2231 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2232 separated by the stride, until we have a complete vector.
2233 Fall back to scalar accesses if that isn't possible. */
2234 if (multiple_p (nunits, group_size))
2235 *memory_access_type = VMAT_STRIDED_SLP;
2236 else
2237 *memory_access_type = VMAT_ELEMENTWISE;
2239 else
2241 overrun_p = loop_vinfo && gap != 0;
2242 if (overrun_p && vls_type != VLS_LOAD)
2244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2245 "Grouped store with gaps requires"
2246 " non-consecutive accesses\n");
2247 return false;
2249 /* An overrun is fine if the trailing elements are smaller
2250 than the alignment boundary B. Every vector access will
2251 be a multiple of B and so we are guaranteed to access a
2252 non-gap element in the same B-sized block. */
2253 if (overrun_p
2254 && gap < (vect_known_alignment_in_bytes (first_dr)
2255 / vect_get_scalar_dr_size (first_dr)))
2256 overrun_p = false;
2257 if (overrun_p && !can_overrun_p)
2259 if (dump_enabled_p ())
2260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2261 "Peeling for outer loop is not supported\n");
2262 return false;
2264 *memory_access_type = VMAT_CONTIGUOUS;
2267 else
2269 /* We can always handle this case using elementwise accesses,
2270 but see if something more efficient is available. */
2271 *memory_access_type = VMAT_ELEMENTWISE;
2273 /* If there is a gap at the end of the group then these optimizations
2274 would access excess elements in the last iteration. */
2275 bool would_overrun_p = (gap != 0);
2276 /* An overrun is fine if the trailing elements are smaller than the
2277 alignment boundary B. Every vector access will be a multiple of B
2278 and so we are guaranteed to access a non-gap element in the
2279 same B-sized block. */
2280 if (would_overrun_p
2281 && !masked_p
2282 && gap < (vect_known_alignment_in_bytes (first_dr)
2283 / vect_get_scalar_dr_size (first_dr)))
2284 would_overrun_p = false;
2286 if (!STMT_VINFO_STRIDED_P (stmt_info)
2287 && (can_overrun_p || !would_overrun_p)
2288 && compare_step_with_zero (stmt) > 0)
2290 /* First cope with the degenerate case of a single-element
2291 vector. */
2292 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2293 *memory_access_type = VMAT_CONTIGUOUS;
2295 /* Otherwise try using LOAD/STORE_LANES. */
2296 if (*memory_access_type == VMAT_ELEMENTWISE
2297 && (vls_type == VLS_LOAD
2298 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2299 : vect_store_lanes_supported (vectype, group_size,
2300 masked_p)))
2302 *memory_access_type = VMAT_LOAD_STORE_LANES;
2303 overrun_p = would_overrun_p;
2306 /* If that fails, try using permuting loads. */
2307 if (*memory_access_type == VMAT_ELEMENTWISE
2308 && (vls_type == VLS_LOAD
2309 ? vect_grouped_load_supported (vectype, single_element_p,
2310 group_size)
2311 : vect_grouped_store_supported (vectype, group_size)))
2313 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2314 overrun_p = would_overrun_p;
2318 /* As a last resort, trying using a gather load or scatter store.
2320 ??? Although the code can handle all group sizes correctly,
2321 it probably isn't a win to use separate strided accesses based
2322 on nearby locations. Or, even if it's a win over scalar code,
2323 it might not be a win over vectorizing at a lower VF, if that
2324 allows us to use contiguous accesses. */
2325 if (*memory_access_type == VMAT_ELEMENTWISE
2326 && single_element_p
2327 && loop_vinfo
2328 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2329 masked_p, gs_info))
2330 *memory_access_type = VMAT_GATHER_SCATTER;
2333 if (vls_type != VLS_LOAD && first_stmt == stmt)
2335 /* STMT is the leader of the group. Check the operands of all the
2336 stmts of the group. */
2337 gimple *next_stmt = DR_GROUP_NEXT_ELEMENT (stmt_info);
2338 while (next_stmt)
2340 tree op = vect_get_store_rhs (next_stmt);
2341 gimple *def_stmt;
2342 enum vect_def_type dt;
2343 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "use not simple.\n");
2348 return false;
2350 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2354 if (overrun_p)
2356 gcc_assert (can_overrun_p);
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "Data access with gaps requires scalar "
2360 "epilogue loop\n");
2361 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2364 return true;
2367 /* A subroutine of get_load_store_type, with a subset of the same
2368 arguments. Handle the case where STMT is a load or store that
2369 accesses consecutive elements with a negative step. */
2371 static vect_memory_access_type
2372 get_negative_load_store_type (gimple *stmt, tree vectype,
2373 vec_load_store_type vls_type,
2374 unsigned int ncopies)
2376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2377 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2378 dr_alignment_support alignment_support_scheme;
2380 if (ncopies > 1)
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2384 "multiple types with negative step.\n");
2385 return VMAT_ELEMENTWISE;
2388 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2389 if (alignment_support_scheme != dr_aligned
2390 && alignment_support_scheme != dr_unaligned_supported)
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394 "negative step but alignment required.\n");
2395 return VMAT_ELEMENTWISE;
2398 if (vls_type == VLS_STORE_INVARIANT)
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_NOTE, vect_location,
2402 "negative step with invariant source;"
2403 " no permute needed.\n");
2404 return VMAT_CONTIGUOUS_DOWN;
2407 if (!perm_mask_for_reverse (vectype))
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "negative step and reversing not supported.\n");
2412 return VMAT_ELEMENTWISE;
2415 return VMAT_CONTIGUOUS_REVERSE;
2418 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2419 if there is a memory access type that the vectorized form can use,
2420 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2421 or scatters, fill in GS_INFO accordingly.
2423 SLP says whether we're performing SLP rather than loop vectorization.
2424 MASKED_P is true if the statement is conditional on a vectorized mask.
2425 VECTYPE is the vector type that the vectorized statements will use.
2426 NCOPIES is the number of vector statements that will be needed. */
2428 static bool
2429 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2430 vec_load_store_type vls_type, unsigned int ncopies,
2431 vect_memory_access_type *memory_access_type,
2432 gather_scatter_info *gs_info)
2434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2435 vec_info *vinfo = stmt_info->vinfo;
2436 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2437 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2438 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2440 *memory_access_type = VMAT_GATHER_SCATTER;
2441 gimple *def_stmt;
2442 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2443 gcc_unreachable ();
2444 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2445 &gs_info->offset_dt,
2446 &gs_info->offset_vectype))
2448 if (dump_enabled_p ())
2449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2450 "%s index use not simple.\n",
2451 vls_type == VLS_LOAD ? "gather" : "scatter");
2452 return false;
2455 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2457 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2458 memory_access_type, gs_info))
2459 return false;
2461 else if (STMT_VINFO_STRIDED_P (stmt_info))
2463 gcc_assert (!slp);
2464 if (loop_vinfo
2465 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2466 masked_p, gs_info))
2467 *memory_access_type = VMAT_GATHER_SCATTER;
2468 else
2469 *memory_access_type = VMAT_ELEMENTWISE;
2471 else
2473 int cmp = compare_step_with_zero (stmt);
2474 if (cmp < 0)
2475 *memory_access_type = get_negative_load_store_type
2476 (stmt, vectype, vls_type, ncopies);
2477 else if (cmp == 0)
2479 gcc_assert (vls_type == VLS_LOAD);
2480 *memory_access_type = VMAT_INVARIANT;
2482 else
2483 *memory_access_type = VMAT_CONTIGUOUS;
2486 if ((*memory_access_type == VMAT_ELEMENTWISE
2487 || *memory_access_type == VMAT_STRIDED_SLP)
2488 && !nunits.is_constant ())
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 "Not using elementwise accesses due to variable "
2493 "vectorization factor.\n");
2494 return false;
2497 /* FIXME: At the moment the cost model seems to underestimate the
2498 cost of using elementwise accesses. This check preserves the
2499 traditional behavior until that can be fixed. */
2500 if (*memory_access_type == VMAT_ELEMENTWISE
2501 && !STMT_VINFO_STRIDED_P (stmt_info)
2502 && !(stmt == DR_GROUP_FIRST_ELEMENT (stmt_info)
2503 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2504 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2508 "not falling back to elementwise accesses\n");
2509 return false;
2511 return true;
2514 /* Return true if boolean argument MASK is suitable for vectorizing
2515 conditional load or store STMT. When returning true, store the type
2516 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2517 in *MASK_VECTYPE_OUT. */
2519 static bool
2520 vect_check_load_store_mask (gimple *stmt, tree mask,
2521 vect_def_type *mask_dt_out,
2522 tree *mask_vectype_out)
2524 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2526 if (dump_enabled_p ())
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "mask argument is not a boolean.\n");
2529 return false;
2532 if (TREE_CODE (mask) != SSA_NAME)
2534 if (dump_enabled_p ())
2535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2536 "mask argument is not an SSA name.\n");
2537 return false;
2540 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2541 gimple *def_stmt;
2542 enum vect_def_type mask_dt;
2543 tree mask_vectype;
2544 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2545 &mask_vectype))
2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2549 "mask use not simple.\n");
2550 return false;
2553 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2554 if (!mask_vectype)
2555 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2557 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2559 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2561 "could not find an appropriate vector mask type.\n");
2562 return false;
2565 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2566 TYPE_VECTOR_SUBPARTS (vectype)))
2568 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2571 "vector mask type ");
2572 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2573 dump_printf (MSG_MISSED_OPTIMIZATION,
2574 " does not match vector data type ");
2575 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2576 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2578 return false;
2581 *mask_dt_out = mask_dt;
2582 *mask_vectype_out = mask_vectype;
2583 return true;
2586 /* Return true if stored value RHS is suitable for vectorizing store
2587 statement STMT. When returning true, store the type of the
2588 definition in *RHS_DT_OUT, the type of the vectorized store value in
2589 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2591 static bool
2592 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2593 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2595 /* In the case this is a store from a constant make sure
2596 native_encode_expr can handle it. */
2597 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2599 if (dump_enabled_p ())
2600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2601 "cannot encode constant as a byte sequence.\n");
2602 return false;
2605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2606 gimple *def_stmt;
2607 enum vect_def_type rhs_dt;
2608 tree rhs_vectype;
2609 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2610 &rhs_vectype))
2612 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2614 "use not simple.\n");
2615 return false;
2618 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2619 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2621 if (dump_enabled_p ())
2622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2623 "incompatible vector types.\n");
2624 return false;
2627 *rhs_dt_out = rhs_dt;
2628 *rhs_vectype_out = rhs_vectype;
2629 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2630 *vls_type_out = VLS_STORE_INVARIANT;
2631 else
2632 *vls_type_out = VLS_STORE;
2633 return true;
2636 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2637 Note that we support masks with floating-point type, in which case the
2638 floats are interpreted as a bitmask. */
2640 static tree
2641 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2643 if (TREE_CODE (masktype) == INTEGER_TYPE)
2644 return build_int_cst (masktype, -1);
2645 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2647 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2648 mask = build_vector_from_val (masktype, mask);
2649 return vect_init_vector (stmt, mask, masktype, NULL);
2651 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2653 REAL_VALUE_TYPE r;
2654 long tmp[6];
2655 for (int j = 0; j < 6; ++j)
2656 tmp[j] = -1;
2657 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2658 tree mask = build_real (TREE_TYPE (masktype), r);
2659 mask = build_vector_from_val (masktype, mask);
2660 return vect_init_vector (stmt, mask, masktype, NULL);
2662 gcc_unreachable ();
2665 /* Build an all-zero merge value of type VECTYPE while vectorizing
2666 STMT as a gather load. */
2668 static tree
2669 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2671 tree merge;
2672 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2673 merge = build_int_cst (TREE_TYPE (vectype), 0);
2674 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2676 REAL_VALUE_TYPE r;
2677 long tmp[6];
2678 for (int j = 0; j < 6; ++j)
2679 tmp[j] = 0;
2680 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2681 merge = build_real (TREE_TYPE (vectype), r);
2683 else
2684 gcc_unreachable ();
2685 merge = build_vector_from_val (vectype, merge);
2686 return vect_init_vector (stmt, merge, vectype, NULL);
2689 /* Build a gather load call while vectorizing STMT. Insert new instructions
2690 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2691 operation. If the load is conditional, MASK is the unvectorized
2692 condition and MASK_DT is its definition type, otherwise MASK is null. */
2694 static void
2695 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2696 gimple **vec_stmt, gather_scatter_info *gs_info,
2697 tree mask, vect_def_type mask_dt)
2699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2701 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2702 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2703 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2704 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2705 edge pe = loop_preheader_edge (loop);
2706 enum { NARROW, NONE, WIDEN } modifier;
2707 poly_uint64 gather_off_nunits
2708 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2710 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2711 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2712 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2713 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2714 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2715 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2716 tree scaletype = TREE_VALUE (arglist);
2717 gcc_checking_assert (types_compatible_p (srctype, rettype)
2718 && (!mask || types_compatible_p (srctype, masktype)));
2720 tree perm_mask = NULL_TREE;
2721 tree mask_perm_mask = NULL_TREE;
2722 if (known_eq (nunits, gather_off_nunits))
2723 modifier = NONE;
2724 else if (known_eq (nunits * 2, gather_off_nunits))
2726 modifier = WIDEN;
2728 /* Currently widening gathers and scatters are only supported for
2729 fixed-length vectors. */
2730 int count = gather_off_nunits.to_constant ();
2731 vec_perm_builder sel (count, count, 1);
2732 for (int i = 0; i < count; ++i)
2733 sel.quick_push (i | (count / 2));
2735 vec_perm_indices indices (sel, 1, count);
2736 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2737 indices);
2739 else if (known_eq (nunits, gather_off_nunits * 2))
2741 modifier = NARROW;
2743 /* Currently narrowing gathers and scatters are only supported for
2744 fixed-length vectors. */
2745 int count = nunits.to_constant ();
2746 vec_perm_builder sel (count, count, 1);
2747 sel.quick_grow (count);
2748 for (int i = 0; i < count; ++i)
2749 sel[i] = i < count / 2 ? i : i + count / 2;
2750 vec_perm_indices indices (sel, 2, count);
2751 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2753 ncopies *= 2;
2755 if (mask)
2757 for (int i = 0; i < count; ++i)
2758 sel[i] = i | (count / 2);
2759 indices.new_vector (sel, 2, count);
2760 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2763 else
2764 gcc_unreachable ();
2766 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2767 vectype);
2769 tree ptr = fold_convert (ptrtype, gs_info->base);
2770 if (!is_gimple_min_invariant (ptr))
2772 gimple_seq seq;
2773 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2774 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2775 gcc_assert (!new_bb);
2778 tree scale = build_int_cst (scaletype, gs_info->scale);
2780 tree vec_oprnd0 = NULL_TREE;
2781 tree vec_mask = NULL_TREE;
2782 tree src_op = NULL_TREE;
2783 tree mask_op = NULL_TREE;
2784 tree prev_res = NULL_TREE;
2785 stmt_vec_info prev_stmt_info = NULL;
2787 if (!mask)
2789 src_op = vect_build_zero_merge_argument (stmt, rettype);
2790 mask_op = vect_build_all_ones_mask (stmt, masktype);
2793 for (int j = 0; j < ncopies; ++j)
2795 tree op, var;
2796 gimple *new_stmt;
2797 if (modifier == WIDEN && (j & 1))
2798 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2799 perm_mask, stmt, gsi);
2800 else if (j == 0)
2801 op = vec_oprnd0
2802 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2803 else
2804 op = vec_oprnd0
2805 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2807 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2809 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2810 TYPE_VECTOR_SUBPARTS (idxtype)));
2811 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2812 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2813 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2814 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2815 op = var;
2818 if (mask)
2820 if (mask_perm_mask && (j & 1))
2821 mask_op = permute_vec_elements (mask_op, mask_op,
2822 mask_perm_mask, stmt, gsi);
2823 else
2825 if (j == 0)
2826 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2827 else
2828 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2830 mask_op = vec_mask;
2831 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2833 gcc_assert
2834 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2835 TYPE_VECTOR_SUBPARTS (masktype)));
2836 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2837 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2838 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2839 mask_op);
2840 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2841 mask_op = var;
2844 src_op = mask_op;
2847 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2848 mask_op, scale);
2850 if (!useless_type_conversion_p (vectype, rettype))
2852 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2853 TYPE_VECTOR_SUBPARTS (rettype)));
2854 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2855 gimple_call_set_lhs (new_stmt, op);
2856 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2857 var = make_ssa_name (vec_dest);
2858 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2859 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2861 else
2863 var = make_ssa_name (vec_dest, new_stmt);
2864 gimple_call_set_lhs (new_stmt, var);
2867 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2869 if (modifier == NARROW)
2871 if ((j & 1) == 0)
2873 prev_res = var;
2874 continue;
2876 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2877 new_stmt = SSA_NAME_DEF_STMT (var);
2880 if (prev_stmt_info == NULL)
2881 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2882 else
2883 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2884 prev_stmt_info = vinfo_for_stmt (new_stmt);
2888 /* Prepare the base and offset in GS_INFO for vectorization.
2889 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2890 to the vectorized offset argument for the first copy of STMT. STMT
2891 is the statement described by GS_INFO and LOOP is the containing loop. */
2893 static void
2894 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2895 gather_scatter_info *gs_info,
2896 tree *dataref_ptr, tree *vec_offset)
2898 gimple_seq stmts = NULL;
2899 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2900 if (stmts != NULL)
2902 basic_block new_bb;
2903 edge pe = loop_preheader_edge (loop);
2904 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2905 gcc_assert (!new_bb);
2907 tree offset_type = TREE_TYPE (gs_info->offset);
2908 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2909 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2910 offset_vectype);
2913 /* Prepare to implement a grouped or strided load or store using
2914 the gather load or scatter store operation described by GS_INFO.
2915 STMT is the load or store statement.
2917 Set *DATAREF_BUMP to the amount that should be added to the base
2918 address after each copy of the vectorized statement. Set *VEC_OFFSET
2919 to an invariant offset vector in which element I has the value
2920 I * DR_STEP / SCALE. */
2922 static void
2923 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2924 gather_scatter_info *gs_info,
2925 tree *dataref_bump, tree *vec_offset)
2927 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2928 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2929 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2931 gimple_seq stmts;
2933 tree bump = size_binop (MULT_EXPR,
2934 fold_convert (sizetype, DR_STEP (dr)),
2935 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2936 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2937 if (stmts)
2938 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2940 /* The offset given in GS_INFO can have pointer type, so use the element
2941 type of the vector instead. */
2942 tree offset_type = TREE_TYPE (gs_info->offset);
2943 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2944 offset_type = TREE_TYPE (offset_vectype);
2946 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2947 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2948 ssize_int (gs_info->scale));
2949 step = fold_convert (offset_type, step);
2950 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2952 /* Create {0, X, X*2, X*3, ...}. */
2953 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2954 build_zero_cst (offset_type), step);
2955 if (stmts)
2956 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2959 /* Return the amount that should be added to a vector pointer to move
2960 to the next or previous copy of AGGR_TYPE. DR is the data reference
2961 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2962 vectorization. */
2964 static tree
2965 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2966 vect_memory_access_type memory_access_type)
2968 if (memory_access_type == VMAT_INVARIANT)
2969 return size_zero_node;
2971 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2972 tree step = vect_dr_behavior (dr)->step;
2973 if (tree_int_cst_sgn (step) == -1)
2974 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2975 return iv_step;
2978 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2980 static bool
2981 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2982 gimple **vec_stmt, slp_tree slp_node,
2983 tree vectype_in, enum vect_def_type *dt,
2984 stmt_vector_for_cost *cost_vec)
2986 tree op, vectype;
2987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2988 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2989 unsigned ncopies;
2990 unsigned HOST_WIDE_INT nunits, num_bytes;
2992 op = gimple_call_arg (stmt, 0);
2993 vectype = STMT_VINFO_VECTYPE (stmt_info);
2995 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2996 return false;
2998 /* Multiple types in SLP are handled by creating the appropriate number of
2999 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3000 case of SLP. */
3001 if (slp_node)
3002 ncopies = 1;
3003 else
3004 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3006 gcc_assert (ncopies >= 1);
3008 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3009 if (! char_vectype)
3010 return false;
3012 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
3013 return false;
3015 unsigned word_bytes = num_bytes / nunits;
3017 /* The encoding uses one stepped pattern for each byte in the word. */
3018 vec_perm_builder elts (num_bytes, word_bytes, 3);
3019 for (unsigned i = 0; i < 3; ++i)
3020 for (unsigned j = 0; j < word_bytes; ++j)
3021 elts.quick_push ((i + 1) * word_bytes - j - 1);
3023 vec_perm_indices indices (elts, 1, num_bytes);
3024 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3025 return false;
3027 if (! vec_stmt)
3029 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3030 if (dump_enabled_p ())
3031 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
3032 "\n");
3033 if (! slp_node)
3035 record_stmt_cost (cost_vec,
3036 1, vector_stmt, stmt_info, 0, vect_prologue);
3037 record_stmt_cost (cost_vec,
3038 ncopies, vec_perm, stmt_info, 0, vect_body);
3040 return true;
3043 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3045 /* Transform. */
3046 vec<tree> vec_oprnds = vNULL;
3047 gimple *new_stmt = NULL;
3048 stmt_vec_info prev_stmt_info = NULL;
3049 for (unsigned j = 0; j < ncopies; j++)
3051 /* Handle uses. */
3052 if (j == 0)
3053 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3054 else
3055 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3057 /* Arguments are ready. create the new vector stmt. */
3058 unsigned i;
3059 tree vop;
3060 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3062 tree tem = make_ssa_name (char_vectype);
3063 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3064 char_vectype, vop));
3065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3066 tree tem2 = make_ssa_name (char_vectype);
3067 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3068 tem, tem, bswap_vconst);
3069 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3070 tem = make_ssa_name (vectype);
3071 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3072 vectype, tem2));
3073 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3074 if (slp_node)
3075 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3078 if (slp_node)
3079 continue;
3081 if (j == 0)
3082 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3083 else
3084 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3086 prev_stmt_info = vinfo_for_stmt (new_stmt);
3089 vec_oprnds.release ();
3090 return true;
3093 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3094 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3095 in a single step. On success, store the binary pack code in
3096 *CONVERT_CODE. */
3098 static bool
3099 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3100 tree_code *convert_code)
3102 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3103 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3104 return false;
3106 tree_code code;
3107 int multi_step_cvt = 0;
3108 auto_vec <tree, 8> interm_types;
3109 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3110 &code, &multi_step_cvt,
3111 &interm_types)
3112 || multi_step_cvt)
3113 return false;
3115 *convert_code = code;
3116 return true;
3119 /* Function vectorizable_call.
3121 Check if GS performs a function call that can be vectorized.
3122 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3123 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3124 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3126 static bool
3127 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
3128 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
3130 gcall *stmt;
3131 tree vec_dest;
3132 tree scalar_dest;
3133 tree op, type;
3134 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3135 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
3136 tree vectype_out, vectype_in;
3137 poly_uint64 nunits_in;
3138 poly_uint64 nunits_out;
3139 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3140 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3141 vec_info *vinfo = stmt_info->vinfo;
3142 tree fndecl, new_temp, rhs_type;
3143 gimple *def_stmt;
3144 enum vect_def_type dt[3]
3145 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3146 int ndts = 3;
3147 gimple *new_stmt = NULL;
3148 int ncopies, j;
3149 vec<tree> vargs = vNULL;
3150 enum { NARROW, NONE, WIDEN } modifier;
3151 size_t i, nargs;
3152 tree lhs;
3154 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3155 return false;
3157 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3158 && ! vec_stmt)
3159 return false;
3161 /* Is GS a vectorizable call? */
3162 stmt = dyn_cast <gcall *> (gs);
3163 if (!stmt)
3164 return false;
3166 if (gimple_call_internal_p (stmt)
3167 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3168 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3169 /* Handled by vectorizable_load and vectorizable_store. */
3170 return false;
3172 if (gimple_call_lhs (stmt) == NULL_TREE
3173 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3174 return false;
3176 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3178 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3180 /* Process function arguments. */
3181 rhs_type = NULL_TREE;
3182 vectype_in = NULL_TREE;
3183 nargs = gimple_call_num_args (stmt);
3185 /* Bail out if the function has more than three arguments, we do not have
3186 interesting builtin functions to vectorize with more than two arguments
3187 except for fma. No arguments is also not good. */
3188 if (nargs == 0 || nargs > 3)
3189 return false;
3191 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3192 if (gimple_call_internal_p (stmt)
3193 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3195 nargs = 0;
3196 rhs_type = unsigned_type_node;
3199 for (i = 0; i < nargs; i++)
3201 tree opvectype;
3203 op = gimple_call_arg (stmt, i);
3205 /* We can only handle calls with arguments of the same type. */
3206 if (rhs_type
3207 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3209 if (dump_enabled_p ())
3210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3211 "argument types differ.\n");
3212 return false;
3214 if (!rhs_type)
3215 rhs_type = TREE_TYPE (op);
3217 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3219 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3221 "use not simple.\n");
3222 return false;
3225 if (!vectype_in)
3226 vectype_in = opvectype;
3227 else if (opvectype
3228 && opvectype != vectype_in)
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232 "argument vector types differ.\n");
3233 return false;
3236 /* If all arguments are external or constant defs use a vector type with
3237 the same size as the output vector type. */
3238 if (!vectype_in)
3239 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3240 if (vec_stmt)
3241 gcc_assert (vectype_in);
3242 if (!vectype_in)
3244 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "no vectype for scalar type ");
3248 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3249 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3252 return false;
3255 /* FORNOW */
3256 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3257 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3258 if (known_eq (nunits_in * 2, nunits_out))
3259 modifier = NARROW;
3260 else if (known_eq (nunits_out, nunits_in))
3261 modifier = NONE;
3262 else if (known_eq (nunits_out * 2, nunits_in))
3263 modifier = WIDEN;
3264 else
3265 return false;
3267 /* We only handle functions that do not read or clobber memory. */
3268 if (gimple_vuse (stmt))
3270 if (dump_enabled_p ())
3271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3272 "function reads from or writes to memory.\n");
3273 return false;
3276 /* For now, we only vectorize functions if a target specific builtin
3277 is available. TODO -- in some cases, it might be profitable to
3278 insert the calls for pieces of the vector, in order to be able
3279 to vectorize other operations in the loop. */
3280 fndecl = NULL_TREE;
3281 internal_fn ifn = IFN_LAST;
3282 combined_fn cfn = gimple_call_combined_fn (stmt);
3283 tree callee = gimple_call_fndecl (stmt);
3285 /* First try using an internal function. */
3286 tree_code convert_code = ERROR_MARK;
3287 if (cfn != CFN_LAST
3288 && (modifier == NONE
3289 || (modifier == NARROW
3290 && simple_integer_narrowing (vectype_out, vectype_in,
3291 &convert_code))))
3292 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3293 vectype_in);
3295 /* If that fails, try asking for a target-specific built-in function. */
3296 if (ifn == IFN_LAST)
3298 if (cfn != CFN_LAST)
3299 fndecl = targetm.vectorize.builtin_vectorized_function
3300 (cfn, vectype_out, vectype_in);
3301 else if (callee)
3302 fndecl = targetm.vectorize.builtin_md_vectorized_function
3303 (callee, vectype_out, vectype_in);
3306 if (ifn == IFN_LAST && !fndecl)
3308 if (cfn == CFN_GOMP_SIMD_LANE
3309 && !slp_node
3310 && loop_vinfo
3311 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3312 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3313 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3314 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3316 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3317 { 0, 1, 2, ... vf - 1 } vector. */
3318 gcc_assert (nargs == 0);
3320 else if (modifier == NONE
3321 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3322 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3323 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3324 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3325 vectype_in, dt, cost_vec);
3326 else
3328 if (dump_enabled_p ())
3329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3330 "function is not vectorizable.\n");
3331 return false;
3335 if (slp_node)
3336 ncopies = 1;
3337 else if (modifier == NARROW && ifn == IFN_LAST)
3338 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3339 else
3340 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3342 /* Sanity check: make sure that at least one copy of the vectorized stmt
3343 needs to be generated. */
3344 gcc_assert (ncopies >= 1);
3346 if (!vec_stmt) /* transformation not required. */
3348 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3351 "\n");
3352 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3353 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3354 record_stmt_cost (cost_vec, ncopies / 2,
3355 vec_promote_demote, stmt_info, 0, vect_body);
3357 return true;
3360 /* Transform. */
3362 if (dump_enabled_p ())
3363 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3365 /* Handle def. */
3366 scalar_dest = gimple_call_lhs (stmt);
3367 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3369 prev_stmt_info = NULL;
3370 if (modifier == NONE || ifn != IFN_LAST)
3372 tree prev_res = NULL_TREE;
3373 for (j = 0; j < ncopies; ++j)
3375 /* Build argument list for the vectorized call. */
3376 if (j == 0)
3377 vargs.create (nargs);
3378 else
3379 vargs.truncate (0);
3381 if (slp_node)
3383 auto_vec<vec<tree> > vec_defs (nargs);
3384 vec<tree> vec_oprnds0;
3386 for (i = 0; i < nargs; i++)
3387 vargs.quick_push (gimple_call_arg (stmt, i));
3388 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3389 vec_oprnds0 = vec_defs[0];
3391 /* Arguments are ready. Create the new vector stmt. */
3392 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3394 size_t k;
3395 for (k = 0; k < nargs; k++)
3397 vec<tree> vec_oprndsk = vec_defs[k];
3398 vargs[k] = vec_oprndsk[i];
3400 if (modifier == NARROW)
3402 tree half_res = make_ssa_name (vectype_in);
3403 gcall *call
3404 = gimple_build_call_internal_vec (ifn, vargs);
3405 gimple_call_set_lhs (call, half_res);
3406 gimple_call_set_nothrow (call, true);
3407 new_stmt = call;
3408 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3409 if ((i & 1) == 0)
3411 prev_res = half_res;
3412 continue;
3414 new_temp = make_ssa_name (vec_dest);
3415 new_stmt = gimple_build_assign (new_temp, convert_code,
3416 prev_res, half_res);
3418 else
3420 gcall *call;
3421 if (ifn != IFN_LAST)
3422 call = gimple_build_call_internal_vec (ifn, vargs);
3423 else
3424 call = gimple_build_call_vec (fndecl, vargs);
3425 new_temp = make_ssa_name (vec_dest, call);
3426 gimple_call_set_lhs (call, new_temp);
3427 gimple_call_set_nothrow (call, true);
3428 new_stmt = call;
3430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3431 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3434 for (i = 0; i < nargs; i++)
3436 vec<tree> vec_oprndsi = vec_defs[i];
3437 vec_oprndsi.release ();
3439 continue;
3442 for (i = 0; i < nargs; i++)
3444 op = gimple_call_arg (stmt, i);
3445 if (j == 0)
3446 vec_oprnd0
3447 = vect_get_vec_def_for_operand (op, stmt);
3448 else
3450 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3451 vec_oprnd0
3452 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3455 vargs.quick_push (vec_oprnd0);
3458 if (gimple_call_internal_p (stmt)
3459 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3461 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3462 tree new_var
3463 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3464 gimple *init_stmt = gimple_build_assign (new_var, cst);
3465 vect_init_vector_1 (stmt, init_stmt, NULL);
3466 new_temp = make_ssa_name (vec_dest);
3467 new_stmt = gimple_build_assign (new_temp, new_var);
3469 else if (modifier == NARROW)
3471 tree half_res = make_ssa_name (vectype_in);
3472 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3473 gimple_call_set_lhs (call, half_res);
3474 gimple_call_set_nothrow (call, true);
3475 new_stmt = call;
3476 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3477 if ((j & 1) == 0)
3479 prev_res = half_res;
3480 continue;
3482 new_temp = make_ssa_name (vec_dest);
3483 new_stmt = gimple_build_assign (new_temp, convert_code,
3484 prev_res, half_res);
3486 else
3488 gcall *call;
3489 if (ifn != IFN_LAST)
3490 call = gimple_build_call_internal_vec (ifn, vargs);
3491 else
3492 call = gimple_build_call_vec (fndecl, vargs);
3493 new_temp = make_ssa_name (vec_dest, new_stmt);
3494 gimple_call_set_lhs (call, new_temp);
3495 gimple_call_set_nothrow (call, true);
3496 new_stmt = call;
3498 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3500 if (j == (modifier == NARROW ? 1 : 0))
3501 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3502 else
3503 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3505 prev_stmt_info = vinfo_for_stmt (new_stmt);
3508 else if (modifier == NARROW)
3510 for (j = 0; j < ncopies; ++j)
3512 /* Build argument list for the vectorized call. */
3513 if (j == 0)
3514 vargs.create (nargs * 2);
3515 else
3516 vargs.truncate (0);
3518 if (slp_node)
3520 auto_vec<vec<tree> > vec_defs (nargs);
3521 vec<tree> vec_oprnds0;
3523 for (i = 0; i < nargs; i++)
3524 vargs.quick_push (gimple_call_arg (stmt, i));
3525 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3526 vec_oprnds0 = vec_defs[0];
3528 /* Arguments are ready. Create the new vector stmt. */
3529 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3531 size_t k;
3532 vargs.truncate (0);
3533 for (k = 0; k < nargs; k++)
3535 vec<tree> vec_oprndsk = vec_defs[k];
3536 vargs.quick_push (vec_oprndsk[i]);
3537 vargs.quick_push (vec_oprndsk[i + 1]);
3539 gcall *call;
3540 if (ifn != IFN_LAST)
3541 call = gimple_build_call_internal_vec (ifn, vargs);
3542 else
3543 call = gimple_build_call_vec (fndecl, vargs);
3544 new_temp = make_ssa_name (vec_dest, call);
3545 gimple_call_set_lhs (call, new_temp);
3546 gimple_call_set_nothrow (call, true);
3547 new_stmt = call;
3548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3549 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3552 for (i = 0; i < nargs; i++)
3554 vec<tree> vec_oprndsi = vec_defs[i];
3555 vec_oprndsi.release ();
3557 continue;
3560 for (i = 0; i < nargs; i++)
3562 op = gimple_call_arg (stmt, i);
3563 if (j == 0)
3565 vec_oprnd0
3566 = vect_get_vec_def_for_operand (op, stmt);
3567 vec_oprnd1
3568 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3570 else
3572 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3573 vec_oprnd0
3574 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3575 vec_oprnd1
3576 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3579 vargs.quick_push (vec_oprnd0);
3580 vargs.quick_push (vec_oprnd1);
3583 new_stmt = gimple_build_call_vec (fndecl, vargs);
3584 new_temp = make_ssa_name (vec_dest, new_stmt);
3585 gimple_call_set_lhs (new_stmt, new_temp);
3586 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3588 if (j == 0)
3589 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3590 else
3591 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3593 prev_stmt_info = vinfo_for_stmt (new_stmt);
3596 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3598 else
3599 /* No current target implements this case. */
3600 return false;
3602 vargs.release ();
3604 /* The call in STMT might prevent it from being removed in dce.
3605 We however cannot remove it here, due to the way the ssa name
3606 it defines is mapped to the new definition. So just replace
3607 rhs of the statement with something harmless. */
3609 if (slp_node)
3610 return true;
3612 type = TREE_TYPE (scalar_dest);
3613 if (is_pattern_stmt_p (stmt_info))
3614 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3615 else
3616 lhs = gimple_call_lhs (stmt);
3618 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3619 set_vinfo_for_stmt (new_stmt, stmt_info);
3620 set_vinfo_for_stmt (stmt, NULL);
3621 STMT_VINFO_STMT (stmt_info) = new_stmt;
3622 gsi_replace (gsi, new_stmt, false);
3624 return true;
3628 struct simd_call_arg_info
3630 tree vectype;
3631 tree op;
3632 HOST_WIDE_INT linear_step;
3633 enum vect_def_type dt;
3634 unsigned int align;
3635 bool simd_lane_linear;
3638 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3639 is linear within simd lane (but not within whole loop), note it in
3640 *ARGINFO. */
3642 static void
3643 vect_simd_lane_linear (tree op, struct loop *loop,
3644 struct simd_call_arg_info *arginfo)
3646 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3648 if (!is_gimple_assign (def_stmt)
3649 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3650 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3651 return;
3653 tree base = gimple_assign_rhs1 (def_stmt);
3654 HOST_WIDE_INT linear_step = 0;
3655 tree v = gimple_assign_rhs2 (def_stmt);
3656 while (TREE_CODE (v) == SSA_NAME)
3658 tree t;
3659 def_stmt = SSA_NAME_DEF_STMT (v);
3660 if (is_gimple_assign (def_stmt))
3661 switch (gimple_assign_rhs_code (def_stmt))
3663 case PLUS_EXPR:
3664 t = gimple_assign_rhs2 (def_stmt);
3665 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3666 return;
3667 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3668 v = gimple_assign_rhs1 (def_stmt);
3669 continue;
3670 case MULT_EXPR:
3671 t = gimple_assign_rhs2 (def_stmt);
3672 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3673 return;
3674 linear_step = tree_to_shwi (t);
3675 v = gimple_assign_rhs1 (def_stmt);
3676 continue;
3677 CASE_CONVERT:
3678 t = gimple_assign_rhs1 (def_stmt);
3679 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3680 || (TYPE_PRECISION (TREE_TYPE (v))
3681 < TYPE_PRECISION (TREE_TYPE (t))))
3682 return;
3683 if (!linear_step)
3684 linear_step = 1;
3685 v = t;
3686 continue;
3687 default:
3688 return;
3690 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3691 && loop->simduid
3692 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3693 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3694 == loop->simduid))
3696 if (!linear_step)
3697 linear_step = 1;
3698 arginfo->linear_step = linear_step;
3699 arginfo->op = base;
3700 arginfo->simd_lane_linear = true;
3701 return;
3706 /* Return the number of elements in vector type VECTYPE, which is associated
3707 with a SIMD clone. At present these vectors always have a constant
3708 length. */
3710 static unsigned HOST_WIDE_INT
3711 simd_clone_subparts (tree vectype)
3713 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3716 /* Function vectorizable_simd_clone_call.
3718 Check if STMT performs a function call that can be vectorized
3719 by calling a simd clone of the function.
3720 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3721 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3722 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3724 static bool
3725 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3726 gimple **vec_stmt, slp_tree slp_node,
3727 stmt_vector_for_cost *)
3729 tree vec_dest;
3730 tree scalar_dest;
3731 tree op, type;
3732 tree vec_oprnd0 = NULL_TREE;
3733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3734 tree vectype;
3735 unsigned int nunits;
3736 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3737 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3738 vec_info *vinfo = stmt_info->vinfo;
3739 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3740 tree fndecl, new_temp;
3741 gimple *def_stmt;
3742 gimple *new_stmt = NULL;
3743 int ncopies, j;
3744 auto_vec<simd_call_arg_info> arginfo;
3745 vec<tree> vargs = vNULL;
3746 size_t i, nargs;
3747 tree lhs, rtype, ratype;
3748 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3750 /* Is STMT a vectorizable call? */
3751 if (!is_gimple_call (stmt))
3752 return false;
3754 fndecl = gimple_call_fndecl (stmt);
3755 if (fndecl == NULL_TREE)
3756 return false;
3758 struct cgraph_node *node = cgraph_node::get (fndecl);
3759 if (node == NULL || node->simd_clones == NULL)
3760 return false;
3762 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3763 return false;
3765 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3766 && ! vec_stmt)
3767 return false;
3769 if (gimple_call_lhs (stmt)
3770 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3771 return false;
3773 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3775 vectype = STMT_VINFO_VECTYPE (stmt_info);
3777 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3778 return false;
3780 /* FORNOW */
3781 if (slp_node)
3782 return false;
3784 /* Process function arguments. */
3785 nargs = gimple_call_num_args (stmt);
3787 /* Bail out if the function has zero arguments. */
3788 if (nargs == 0)
3789 return false;
3791 arginfo.reserve (nargs, true);
3793 for (i = 0; i < nargs; i++)
3795 simd_call_arg_info thisarginfo;
3796 affine_iv iv;
3798 thisarginfo.linear_step = 0;
3799 thisarginfo.align = 0;
3800 thisarginfo.op = NULL_TREE;
3801 thisarginfo.simd_lane_linear = false;
3803 op = gimple_call_arg (stmt, i);
3804 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3805 &thisarginfo.vectype)
3806 || thisarginfo.dt == vect_uninitialized_def)
3808 if (dump_enabled_p ())
3809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3810 "use not simple.\n");
3811 return false;
3814 if (thisarginfo.dt == vect_constant_def
3815 || thisarginfo.dt == vect_external_def)
3816 gcc_assert (thisarginfo.vectype == NULL_TREE);
3817 else
3818 gcc_assert (thisarginfo.vectype != NULL_TREE);
3820 /* For linear arguments, the analyze phase should have saved
3821 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3822 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3823 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3825 gcc_assert (vec_stmt);
3826 thisarginfo.linear_step
3827 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3828 thisarginfo.op
3829 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3830 thisarginfo.simd_lane_linear
3831 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3832 == boolean_true_node);
3833 /* If loop has been peeled for alignment, we need to adjust it. */
3834 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3835 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3836 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3838 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3839 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3840 tree opt = TREE_TYPE (thisarginfo.op);
3841 bias = fold_convert (TREE_TYPE (step), bias);
3842 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3843 thisarginfo.op
3844 = fold_build2 (POINTER_TYPE_P (opt)
3845 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3846 thisarginfo.op, bias);
3849 else if (!vec_stmt
3850 && thisarginfo.dt != vect_constant_def
3851 && thisarginfo.dt != vect_external_def
3852 && loop_vinfo
3853 && TREE_CODE (op) == SSA_NAME
3854 && simple_iv (loop, loop_containing_stmt (stmt), op,
3855 &iv, false)
3856 && tree_fits_shwi_p (iv.step))
3858 thisarginfo.linear_step = tree_to_shwi (iv.step);
3859 thisarginfo.op = iv.base;
3861 else if ((thisarginfo.dt == vect_constant_def
3862 || thisarginfo.dt == vect_external_def)
3863 && POINTER_TYPE_P (TREE_TYPE (op)))
3864 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3865 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3866 linear too. */
3867 if (POINTER_TYPE_P (TREE_TYPE (op))
3868 && !thisarginfo.linear_step
3869 && !vec_stmt
3870 && thisarginfo.dt != vect_constant_def
3871 && thisarginfo.dt != vect_external_def
3872 && loop_vinfo
3873 && !slp_node
3874 && TREE_CODE (op) == SSA_NAME)
3875 vect_simd_lane_linear (op, loop, &thisarginfo);
3877 arginfo.quick_push (thisarginfo);
3880 unsigned HOST_WIDE_INT vf;
3881 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3883 if (dump_enabled_p ())
3884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3885 "not considering SIMD clones; not yet supported"
3886 " for variable-width vectors.\n");
3887 return NULL;
3890 unsigned int badness = 0;
3891 struct cgraph_node *bestn = NULL;
3892 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3893 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3894 else
3895 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3896 n = n->simdclone->next_clone)
3898 unsigned int this_badness = 0;
3899 if (n->simdclone->simdlen > vf
3900 || n->simdclone->nargs != nargs)
3901 continue;
3902 if (n->simdclone->simdlen < vf)
3903 this_badness += (exact_log2 (vf)
3904 - exact_log2 (n->simdclone->simdlen)) * 1024;
3905 if (n->simdclone->inbranch)
3906 this_badness += 2048;
3907 int target_badness = targetm.simd_clone.usable (n);
3908 if (target_badness < 0)
3909 continue;
3910 this_badness += target_badness * 512;
3911 /* FORNOW: Have to add code to add the mask argument. */
3912 if (n->simdclone->inbranch)
3913 continue;
3914 for (i = 0; i < nargs; i++)
3916 switch (n->simdclone->args[i].arg_type)
3918 case SIMD_CLONE_ARG_TYPE_VECTOR:
3919 if (!useless_type_conversion_p
3920 (n->simdclone->args[i].orig_type,
3921 TREE_TYPE (gimple_call_arg (stmt, i))))
3922 i = -1;
3923 else if (arginfo[i].dt == vect_constant_def
3924 || arginfo[i].dt == vect_external_def
3925 || arginfo[i].linear_step)
3926 this_badness += 64;
3927 break;
3928 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3929 if (arginfo[i].dt != vect_constant_def
3930 && arginfo[i].dt != vect_external_def)
3931 i = -1;
3932 break;
3933 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3934 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3935 if (arginfo[i].dt == vect_constant_def
3936 || arginfo[i].dt == vect_external_def
3937 || (arginfo[i].linear_step
3938 != n->simdclone->args[i].linear_step))
3939 i = -1;
3940 break;
3941 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3942 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3943 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3944 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3945 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3947 /* FORNOW */
3948 i = -1;
3949 break;
3950 case SIMD_CLONE_ARG_TYPE_MASK:
3951 gcc_unreachable ();
3953 if (i == (size_t) -1)
3954 break;
3955 if (n->simdclone->args[i].alignment > arginfo[i].align)
3957 i = -1;
3958 break;
3960 if (arginfo[i].align)
3961 this_badness += (exact_log2 (arginfo[i].align)
3962 - exact_log2 (n->simdclone->args[i].alignment));
3964 if (i == (size_t) -1)
3965 continue;
3966 if (bestn == NULL || this_badness < badness)
3968 bestn = n;
3969 badness = this_badness;
3973 if (bestn == NULL)
3974 return false;
3976 for (i = 0; i < nargs; i++)
3977 if ((arginfo[i].dt == vect_constant_def
3978 || arginfo[i].dt == vect_external_def)
3979 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3981 arginfo[i].vectype
3982 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3983 i)));
3984 if (arginfo[i].vectype == NULL
3985 || (simd_clone_subparts (arginfo[i].vectype)
3986 > bestn->simdclone->simdlen))
3987 return false;
3990 fndecl = bestn->decl;
3991 nunits = bestn->simdclone->simdlen;
3992 ncopies = vf / nunits;
3994 /* If the function isn't const, only allow it in simd loops where user
3995 has asserted that at least nunits consecutive iterations can be
3996 performed using SIMD instructions. */
3997 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3998 && gimple_vuse (stmt))
3999 return false;
4001 /* Sanity check: make sure that at least one copy of the vectorized stmt
4002 needs to be generated. */
4003 gcc_assert (ncopies >= 1);
4005 if (!vec_stmt) /* transformation not required. */
4007 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4008 for (i = 0; i < nargs; i++)
4009 if ((bestn->simdclone->args[i].arg_type
4010 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4011 || (bestn->simdclone->args[i].arg_type
4012 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4014 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4015 + 1);
4016 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4017 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4018 ? size_type_node : TREE_TYPE (arginfo[i].op);
4019 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4021 tree sll = arginfo[i].simd_lane_linear
4022 ? boolean_true_node : boolean_false_node;
4023 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4025 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4026 if (dump_enabled_p ())
4027 dump_printf_loc (MSG_NOTE, vect_location,
4028 "=== vectorizable_simd_clone_call ===\n");
4029 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4030 return true;
4033 /* Transform. */
4035 if (dump_enabled_p ())
4036 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4038 /* Handle def. */
4039 scalar_dest = gimple_call_lhs (stmt);
4040 vec_dest = NULL_TREE;
4041 rtype = NULL_TREE;
4042 ratype = NULL_TREE;
4043 if (scalar_dest)
4045 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4046 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4047 if (TREE_CODE (rtype) == ARRAY_TYPE)
4049 ratype = rtype;
4050 rtype = TREE_TYPE (ratype);
4054 prev_stmt_info = NULL;
4055 for (j = 0; j < ncopies; ++j)
4057 /* Build argument list for the vectorized call. */
4058 if (j == 0)
4059 vargs.create (nargs);
4060 else
4061 vargs.truncate (0);
4063 for (i = 0; i < nargs; i++)
4065 unsigned int k, l, m, o;
4066 tree atype;
4067 op = gimple_call_arg (stmt, i);
4068 switch (bestn->simdclone->args[i].arg_type)
4070 case SIMD_CLONE_ARG_TYPE_VECTOR:
4071 atype = bestn->simdclone->args[i].vector_type;
4072 o = nunits / simd_clone_subparts (atype);
4073 for (m = j * o; m < (j + 1) * o; m++)
4075 if (simd_clone_subparts (atype)
4076 < simd_clone_subparts (arginfo[i].vectype))
4078 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4079 k = (simd_clone_subparts (arginfo[i].vectype)
4080 / simd_clone_subparts (atype));
4081 gcc_assert ((k & (k - 1)) == 0);
4082 if (m == 0)
4083 vec_oprnd0
4084 = vect_get_vec_def_for_operand (op, stmt);
4085 else
4087 vec_oprnd0 = arginfo[i].op;
4088 if ((m & (k - 1)) == 0)
4089 vec_oprnd0
4090 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4091 vec_oprnd0);
4093 arginfo[i].op = vec_oprnd0;
4094 vec_oprnd0
4095 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4096 bitsize_int (prec),
4097 bitsize_int ((m & (k - 1)) * prec));
4098 new_stmt
4099 = gimple_build_assign (make_ssa_name (atype),
4100 vec_oprnd0);
4101 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4102 vargs.safe_push (gimple_assign_lhs (new_stmt));
4104 else
4106 k = (simd_clone_subparts (atype)
4107 / simd_clone_subparts (arginfo[i].vectype));
4108 gcc_assert ((k & (k - 1)) == 0);
4109 vec<constructor_elt, va_gc> *ctor_elts;
4110 if (k != 1)
4111 vec_alloc (ctor_elts, k);
4112 else
4113 ctor_elts = NULL;
4114 for (l = 0; l < k; l++)
4116 if (m == 0 && l == 0)
4117 vec_oprnd0
4118 = vect_get_vec_def_for_operand (op, stmt);
4119 else
4120 vec_oprnd0
4121 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4122 arginfo[i].op);
4123 arginfo[i].op = vec_oprnd0;
4124 if (k == 1)
4125 break;
4126 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4127 vec_oprnd0);
4129 if (k == 1)
4130 vargs.safe_push (vec_oprnd0);
4131 else
4133 vec_oprnd0 = build_constructor (atype, ctor_elts);
4134 new_stmt
4135 = gimple_build_assign (make_ssa_name (atype),
4136 vec_oprnd0);
4137 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4138 vargs.safe_push (gimple_assign_lhs (new_stmt));
4142 break;
4143 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4144 vargs.safe_push (op);
4145 break;
4146 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4147 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4148 if (j == 0)
4150 gimple_seq stmts;
4151 arginfo[i].op
4152 = force_gimple_operand (arginfo[i].op, &stmts, true,
4153 NULL_TREE);
4154 if (stmts != NULL)
4156 basic_block new_bb;
4157 edge pe = loop_preheader_edge (loop);
4158 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4159 gcc_assert (!new_bb);
4161 if (arginfo[i].simd_lane_linear)
4163 vargs.safe_push (arginfo[i].op);
4164 break;
4166 tree phi_res = copy_ssa_name (op);
4167 gphi *new_phi = create_phi_node (phi_res, loop->header);
4168 set_vinfo_for_stmt (new_phi,
4169 new_stmt_vec_info (new_phi, loop_vinfo));
4170 add_phi_arg (new_phi, arginfo[i].op,
4171 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4172 enum tree_code code
4173 = POINTER_TYPE_P (TREE_TYPE (op))
4174 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4175 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4176 ? sizetype : TREE_TYPE (op);
4177 widest_int cst
4178 = wi::mul (bestn->simdclone->args[i].linear_step,
4179 ncopies * nunits);
4180 tree tcst = wide_int_to_tree (type, cst);
4181 tree phi_arg = copy_ssa_name (op);
4182 new_stmt
4183 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4184 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4185 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4186 set_vinfo_for_stmt (new_stmt,
4187 new_stmt_vec_info (new_stmt, loop_vinfo));
4188 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4189 UNKNOWN_LOCATION);
4190 arginfo[i].op = phi_res;
4191 vargs.safe_push (phi_res);
4193 else
4195 enum tree_code code
4196 = POINTER_TYPE_P (TREE_TYPE (op))
4197 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4198 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4199 ? sizetype : TREE_TYPE (op);
4200 widest_int cst
4201 = wi::mul (bestn->simdclone->args[i].linear_step,
4202 j * nunits);
4203 tree tcst = wide_int_to_tree (type, cst);
4204 new_temp = make_ssa_name (TREE_TYPE (op));
4205 new_stmt = gimple_build_assign (new_temp, code,
4206 arginfo[i].op, tcst);
4207 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4208 vargs.safe_push (new_temp);
4210 break;
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4213 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4214 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4215 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4216 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4217 default:
4218 gcc_unreachable ();
4222 new_stmt = gimple_build_call_vec (fndecl, vargs);
4223 if (vec_dest)
4225 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4226 if (ratype)
4227 new_temp = create_tmp_var (ratype);
4228 else if (simd_clone_subparts (vectype)
4229 == simd_clone_subparts (rtype))
4230 new_temp = make_ssa_name (vec_dest, new_stmt);
4231 else
4232 new_temp = make_ssa_name (rtype, new_stmt);
4233 gimple_call_set_lhs (new_stmt, new_temp);
4235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4237 if (vec_dest)
4239 if (simd_clone_subparts (vectype) < nunits)
4241 unsigned int k, l;
4242 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4243 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4244 k = nunits / simd_clone_subparts (vectype);
4245 gcc_assert ((k & (k - 1)) == 0);
4246 for (l = 0; l < k; l++)
4248 tree t;
4249 if (ratype)
4251 t = build_fold_addr_expr (new_temp);
4252 t = build2 (MEM_REF, vectype, t,
4253 build_int_cst (TREE_TYPE (t), l * bytes));
4255 else
4256 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4257 bitsize_int (prec), bitsize_int (l * prec));
4258 new_stmt
4259 = gimple_build_assign (make_ssa_name (vectype), t);
4260 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4261 if (j == 0 && l == 0)
4262 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4263 else
4264 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4266 prev_stmt_info = vinfo_for_stmt (new_stmt);
4269 if (ratype)
4270 vect_clobber_variable (stmt, gsi, new_temp);
4271 continue;
4273 else if (simd_clone_subparts (vectype) > nunits)
4275 unsigned int k = (simd_clone_subparts (vectype)
4276 / simd_clone_subparts (rtype));
4277 gcc_assert ((k & (k - 1)) == 0);
4278 if ((j & (k - 1)) == 0)
4279 vec_alloc (ret_ctor_elts, k);
4280 if (ratype)
4282 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4283 for (m = 0; m < o; m++)
4285 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4286 size_int (m), NULL_TREE, NULL_TREE);
4287 new_stmt
4288 = gimple_build_assign (make_ssa_name (rtype), tem);
4289 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4290 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4291 gimple_assign_lhs (new_stmt));
4293 vect_clobber_variable (stmt, gsi, new_temp);
4295 else
4296 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4297 if ((j & (k - 1)) != k - 1)
4298 continue;
4299 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4300 new_stmt
4301 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4302 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4304 if ((unsigned) j == k - 1)
4305 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4306 else
4307 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4309 prev_stmt_info = vinfo_for_stmt (new_stmt);
4310 continue;
4312 else if (ratype)
4314 tree t = build_fold_addr_expr (new_temp);
4315 t = build2 (MEM_REF, vectype, t,
4316 build_int_cst (TREE_TYPE (t), 0));
4317 new_stmt
4318 = gimple_build_assign (make_ssa_name (vec_dest), t);
4319 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4320 vect_clobber_variable (stmt, gsi, new_temp);
4324 if (j == 0)
4325 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4326 else
4327 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4329 prev_stmt_info = vinfo_for_stmt (new_stmt);
4332 vargs.release ();
4334 /* The call in STMT might prevent it from being removed in dce.
4335 We however cannot remove it here, due to the way the ssa name
4336 it defines is mapped to the new definition. So just replace
4337 rhs of the statement with something harmless. */
4339 if (slp_node)
4340 return true;
4342 if (scalar_dest)
4344 type = TREE_TYPE (scalar_dest);
4345 if (is_pattern_stmt_p (stmt_info))
4346 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4347 else
4348 lhs = gimple_call_lhs (stmt);
4349 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4351 else
4352 new_stmt = gimple_build_nop ();
4353 set_vinfo_for_stmt (new_stmt, stmt_info);
4354 set_vinfo_for_stmt (stmt, NULL);
4355 STMT_VINFO_STMT (stmt_info) = new_stmt;
4356 gsi_replace (gsi, new_stmt, true);
4357 unlink_stmt_vdef (stmt);
4359 return true;
4363 /* Function vect_gen_widened_results_half
4365 Create a vector stmt whose code, type, number of arguments, and result
4366 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4367 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4368 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4369 needs to be created (DECL is a function-decl of a target-builtin).
4370 STMT is the original scalar stmt that we are vectorizing. */
4372 static gimple *
4373 vect_gen_widened_results_half (enum tree_code code,
4374 tree decl,
4375 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4376 tree vec_dest, gimple_stmt_iterator *gsi,
4377 gimple *stmt)
4379 gimple *new_stmt;
4380 tree new_temp;
4382 /* Generate half of the widened result: */
4383 if (code == CALL_EXPR)
4385 /* Target specific support */
4386 if (op_type == binary_op)
4387 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4388 else
4389 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4390 new_temp = make_ssa_name (vec_dest, new_stmt);
4391 gimple_call_set_lhs (new_stmt, new_temp);
4393 else
4395 /* Generic support */
4396 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4397 if (op_type != binary_op)
4398 vec_oprnd1 = NULL;
4399 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4400 new_temp = make_ssa_name (vec_dest, new_stmt);
4401 gimple_assign_set_lhs (new_stmt, new_temp);
4403 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4405 return new_stmt;
4409 /* Get vectorized definitions for loop-based vectorization. For the first
4410 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4411 scalar operand), and for the rest we get a copy with
4412 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4413 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4414 The vectors are collected into VEC_OPRNDS. */
4416 static void
4417 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4418 vec<tree> *vec_oprnds, int multi_step_cvt)
4420 tree vec_oprnd;
4422 /* Get first vector operand. */
4423 /* All the vector operands except the very first one (that is scalar oprnd)
4424 are stmt copies. */
4425 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4426 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4427 else
4428 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4430 vec_oprnds->quick_push (vec_oprnd);
4432 /* Get second vector operand. */
4433 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4434 vec_oprnds->quick_push (vec_oprnd);
4436 *oprnd = vec_oprnd;
4438 /* For conversion in multiple steps, continue to get operands
4439 recursively. */
4440 if (multi_step_cvt)
4441 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4445 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4446 For multi-step conversions store the resulting vectors and call the function
4447 recursively. */
4449 static void
4450 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4451 int multi_step_cvt, gimple *stmt,
4452 vec<tree> vec_dsts,
4453 gimple_stmt_iterator *gsi,
4454 slp_tree slp_node, enum tree_code code,
4455 stmt_vec_info *prev_stmt_info)
4457 unsigned int i;
4458 tree vop0, vop1, new_tmp, vec_dest;
4459 gimple *new_stmt;
4460 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4462 vec_dest = vec_dsts.pop ();
4464 for (i = 0; i < vec_oprnds->length (); i += 2)
4466 /* Create demotion operation. */
4467 vop0 = (*vec_oprnds)[i];
4468 vop1 = (*vec_oprnds)[i + 1];
4469 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4470 new_tmp = make_ssa_name (vec_dest, new_stmt);
4471 gimple_assign_set_lhs (new_stmt, new_tmp);
4472 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4474 if (multi_step_cvt)
4475 /* Store the resulting vector for next recursive call. */
4476 (*vec_oprnds)[i/2] = new_tmp;
4477 else
4479 /* This is the last step of the conversion sequence. Store the
4480 vectors in SLP_NODE or in vector info of the scalar statement
4481 (or in STMT_VINFO_RELATED_STMT chain). */
4482 if (slp_node)
4483 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4484 else
4486 if (!*prev_stmt_info)
4487 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4488 else
4489 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4491 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4496 /* For multi-step demotion operations we first generate demotion operations
4497 from the source type to the intermediate types, and then combine the
4498 results (stored in VEC_OPRNDS) in demotion operation to the destination
4499 type. */
4500 if (multi_step_cvt)
4502 /* At each level of recursion we have half of the operands we had at the
4503 previous level. */
4504 vec_oprnds->truncate ((i+1)/2);
4505 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4506 stmt, vec_dsts, gsi, slp_node,
4507 VEC_PACK_TRUNC_EXPR,
4508 prev_stmt_info);
4511 vec_dsts.quick_push (vec_dest);
4515 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4516 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4517 the resulting vectors and call the function recursively. */
4519 static void
4520 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4521 vec<tree> *vec_oprnds1,
4522 gimple *stmt, tree vec_dest,
4523 gimple_stmt_iterator *gsi,
4524 enum tree_code code1,
4525 enum tree_code code2, tree decl1,
4526 tree decl2, int op_type)
4528 int i;
4529 tree vop0, vop1, new_tmp1, new_tmp2;
4530 gimple *new_stmt1, *new_stmt2;
4531 vec<tree> vec_tmp = vNULL;
4533 vec_tmp.create (vec_oprnds0->length () * 2);
4534 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4536 if (op_type == binary_op)
4537 vop1 = (*vec_oprnds1)[i];
4538 else
4539 vop1 = NULL_TREE;
4541 /* Generate the two halves of promotion operation. */
4542 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4543 op_type, vec_dest, gsi, stmt);
4544 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4545 op_type, vec_dest, gsi, stmt);
4546 if (is_gimple_call (new_stmt1))
4548 new_tmp1 = gimple_call_lhs (new_stmt1);
4549 new_tmp2 = gimple_call_lhs (new_stmt2);
4551 else
4553 new_tmp1 = gimple_assign_lhs (new_stmt1);
4554 new_tmp2 = gimple_assign_lhs (new_stmt2);
4557 /* Store the results for the next step. */
4558 vec_tmp.quick_push (new_tmp1);
4559 vec_tmp.quick_push (new_tmp2);
4562 vec_oprnds0->release ();
4563 *vec_oprnds0 = vec_tmp;
4567 /* Check if STMT performs a conversion operation, that can be vectorized.
4568 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4569 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4570 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4572 static bool
4573 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4574 gimple **vec_stmt, slp_tree slp_node,
4575 stmt_vector_for_cost *cost_vec)
4577 tree vec_dest;
4578 tree scalar_dest;
4579 tree op0, op1 = NULL_TREE;
4580 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4581 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4582 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4583 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4584 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4585 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4586 tree new_temp;
4587 gimple *def_stmt;
4588 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4589 int ndts = 2;
4590 gimple *new_stmt = NULL;
4591 stmt_vec_info prev_stmt_info;
4592 poly_uint64 nunits_in;
4593 poly_uint64 nunits_out;
4594 tree vectype_out, vectype_in;
4595 int ncopies, i, j;
4596 tree lhs_type, rhs_type;
4597 enum { NARROW, NONE, WIDEN } modifier;
4598 vec<tree> vec_oprnds0 = vNULL;
4599 vec<tree> vec_oprnds1 = vNULL;
4600 tree vop0;
4601 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4602 vec_info *vinfo = stmt_info->vinfo;
4603 int multi_step_cvt = 0;
4604 vec<tree> interm_types = vNULL;
4605 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4606 int op_type;
4607 unsigned short fltsz;
4609 /* Is STMT a vectorizable conversion? */
4611 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4612 return false;
4614 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4615 && ! vec_stmt)
4616 return false;
4618 if (!is_gimple_assign (stmt))
4619 return false;
4621 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4622 return false;
4624 code = gimple_assign_rhs_code (stmt);
4625 if (!CONVERT_EXPR_CODE_P (code)
4626 && code != FIX_TRUNC_EXPR
4627 && code != FLOAT_EXPR
4628 && code != WIDEN_MULT_EXPR
4629 && code != WIDEN_LSHIFT_EXPR)
4630 return false;
4632 op_type = TREE_CODE_LENGTH (code);
4634 /* Check types of lhs and rhs. */
4635 scalar_dest = gimple_assign_lhs (stmt);
4636 lhs_type = TREE_TYPE (scalar_dest);
4637 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4639 op0 = gimple_assign_rhs1 (stmt);
4640 rhs_type = TREE_TYPE (op0);
4642 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4643 && !((INTEGRAL_TYPE_P (lhs_type)
4644 && INTEGRAL_TYPE_P (rhs_type))
4645 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4646 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4647 return false;
4649 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4650 && ((INTEGRAL_TYPE_P (lhs_type)
4651 && !type_has_mode_precision_p (lhs_type))
4652 || (INTEGRAL_TYPE_P (rhs_type)
4653 && !type_has_mode_precision_p (rhs_type))))
4655 if (dump_enabled_p ())
4656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4657 "type conversion to/from bit-precision unsupported."
4658 "\n");
4659 return false;
4662 /* Check the operands of the operation. */
4663 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4665 if (dump_enabled_p ())
4666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4667 "use not simple.\n");
4668 return false;
4670 if (op_type == binary_op)
4672 bool ok;
4674 op1 = gimple_assign_rhs2 (stmt);
4675 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4676 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4677 OP1. */
4678 if (CONSTANT_CLASS_P (op0))
4679 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4680 else
4681 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4683 if (!ok)
4685 if (dump_enabled_p ())
4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4687 "use not simple.\n");
4688 return false;
4692 /* If op0 is an external or constant defs use a vector type of
4693 the same size as the output vector type. */
4694 if (!vectype_in)
4695 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4696 if (vec_stmt)
4697 gcc_assert (vectype_in);
4698 if (!vectype_in)
4700 if (dump_enabled_p ())
4702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4703 "no vectype for scalar type ");
4704 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4705 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4708 return false;
4711 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4712 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4714 if (dump_enabled_p ())
4716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4717 "can't convert between boolean and non "
4718 "boolean vectors");
4719 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4720 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4723 return false;
4726 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4727 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4728 if (known_eq (nunits_out, nunits_in))
4729 modifier = NONE;
4730 else if (multiple_p (nunits_out, nunits_in))
4731 modifier = NARROW;
4732 else
4734 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4735 modifier = WIDEN;
4738 /* Multiple types in SLP are handled by creating the appropriate number of
4739 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4740 case of SLP. */
4741 if (slp_node)
4742 ncopies = 1;
4743 else if (modifier == NARROW)
4744 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4745 else
4746 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4748 /* Sanity check: make sure that at least one copy of the vectorized stmt
4749 needs to be generated. */
4750 gcc_assert (ncopies >= 1);
4752 bool found_mode = false;
4753 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4754 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4755 opt_scalar_mode rhs_mode_iter;
4757 /* Supportable by target? */
4758 switch (modifier)
4760 case NONE:
4761 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4762 return false;
4763 if (supportable_convert_operation (code, vectype_out, vectype_in,
4764 &decl1, &code1))
4765 break;
4766 /* FALLTHRU */
4767 unsupported:
4768 if (dump_enabled_p ())
4769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4770 "conversion not supported by target.\n");
4771 return false;
4773 case WIDEN:
4774 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4775 &code1, &code2, &multi_step_cvt,
4776 &interm_types))
4778 /* Binary widening operation can only be supported directly by the
4779 architecture. */
4780 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4781 break;
4784 if (code != FLOAT_EXPR
4785 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4786 goto unsupported;
4788 fltsz = GET_MODE_SIZE (lhs_mode);
4789 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4791 rhs_mode = rhs_mode_iter.require ();
4792 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4793 break;
4795 cvt_type
4796 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4797 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4798 if (cvt_type == NULL_TREE)
4799 goto unsupported;
4801 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4803 if (!supportable_convert_operation (code, vectype_out,
4804 cvt_type, &decl1, &codecvt1))
4805 goto unsupported;
4807 else if (!supportable_widening_operation (code, stmt, vectype_out,
4808 cvt_type, &codecvt1,
4809 &codecvt2, &multi_step_cvt,
4810 &interm_types))
4811 continue;
4812 else
4813 gcc_assert (multi_step_cvt == 0);
4815 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4816 vectype_in, &code1, &code2,
4817 &multi_step_cvt, &interm_types))
4819 found_mode = true;
4820 break;
4824 if (!found_mode)
4825 goto unsupported;
4827 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4828 codecvt2 = ERROR_MARK;
4829 else
4831 multi_step_cvt++;
4832 interm_types.safe_push (cvt_type);
4833 cvt_type = NULL_TREE;
4835 break;
4837 case NARROW:
4838 gcc_assert (op_type == unary_op);
4839 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4840 &code1, &multi_step_cvt,
4841 &interm_types))
4842 break;
4844 if (code != FIX_TRUNC_EXPR
4845 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4846 goto unsupported;
4848 cvt_type
4849 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4850 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4851 if (cvt_type == NULL_TREE)
4852 goto unsupported;
4853 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4854 &decl1, &codecvt1))
4855 goto unsupported;
4856 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4857 &code1, &multi_step_cvt,
4858 &interm_types))
4859 break;
4860 goto unsupported;
4862 default:
4863 gcc_unreachable ();
4866 if (!vec_stmt) /* transformation not required. */
4868 if (dump_enabled_p ())
4869 dump_printf_loc (MSG_NOTE, vect_location,
4870 "=== vectorizable_conversion ===\n");
4871 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4873 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4874 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4875 cost_vec);
4877 else if (modifier == NARROW)
4879 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4880 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4881 cost_vec);
4883 else
4885 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4886 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4887 cost_vec);
4889 interm_types.release ();
4890 return true;
4893 /* Transform. */
4894 if (dump_enabled_p ())
4895 dump_printf_loc (MSG_NOTE, vect_location,
4896 "transform conversion. ncopies = %d.\n", ncopies);
4898 if (op_type == binary_op)
4900 if (CONSTANT_CLASS_P (op0))
4901 op0 = fold_convert (TREE_TYPE (op1), op0);
4902 else if (CONSTANT_CLASS_P (op1))
4903 op1 = fold_convert (TREE_TYPE (op0), op1);
4906 /* In case of multi-step conversion, we first generate conversion operations
4907 to the intermediate types, and then from that types to the final one.
4908 We create vector destinations for the intermediate type (TYPES) received
4909 from supportable_*_operation, and store them in the correct order
4910 for future use in vect_create_vectorized_*_stmts (). */
4911 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4912 vec_dest = vect_create_destination_var (scalar_dest,
4913 (cvt_type && modifier == WIDEN)
4914 ? cvt_type : vectype_out);
4915 vec_dsts.quick_push (vec_dest);
4917 if (multi_step_cvt)
4919 for (i = interm_types.length () - 1;
4920 interm_types.iterate (i, &intermediate_type); i--)
4922 vec_dest = vect_create_destination_var (scalar_dest,
4923 intermediate_type);
4924 vec_dsts.quick_push (vec_dest);
4928 if (cvt_type)
4929 vec_dest = vect_create_destination_var (scalar_dest,
4930 modifier == WIDEN
4931 ? vectype_out : cvt_type);
4933 if (!slp_node)
4935 if (modifier == WIDEN)
4937 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4938 if (op_type == binary_op)
4939 vec_oprnds1.create (1);
4941 else if (modifier == NARROW)
4942 vec_oprnds0.create (
4943 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4945 else if (code == WIDEN_LSHIFT_EXPR)
4946 vec_oprnds1.create (slp_node->vec_stmts_size);
4948 last_oprnd = op0;
4949 prev_stmt_info = NULL;
4950 switch (modifier)
4952 case NONE:
4953 for (j = 0; j < ncopies; j++)
4955 if (j == 0)
4956 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4957 else
4958 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4960 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4962 /* Arguments are ready, create the new vector stmt. */
4963 if (code1 == CALL_EXPR)
4965 new_stmt = gimple_build_call (decl1, 1, vop0);
4966 new_temp = make_ssa_name (vec_dest, new_stmt);
4967 gimple_call_set_lhs (new_stmt, new_temp);
4969 else
4971 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4972 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4973 new_temp = make_ssa_name (vec_dest, new_stmt);
4974 gimple_assign_set_lhs (new_stmt, new_temp);
4977 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4978 if (slp_node)
4979 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4980 else
4982 if (!prev_stmt_info)
4983 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4984 else
4985 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4986 prev_stmt_info = vinfo_for_stmt (new_stmt);
4990 break;
4992 case WIDEN:
4993 /* In case the vectorization factor (VF) is bigger than the number
4994 of elements that we can fit in a vectype (nunits), we have to
4995 generate more than one vector stmt - i.e - we need to "unroll"
4996 the vector stmt by a factor VF/nunits. */
4997 for (j = 0; j < ncopies; j++)
4999 /* Handle uses. */
5000 if (j == 0)
5002 if (slp_node)
5004 if (code == WIDEN_LSHIFT_EXPR)
5006 unsigned int k;
5008 vec_oprnd1 = op1;
5009 /* Store vec_oprnd1 for every vector stmt to be created
5010 for SLP_NODE. We check during the analysis that all
5011 the shift arguments are the same. */
5012 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5013 vec_oprnds1.quick_push (vec_oprnd1);
5015 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5016 slp_node);
5018 else
5019 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
5020 &vec_oprnds1, slp_node);
5022 else
5024 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
5025 vec_oprnds0.quick_push (vec_oprnd0);
5026 if (op_type == binary_op)
5028 if (code == WIDEN_LSHIFT_EXPR)
5029 vec_oprnd1 = op1;
5030 else
5031 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
5032 vec_oprnds1.quick_push (vec_oprnd1);
5036 else
5038 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
5039 vec_oprnds0.truncate (0);
5040 vec_oprnds0.quick_push (vec_oprnd0);
5041 if (op_type == binary_op)
5043 if (code == WIDEN_LSHIFT_EXPR)
5044 vec_oprnd1 = op1;
5045 else
5046 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5047 vec_oprnd1);
5048 vec_oprnds1.truncate (0);
5049 vec_oprnds1.quick_push (vec_oprnd1);
5053 /* Arguments are ready. Create the new vector stmts. */
5054 for (i = multi_step_cvt; i >= 0; i--)
5056 tree this_dest = vec_dsts[i];
5057 enum tree_code c1 = code1, c2 = code2;
5058 if (i == 0 && codecvt2 != ERROR_MARK)
5060 c1 = codecvt1;
5061 c2 = codecvt2;
5063 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5064 &vec_oprnds1,
5065 stmt, this_dest, gsi,
5066 c1, c2, decl1, decl2,
5067 op_type);
5070 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5072 if (cvt_type)
5074 if (codecvt1 == CALL_EXPR)
5076 new_stmt = gimple_build_call (decl1, 1, vop0);
5077 new_temp = make_ssa_name (vec_dest, new_stmt);
5078 gimple_call_set_lhs (new_stmt, new_temp);
5080 else
5082 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5083 new_temp = make_ssa_name (vec_dest);
5084 new_stmt = gimple_build_assign (new_temp, codecvt1,
5085 vop0);
5088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5090 else
5091 new_stmt = SSA_NAME_DEF_STMT (vop0);
5093 if (slp_node)
5094 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5095 else
5097 if (!prev_stmt_info)
5098 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
5099 else
5100 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5101 prev_stmt_info = vinfo_for_stmt (new_stmt);
5106 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5107 break;
5109 case NARROW:
5110 /* In case the vectorization factor (VF) is bigger than the number
5111 of elements that we can fit in a vectype (nunits), we have to
5112 generate more than one vector stmt - i.e - we need to "unroll"
5113 the vector stmt by a factor VF/nunits. */
5114 for (j = 0; j < ncopies; j++)
5116 /* Handle uses. */
5117 if (slp_node)
5118 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5119 slp_node);
5120 else
5122 vec_oprnds0.truncate (0);
5123 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5124 vect_pow2 (multi_step_cvt) - 1);
5127 /* Arguments are ready. Create the new vector stmts. */
5128 if (cvt_type)
5129 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5131 if (codecvt1 == CALL_EXPR)
5133 new_stmt = gimple_build_call (decl1, 1, vop0);
5134 new_temp = make_ssa_name (vec_dest, new_stmt);
5135 gimple_call_set_lhs (new_stmt, new_temp);
5137 else
5139 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5140 new_temp = make_ssa_name (vec_dest);
5141 new_stmt = gimple_build_assign (new_temp, codecvt1,
5142 vop0);
5145 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5146 vec_oprnds0[i] = new_temp;
5149 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5150 stmt, vec_dsts, gsi,
5151 slp_node, code1,
5152 &prev_stmt_info);
5155 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5156 break;
5159 vec_oprnds0.release ();
5160 vec_oprnds1.release ();
5161 interm_types.release ();
5163 return true;
5167 /* Function vectorizable_assignment.
5169 Check if STMT performs an assignment (copy) that can be vectorized.
5170 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5171 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5172 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5174 static bool
5175 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5176 gimple **vec_stmt, slp_tree slp_node,
5177 stmt_vector_for_cost *cost_vec)
5179 tree vec_dest;
5180 tree scalar_dest;
5181 tree op;
5182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5183 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5184 tree new_temp;
5185 gimple *def_stmt;
5186 enum vect_def_type dt[1] = {vect_unknown_def_type};
5187 int ndts = 1;
5188 int ncopies;
5189 int i, j;
5190 vec<tree> vec_oprnds = vNULL;
5191 tree vop;
5192 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5193 vec_info *vinfo = stmt_info->vinfo;
5194 gimple *new_stmt = NULL;
5195 stmt_vec_info prev_stmt_info = NULL;
5196 enum tree_code code;
5197 tree vectype_in;
5199 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5200 return false;
5202 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5203 && ! vec_stmt)
5204 return false;
5206 /* Is vectorizable assignment? */
5207 if (!is_gimple_assign (stmt))
5208 return false;
5210 scalar_dest = gimple_assign_lhs (stmt);
5211 if (TREE_CODE (scalar_dest) != SSA_NAME)
5212 return false;
5214 code = gimple_assign_rhs_code (stmt);
5215 if (gimple_assign_single_p (stmt)
5216 || code == PAREN_EXPR
5217 || CONVERT_EXPR_CODE_P (code))
5218 op = gimple_assign_rhs1 (stmt);
5219 else
5220 return false;
5222 if (code == VIEW_CONVERT_EXPR)
5223 op = TREE_OPERAND (op, 0);
5225 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5226 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5228 /* Multiple types in SLP are handled by creating the appropriate number of
5229 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5230 case of SLP. */
5231 if (slp_node)
5232 ncopies = 1;
5233 else
5234 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5236 gcc_assert (ncopies >= 1);
5238 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5240 if (dump_enabled_p ())
5241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5242 "use not simple.\n");
5243 return false;
5246 /* We can handle NOP_EXPR conversions that do not change the number
5247 of elements or the vector size. */
5248 if ((CONVERT_EXPR_CODE_P (code)
5249 || code == VIEW_CONVERT_EXPR)
5250 && (!vectype_in
5251 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5252 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5253 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5254 return false;
5256 /* We do not handle bit-precision changes. */
5257 if ((CONVERT_EXPR_CODE_P (code)
5258 || code == VIEW_CONVERT_EXPR)
5259 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5260 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5261 || !type_has_mode_precision_p (TREE_TYPE (op)))
5262 /* But a conversion that does not change the bit-pattern is ok. */
5263 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5264 > TYPE_PRECISION (TREE_TYPE (op)))
5265 && TYPE_UNSIGNED (TREE_TYPE (op)))
5266 /* Conversion between boolean types of different sizes is
5267 a simple assignment in case their vectypes are same
5268 boolean vectors. */
5269 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5270 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5272 if (dump_enabled_p ())
5273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5274 "type conversion to/from bit-precision "
5275 "unsupported.\n");
5276 return false;
5279 if (!vec_stmt) /* transformation not required. */
5281 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5282 if (dump_enabled_p ())
5283 dump_printf_loc (MSG_NOTE, vect_location,
5284 "=== vectorizable_assignment ===\n");
5285 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5286 return true;
5289 /* Transform. */
5290 if (dump_enabled_p ())
5291 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5293 /* Handle def. */
5294 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5296 /* Handle use. */
5297 for (j = 0; j < ncopies; j++)
5299 /* Handle uses. */
5300 if (j == 0)
5301 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5302 else
5303 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5305 /* Arguments are ready. create the new vector stmt. */
5306 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5308 if (CONVERT_EXPR_CODE_P (code)
5309 || code == VIEW_CONVERT_EXPR)
5310 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5311 new_stmt = gimple_build_assign (vec_dest, vop);
5312 new_temp = make_ssa_name (vec_dest, new_stmt);
5313 gimple_assign_set_lhs (new_stmt, new_temp);
5314 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5315 if (slp_node)
5316 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5319 if (slp_node)
5320 continue;
5322 if (j == 0)
5323 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5324 else
5325 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5327 prev_stmt_info = vinfo_for_stmt (new_stmt);
5330 vec_oprnds.release ();
5331 return true;
5335 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5336 either as shift by a scalar or by a vector. */
5338 bool
5339 vect_supportable_shift (enum tree_code code, tree scalar_type)
5342 machine_mode vec_mode;
5343 optab optab;
5344 int icode;
5345 tree vectype;
5347 vectype = get_vectype_for_scalar_type (scalar_type);
5348 if (!vectype)
5349 return false;
5351 optab = optab_for_tree_code (code, vectype, optab_scalar);
5352 if (!optab
5353 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5355 optab = optab_for_tree_code (code, vectype, optab_vector);
5356 if (!optab
5357 || (optab_handler (optab, TYPE_MODE (vectype))
5358 == CODE_FOR_nothing))
5359 return false;
5362 vec_mode = TYPE_MODE (vectype);
5363 icode = (int) optab_handler (optab, vec_mode);
5364 if (icode == CODE_FOR_nothing)
5365 return false;
5367 return true;
5371 /* Function vectorizable_shift.
5373 Check if STMT performs a shift operation that can be vectorized.
5374 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5375 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5376 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5378 static bool
5379 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5380 gimple **vec_stmt, slp_tree slp_node,
5381 stmt_vector_for_cost *cost_vec)
5383 tree vec_dest;
5384 tree scalar_dest;
5385 tree op0, op1 = NULL;
5386 tree vec_oprnd1 = NULL_TREE;
5387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5388 tree vectype;
5389 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5390 enum tree_code code;
5391 machine_mode vec_mode;
5392 tree new_temp;
5393 optab optab;
5394 int icode;
5395 machine_mode optab_op2_mode;
5396 gimple *def_stmt;
5397 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5398 int ndts = 2;
5399 gimple *new_stmt = NULL;
5400 stmt_vec_info prev_stmt_info;
5401 poly_uint64 nunits_in;
5402 poly_uint64 nunits_out;
5403 tree vectype_out;
5404 tree op1_vectype;
5405 int ncopies;
5406 int j, i;
5407 vec<tree> vec_oprnds0 = vNULL;
5408 vec<tree> vec_oprnds1 = vNULL;
5409 tree vop0, vop1;
5410 unsigned int k;
5411 bool scalar_shift_arg = true;
5412 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5413 vec_info *vinfo = stmt_info->vinfo;
5415 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5416 return false;
5418 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5419 && ! vec_stmt)
5420 return false;
5422 /* Is STMT a vectorizable binary/unary operation? */
5423 if (!is_gimple_assign (stmt))
5424 return false;
5426 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5427 return false;
5429 code = gimple_assign_rhs_code (stmt);
5431 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5432 || code == RROTATE_EXPR))
5433 return false;
5435 scalar_dest = gimple_assign_lhs (stmt);
5436 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5437 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5439 if (dump_enabled_p ())
5440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5441 "bit-precision shifts not supported.\n");
5442 return false;
5445 op0 = gimple_assign_rhs1 (stmt);
5446 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5448 if (dump_enabled_p ())
5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5450 "use not simple.\n");
5451 return false;
5453 /* If op0 is an external or constant def use a vector type with
5454 the same size as the output vector type. */
5455 if (!vectype)
5456 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5457 if (vec_stmt)
5458 gcc_assert (vectype);
5459 if (!vectype)
5461 if (dump_enabled_p ())
5462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5463 "no vectype for scalar type\n");
5464 return false;
5467 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5468 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5469 if (maybe_ne (nunits_out, nunits_in))
5470 return false;
5472 op1 = gimple_assign_rhs2 (stmt);
5473 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5475 if (dump_enabled_p ())
5476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5477 "use not simple.\n");
5478 return false;
5481 /* Multiple types in SLP are handled by creating the appropriate number of
5482 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5483 case of SLP. */
5484 if (slp_node)
5485 ncopies = 1;
5486 else
5487 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5489 gcc_assert (ncopies >= 1);
5491 /* Determine whether the shift amount is a vector, or scalar. If the
5492 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5494 if ((dt[1] == vect_internal_def
5495 || dt[1] == vect_induction_def)
5496 && !slp_node)
5497 scalar_shift_arg = false;
5498 else if (dt[1] == vect_constant_def
5499 || dt[1] == vect_external_def
5500 || dt[1] == vect_internal_def)
5502 /* In SLP, need to check whether the shift count is the same,
5503 in loops if it is a constant or invariant, it is always
5504 a scalar shift. */
5505 if (slp_node)
5507 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5508 gimple *slpstmt;
5510 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5511 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5512 scalar_shift_arg = false;
5515 /* If the shift amount is computed by a pattern stmt we cannot
5516 use the scalar amount directly thus give up and use a vector
5517 shift. */
5518 if (dt[1] == vect_internal_def)
5520 gimple *def = SSA_NAME_DEF_STMT (op1);
5521 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5522 scalar_shift_arg = false;
5525 else
5527 if (dump_enabled_p ())
5528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5529 "operand mode requires invariant argument.\n");
5530 return false;
5533 /* Vector shifted by vector. */
5534 if (!scalar_shift_arg)
5536 optab = optab_for_tree_code (code, vectype, optab_vector);
5537 if (dump_enabled_p ())
5538 dump_printf_loc (MSG_NOTE, vect_location,
5539 "vector/vector shift/rotate found.\n");
5541 if (!op1_vectype)
5542 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5543 if (op1_vectype == NULL_TREE
5544 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5546 if (dump_enabled_p ())
5547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5548 "unusable type for last operand in"
5549 " vector/vector shift/rotate.\n");
5550 return false;
5553 /* See if the machine has a vector shifted by scalar insn and if not
5554 then see if it has a vector shifted by vector insn. */
5555 else
5557 optab = optab_for_tree_code (code, vectype, optab_scalar);
5558 if (optab
5559 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5561 if (dump_enabled_p ())
5562 dump_printf_loc (MSG_NOTE, vect_location,
5563 "vector/scalar shift/rotate found.\n");
5565 else
5567 optab = optab_for_tree_code (code, vectype, optab_vector);
5568 if (optab
5569 && (optab_handler (optab, TYPE_MODE (vectype))
5570 != CODE_FOR_nothing))
5572 scalar_shift_arg = false;
5574 if (dump_enabled_p ())
5575 dump_printf_loc (MSG_NOTE, vect_location,
5576 "vector/vector shift/rotate found.\n");
5578 /* Unlike the other binary operators, shifts/rotates have
5579 the rhs being int, instead of the same type as the lhs,
5580 so make sure the scalar is the right type if we are
5581 dealing with vectors of long long/long/short/char. */
5582 if (dt[1] == vect_constant_def)
5583 op1 = fold_convert (TREE_TYPE (vectype), op1);
5584 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5585 TREE_TYPE (op1)))
5587 if (slp_node
5588 && TYPE_MODE (TREE_TYPE (vectype))
5589 != TYPE_MODE (TREE_TYPE (op1)))
5591 if (dump_enabled_p ())
5592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5593 "unusable type for last operand in"
5594 " vector/vector shift/rotate.\n");
5595 return false;
5597 if (vec_stmt && !slp_node)
5599 op1 = fold_convert (TREE_TYPE (vectype), op1);
5600 op1 = vect_init_vector (stmt, op1,
5601 TREE_TYPE (vectype), NULL);
5608 /* Supportable by target? */
5609 if (!optab)
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5613 "no optab.\n");
5614 return false;
5616 vec_mode = TYPE_MODE (vectype);
5617 icode = (int) optab_handler (optab, vec_mode);
5618 if (icode == CODE_FOR_nothing)
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "op not supported by target.\n");
5623 /* Check only during analysis. */
5624 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5625 || (!vec_stmt
5626 && !vect_worthwhile_without_simd_p (vinfo, code)))
5627 return false;
5628 if (dump_enabled_p ())
5629 dump_printf_loc (MSG_NOTE, vect_location,
5630 "proceeding using word mode.\n");
5633 /* Worthwhile without SIMD support? Check only during analysis. */
5634 if (!vec_stmt
5635 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5636 && !vect_worthwhile_without_simd_p (vinfo, code))
5638 if (dump_enabled_p ())
5639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5640 "not worthwhile without SIMD support.\n");
5641 return false;
5644 if (!vec_stmt) /* transformation not required. */
5646 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5647 if (dump_enabled_p ())
5648 dump_printf_loc (MSG_NOTE, vect_location,
5649 "=== vectorizable_shift ===\n");
5650 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5651 return true;
5654 /* Transform. */
5656 if (dump_enabled_p ())
5657 dump_printf_loc (MSG_NOTE, vect_location,
5658 "transform binary/unary operation.\n");
5660 /* Handle def. */
5661 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5663 prev_stmt_info = NULL;
5664 for (j = 0; j < ncopies; j++)
5666 /* Handle uses. */
5667 if (j == 0)
5669 if (scalar_shift_arg)
5671 /* Vector shl and shr insn patterns can be defined with scalar
5672 operand 2 (shift operand). In this case, use constant or loop
5673 invariant op1 directly, without extending it to vector mode
5674 first. */
5675 optab_op2_mode = insn_data[icode].operand[2].mode;
5676 if (!VECTOR_MODE_P (optab_op2_mode))
5678 if (dump_enabled_p ())
5679 dump_printf_loc (MSG_NOTE, vect_location,
5680 "operand 1 using scalar mode.\n");
5681 vec_oprnd1 = op1;
5682 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5683 vec_oprnds1.quick_push (vec_oprnd1);
5684 if (slp_node)
5686 /* Store vec_oprnd1 for every vector stmt to be created
5687 for SLP_NODE. We check during the analysis that all
5688 the shift arguments are the same.
5689 TODO: Allow different constants for different vector
5690 stmts generated for an SLP instance. */
5691 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5692 vec_oprnds1.quick_push (vec_oprnd1);
5697 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5698 (a special case for certain kind of vector shifts); otherwise,
5699 operand 1 should be of a vector type (the usual case). */
5700 if (vec_oprnd1)
5701 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5702 slp_node);
5703 else
5704 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5705 slp_node);
5707 else
5708 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5710 /* Arguments are ready. Create the new vector stmt. */
5711 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5713 vop1 = vec_oprnds1[i];
5714 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5715 new_temp = make_ssa_name (vec_dest, new_stmt);
5716 gimple_assign_set_lhs (new_stmt, new_temp);
5717 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5718 if (slp_node)
5719 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5722 if (slp_node)
5723 continue;
5725 if (j == 0)
5726 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5727 else
5728 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5729 prev_stmt_info = vinfo_for_stmt (new_stmt);
5732 vec_oprnds0.release ();
5733 vec_oprnds1.release ();
5735 return true;
5739 /* Function vectorizable_operation.
5741 Check if STMT performs a binary, unary or ternary operation that can
5742 be vectorized.
5743 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5744 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5745 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5747 static bool
5748 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5749 gimple **vec_stmt, slp_tree slp_node,
5750 stmt_vector_for_cost *cost_vec)
5752 tree vec_dest;
5753 tree scalar_dest;
5754 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5756 tree vectype;
5757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5758 enum tree_code code, orig_code;
5759 machine_mode vec_mode;
5760 tree new_temp;
5761 int op_type;
5762 optab optab;
5763 bool target_support_p;
5764 gimple *def_stmt;
5765 enum vect_def_type dt[3]
5766 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5767 int ndts = 3;
5768 gimple *new_stmt = NULL;
5769 stmt_vec_info prev_stmt_info;
5770 poly_uint64 nunits_in;
5771 poly_uint64 nunits_out;
5772 tree vectype_out;
5773 int ncopies;
5774 int j, i;
5775 vec<tree> vec_oprnds0 = vNULL;
5776 vec<tree> vec_oprnds1 = vNULL;
5777 vec<tree> vec_oprnds2 = vNULL;
5778 tree vop0, vop1, vop2;
5779 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5780 vec_info *vinfo = stmt_info->vinfo;
5782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5783 return false;
5785 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5786 && ! vec_stmt)
5787 return false;
5789 /* Is STMT a vectorizable binary/unary operation? */
5790 if (!is_gimple_assign (stmt))
5791 return false;
5793 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5794 return false;
5796 orig_code = code = gimple_assign_rhs_code (stmt);
5798 /* For pointer addition and subtraction, we should use the normal
5799 plus and minus for the vector operation. */
5800 if (code == POINTER_PLUS_EXPR)
5801 code = PLUS_EXPR;
5802 if (code == POINTER_DIFF_EXPR)
5803 code = MINUS_EXPR;
5805 /* Support only unary or binary operations. */
5806 op_type = TREE_CODE_LENGTH (code);
5807 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5809 if (dump_enabled_p ())
5810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5811 "num. args = %d (not unary/binary/ternary op).\n",
5812 op_type);
5813 return false;
5816 scalar_dest = gimple_assign_lhs (stmt);
5817 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5819 /* Most operations cannot handle bit-precision types without extra
5820 truncations. */
5821 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5822 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5823 /* Exception are bitwise binary operations. */
5824 && code != BIT_IOR_EXPR
5825 && code != BIT_XOR_EXPR
5826 && code != BIT_AND_EXPR)
5828 if (dump_enabled_p ())
5829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5830 "bit-precision arithmetic not supported.\n");
5831 return false;
5834 op0 = gimple_assign_rhs1 (stmt);
5835 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5837 if (dump_enabled_p ())
5838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5839 "use not simple.\n");
5840 return false;
5842 /* If op0 is an external or constant def use a vector type with
5843 the same size as the output vector type. */
5844 if (!vectype)
5846 /* For boolean type we cannot determine vectype by
5847 invariant value (don't know whether it is a vector
5848 of booleans or vector of integers). We use output
5849 vectype because operations on boolean don't change
5850 type. */
5851 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5853 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5855 if (dump_enabled_p ())
5856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5857 "not supported operation on bool value.\n");
5858 return false;
5860 vectype = vectype_out;
5862 else
5863 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5865 if (vec_stmt)
5866 gcc_assert (vectype);
5867 if (!vectype)
5869 if (dump_enabled_p ())
5871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5872 "no vectype for scalar type ");
5873 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5874 TREE_TYPE (op0));
5875 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5878 return false;
5881 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5882 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5883 if (maybe_ne (nunits_out, nunits_in))
5884 return false;
5886 if (op_type == binary_op || op_type == ternary_op)
5888 op1 = gimple_assign_rhs2 (stmt);
5889 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5891 if (dump_enabled_p ())
5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5893 "use not simple.\n");
5894 return false;
5897 if (op_type == ternary_op)
5899 op2 = gimple_assign_rhs3 (stmt);
5900 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5902 if (dump_enabled_p ())
5903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5904 "use not simple.\n");
5905 return false;
5909 /* Multiple types in SLP are handled by creating the appropriate number of
5910 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5911 case of SLP. */
5912 if (slp_node)
5913 ncopies = 1;
5914 else
5915 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5917 gcc_assert (ncopies >= 1);
5919 /* Shifts are handled in vectorizable_shift (). */
5920 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5921 || code == RROTATE_EXPR)
5922 return false;
5924 /* Supportable by target? */
5926 vec_mode = TYPE_MODE (vectype);
5927 if (code == MULT_HIGHPART_EXPR)
5928 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5929 else
5931 optab = optab_for_tree_code (code, vectype, optab_default);
5932 if (!optab)
5934 if (dump_enabled_p ())
5935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5936 "no optab.\n");
5937 return false;
5939 target_support_p = (optab_handler (optab, vec_mode)
5940 != CODE_FOR_nothing);
5943 if (!target_support_p)
5945 if (dump_enabled_p ())
5946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5947 "op not supported by target.\n");
5948 /* Check only during analysis. */
5949 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5950 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5951 return false;
5952 if (dump_enabled_p ())
5953 dump_printf_loc (MSG_NOTE, vect_location,
5954 "proceeding using word mode.\n");
5957 /* Worthwhile without SIMD support? Check only during analysis. */
5958 if (!VECTOR_MODE_P (vec_mode)
5959 && !vec_stmt
5960 && !vect_worthwhile_without_simd_p (vinfo, code))
5962 if (dump_enabled_p ())
5963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5964 "not worthwhile without SIMD support.\n");
5965 return false;
5968 if (!vec_stmt) /* transformation not required. */
5970 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5971 if (dump_enabled_p ())
5972 dump_printf_loc (MSG_NOTE, vect_location,
5973 "=== vectorizable_operation ===\n");
5974 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5975 return true;
5978 /* Transform. */
5980 if (dump_enabled_p ())
5981 dump_printf_loc (MSG_NOTE, vect_location,
5982 "transform binary/unary operation.\n");
5984 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5985 vectors with unsigned elements, but the result is signed. So, we
5986 need to compute the MINUS_EXPR into vectype temporary and
5987 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5988 tree vec_cvt_dest = NULL_TREE;
5989 if (orig_code == POINTER_DIFF_EXPR)
5991 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5992 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5994 /* Handle def. */
5995 else
5996 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5998 /* In case the vectorization factor (VF) is bigger than the number
5999 of elements that we can fit in a vectype (nunits), we have to generate
6000 more than one vector stmt - i.e - we need to "unroll" the
6001 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6002 from one copy of the vector stmt to the next, in the field
6003 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6004 stages to find the correct vector defs to be used when vectorizing
6005 stmts that use the defs of the current stmt. The example below
6006 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6007 we need to create 4 vectorized stmts):
6009 before vectorization:
6010 RELATED_STMT VEC_STMT
6011 S1: x = memref - -
6012 S2: z = x + 1 - -
6014 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6015 there):
6016 RELATED_STMT VEC_STMT
6017 VS1_0: vx0 = memref0 VS1_1 -
6018 VS1_1: vx1 = memref1 VS1_2 -
6019 VS1_2: vx2 = memref2 VS1_3 -
6020 VS1_3: vx3 = memref3 - -
6021 S1: x = load - VS1_0
6022 S2: z = x + 1 - -
6024 step2: vectorize stmt S2 (done here):
6025 To vectorize stmt S2 we first need to find the relevant vector
6026 def for the first operand 'x'. This is, as usual, obtained from
6027 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6028 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6029 relevant vector def 'vx0'. Having found 'vx0' we can generate
6030 the vector stmt VS2_0, and as usual, record it in the
6031 STMT_VINFO_VEC_STMT of stmt S2.
6032 When creating the second copy (VS2_1), we obtain the relevant vector
6033 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6034 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6035 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6036 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6037 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6038 chain of stmts and pointers:
6039 RELATED_STMT VEC_STMT
6040 VS1_0: vx0 = memref0 VS1_1 -
6041 VS1_1: vx1 = memref1 VS1_2 -
6042 VS1_2: vx2 = memref2 VS1_3 -
6043 VS1_3: vx3 = memref3 - -
6044 S1: x = load - VS1_0
6045 VS2_0: vz0 = vx0 + v1 VS2_1 -
6046 VS2_1: vz1 = vx1 + v1 VS2_2 -
6047 VS2_2: vz2 = vx2 + v1 VS2_3 -
6048 VS2_3: vz3 = vx3 + v1 - -
6049 S2: z = x + 1 - VS2_0 */
6051 prev_stmt_info = NULL;
6052 for (j = 0; j < ncopies; j++)
6054 /* Handle uses. */
6055 if (j == 0)
6057 if (op_type == binary_op)
6058 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6059 slp_node);
6060 else if (op_type == ternary_op)
6062 if (slp_node)
6064 auto_vec<tree> ops(3);
6065 ops.quick_push (op0);
6066 ops.quick_push (op1);
6067 ops.quick_push (op2);
6068 auto_vec<vec<tree> > vec_defs(3);
6069 vect_get_slp_defs (ops, slp_node, &vec_defs);
6070 vec_oprnds0 = vec_defs[0];
6071 vec_oprnds1 = vec_defs[1];
6072 vec_oprnds2 = vec_defs[2];
6074 else
6076 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6077 NULL);
6078 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6079 NULL);
6082 else
6083 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
6084 slp_node);
6086 else
6088 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6089 if (op_type == ternary_op)
6091 tree vec_oprnd = vec_oprnds2.pop ();
6092 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6093 vec_oprnd));
6097 /* Arguments are ready. Create the new vector stmt. */
6098 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6100 vop1 = ((op_type == binary_op || op_type == ternary_op)
6101 ? vec_oprnds1[i] : NULL_TREE);
6102 vop2 = ((op_type == ternary_op)
6103 ? vec_oprnds2[i] : NULL_TREE);
6104 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6105 new_temp = make_ssa_name (vec_dest, new_stmt);
6106 gimple_assign_set_lhs (new_stmt, new_temp);
6107 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6108 if (vec_cvt_dest)
6110 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6111 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6112 new_temp);
6113 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6114 gimple_assign_set_lhs (new_stmt, new_temp);
6115 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6117 if (slp_node)
6118 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6121 if (slp_node)
6122 continue;
6124 if (j == 0)
6125 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6126 else
6127 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6128 prev_stmt_info = vinfo_for_stmt (new_stmt);
6131 vec_oprnds0.release ();
6132 vec_oprnds1.release ();
6133 vec_oprnds2.release ();
6135 return true;
6138 /* A helper function to ensure data reference DR's base alignment. */
6140 static void
6141 ensure_base_align (struct data_reference *dr)
6143 if (!dr->aux)
6144 return;
6146 if (DR_VECT_AUX (dr)->base_misaligned)
6148 tree base_decl = DR_VECT_AUX (dr)->base_decl;
6150 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6152 if (decl_in_symtab_p (base_decl))
6153 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6154 else
6156 SET_DECL_ALIGN (base_decl, align_base_to);
6157 DECL_USER_ALIGN (base_decl) = 1;
6159 DR_VECT_AUX (dr)->base_misaligned = false;
6164 /* Function get_group_alias_ptr_type.
6166 Return the alias type for the group starting at FIRST_STMT. */
6168 static tree
6169 get_group_alias_ptr_type (gimple *first_stmt)
6171 struct data_reference *first_dr, *next_dr;
6172 gimple *next_stmt;
6174 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6175 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6176 while (next_stmt)
6178 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6179 if (get_alias_set (DR_REF (first_dr))
6180 != get_alias_set (DR_REF (next_dr)))
6182 if (dump_enabled_p ())
6183 dump_printf_loc (MSG_NOTE, vect_location,
6184 "conflicting alias set types.\n");
6185 return ptr_type_node;
6187 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6189 return reference_alias_ptr_type (DR_REF (first_dr));
6193 /* Function vectorizable_store.
6195 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6196 can be vectorized.
6197 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6198 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6199 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6201 static bool
6202 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6203 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
6205 tree data_ref;
6206 tree op;
6207 tree vec_oprnd = NULL_TREE;
6208 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6209 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6210 tree elem_type;
6211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6212 struct loop *loop = NULL;
6213 machine_mode vec_mode;
6214 tree dummy;
6215 enum dr_alignment_support alignment_support_scheme;
6216 gimple *def_stmt;
6217 enum vect_def_type rhs_dt = vect_unknown_def_type;
6218 enum vect_def_type mask_dt = vect_unknown_def_type;
6219 stmt_vec_info prev_stmt_info = NULL;
6220 tree dataref_ptr = NULL_TREE;
6221 tree dataref_offset = NULL_TREE;
6222 gimple *ptr_incr = NULL;
6223 int ncopies;
6224 int j;
6225 gimple *next_stmt, *first_stmt;
6226 bool grouped_store;
6227 unsigned int group_size, i;
6228 vec<tree> oprnds = vNULL;
6229 vec<tree> result_chain = vNULL;
6230 bool inv_p;
6231 tree offset = NULL_TREE;
6232 vec<tree> vec_oprnds = vNULL;
6233 bool slp = (slp_node != NULL);
6234 unsigned int vec_num;
6235 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6236 vec_info *vinfo = stmt_info->vinfo;
6237 tree aggr_type;
6238 gather_scatter_info gs_info;
6239 gimple *new_stmt;
6240 poly_uint64 vf;
6241 vec_load_store_type vls_type;
6242 tree ref_type;
6244 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6245 return false;
6247 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6248 && ! vec_stmt)
6249 return false;
6251 /* Is vectorizable store? */
6253 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6254 if (is_gimple_assign (stmt))
6256 tree scalar_dest = gimple_assign_lhs (stmt);
6257 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6258 && is_pattern_stmt_p (stmt_info))
6259 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6260 if (TREE_CODE (scalar_dest) != ARRAY_REF
6261 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6262 && TREE_CODE (scalar_dest) != INDIRECT_REF
6263 && TREE_CODE (scalar_dest) != COMPONENT_REF
6264 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6265 && TREE_CODE (scalar_dest) != REALPART_EXPR
6266 && TREE_CODE (scalar_dest) != MEM_REF)
6267 return false;
6269 else
6271 gcall *call = dyn_cast <gcall *> (stmt);
6272 if (!call || !gimple_call_internal_p (call))
6273 return false;
6275 internal_fn ifn = gimple_call_internal_fn (call);
6276 if (!internal_store_fn_p (ifn))
6277 return false;
6279 if (slp_node != NULL)
6281 if (dump_enabled_p ())
6282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6283 "SLP of masked stores not supported.\n");
6284 return false;
6287 int mask_index = internal_fn_mask_index (ifn);
6288 if (mask_index >= 0)
6290 mask = gimple_call_arg (call, mask_index);
6291 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6292 &mask_vectype))
6293 return false;
6297 op = vect_get_store_rhs (stmt);
6299 /* Cannot have hybrid store SLP -- that would mean storing to the
6300 same location twice. */
6301 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6303 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6304 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6306 if (loop_vinfo)
6308 loop = LOOP_VINFO_LOOP (loop_vinfo);
6309 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6311 else
6312 vf = 1;
6314 /* Multiple types in SLP are handled by creating the appropriate number of
6315 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6316 case of SLP. */
6317 if (slp)
6318 ncopies = 1;
6319 else
6320 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6322 gcc_assert (ncopies >= 1);
6324 /* FORNOW. This restriction should be relaxed. */
6325 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6327 if (dump_enabled_p ())
6328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6329 "multiple types in nested loop.\n");
6330 return false;
6333 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6334 return false;
6336 elem_type = TREE_TYPE (vectype);
6337 vec_mode = TYPE_MODE (vectype);
6339 if (!STMT_VINFO_DATA_REF (stmt_info))
6340 return false;
6342 vect_memory_access_type memory_access_type;
6343 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6344 &memory_access_type, &gs_info))
6345 return false;
6347 if (mask)
6349 if (memory_access_type == VMAT_CONTIGUOUS)
6351 if (!VECTOR_MODE_P (vec_mode)
6352 || !can_vec_mask_load_store_p (vec_mode,
6353 TYPE_MODE (mask_vectype), false))
6354 return false;
6356 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6357 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6359 if (dump_enabled_p ())
6360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6361 "unsupported access type for masked store.\n");
6362 return false;
6365 else
6367 /* FORNOW. In some cases can vectorize even if data-type not supported
6368 (e.g. - array initialization with 0). */
6369 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6370 return false;
6373 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6374 && memory_access_type != VMAT_GATHER_SCATTER
6375 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6376 if (grouped_store)
6378 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6379 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6380 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
6382 else
6384 first_stmt = stmt;
6385 first_dr = dr;
6386 group_size = vec_num = 1;
6389 if (!vec_stmt) /* transformation not required. */
6391 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6393 if (loop_vinfo
6394 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6395 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6396 memory_access_type, &gs_info);
6398 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6399 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6400 vls_type, slp_node, cost_vec);
6401 return true;
6403 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6405 /* Transform. */
6407 ensure_base_align (dr);
6409 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6411 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6412 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6413 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6414 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6415 edge pe = loop_preheader_edge (loop);
6416 gimple_seq seq;
6417 basic_block new_bb;
6418 enum { NARROW, NONE, WIDEN } modifier;
6419 poly_uint64 scatter_off_nunits
6420 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6422 if (known_eq (nunits, scatter_off_nunits))
6423 modifier = NONE;
6424 else if (known_eq (nunits * 2, scatter_off_nunits))
6426 modifier = WIDEN;
6428 /* Currently gathers and scatters are only supported for
6429 fixed-length vectors. */
6430 unsigned int count = scatter_off_nunits.to_constant ();
6431 vec_perm_builder sel (count, count, 1);
6432 for (i = 0; i < (unsigned int) count; ++i)
6433 sel.quick_push (i | (count / 2));
6435 vec_perm_indices indices (sel, 1, count);
6436 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6437 indices);
6438 gcc_assert (perm_mask != NULL_TREE);
6440 else if (known_eq (nunits, scatter_off_nunits * 2))
6442 modifier = NARROW;
6444 /* Currently gathers and scatters are only supported for
6445 fixed-length vectors. */
6446 unsigned int count = nunits.to_constant ();
6447 vec_perm_builder sel (count, count, 1);
6448 for (i = 0; i < (unsigned int) count; ++i)
6449 sel.quick_push (i | (count / 2));
6451 vec_perm_indices indices (sel, 2, count);
6452 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6453 gcc_assert (perm_mask != NULL_TREE);
6454 ncopies *= 2;
6456 else
6457 gcc_unreachable ();
6459 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6460 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6461 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6462 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6463 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6464 scaletype = TREE_VALUE (arglist);
6466 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6467 && TREE_CODE (rettype) == VOID_TYPE);
6469 ptr = fold_convert (ptrtype, gs_info.base);
6470 if (!is_gimple_min_invariant (ptr))
6472 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6473 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6474 gcc_assert (!new_bb);
6477 /* Currently we support only unconditional scatter stores,
6478 so mask should be all ones. */
6479 mask = build_int_cst (masktype, -1);
6480 mask = vect_init_vector (stmt, mask, masktype, NULL);
6482 scale = build_int_cst (scaletype, gs_info.scale);
6484 prev_stmt_info = NULL;
6485 for (j = 0; j < ncopies; ++j)
6487 if (j == 0)
6489 src = vec_oprnd1
6490 = vect_get_vec_def_for_operand (op, stmt);
6491 op = vec_oprnd0
6492 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6494 else if (modifier != NONE && (j & 1))
6496 if (modifier == WIDEN)
6498 src = vec_oprnd1
6499 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6500 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6501 stmt, gsi);
6503 else if (modifier == NARROW)
6505 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6506 stmt, gsi);
6507 op = vec_oprnd0
6508 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6509 vec_oprnd0);
6511 else
6512 gcc_unreachable ();
6514 else
6516 src = vec_oprnd1
6517 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6518 op = vec_oprnd0
6519 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6520 vec_oprnd0);
6523 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6525 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6526 TYPE_VECTOR_SUBPARTS (srctype)));
6527 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6528 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6529 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6531 src = var;
6534 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6536 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6537 TYPE_VECTOR_SUBPARTS (idxtype)));
6538 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6539 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6540 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6541 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6542 op = var;
6545 new_stmt
6546 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6550 if (prev_stmt_info == NULL)
6551 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6552 else
6553 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6554 prev_stmt_info = vinfo_for_stmt (new_stmt);
6556 return true;
6559 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6561 gimple *group_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
6562 DR_GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6565 if (grouped_store)
6567 /* FORNOW */
6568 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6570 /* We vectorize all the stmts of the interleaving group when we
6571 reach the last stmt in the group. */
6572 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6573 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
6574 && !slp)
6576 *vec_stmt = NULL;
6577 return true;
6580 if (slp)
6582 grouped_store = false;
6583 /* VEC_NUM is the number of vect stmts to be created for this
6584 group. */
6585 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6586 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6587 gcc_assert (DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6588 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6589 op = vect_get_store_rhs (first_stmt);
6591 else
6592 /* VEC_NUM is the number of vect stmts to be created for this
6593 group. */
6594 vec_num = group_size;
6596 ref_type = get_group_alias_ptr_type (first_stmt);
6598 else
6599 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6601 if (dump_enabled_p ())
6602 dump_printf_loc (MSG_NOTE, vect_location,
6603 "transform store. ncopies = %d\n", ncopies);
6605 if (memory_access_type == VMAT_ELEMENTWISE
6606 || memory_access_type == VMAT_STRIDED_SLP)
6608 gimple_stmt_iterator incr_gsi;
6609 bool insert_after;
6610 gimple *incr;
6611 tree offvar;
6612 tree ivstep;
6613 tree running_off;
6614 tree stride_base, stride_step, alias_off;
6615 tree vec_oprnd;
6616 unsigned int g;
6617 /* Checked by get_load_store_type. */
6618 unsigned int const_nunits = nunits.to_constant ();
6620 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6621 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6623 stride_base
6624 = fold_build_pointer_plus
6625 (DR_BASE_ADDRESS (first_dr),
6626 size_binop (PLUS_EXPR,
6627 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6628 convert_to_ptrofftype (DR_INIT (first_dr))));
6629 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6631 /* For a store with loop-invariant (but other than power-of-2)
6632 stride (i.e. not a grouped access) like so:
6634 for (i = 0; i < n; i += stride)
6635 array[i] = ...;
6637 we generate a new induction variable and new stores from
6638 the components of the (vectorized) rhs:
6640 for (j = 0; ; j += VF*stride)
6641 vectemp = ...;
6642 tmp1 = vectemp[0];
6643 array[j] = tmp1;
6644 tmp2 = vectemp[1];
6645 array[j + stride] = tmp2;
6649 unsigned nstores = const_nunits;
6650 unsigned lnel = 1;
6651 tree ltype = elem_type;
6652 tree lvectype = vectype;
6653 if (slp)
6655 if (group_size < const_nunits
6656 && const_nunits % group_size == 0)
6658 nstores = const_nunits / group_size;
6659 lnel = group_size;
6660 ltype = build_vector_type (elem_type, group_size);
6661 lvectype = vectype;
6663 /* First check if vec_extract optab doesn't support extraction
6664 of vector elts directly. */
6665 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6666 machine_mode vmode;
6667 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6668 || !VECTOR_MODE_P (vmode)
6669 || !targetm.vector_mode_supported_p (vmode)
6670 || (convert_optab_handler (vec_extract_optab,
6671 TYPE_MODE (vectype), vmode)
6672 == CODE_FOR_nothing))
6674 /* Try to avoid emitting an extract of vector elements
6675 by performing the extracts using an integer type of the
6676 same size, extracting from a vector of those and then
6677 re-interpreting it as the original vector type if
6678 supported. */
6679 unsigned lsize
6680 = group_size * GET_MODE_BITSIZE (elmode);
6681 elmode = int_mode_for_size (lsize, 0).require ();
6682 unsigned int lnunits = const_nunits / group_size;
6683 /* If we can't construct such a vector fall back to
6684 element extracts from the original vector type and
6685 element size stores. */
6686 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6687 && VECTOR_MODE_P (vmode)
6688 && targetm.vector_mode_supported_p (vmode)
6689 && (convert_optab_handler (vec_extract_optab,
6690 vmode, elmode)
6691 != CODE_FOR_nothing))
6693 nstores = lnunits;
6694 lnel = group_size;
6695 ltype = build_nonstandard_integer_type (lsize, 1);
6696 lvectype = build_vector_type (ltype, nstores);
6698 /* Else fall back to vector extraction anyway.
6699 Fewer stores are more important than avoiding spilling
6700 of the vector we extract from. Compared to the
6701 construction case in vectorizable_load no store-forwarding
6702 issue exists here for reasonable archs. */
6705 else if (group_size >= const_nunits
6706 && group_size % const_nunits == 0)
6708 nstores = 1;
6709 lnel = const_nunits;
6710 ltype = vectype;
6711 lvectype = vectype;
6713 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6714 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6717 ivstep = stride_step;
6718 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6719 build_int_cst (TREE_TYPE (ivstep), vf));
6721 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6723 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6724 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6725 create_iv (stride_base, ivstep, NULL,
6726 loop, &incr_gsi, insert_after,
6727 &offvar, NULL);
6728 incr = gsi_stmt (incr_gsi);
6729 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6731 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6733 prev_stmt_info = NULL;
6734 alias_off = build_int_cst (ref_type, 0);
6735 next_stmt = first_stmt;
6736 for (g = 0; g < group_size; g++)
6738 running_off = offvar;
6739 if (g)
6741 tree size = TYPE_SIZE_UNIT (ltype);
6742 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6743 size);
6744 tree newoff = copy_ssa_name (running_off, NULL);
6745 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6746 running_off, pos);
6747 vect_finish_stmt_generation (stmt, incr, gsi);
6748 running_off = newoff;
6750 unsigned int group_el = 0;
6751 unsigned HOST_WIDE_INT
6752 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6753 for (j = 0; j < ncopies; j++)
6755 /* We've set op and dt above, from vect_get_store_rhs,
6756 and first_stmt == stmt. */
6757 if (j == 0)
6759 if (slp)
6761 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6762 slp_node);
6763 vec_oprnd = vec_oprnds[0];
6765 else
6767 op = vect_get_store_rhs (next_stmt);
6768 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6771 else
6773 if (slp)
6774 vec_oprnd = vec_oprnds[j];
6775 else
6777 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6778 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6779 vec_oprnd);
6782 /* Pun the vector to extract from if necessary. */
6783 if (lvectype != vectype)
6785 tree tem = make_ssa_name (lvectype);
6786 gimple *pun
6787 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6788 lvectype, vec_oprnd));
6789 vect_finish_stmt_generation (stmt, pun, gsi);
6790 vec_oprnd = tem;
6792 for (i = 0; i < nstores; i++)
6794 tree newref, newoff;
6795 gimple *incr, *assign;
6796 tree size = TYPE_SIZE (ltype);
6797 /* Extract the i'th component. */
6798 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6799 bitsize_int (i), size);
6800 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6801 size, pos);
6803 elem = force_gimple_operand_gsi (gsi, elem, true,
6804 NULL_TREE, true,
6805 GSI_SAME_STMT);
6807 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6808 group_el * elsz);
6809 newref = build2 (MEM_REF, ltype,
6810 running_off, this_off);
6811 vect_copy_ref_info (newref, DR_REF (first_dr));
6813 /* And store it to *running_off. */
6814 assign = gimple_build_assign (newref, elem);
6815 vect_finish_stmt_generation (stmt, assign, gsi);
6817 group_el += lnel;
6818 if (! slp
6819 || group_el == group_size)
6821 newoff = copy_ssa_name (running_off, NULL);
6822 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6823 running_off, stride_step);
6824 vect_finish_stmt_generation (stmt, incr, gsi);
6826 running_off = newoff;
6827 group_el = 0;
6829 if (g == group_size - 1
6830 && !slp)
6832 if (j == 0 && i == 0)
6833 STMT_VINFO_VEC_STMT (stmt_info)
6834 = *vec_stmt = assign;
6835 else
6836 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6837 prev_stmt_info = vinfo_for_stmt (assign);
6841 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6842 if (slp)
6843 break;
6846 vec_oprnds.release ();
6847 return true;
6850 auto_vec<tree> dr_chain (group_size);
6851 oprnds.create (group_size);
6853 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6854 gcc_assert (alignment_support_scheme);
6855 vec_loop_masks *loop_masks
6856 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6857 ? &LOOP_VINFO_MASKS (loop_vinfo)
6858 : NULL);
6859 /* Targets with store-lane instructions must not require explicit
6860 realignment. vect_supportable_dr_alignment always returns either
6861 dr_aligned or dr_unaligned_supported for masked operations. */
6862 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6863 && !mask
6864 && !loop_masks)
6865 || alignment_support_scheme == dr_aligned
6866 || alignment_support_scheme == dr_unaligned_supported);
6868 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6869 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6870 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6872 tree bump;
6873 tree vec_offset = NULL_TREE;
6874 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6876 aggr_type = NULL_TREE;
6877 bump = NULL_TREE;
6879 else if (memory_access_type == VMAT_GATHER_SCATTER)
6881 aggr_type = elem_type;
6882 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6883 &bump, &vec_offset);
6885 else
6887 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6888 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6889 else
6890 aggr_type = vectype;
6891 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6894 if (mask)
6895 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6897 /* In case the vectorization factor (VF) is bigger than the number
6898 of elements that we can fit in a vectype (nunits), we have to generate
6899 more than one vector stmt - i.e - we need to "unroll" the
6900 vector stmt by a factor VF/nunits. For more details see documentation in
6901 vect_get_vec_def_for_copy_stmt. */
6903 /* In case of interleaving (non-unit grouped access):
6905 S1: &base + 2 = x2
6906 S2: &base = x0
6907 S3: &base + 1 = x1
6908 S4: &base + 3 = x3
6910 We create vectorized stores starting from base address (the access of the
6911 first stmt in the chain (S2 in the above example), when the last store stmt
6912 of the chain (S4) is reached:
6914 VS1: &base = vx2
6915 VS2: &base + vec_size*1 = vx0
6916 VS3: &base + vec_size*2 = vx1
6917 VS4: &base + vec_size*3 = vx3
6919 Then permutation statements are generated:
6921 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6922 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6925 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6926 (the order of the data-refs in the output of vect_permute_store_chain
6927 corresponds to the order of scalar stmts in the interleaving chain - see
6928 the documentation of vect_permute_store_chain()).
6930 In case of both multiple types and interleaving, above vector stores and
6931 permutation stmts are created for every copy. The result vector stmts are
6932 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6933 STMT_VINFO_RELATED_STMT for the next copies.
6936 prev_stmt_info = NULL;
6937 tree vec_mask = NULL_TREE;
6938 for (j = 0; j < ncopies; j++)
6941 if (j == 0)
6943 if (slp)
6945 /* Get vectorized arguments for SLP_NODE. */
6946 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6947 NULL, slp_node);
6949 vec_oprnd = vec_oprnds[0];
6951 else
6953 /* For interleaved stores we collect vectorized defs for all the
6954 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6955 used as an input to vect_permute_store_chain(), and OPRNDS as
6956 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6958 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6959 OPRNDS are of size 1. */
6960 next_stmt = first_stmt;
6961 for (i = 0; i < group_size; i++)
6963 /* Since gaps are not supported for interleaved stores,
6964 DR_GROUP_SIZE is the exact number of stmts in the chain.
6965 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6966 there is no interleaving, DR_GROUP_SIZE is 1, and only one
6967 iteration of the loop will be executed. */
6968 op = vect_get_store_rhs (next_stmt);
6969 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6970 dr_chain.quick_push (vec_oprnd);
6971 oprnds.quick_push (vec_oprnd);
6972 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6974 if (mask)
6975 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6976 mask_vectype);
6979 /* We should have catched mismatched types earlier. */
6980 gcc_assert (useless_type_conversion_p (vectype,
6981 TREE_TYPE (vec_oprnd)));
6982 bool simd_lane_access_p
6983 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6984 if (simd_lane_access_p
6985 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6986 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6987 && integer_zerop (DR_OFFSET (first_dr))
6988 && integer_zerop (DR_INIT (first_dr))
6989 && alias_sets_conflict_p (get_alias_set (aggr_type),
6990 get_alias_set (TREE_TYPE (ref_type))))
6992 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6993 dataref_offset = build_int_cst (ref_type, 0);
6994 inv_p = false;
6996 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6998 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6999 &dataref_ptr, &vec_offset);
7000 inv_p = false;
7002 else
7003 dataref_ptr
7004 = vect_create_data_ref_ptr (first_stmt, aggr_type,
7005 simd_lane_access_p ? loop : NULL,
7006 offset, &dummy, gsi, &ptr_incr,
7007 simd_lane_access_p, &inv_p,
7008 NULL_TREE, bump);
7009 gcc_assert (bb_vinfo || !inv_p);
7011 else
7013 /* For interleaved stores we created vectorized defs for all the
7014 defs stored in OPRNDS in the previous iteration (previous copy).
7015 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7016 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7017 next copy.
7018 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7019 OPRNDS are of size 1. */
7020 for (i = 0; i < group_size; i++)
7022 op = oprnds[i];
7023 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
7024 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
7025 dr_chain[i] = vec_oprnd;
7026 oprnds[i] = vec_oprnd;
7028 if (mask)
7029 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
7030 if (dataref_offset)
7031 dataref_offset
7032 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7033 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7034 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7035 vec_offset);
7036 else
7037 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7038 bump);
7041 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7043 tree vec_array;
7045 /* Get an array into which we can store the individual vectors. */
7046 vec_array = create_vector_array (vectype, vec_num);
7048 /* Invalidate the current contents of VEC_ARRAY. This should
7049 become an RTL clobber too, which prevents the vector registers
7050 from being upward-exposed. */
7051 vect_clobber_variable (stmt, gsi, vec_array);
7053 /* Store the individual vectors into the array. */
7054 for (i = 0; i < vec_num; i++)
7056 vec_oprnd = dr_chain[i];
7057 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
7060 tree final_mask = NULL;
7061 if (loop_masks)
7062 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7063 vectype, j);
7064 if (vec_mask)
7065 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7066 vec_mask, gsi);
7068 gcall *call;
7069 if (final_mask)
7071 /* Emit:
7072 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7073 VEC_ARRAY). */
7074 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7075 tree alias_ptr = build_int_cst (ref_type, align);
7076 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7077 dataref_ptr, alias_ptr,
7078 final_mask, vec_array);
7080 else
7082 /* Emit:
7083 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7084 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7085 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7086 vec_array);
7087 gimple_call_set_lhs (call, data_ref);
7089 gimple_call_set_nothrow (call, true);
7090 new_stmt = call;
7091 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7093 /* Record that VEC_ARRAY is now dead. */
7094 vect_clobber_variable (stmt, gsi, vec_array);
7096 else
7098 new_stmt = NULL;
7099 if (grouped_store)
7101 if (j == 0)
7102 result_chain.create (group_size);
7103 /* Permute. */
7104 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7105 &result_chain);
7108 next_stmt = first_stmt;
7109 for (i = 0; i < vec_num; i++)
7111 unsigned align, misalign;
7113 tree final_mask = NULL_TREE;
7114 if (loop_masks)
7115 final_mask = vect_get_loop_mask (gsi, loop_masks,
7116 vec_num * ncopies,
7117 vectype, vec_num * j + i);
7118 if (vec_mask)
7119 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7120 vec_mask, gsi);
7122 if (memory_access_type == VMAT_GATHER_SCATTER)
7124 tree scale = size_int (gs_info.scale);
7125 gcall *call;
7126 if (loop_masks)
7127 call = gimple_build_call_internal
7128 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7129 scale, vec_oprnd, final_mask);
7130 else
7131 call = gimple_build_call_internal
7132 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7133 scale, vec_oprnd);
7134 gimple_call_set_nothrow (call, true);
7135 new_stmt = call;
7136 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7137 break;
7140 if (i > 0)
7141 /* Bump the vector pointer. */
7142 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7143 stmt, bump);
7145 if (slp)
7146 vec_oprnd = vec_oprnds[i];
7147 else if (grouped_store)
7148 /* For grouped stores vectorized defs are interleaved in
7149 vect_permute_store_chain(). */
7150 vec_oprnd = result_chain[i];
7152 align = DR_TARGET_ALIGNMENT (first_dr);
7153 if (aligned_access_p (first_dr))
7154 misalign = 0;
7155 else if (DR_MISALIGNMENT (first_dr) == -1)
7157 align = dr_alignment (vect_dr_behavior (first_dr));
7158 misalign = 0;
7160 else
7161 misalign = DR_MISALIGNMENT (first_dr);
7162 if (dataref_offset == NULL_TREE
7163 && TREE_CODE (dataref_ptr) == SSA_NAME)
7164 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7165 misalign);
7167 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7169 tree perm_mask = perm_mask_for_reverse (vectype);
7170 tree perm_dest
7171 = vect_create_destination_var (vect_get_store_rhs (stmt),
7172 vectype);
7173 tree new_temp = make_ssa_name (perm_dest);
7175 /* Generate the permute statement. */
7176 gimple *perm_stmt
7177 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7178 vec_oprnd, perm_mask);
7179 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7181 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7182 vec_oprnd = new_temp;
7185 /* Arguments are ready. Create the new vector stmt. */
7186 if (final_mask)
7188 align = least_bit_hwi (misalign | align);
7189 tree ptr = build_int_cst (ref_type, align);
7190 gcall *call
7191 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7192 dataref_ptr, ptr,
7193 final_mask, vec_oprnd);
7194 gimple_call_set_nothrow (call, true);
7195 new_stmt = call;
7197 else
7199 data_ref = fold_build2 (MEM_REF, vectype,
7200 dataref_ptr,
7201 dataref_offset
7202 ? dataref_offset
7203 : build_int_cst (ref_type, 0));
7204 if (aligned_access_p (first_dr))
7206 else if (DR_MISALIGNMENT (first_dr) == -1)
7207 TREE_TYPE (data_ref)
7208 = build_aligned_type (TREE_TYPE (data_ref),
7209 align * BITS_PER_UNIT);
7210 else
7211 TREE_TYPE (data_ref)
7212 = build_aligned_type (TREE_TYPE (data_ref),
7213 TYPE_ALIGN (elem_type));
7214 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7215 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7217 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7219 if (slp)
7220 continue;
7222 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7223 if (!next_stmt)
7224 break;
7227 if (!slp)
7229 if (j == 0)
7230 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7231 else
7232 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7233 prev_stmt_info = vinfo_for_stmt (new_stmt);
7237 oprnds.release ();
7238 result_chain.release ();
7239 vec_oprnds.release ();
7241 return true;
7244 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7245 VECTOR_CST mask. No checks are made that the target platform supports the
7246 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7247 vect_gen_perm_mask_checked. */
7249 tree
7250 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7252 tree mask_type;
7254 poly_uint64 nunits = sel.length ();
7255 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7257 mask_type = build_vector_type (ssizetype, nunits);
7258 return vec_perm_indices_to_tree (mask_type, sel);
7261 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7262 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7264 tree
7265 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7267 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7268 return vect_gen_perm_mask_any (vectype, sel);
7271 /* Given a vector variable X and Y, that was generated for the scalar
7272 STMT, generate instructions to permute the vector elements of X and Y
7273 using permutation mask MASK_VEC, insert them at *GSI and return the
7274 permuted vector variable. */
7276 static tree
7277 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7278 gimple_stmt_iterator *gsi)
7280 tree vectype = TREE_TYPE (x);
7281 tree perm_dest, data_ref;
7282 gimple *perm_stmt;
7284 tree scalar_dest = gimple_get_lhs (stmt);
7285 if (TREE_CODE (scalar_dest) == SSA_NAME)
7286 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7287 else
7288 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7289 data_ref = make_ssa_name (perm_dest);
7291 /* Generate the permute statement. */
7292 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7293 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7295 return data_ref;
7298 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7299 inserting them on the loops preheader edge. Returns true if we
7300 were successful in doing so (and thus STMT can be moved then),
7301 otherwise returns false. */
7303 static bool
7304 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7306 ssa_op_iter i;
7307 tree op;
7308 bool any = false;
7310 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7312 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7313 if (!gimple_nop_p (def_stmt)
7314 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7316 /* Make sure we don't need to recurse. While we could do
7317 so in simple cases when there are more complex use webs
7318 we don't have an easy way to preserve stmt order to fulfil
7319 dependencies within them. */
7320 tree op2;
7321 ssa_op_iter i2;
7322 if (gimple_code (def_stmt) == GIMPLE_PHI)
7323 return false;
7324 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7326 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7327 if (!gimple_nop_p (def_stmt2)
7328 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7329 return false;
7331 any = true;
7335 if (!any)
7336 return true;
7338 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7340 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7341 if (!gimple_nop_p (def_stmt)
7342 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7344 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7345 gsi_remove (&gsi, false);
7346 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7350 return true;
7353 /* vectorizable_load.
7355 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7356 can be vectorized.
7357 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7358 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7359 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7361 static bool
7362 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7363 slp_tree slp_node, slp_instance slp_node_instance,
7364 stmt_vector_for_cost *cost_vec)
7366 tree scalar_dest;
7367 tree vec_dest = NULL;
7368 tree data_ref = NULL;
7369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7370 stmt_vec_info prev_stmt_info;
7371 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7372 struct loop *loop = NULL;
7373 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7374 bool nested_in_vect_loop = false;
7375 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7376 tree elem_type;
7377 tree new_temp;
7378 machine_mode mode;
7379 gimple *new_stmt = NULL;
7380 tree dummy;
7381 enum dr_alignment_support alignment_support_scheme;
7382 tree dataref_ptr = NULL_TREE;
7383 tree dataref_offset = NULL_TREE;
7384 gimple *ptr_incr = NULL;
7385 int ncopies;
7386 int i, j;
7387 unsigned int group_size;
7388 poly_uint64 group_gap_adj;
7389 tree msq = NULL_TREE, lsq;
7390 tree offset = NULL_TREE;
7391 tree byte_offset = NULL_TREE;
7392 tree realignment_token = NULL_TREE;
7393 gphi *phi = NULL;
7394 vec<tree> dr_chain = vNULL;
7395 bool grouped_load = false;
7396 gimple *first_stmt;
7397 gimple *first_stmt_for_drptr = NULL;
7398 bool inv_p;
7399 bool compute_in_loop = false;
7400 struct loop *at_loop;
7401 int vec_num;
7402 bool slp = (slp_node != NULL);
7403 bool slp_perm = false;
7404 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7405 poly_uint64 vf;
7406 tree aggr_type;
7407 gather_scatter_info gs_info;
7408 vec_info *vinfo = stmt_info->vinfo;
7409 tree ref_type;
7410 enum vect_def_type mask_dt = vect_unknown_def_type;
7412 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7413 return false;
7415 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7416 && ! vec_stmt)
7417 return false;
7419 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7420 if (is_gimple_assign (stmt))
7422 scalar_dest = gimple_assign_lhs (stmt);
7423 if (TREE_CODE (scalar_dest) != SSA_NAME)
7424 return false;
7426 tree_code code = gimple_assign_rhs_code (stmt);
7427 if (code != ARRAY_REF
7428 && code != BIT_FIELD_REF
7429 && code != INDIRECT_REF
7430 && code != COMPONENT_REF
7431 && code != IMAGPART_EXPR
7432 && code != REALPART_EXPR
7433 && code != MEM_REF
7434 && TREE_CODE_CLASS (code) != tcc_declaration)
7435 return false;
7437 else
7439 gcall *call = dyn_cast <gcall *> (stmt);
7440 if (!call || !gimple_call_internal_p (call))
7441 return false;
7443 internal_fn ifn = gimple_call_internal_fn (call);
7444 if (!internal_load_fn_p (ifn))
7445 return false;
7447 scalar_dest = gimple_call_lhs (call);
7448 if (!scalar_dest)
7449 return false;
7451 if (slp_node != NULL)
7453 if (dump_enabled_p ())
7454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7455 "SLP of masked loads not supported.\n");
7456 return false;
7459 int mask_index = internal_fn_mask_index (ifn);
7460 if (mask_index >= 0)
7462 mask = gimple_call_arg (call, mask_index);
7463 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7464 &mask_vectype))
7465 return false;
7469 if (!STMT_VINFO_DATA_REF (stmt_info))
7470 return false;
7472 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7473 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7475 if (loop_vinfo)
7477 loop = LOOP_VINFO_LOOP (loop_vinfo);
7478 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7479 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7481 else
7482 vf = 1;
7484 /* Multiple types in SLP are handled by creating the appropriate number of
7485 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7486 case of SLP. */
7487 if (slp)
7488 ncopies = 1;
7489 else
7490 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7492 gcc_assert (ncopies >= 1);
7494 /* FORNOW. This restriction should be relaxed. */
7495 if (nested_in_vect_loop && ncopies > 1)
7497 if (dump_enabled_p ())
7498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7499 "multiple types in nested loop.\n");
7500 return false;
7503 /* Invalidate assumptions made by dependence analysis when vectorization
7504 on the unrolled body effectively re-orders stmts. */
7505 if (ncopies > 1
7506 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7507 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7508 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7510 if (dump_enabled_p ())
7511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7512 "cannot perform implicit CSE when unrolling "
7513 "with negative dependence distance\n");
7514 return false;
7517 elem_type = TREE_TYPE (vectype);
7518 mode = TYPE_MODE (vectype);
7520 /* FORNOW. In some cases can vectorize even if data-type not supported
7521 (e.g. - data copies). */
7522 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7524 if (dump_enabled_p ())
7525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7526 "Aligned load, but unsupported type.\n");
7527 return false;
7530 /* Check if the load is a part of an interleaving chain. */
7531 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7533 grouped_load = true;
7534 /* FORNOW */
7535 gcc_assert (!nested_in_vect_loop);
7536 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7538 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7539 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7541 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7542 slp_perm = true;
7544 /* Invalidate assumptions made by dependence analysis when vectorization
7545 on the unrolled body effectively re-orders stmts. */
7546 if (!PURE_SLP_STMT (stmt_info)
7547 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7548 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7549 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7551 if (dump_enabled_p ())
7552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7553 "cannot perform implicit CSE when performing "
7554 "group loads with negative dependence distance\n");
7555 return false;
7558 /* Similarly when the stmt is a load that is both part of a SLP
7559 instance and a loop vectorized stmt via the same-dr mechanism
7560 we have to give up. */
7561 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7562 && (STMT_SLP_TYPE (stmt_info)
7563 != STMT_SLP_TYPE (vinfo_for_stmt
7564 (DR_GROUP_SAME_DR_STMT (stmt_info)))))
7566 if (dump_enabled_p ())
7567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7568 "conflicting SLP types for CSEd load\n");
7569 return false;
7572 else
7573 group_size = 1;
7575 vect_memory_access_type memory_access_type;
7576 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7577 &memory_access_type, &gs_info))
7578 return false;
7580 if (mask)
7582 if (memory_access_type == VMAT_CONTIGUOUS)
7584 machine_mode vec_mode = TYPE_MODE (vectype);
7585 if (!VECTOR_MODE_P (vec_mode)
7586 || !can_vec_mask_load_store_p (vec_mode,
7587 TYPE_MODE (mask_vectype), true))
7588 return false;
7590 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7592 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7593 tree masktype
7594 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7595 if (TREE_CODE (masktype) == INTEGER_TYPE)
7597 if (dump_enabled_p ())
7598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7599 "masked gather with integer mask not"
7600 " supported.");
7601 return false;
7604 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7605 && memory_access_type != VMAT_GATHER_SCATTER)
7607 if (dump_enabled_p ())
7608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7609 "unsupported access type for masked load.\n");
7610 return false;
7614 if (!vec_stmt) /* transformation not required. */
7616 if (!slp)
7617 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7619 if (loop_vinfo
7620 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7621 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7622 memory_access_type, &gs_info);
7624 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7625 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7626 slp_node_instance, slp_node, cost_vec);
7627 return true;
7630 if (!slp)
7631 gcc_assert (memory_access_type
7632 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7634 if (dump_enabled_p ())
7635 dump_printf_loc (MSG_NOTE, vect_location,
7636 "transform load. ncopies = %d\n", ncopies);
7638 /* Transform. */
7640 ensure_base_align (dr);
7642 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7644 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7645 mask_dt);
7646 return true;
7649 if (memory_access_type == VMAT_ELEMENTWISE
7650 || memory_access_type == VMAT_STRIDED_SLP)
7652 gimple_stmt_iterator incr_gsi;
7653 bool insert_after;
7654 gimple *incr;
7655 tree offvar;
7656 tree ivstep;
7657 tree running_off;
7658 vec<constructor_elt, va_gc> *v = NULL;
7659 tree stride_base, stride_step, alias_off;
7660 /* Checked by get_load_store_type. */
7661 unsigned int const_nunits = nunits.to_constant ();
7662 unsigned HOST_WIDE_INT cst_offset = 0;
7664 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7665 gcc_assert (!nested_in_vect_loop);
7667 if (grouped_load)
7669 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7670 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7672 else
7674 first_stmt = stmt;
7675 first_dr = dr;
7677 if (slp && grouped_load)
7679 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7680 ref_type = get_group_alias_ptr_type (first_stmt);
7682 else
7684 if (grouped_load)
7685 cst_offset
7686 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7687 * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7688 group_size = 1;
7689 ref_type = reference_alias_ptr_type (DR_REF (dr));
7692 stride_base
7693 = fold_build_pointer_plus
7694 (DR_BASE_ADDRESS (first_dr),
7695 size_binop (PLUS_EXPR,
7696 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7697 convert_to_ptrofftype (DR_INIT (first_dr))));
7698 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7700 /* For a load with loop-invariant (but other than power-of-2)
7701 stride (i.e. not a grouped access) like so:
7703 for (i = 0; i < n; i += stride)
7704 ... = array[i];
7706 we generate a new induction variable and new accesses to
7707 form a new vector (or vectors, depending on ncopies):
7709 for (j = 0; ; j += VF*stride)
7710 tmp1 = array[j];
7711 tmp2 = array[j + stride];
7713 vectemp = {tmp1, tmp2, ...}
7716 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7717 build_int_cst (TREE_TYPE (stride_step), vf));
7719 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7721 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7722 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7723 create_iv (stride_base, ivstep, NULL,
7724 loop, &incr_gsi, insert_after,
7725 &offvar, NULL);
7726 incr = gsi_stmt (incr_gsi);
7727 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7729 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7731 prev_stmt_info = NULL;
7732 running_off = offvar;
7733 alias_off = build_int_cst (ref_type, 0);
7734 int nloads = const_nunits;
7735 int lnel = 1;
7736 tree ltype = TREE_TYPE (vectype);
7737 tree lvectype = vectype;
7738 auto_vec<tree> dr_chain;
7739 if (memory_access_type == VMAT_STRIDED_SLP)
7741 if (group_size < const_nunits)
7743 /* First check if vec_init optab supports construction from
7744 vector elts directly. */
7745 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7746 machine_mode vmode;
7747 if (mode_for_vector (elmode, group_size).exists (&vmode)
7748 && VECTOR_MODE_P (vmode)
7749 && targetm.vector_mode_supported_p (vmode)
7750 && (convert_optab_handler (vec_init_optab,
7751 TYPE_MODE (vectype), vmode)
7752 != CODE_FOR_nothing))
7754 nloads = const_nunits / group_size;
7755 lnel = group_size;
7756 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7758 else
7760 /* Otherwise avoid emitting a constructor of vector elements
7761 by performing the loads using an integer type of the same
7762 size, constructing a vector of those and then
7763 re-interpreting it as the original vector type.
7764 This avoids a huge runtime penalty due to the general
7765 inability to perform store forwarding from smaller stores
7766 to a larger load. */
7767 unsigned lsize
7768 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7769 elmode = int_mode_for_size (lsize, 0).require ();
7770 unsigned int lnunits = const_nunits / group_size;
7771 /* If we can't construct such a vector fall back to
7772 element loads of the original vector type. */
7773 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7774 && VECTOR_MODE_P (vmode)
7775 && targetm.vector_mode_supported_p (vmode)
7776 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7777 != CODE_FOR_nothing))
7779 nloads = lnunits;
7780 lnel = group_size;
7781 ltype = build_nonstandard_integer_type (lsize, 1);
7782 lvectype = build_vector_type (ltype, nloads);
7786 else
7788 nloads = 1;
7789 lnel = const_nunits;
7790 ltype = vectype;
7792 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7794 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7795 else if (nloads == 1)
7796 ltype = vectype;
7798 if (slp)
7800 /* For SLP permutation support we need to load the whole group,
7801 not only the number of vector stmts the permutation result
7802 fits in. */
7803 if (slp_perm)
7805 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7806 variable VF. */
7807 unsigned int const_vf = vf.to_constant ();
7808 ncopies = CEIL (group_size * const_vf, const_nunits);
7809 dr_chain.create (ncopies);
7811 else
7812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7814 unsigned int group_el = 0;
7815 unsigned HOST_WIDE_INT
7816 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7817 for (j = 0; j < ncopies; j++)
7819 if (nloads > 1)
7820 vec_alloc (v, nloads);
7821 for (i = 0; i < nloads; i++)
7823 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7824 group_el * elsz + cst_offset);
7825 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7826 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7827 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7828 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7829 if (nloads > 1)
7830 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7831 gimple_assign_lhs (new_stmt));
7833 group_el += lnel;
7834 if (! slp
7835 || group_el == group_size)
7837 tree newoff = copy_ssa_name (running_off);
7838 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7839 running_off, stride_step);
7840 vect_finish_stmt_generation (stmt, incr, gsi);
7842 running_off = newoff;
7843 group_el = 0;
7846 if (nloads > 1)
7848 tree vec_inv = build_constructor (lvectype, v);
7849 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7850 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7851 if (lvectype != vectype)
7853 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7854 VIEW_CONVERT_EXPR,
7855 build1 (VIEW_CONVERT_EXPR,
7856 vectype, new_temp));
7857 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7861 if (slp)
7863 if (slp_perm)
7864 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7865 else
7866 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7868 else
7870 if (j == 0)
7871 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7872 else
7873 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7874 prev_stmt_info = vinfo_for_stmt (new_stmt);
7877 if (slp_perm)
7879 unsigned n_perms;
7880 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7881 slp_node_instance, false, &n_perms);
7883 return true;
7886 if (memory_access_type == VMAT_GATHER_SCATTER
7887 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7888 grouped_load = false;
7890 if (grouped_load)
7892 first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info);
7893 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
7894 /* For SLP vectorization we directly vectorize a subchain
7895 without permutation. */
7896 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7897 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7898 /* For BB vectorization always use the first stmt to base
7899 the data ref pointer on. */
7900 if (bb_vinfo)
7901 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7903 /* Check if the chain of loads is already vectorized. */
7904 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7905 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7906 ??? But we can only do so if there is exactly one
7907 as we have no way to get at the rest. Leave the CSE
7908 opportunity alone.
7909 ??? With the group load eventually participating
7910 in multiple different permutations (having multiple
7911 slp nodes which refer to the same group) the CSE
7912 is even wrong code. See PR56270. */
7913 && !slp)
7915 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7916 return true;
7918 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7919 group_gap_adj = 0;
7921 /* VEC_NUM is the number of vect stmts to be created for this group. */
7922 if (slp)
7924 grouped_load = false;
7925 /* For SLP permutation support we need to load the whole group,
7926 not only the number of vector stmts the permutation result
7927 fits in. */
7928 if (slp_perm)
7930 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7931 variable VF. */
7932 unsigned int const_vf = vf.to_constant ();
7933 unsigned int const_nunits = nunits.to_constant ();
7934 vec_num = CEIL (group_size * const_vf, const_nunits);
7935 group_gap_adj = vf * group_size - nunits * vec_num;
7937 else
7939 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7940 group_gap_adj
7941 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7944 else
7945 vec_num = group_size;
7947 ref_type = get_group_alias_ptr_type (first_stmt);
7949 else
7951 first_stmt = stmt;
7952 first_dr = dr;
7953 group_size = vec_num = 1;
7954 group_gap_adj = 0;
7955 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7958 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7959 gcc_assert (alignment_support_scheme);
7960 vec_loop_masks *loop_masks
7961 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7962 ? &LOOP_VINFO_MASKS (loop_vinfo)
7963 : NULL);
7964 /* Targets with store-lane instructions must not require explicit
7965 realignment. vect_supportable_dr_alignment always returns either
7966 dr_aligned or dr_unaligned_supported for masked operations. */
7967 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7968 && !mask
7969 && !loop_masks)
7970 || alignment_support_scheme == dr_aligned
7971 || alignment_support_scheme == dr_unaligned_supported);
7973 /* In case the vectorization factor (VF) is bigger than the number
7974 of elements that we can fit in a vectype (nunits), we have to generate
7975 more than one vector stmt - i.e - we need to "unroll" the
7976 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7977 from one copy of the vector stmt to the next, in the field
7978 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7979 stages to find the correct vector defs to be used when vectorizing
7980 stmts that use the defs of the current stmt. The example below
7981 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7982 need to create 4 vectorized stmts):
7984 before vectorization:
7985 RELATED_STMT VEC_STMT
7986 S1: x = memref - -
7987 S2: z = x + 1 - -
7989 step 1: vectorize stmt S1:
7990 We first create the vector stmt VS1_0, and, as usual, record a
7991 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7992 Next, we create the vector stmt VS1_1, and record a pointer to
7993 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7994 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7995 stmts and pointers:
7996 RELATED_STMT VEC_STMT
7997 VS1_0: vx0 = memref0 VS1_1 -
7998 VS1_1: vx1 = memref1 VS1_2 -
7999 VS1_2: vx2 = memref2 VS1_3 -
8000 VS1_3: vx3 = memref3 - -
8001 S1: x = load - VS1_0
8002 S2: z = x + 1 - -
8004 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8005 information we recorded in RELATED_STMT field is used to vectorize
8006 stmt S2. */
8008 /* In case of interleaving (non-unit grouped access):
8010 S1: x2 = &base + 2
8011 S2: x0 = &base
8012 S3: x1 = &base + 1
8013 S4: x3 = &base + 3
8015 Vectorized loads are created in the order of memory accesses
8016 starting from the access of the first stmt of the chain:
8018 VS1: vx0 = &base
8019 VS2: vx1 = &base + vec_size*1
8020 VS3: vx3 = &base + vec_size*2
8021 VS4: vx4 = &base + vec_size*3
8023 Then permutation statements are generated:
8025 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8026 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8029 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8030 (the order of the data-refs in the output of vect_permute_load_chain
8031 corresponds to the order of scalar stmts in the interleaving chain - see
8032 the documentation of vect_permute_load_chain()).
8033 The generation of permutation stmts and recording them in
8034 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8036 In case of both multiple types and interleaving, the vector loads and
8037 permutation stmts above are created for every copy. The result vector
8038 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8039 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8041 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8042 on a target that supports unaligned accesses (dr_unaligned_supported)
8043 we generate the following code:
8044 p = initial_addr;
8045 indx = 0;
8046 loop {
8047 p = p + indx * vectype_size;
8048 vec_dest = *(p);
8049 indx = indx + 1;
8052 Otherwise, the data reference is potentially unaligned on a target that
8053 does not support unaligned accesses (dr_explicit_realign_optimized) -
8054 then generate the following code, in which the data in each iteration is
8055 obtained by two vector loads, one from the previous iteration, and one
8056 from the current iteration:
8057 p1 = initial_addr;
8058 msq_init = *(floor(p1))
8059 p2 = initial_addr + VS - 1;
8060 realignment_token = call target_builtin;
8061 indx = 0;
8062 loop {
8063 p2 = p2 + indx * vectype_size
8064 lsq = *(floor(p2))
8065 vec_dest = realign_load (msq, lsq, realignment_token)
8066 indx = indx + 1;
8067 msq = lsq;
8068 } */
8070 /* If the misalignment remains the same throughout the execution of the
8071 loop, we can create the init_addr and permutation mask at the loop
8072 preheader. Otherwise, it needs to be created inside the loop.
8073 This can only occur when vectorizing memory accesses in the inner-loop
8074 nested within an outer-loop that is being vectorized. */
8076 if (nested_in_vect_loop
8077 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8078 GET_MODE_SIZE (TYPE_MODE (vectype))))
8080 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8081 compute_in_loop = true;
8084 if ((alignment_support_scheme == dr_explicit_realign_optimized
8085 || alignment_support_scheme == dr_explicit_realign)
8086 && !compute_in_loop)
8088 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
8089 alignment_support_scheme, NULL_TREE,
8090 &at_loop);
8091 if (alignment_support_scheme == dr_explicit_realign_optimized)
8093 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8094 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8095 size_one_node);
8098 else
8099 at_loop = loop;
8101 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8102 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8104 tree bump;
8105 tree vec_offset = NULL_TREE;
8106 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8108 aggr_type = NULL_TREE;
8109 bump = NULL_TREE;
8111 else if (memory_access_type == VMAT_GATHER_SCATTER)
8113 aggr_type = elem_type;
8114 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8115 &bump, &vec_offset);
8117 else
8119 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8120 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8121 else
8122 aggr_type = vectype;
8123 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8126 tree vec_mask = NULL_TREE;
8127 prev_stmt_info = NULL;
8128 poly_uint64 group_elt = 0;
8129 for (j = 0; j < ncopies; j++)
8131 /* 1. Create the vector or array pointer update chain. */
8132 if (j == 0)
8134 bool simd_lane_access_p
8135 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8136 if (simd_lane_access_p
8137 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8138 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8139 && integer_zerop (DR_OFFSET (first_dr))
8140 && integer_zerop (DR_INIT (first_dr))
8141 && alias_sets_conflict_p (get_alias_set (aggr_type),
8142 get_alias_set (TREE_TYPE (ref_type)))
8143 && (alignment_support_scheme == dr_aligned
8144 || alignment_support_scheme == dr_unaligned_supported))
8146 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8147 dataref_offset = build_int_cst (ref_type, 0);
8148 inv_p = false;
8150 else if (first_stmt_for_drptr
8151 && first_stmt != first_stmt_for_drptr)
8153 dataref_ptr
8154 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8155 at_loop, offset, &dummy, gsi,
8156 &ptr_incr, simd_lane_access_p,
8157 &inv_p, byte_offset, bump);
8158 /* Adjust the pointer by the difference to first_stmt. */
8159 data_reference_p ptrdr
8160 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8161 tree diff = fold_convert (sizetype,
8162 size_binop (MINUS_EXPR,
8163 DR_INIT (first_dr),
8164 DR_INIT (ptrdr)));
8165 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8166 stmt, diff);
8168 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8170 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8171 &dataref_ptr, &vec_offset);
8172 inv_p = false;
8174 else
8175 dataref_ptr
8176 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8177 offset, &dummy, gsi, &ptr_incr,
8178 simd_lane_access_p, &inv_p,
8179 byte_offset, bump);
8180 if (mask)
8181 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8182 mask_vectype);
8184 else
8186 if (dataref_offset)
8187 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8188 bump);
8189 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8190 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8191 vec_offset);
8192 else
8193 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8194 stmt, bump);
8195 if (mask)
8196 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8199 if (grouped_load || slp_perm)
8200 dr_chain.create (vec_num);
8202 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8204 tree vec_array;
8206 vec_array = create_vector_array (vectype, vec_num);
8208 tree final_mask = NULL_TREE;
8209 if (loop_masks)
8210 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8211 vectype, j);
8212 if (vec_mask)
8213 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8214 vec_mask, gsi);
8216 gcall *call;
8217 if (final_mask)
8219 /* Emit:
8220 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8221 VEC_MASK). */
8222 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8223 tree alias_ptr = build_int_cst (ref_type, align);
8224 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8225 dataref_ptr, alias_ptr,
8226 final_mask);
8228 else
8230 /* Emit:
8231 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8232 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8233 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8235 gimple_call_set_lhs (call, vec_array);
8236 gimple_call_set_nothrow (call, true);
8237 new_stmt = call;
8238 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8240 /* Extract each vector into an SSA_NAME. */
8241 for (i = 0; i < vec_num; i++)
8243 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8244 vec_array, i);
8245 dr_chain.quick_push (new_temp);
8248 /* Record the mapping between SSA_NAMEs and statements. */
8249 vect_record_grouped_load_vectors (stmt, dr_chain);
8251 /* Record that VEC_ARRAY is now dead. */
8252 vect_clobber_variable (stmt, gsi, vec_array);
8254 else
8256 for (i = 0; i < vec_num; i++)
8258 tree final_mask = NULL_TREE;
8259 if (loop_masks
8260 && memory_access_type != VMAT_INVARIANT)
8261 final_mask = vect_get_loop_mask (gsi, loop_masks,
8262 vec_num * ncopies,
8263 vectype, vec_num * j + i);
8264 if (vec_mask)
8265 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8266 vec_mask, gsi);
8268 if (i > 0)
8269 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8270 stmt, bump);
8272 /* 2. Create the vector-load in the loop. */
8273 switch (alignment_support_scheme)
8275 case dr_aligned:
8276 case dr_unaligned_supported:
8278 unsigned int align, misalign;
8280 if (memory_access_type == VMAT_GATHER_SCATTER)
8282 tree scale = size_int (gs_info.scale);
8283 gcall *call;
8284 if (loop_masks)
8285 call = gimple_build_call_internal
8286 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8287 vec_offset, scale, final_mask);
8288 else
8289 call = gimple_build_call_internal
8290 (IFN_GATHER_LOAD, 3, dataref_ptr,
8291 vec_offset, scale);
8292 gimple_call_set_nothrow (call, true);
8293 new_stmt = call;
8294 data_ref = NULL_TREE;
8295 break;
8298 align = DR_TARGET_ALIGNMENT (dr);
8299 if (alignment_support_scheme == dr_aligned)
8301 gcc_assert (aligned_access_p (first_dr));
8302 misalign = 0;
8304 else if (DR_MISALIGNMENT (first_dr) == -1)
8306 align = dr_alignment (vect_dr_behavior (first_dr));
8307 misalign = 0;
8309 else
8310 misalign = DR_MISALIGNMENT (first_dr);
8311 if (dataref_offset == NULL_TREE
8312 && TREE_CODE (dataref_ptr) == SSA_NAME)
8313 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8314 align, misalign);
8316 if (final_mask)
8318 align = least_bit_hwi (misalign | align);
8319 tree ptr = build_int_cst (ref_type, align);
8320 gcall *call
8321 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8322 dataref_ptr, ptr,
8323 final_mask);
8324 gimple_call_set_nothrow (call, true);
8325 new_stmt = call;
8326 data_ref = NULL_TREE;
8328 else
8330 data_ref
8331 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8332 dataref_offset
8333 ? dataref_offset
8334 : build_int_cst (ref_type, 0));
8335 if (alignment_support_scheme == dr_aligned)
8337 else if (DR_MISALIGNMENT (first_dr) == -1)
8338 TREE_TYPE (data_ref)
8339 = build_aligned_type (TREE_TYPE (data_ref),
8340 align * BITS_PER_UNIT);
8341 else
8342 TREE_TYPE (data_ref)
8343 = build_aligned_type (TREE_TYPE (data_ref),
8344 TYPE_ALIGN (elem_type));
8346 break;
8348 case dr_explicit_realign:
8350 tree ptr, bump;
8352 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8354 if (compute_in_loop)
8355 msq = vect_setup_realignment (first_stmt, gsi,
8356 &realignment_token,
8357 dr_explicit_realign,
8358 dataref_ptr, NULL);
8360 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8361 ptr = copy_ssa_name (dataref_ptr);
8362 else
8363 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8364 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8365 new_stmt = gimple_build_assign
8366 (ptr, BIT_AND_EXPR, dataref_ptr,
8367 build_int_cst
8368 (TREE_TYPE (dataref_ptr),
8369 -(HOST_WIDE_INT) align));
8370 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8371 data_ref
8372 = build2 (MEM_REF, vectype, ptr,
8373 build_int_cst (ref_type, 0));
8374 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8375 vec_dest = vect_create_destination_var (scalar_dest,
8376 vectype);
8377 new_stmt = gimple_build_assign (vec_dest, data_ref);
8378 new_temp = make_ssa_name (vec_dest, new_stmt);
8379 gimple_assign_set_lhs (new_stmt, new_temp);
8380 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8381 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8382 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8383 msq = new_temp;
8385 bump = size_binop (MULT_EXPR, vs,
8386 TYPE_SIZE_UNIT (elem_type));
8387 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8388 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8389 new_stmt = gimple_build_assign
8390 (NULL_TREE, BIT_AND_EXPR, ptr,
8391 build_int_cst
8392 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8393 ptr = copy_ssa_name (ptr, new_stmt);
8394 gimple_assign_set_lhs (new_stmt, ptr);
8395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8396 data_ref
8397 = build2 (MEM_REF, vectype, ptr,
8398 build_int_cst (ref_type, 0));
8399 break;
8401 case dr_explicit_realign_optimized:
8403 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8404 new_temp = copy_ssa_name (dataref_ptr);
8405 else
8406 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8407 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8408 new_stmt = gimple_build_assign
8409 (new_temp, BIT_AND_EXPR, dataref_ptr,
8410 build_int_cst (TREE_TYPE (dataref_ptr),
8411 -(HOST_WIDE_INT) align));
8412 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8413 data_ref
8414 = build2 (MEM_REF, vectype, new_temp,
8415 build_int_cst (ref_type, 0));
8416 break;
8418 default:
8419 gcc_unreachable ();
8421 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8422 /* DATA_REF is null if we've already built the statement. */
8423 if (data_ref)
8425 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8426 new_stmt = gimple_build_assign (vec_dest, data_ref);
8428 new_temp = make_ssa_name (vec_dest, new_stmt);
8429 gimple_set_lhs (new_stmt, new_temp);
8430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8432 /* 3. Handle explicit realignment if necessary/supported.
8433 Create in loop:
8434 vec_dest = realign_load (msq, lsq, realignment_token) */
8435 if (alignment_support_scheme == dr_explicit_realign_optimized
8436 || alignment_support_scheme == dr_explicit_realign)
8438 lsq = gimple_assign_lhs (new_stmt);
8439 if (!realignment_token)
8440 realignment_token = dataref_ptr;
8441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8442 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8443 msq, lsq, realignment_token);
8444 new_temp = make_ssa_name (vec_dest, new_stmt);
8445 gimple_assign_set_lhs (new_stmt, new_temp);
8446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8448 if (alignment_support_scheme == dr_explicit_realign_optimized)
8450 gcc_assert (phi);
8451 if (i == vec_num - 1 && j == ncopies - 1)
8452 add_phi_arg (phi, lsq,
8453 loop_latch_edge (containing_loop),
8454 UNKNOWN_LOCATION);
8455 msq = lsq;
8459 /* 4. Handle invariant-load. */
8460 if (inv_p && !bb_vinfo)
8462 gcc_assert (!grouped_load);
8463 /* If we have versioned for aliasing or the loop doesn't
8464 have any data dependencies that would preclude this,
8465 then we are sure this is a loop invariant load and
8466 thus we can insert it on the preheader edge. */
8467 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8468 && !nested_in_vect_loop
8469 && hoist_defs_of_uses (stmt, loop))
8471 if (dump_enabled_p ())
8473 dump_printf_loc (MSG_NOTE, vect_location,
8474 "hoisting out of the vectorized "
8475 "loop: ");
8476 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8478 tree tem = copy_ssa_name (scalar_dest);
8479 gsi_insert_on_edge_immediate
8480 (loop_preheader_edge (loop),
8481 gimple_build_assign (tem,
8482 unshare_expr
8483 (gimple_assign_rhs1 (stmt))));
8484 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8485 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8486 set_vinfo_for_stmt (new_stmt,
8487 new_stmt_vec_info (new_stmt, vinfo));
8489 else
8491 gimple_stmt_iterator gsi2 = *gsi;
8492 gsi_next (&gsi2);
8493 new_temp = vect_init_vector (stmt, scalar_dest,
8494 vectype, &gsi2);
8495 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8499 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8501 tree perm_mask = perm_mask_for_reverse (vectype);
8502 new_temp = permute_vec_elements (new_temp, new_temp,
8503 perm_mask, stmt, gsi);
8504 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8507 /* Collect vector loads and later create their permutation in
8508 vect_transform_grouped_load (). */
8509 if (grouped_load || slp_perm)
8510 dr_chain.quick_push (new_temp);
8512 /* Store vector loads in the corresponding SLP_NODE. */
8513 if (slp && !slp_perm)
8514 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8516 /* With SLP permutation we load the gaps as well, without
8517 we need to skip the gaps after we manage to fully load
8518 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8519 group_elt += nunits;
8520 if (maybe_ne (group_gap_adj, 0U)
8521 && !slp_perm
8522 && known_eq (group_elt, group_size - group_gap_adj))
8524 poly_wide_int bump_val
8525 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8526 * group_gap_adj);
8527 tree bump = wide_int_to_tree (sizetype, bump_val);
8528 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8529 stmt, bump);
8530 group_elt = 0;
8533 /* Bump the vector pointer to account for a gap or for excess
8534 elements loaded for a permuted SLP load. */
8535 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8537 poly_wide_int bump_val
8538 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8539 * group_gap_adj);
8540 tree bump = wide_int_to_tree (sizetype, bump_val);
8541 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8542 stmt, bump);
8546 if (slp && !slp_perm)
8547 continue;
8549 if (slp_perm)
8551 unsigned n_perms;
8552 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8553 slp_node_instance, false,
8554 &n_perms))
8556 dr_chain.release ();
8557 return false;
8560 else
8562 if (grouped_load)
8564 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8565 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8566 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8568 else
8570 if (j == 0)
8571 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8572 else
8573 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8574 prev_stmt_info = vinfo_for_stmt (new_stmt);
8577 dr_chain.release ();
8580 return true;
8583 /* Function vect_is_simple_cond.
8585 Input:
8586 LOOP - the loop that is being vectorized.
8587 COND - Condition that is checked for simple use.
8589 Output:
8590 *COMP_VECTYPE - the vector type for the comparison.
8591 *DTS - The def types for the arguments of the comparison
8593 Returns whether a COND can be vectorized. Checks whether
8594 condition operands are supportable using vec_is_simple_use. */
8596 static bool
8597 vect_is_simple_cond (tree cond, vec_info *vinfo,
8598 tree *comp_vectype, enum vect_def_type *dts,
8599 tree vectype)
8601 tree lhs, rhs;
8602 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8604 /* Mask case. */
8605 if (TREE_CODE (cond) == SSA_NAME
8606 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8608 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8609 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8610 &dts[0], comp_vectype)
8611 || !*comp_vectype
8612 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8613 return false;
8614 return true;
8617 if (!COMPARISON_CLASS_P (cond))
8618 return false;
8620 lhs = TREE_OPERAND (cond, 0);
8621 rhs = TREE_OPERAND (cond, 1);
8623 if (TREE_CODE (lhs) == SSA_NAME)
8625 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8626 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8627 return false;
8629 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8630 || TREE_CODE (lhs) == FIXED_CST)
8631 dts[0] = vect_constant_def;
8632 else
8633 return false;
8635 if (TREE_CODE (rhs) == SSA_NAME)
8637 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8638 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8639 return false;
8641 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8642 || TREE_CODE (rhs) == FIXED_CST)
8643 dts[1] = vect_constant_def;
8644 else
8645 return false;
8647 if (vectype1 && vectype2
8648 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8649 TYPE_VECTOR_SUBPARTS (vectype2)))
8650 return false;
8652 *comp_vectype = vectype1 ? vectype1 : vectype2;
8653 /* Invariant comparison. */
8654 if (! *comp_vectype && vectype)
8656 tree scalar_type = TREE_TYPE (lhs);
8657 /* If we can widen the comparison to match vectype do so. */
8658 if (INTEGRAL_TYPE_P (scalar_type)
8659 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8660 TYPE_SIZE (TREE_TYPE (vectype))))
8661 scalar_type = build_nonstandard_integer_type
8662 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8663 TYPE_UNSIGNED (scalar_type));
8664 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8667 return true;
8670 /* vectorizable_condition.
8672 Check if STMT is conditional modify expression that can be vectorized.
8673 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8674 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8675 at GSI.
8677 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8678 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8679 else clause if it is 2).
8681 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8683 bool
8684 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8685 gimple **vec_stmt, tree reduc_def, int reduc_index,
8686 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
8688 tree scalar_dest = NULL_TREE;
8689 tree vec_dest = NULL_TREE;
8690 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8691 tree then_clause, else_clause;
8692 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8693 tree comp_vectype = NULL_TREE;
8694 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8695 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8696 tree vec_compare;
8697 tree new_temp;
8698 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8699 enum vect_def_type dts[4]
8700 = {vect_unknown_def_type, vect_unknown_def_type,
8701 vect_unknown_def_type, vect_unknown_def_type};
8702 int ndts = 4;
8703 int ncopies;
8704 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8705 stmt_vec_info prev_stmt_info = NULL;
8706 int i, j;
8707 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8708 vec<tree> vec_oprnds0 = vNULL;
8709 vec<tree> vec_oprnds1 = vNULL;
8710 vec<tree> vec_oprnds2 = vNULL;
8711 vec<tree> vec_oprnds3 = vNULL;
8712 tree vec_cmp_type;
8713 bool masked = false;
8715 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8716 return false;
8718 vect_reduction_type reduction_type
8719 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8720 if (reduction_type == TREE_CODE_REDUCTION)
8722 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8723 return false;
8725 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8726 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8727 && reduc_def))
8728 return false;
8730 /* FORNOW: not yet supported. */
8731 if (STMT_VINFO_LIVE_P (stmt_info))
8733 if (dump_enabled_p ())
8734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8735 "value used after loop.\n");
8736 return false;
8740 /* Is vectorizable conditional operation? */
8741 if (!is_gimple_assign (stmt))
8742 return false;
8744 code = gimple_assign_rhs_code (stmt);
8746 if (code != COND_EXPR)
8747 return false;
8749 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8750 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8752 if (slp_node)
8753 ncopies = 1;
8754 else
8755 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8757 gcc_assert (ncopies >= 1);
8758 if (reduc_index && ncopies > 1)
8759 return false; /* FORNOW */
8761 cond_expr = gimple_assign_rhs1 (stmt);
8762 then_clause = gimple_assign_rhs2 (stmt);
8763 else_clause = gimple_assign_rhs3 (stmt);
8765 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8766 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8767 || !comp_vectype)
8768 return false;
8770 gimple *def_stmt;
8771 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8772 &vectype1))
8773 return false;
8774 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8775 &vectype2))
8776 return false;
8778 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8779 return false;
8781 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8782 return false;
8784 masked = !COMPARISON_CLASS_P (cond_expr);
8785 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8787 if (vec_cmp_type == NULL_TREE)
8788 return false;
8790 cond_code = TREE_CODE (cond_expr);
8791 if (!masked)
8793 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8794 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8797 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8799 /* Boolean values may have another representation in vectors
8800 and therefore we prefer bit operations over comparison for
8801 them (which also works for scalar masks). We store opcodes
8802 to use in bitop1 and bitop2. Statement is vectorized as
8803 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8804 depending on bitop1 and bitop2 arity. */
8805 switch (cond_code)
8807 case GT_EXPR:
8808 bitop1 = BIT_NOT_EXPR;
8809 bitop2 = BIT_AND_EXPR;
8810 break;
8811 case GE_EXPR:
8812 bitop1 = BIT_NOT_EXPR;
8813 bitop2 = BIT_IOR_EXPR;
8814 break;
8815 case LT_EXPR:
8816 bitop1 = BIT_NOT_EXPR;
8817 bitop2 = BIT_AND_EXPR;
8818 std::swap (cond_expr0, cond_expr1);
8819 break;
8820 case LE_EXPR:
8821 bitop1 = BIT_NOT_EXPR;
8822 bitop2 = BIT_IOR_EXPR;
8823 std::swap (cond_expr0, cond_expr1);
8824 break;
8825 case NE_EXPR:
8826 bitop1 = BIT_XOR_EXPR;
8827 break;
8828 case EQ_EXPR:
8829 bitop1 = BIT_XOR_EXPR;
8830 bitop2 = BIT_NOT_EXPR;
8831 break;
8832 default:
8833 return false;
8835 cond_code = SSA_NAME;
8838 if (!vec_stmt)
8840 if (bitop1 != NOP_EXPR)
8842 machine_mode mode = TYPE_MODE (comp_vectype);
8843 optab optab;
8845 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8846 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8847 return false;
8849 if (bitop2 != NOP_EXPR)
8851 optab = optab_for_tree_code (bitop2, comp_vectype,
8852 optab_default);
8853 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8854 return false;
8857 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8858 cond_code))
8860 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8861 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8862 cost_vec);
8863 return true;
8865 return false;
8868 /* Transform. */
8870 if (!slp_node)
8872 vec_oprnds0.create (1);
8873 vec_oprnds1.create (1);
8874 vec_oprnds2.create (1);
8875 vec_oprnds3.create (1);
8878 /* Handle def. */
8879 scalar_dest = gimple_assign_lhs (stmt);
8880 if (reduction_type != EXTRACT_LAST_REDUCTION)
8881 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8883 /* Handle cond expr. */
8884 for (j = 0; j < ncopies; j++)
8886 gimple *new_stmt = NULL;
8887 if (j == 0)
8889 if (slp_node)
8891 auto_vec<tree, 4> ops;
8892 auto_vec<vec<tree>, 4> vec_defs;
8894 if (masked)
8895 ops.safe_push (cond_expr);
8896 else
8898 ops.safe_push (cond_expr0);
8899 ops.safe_push (cond_expr1);
8901 ops.safe_push (then_clause);
8902 ops.safe_push (else_clause);
8903 vect_get_slp_defs (ops, slp_node, &vec_defs);
8904 vec_oprnds3 = vec_defs.pop ();
8905 vec_oprnds2 = vec_defs.pop ();
8906 if (!masked)
8907 vec_oprnds1 = vec_defs.pop ();
8908 vec_oprnds0 = vec_defs.pop ();
8910 else
8912 gimple *gtemp;
8913 if (masked)
8915 vec_cond_lhs
8916 = vect_get_vec_def_for_operand (cond_expr, stmt,
8917 comp_vectype);
8918 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8919 &gtemp, &dts[0]);
8921 else
8923 vec_cond_lhs
8924 = vect_get_vec_def_for_operand (cond_expr0,
8925 stmt, comp_vectype);
8926 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8928 vec_cond_rhs
8929 = vect_get_vec_def_for_operand (cond_expr1,
8930 stmt, comp_vectype);
8931 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8933 if (reduc_index == 1)
8934 vec_then_clause = reduc_def;
8935 else
8937 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8938 stmt);
8939 vect_is_simple_use (then_clause, loop_vinfo,
8940 &gtemp, &dts[2]);
8942 if (reduc_index == 2)
8943 vec_else_clause = reduc_def;
8944 else
8946 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8947 stmt);
8948 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8952 else
8954 vec_cond_lhs
8955 = vect_get_vec_def_for_stmt_copy (dts[0],
8956 vec_oprnds0.pop ());
8957 if (!masked)
8958 vec_cond_rhs
8959 = vect_get_vec_def_for_stmt_copy (dts[1],
8960 vec_oprnds1.pop ());
8962 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8963 vec_oprnds2.pop ());
8964 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8965 vec_oprnds3.pop ());
8968 if (!slp_node)
8970 vec_oprnds0.quick_push (vec_cond_lhs);
8971 if (!masked)
8972 vec_oprnds1.quick_push (vec_cond_rhs);
8973 vec_oprnds2.quick_push (vec_then_clause);
8974 vec_oprnds3.quick_push (vec_else_clause);
8977 /* Arguments are ready. Create the new vector stmt. */
8978 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8980 vec_then_clause = vec_oprnds2[i];
8981 vec_else_clause = vec_oprnds3[i];
8983 if (masked)
8984 vec_compare = vec_cond_lhs;
8985 else
8987 vec_cond_rhs = vec_oprnds1[i];
8988 if (bitop1 == NOP_EXPR)
8989 vec_compare = build2 (cond_code, vec_cmp_type,
8990 vec_cond_lhs, vec_cond_rhs);
8991 else
8993 new_temp = make_ssa_name (vec_cmp_type);
8994 if (bitop1 == BIT_NOT_EXPR)
8995 new_stmt = gimple_build_assign (new_temp, bitop1,
8996 vec_cond_rhs);
8997 else
8998 new_stmt
8999 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9000 vec_cond_rhs);
9001 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9002 if (bitop2 == NOP_EXPR)
9003 vec_compare = new_temp;
9004 else if (bitop2 == BIT_NOT_EXPR)
9006 /* Instead of doing ~x ? y : z do x ? z : y. */
9007 vec_compare = new_temp;
9008 std::swap (vec_then_clause, vec_else_clause);
9010 else
9012 vec_compare = make_ssa_name (vec_cmp_type);
9013 new_stmt
9014 = gimple_build_assign (vec_compare, bitop2,
9015 vec_cond_lhs, new_temp);
9016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9020 if (reduction_type == EXTRACT_LAST_REDUCTION)
9022 if (!is_gimple_val (vec_compare))
9024 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9025 new_stmt = gimple_build_assign (vec_compare_name,
9026 vec_compare);
9027 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9028 vec_compare = vec_compare_name;
9030 gcc_assert (reduc_index == 2);
9031 new_stmt = gimple_build_call_internal
9032 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9033 vec_then_clause);
9034 gimple_call_set_lhs (new_stmt, scalar_dest);
9035 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9036 if (stmt == gsi_stmt (*gsi))
9037 vect_finish_replace_stmt (stmt, new_stmt);
9038 else
9040 /* In this case we're moving the definition to later in the
9041 block. That doesn't matter because the only uses of the
9042 lhs are in phi statements. */
9043 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9044 gsi_remove (&old_gsi, true);
9045 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9048 else
9050 new_temp = make_ssa_name (vec_dest);
9051 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
9052 vec_compare, vec_then_clause,
9053 vec_else_clause);
9054 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9056 if (slp_node)
9057 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9060 if (slp_node)
9061 continue;
9063 if (j == 0)
9064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9065 else
9066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9068 prev_stmt_info = vinfo_for_stmt (new_stmt);
9071 vec_oprnds0.release ();
9072 vec_oprnds1.release ();
9073 vec_oprnds2.release ();
9074 vec_oprnds3.release ();
9076 return true;
9079 /* vectorizable_comparison.
9081 Check if STMT is comparison expression that can be vectorized.
9082 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9083 comparison, put it in VEC_STMT, and insert it at GSI.
9085 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9087 static bool
9088 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9089 gimple **vec_stmt, tree reduc_def,
9090 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9092 tree lhs, rhs1, rhs2;
9093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9094 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9096 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9097 tree new_temp;
9098 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9099 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9100 int ndts = 2;
9101 poly_uint64 nunits;
9102 int ncopies;
9103 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9104 stmt_vec_info prev_stmt_info = NULL;
9105 int i, j;
9106 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9107 vec<tree> vec_oprnds0 = vNULL;
9108 vec<tree> vec_oprnds1 = vNULL;
9109 gimple *def_stmt;
9110 tree mask_type;
9111 tree mask;
9113 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9114 return false;
9116 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9117 return false;
9119 mask_type = vectype;
9120 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9122 if (slp_node)
9123 ncopies = 1;
9124 else
9125 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9127 gcc_assert (ncopies >= 1);
9128 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9129 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9130 && reduc_def))
9131 return false;
9133 if (STMT_VINFO_LIVE_P (stmt_info))
9135 if (dump_enabled_p ())
9136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9137 "value used after loop.\n");
9138 return false;
9141 if (!is_gimple_assign (stmt))
9142 return false;
9144 code = gimple_assign_rhs_code (stmt);
9146 if (TREE_CODE_CLASS (code) != tcc_comparison)
9147 return false;
9149 rhs1 = gimple_assign_rhs1 (stmt);
9150 rhs2 = gimple_assign_rhs2 (stmt);
9152 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9153 &dts[0], &vectype1))
9154 return false;
9156 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9157 &dts[1], &vectype2))
9158 return false;
9160 if (vectype1 && vectype2
9161 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9162 TYPE_VECTOR_SUBPARTS (vectype2)))
9163 return false;
9165 vectype = vectype1 ? vectype1 : vectype2;
9167 /* Invariant comparison. */
9168 if (!vectype)
9170 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9171 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9172 return false;
9174 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9175 return false;
9177 /* Can't compare mask and non-mask types. */
9178 if (vectype1 && vectype2
9179 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9180 return false;
9182 /* Boolean values may have another representation in vectors
9183 and therefore we prefer bit operations over comparison for
9184 them (which also works for scalar masks). We store opcodes
9185 to use in bitop1 and bitop2. Statement is vectorized as
9186 BITOP2 (rhs1 BITOP1 rhs2) or
9187 rhs1 BITOP2 (BITOP1 rhs2)
9188 depending on bitop1 and bitop2 arity. */
9189 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9191 if (code == GT_EXPR)
9193 bitop1 = BIT_NOT_EXPR;
9194 bitop2 = BIT_AND_EXPR;
9196 else if (code == GE_EXPR)
9198 bitop1 = BIT_NOT_EXPR;
9199 bitop2 = BIT_IOR_EXPR;
9201 else if (code == LT_EXPR)
9203 bitop1 = BIT_NOT_EXPR;
9204 bitop2 = BIT_AND_EXPR;
9205 std::swap (rhs1, rhs2);
9206 std::swap (dts[0], dts[1]);
9208 else if (code == LE_EXPR)
9210 bitop1 = BIT_NOT_EXPR;
9211 bitop2 = BIT_IOR_EXPR;
9212 std::swap (rhs1, rhs2);
9213 std::swap (dts[0], dts[1]);
9215 else
9217 bitop1 = BIT_XOR_EXPR;
9218 if (code == EQ_EXPR)
9219 bitop2 = BIT_NOT_EXPR;
9223 if (!vec_stmt)
9225 if (bitop1 == NOP_EXPR)
9227 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9228 return false;
9230 else
9232 machine_mode mode = TYPE_MODE (vectype);
9233 optab optab;
9235 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9236 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9237 return false;
9239 if (bitop2 != NOP_EXPR)
9241 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9242 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9243 return false;
9247 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9248 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9249 dts, ndts, slp_node, cost_vec);
9250 return true;
9253 /* Transform. */
9254 if (!slp_node)
9256 vec_oprnds0.create (1);
9257 vec_oprnds1.create (1);
9260 /* Handle def. */
9261 lhs = gimple_assign_lhs (stmt);
9262 mask = vect_create_destination_var (lhs, mask_type);
9264 /* Handle cmp expr. */
9265 for (j = 0; j < ncopies; j++)
9267 gassign *new_stmt = NULL;
9268 if (j == 0)
9270 if (slp_node)
9272 auto_vec<tree, 2> ops;
9273 auto_vec<vec<tree>, 2> vec_defs;
9275 ops.safe_push (rhs1);
9276 ops.safe_push (rhs2);
9277 vect_get_slp_defs (ops, slp_node, &vec_defs);
9278 vec_oprnds1 = vec_defs.pop ();
9279 vec_oprnds0 = vec_defs.pop ();
9281 else
9283 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9284 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9287 else
9289 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9290 vec_oprnds0.pop ());
9291 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9292 vec_oprnds1.pop ());
9295 if (!slp_node)
9297 vec_oprnds0.quick_push (vec_rhs1);
9298 vec_oprnds1.quick_push (vec_rhs2);
9301 /* Arguments are ready. Create the new vector stmt. */
9302 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9304 vec_rhs2 = vec_oprnds1[i];
9306 new_temp = make_ssa_name (mask);
9307 if (bitop1 == NOP_EXPR)
9309 new_stmt = gimple_build_assign (new_temp, code,
9310 vec_rhs1, vec_rhs2);
9311 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9313 else
9315 if (bitop1 == BIT_NOT_EXPR)
9316 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9317 else
9318 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9319 vec_rhs2);
9320 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9321 if (bitop2 != NOP_EXPR)
9323 tree res = make_ssa_name (mask);
9324 if (bitop2 == BIT_NOT_EXPR)
9325 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9326 else
9327 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9328 new_temp);
9329 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9332 if (slp_node)
9333 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9336 if (slp_node)
9337 continue;
9339 if (j == 0)
9340 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9341 else
9342 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9344 prev_stmt_info = vinfo_for_stmt (new_stmt);
9347 vec_oprnds0.release ();
9348 vec_oprnds1.release ();
9350 return true;
9353 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9354 can handle all live statements in the node. Otherwise return true
9355 if STMT is not live or if vectorizable_live_operation can handle it.
9356 GSI and VEC_STMT are as for vectorizable_live_operation. */
9358 static bool
9359 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9360 slp_tree slp_node, gimple **vec_stmt,
9361 stmt_vector_for_cost *cost_vec)
9363 if (slp_node)
9365 gimple *slp_stmt;
9366 unsigned int i;
9367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9369 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9370 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9371 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9372 vec_stmt, cost_vec))
9373 return false;
9376 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9377 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9378 cost_vec))
9379 return false;
9381 return true;
9384 /* Make sure the statement is vectorizable. */
9386 bool
9387 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9388 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
9390 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9391 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9392 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9393 bool ok;
9394 gimple *pattern_stmt;
9395 gimple_seq pattern_def_seq;
9397 if (dump_enabled_p ())
9399 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9400 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9403 if (gimple_has_volatile_ops (stmt))
9405 if (dump_enabled_p ())
9406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9407 "not vectorized: stmt has volatile operands\n");
9409 return false;
9412 /* Skip stmts that do not need to be vectorized. In loops this is expected
9413 to include:
9414 - the COND_EXPR which is the loop exit condition
9415 - any LABEL_EXPRs in the loop
9416 - computations that are used only for array indexing or loop control.
9417 In basic blocks we only analyze statements that are a part of some SLP
9418 instance, therefore, all the statements are relevant.
9420 Pattern statement needs to be analyzed instead of the original statement
9421 if the original statement is not relevant. Otherwise, we analyze both
9422 statements. In basic blocks we are called from some SLP instance
9423 traversal, don't analyze pattern stmts instead, the pattern stmts
9424 already will be part of SLP instance. */
9426 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9427 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9428 && !STMT_VINFO_LIVE_P (stmt_info))
9430 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9431 && pattern_stmt
9432 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9433 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9435 /* Analyze PATTERN_STMT instead of the original stmt. */
9436 stmt = pattern_stmt;
9437 stmt_info = vinfo_for_stmt (pattern_stmt);
9438 if (dump_enabled_p ())
9440 dump_printf_loc (MSG_NOTE, vect_location,
9441 "==> examining pattern statement: ");
9442 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9445 else
9447 if (dump_enabled_p ())
9448 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9450 return true;
9453 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9454 && node == NULL
9455 && pattern_stmt
9456 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9457 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9459 /* Analyze PATTERN_STMT too. */
9460 if (dump_enabled_p ())
9462 dump_printf_loc (MSG_NOTE, vect_location,
9463 "==> examining pattern statement: ");
9464 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9467 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9468 node_instance, cost_vec))
9469 return false;
9472 if (is_pattern_stmt_p (stmt_info)
9473 && node == NULL
9474 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9476 gimple_stmt_iterator si;
9478 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9480 gimple *pattern_def_stmt = gsi_stmt (si);
9481 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9482 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9484 /* Analyze def stmt of STMT if it's a pattern stmt. */
9485 if (dump_enabled_p ())
9487 dump_printf_loc (MSG_NOTE, vect_location,
9488 "==> examining pattern def statement: ");
9489 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9492 if (!vect_analyze_stmt (pattern_def_stmt,
9493 need_to_vectorize, node, node_instance,
9494 cost_vec))
9495 return false;
9500 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9502 case vect_internal_def:
9503 break;
9505 case vect_reduction_def:
9506 case vect_nested_cycle:
9507 gcc_assert (!bb_vinfo
9508 && (relevance == vect_used_in_outer
9509 || relevance == vect_used_in_outer_by_reduction
9510 || relevance == vect_used_by_reduction
9511 || relevance == vect_unused_in_scope
9512 || relevance == vect_used_only_live));
9513 break;
9515 case vect_induction_def:
9516 gcc_assert (!bb_vinfo);
9517 break;
9519 case vect_constant_def:
9520 case vect_external_def:
9521 case vect_unknown_def_type:
9522 default:
9523 gcc_unreachable ();
9526 if (STMT_VINFO_RELEVANT_P (stmt_info))
9528 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9529 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9530 || (is_gimple_call (stmt)
9531 && gimple_call_lhs (stmt) == NULL_TREE));
9532 *need_to_vectorize = true;
9535 if (PURE_SLP_STMT (stmt_info) && !node)
9537 dump_printf_loc (MSG_NOTE, vect_location,
9538 "handled only by SLP analysis\n");
9539 return true;
9542 ok = true;
9543 if (!bb_vinfo
9544 && (STMT_VINFO_RELEVANT_P (stmt_info)
9545 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9546 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9547 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9548 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9549 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9550 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9551 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9552 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9553 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9554 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9555 cost_vec)
9556 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9557 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9558 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
9559 else
9561 if (bb_vinfo)
9562 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9563 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9564 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9565 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9566 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9567 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9568 cost_vec)
9569 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9570 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9571 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9572 cost_vec)
9573 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9574 cost_vec));
9577 if (!ok)
9579 if (dump_enabled_p ())
9581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9582 "not vectorized: relevant stmt not ");
9583 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9584 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9587 return false;
9590 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9591 need extra handling, except for vectorizable reductions. */
9592 if (!bb_vinfo
9593 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9594 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
9596 if (dump_enabled_p ())
9598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9599 "not vectorized: live stmt not supported: ");
9600 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9603 return false;
9606 return true;
9610 /* Function vect_transform_stmt.
9612 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9614 bool
9615 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9616 bool *grouped_store, slp_tree slp_node,
9617 slp_instance slp_node_instance)
9619 bool is_store = false;
9620 gimple *vec_stmt = NULL;
9621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9622 bool done;
9624 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9625 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9627 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9628 && nested_in_vect_loop_p
9629 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9630 stmt));
9632 switch (STMT_VINFO_TYPE (stmt_info))
9634 case type_demotion_vec_info_type:
9635 case type_promotion_vec_info_type:
9636 case type_conversion_vec_info_type:
9637 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
9638 gcc_assert (done);
9639 break;
9641 case induc_vec_info_type:
9642 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
9643 gcc_assert (done);
9644 break;
9646 case shift_vec_info_type:
9647 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9648 gcc_assert (done);
9649 break;
9651 case op_vec_info_type:
9652 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
9653 gcc_assert (done);
9654 break;
9656 case assignment_vec_info_type:
9657 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
9658 gcc_assert (done);
9659 break;
9661 case load_vec_info_type:
9662 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9663 slp_node_instance, NULL);
9664 gcc_assert (done);
9665 break;
9667 case store_vec_info_type:
9668 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
9669 gcc_assert (done);
9670 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9672 /* In case of interleaving, the whole chain is vectorized when the
9673 last store in the chain is reached. Store stmts before the last
9674 one are skipped, and there vec_stmt_info shouldn't be freed
9675 meanwhile. */
9676 *grouped_store = true;
9677 stmt_vec_info group_info
9678 = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
9679 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9680 is_store = true;
9682 else
9683 is_store = true;
9684 break;
9686 case condition_vec_info_type:
9687 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
9688 gcc_assert (done);
9689 break;
9691 case comparison_vec_info_type:
9692 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
9693 gcc_assert (done);
9694 break;
9696 case call_vec_info_type:
9697 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9698 stmt = gsi_stmt (*gsi);
9699 break;
9701 case call_simd_clone_vec_info_type:
9702 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9703 stmt = gsi_stmt (*gsi);
9704 break;
9706 case reduc_vec_info_type:
9707 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9708 slp_node_instance, NULL);
9709 gcc_assert (done);
9710 break;
9712 default:
9713 if (!STMT_VINFO_LIVE_P (stmt_info))
9715 if (dump_enabled_p ())
9716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9717 "stmt not supported.\n");
9718 gcc_unreachable ();
9722 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9723 This would break hybrid SLP vectorization. */
9724 if (slp_node)
9725 gcc_assert (!vec_stmt
9726 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9728 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9729 is being vectorized, but outside the immediately enclosing loop. */
9730 if (vec_stmt
9731 && nested_p
9732 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9733 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9734 || STMT_VINFO_RELEVANT (stmt_info) ==
9735 vect_used_in_outer_by_reduction))
9737 struct loop *innerloop = LOOP_VINFO_LOOP (
9738 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9739 imm_use_iterator imm_iter;
9740 use_operand_p use_p;
9741 tree scalar_dest;
9742 gimple *exit_phi;
9744 if (dump_enabled_p ())
9745 dump_printf_loc (MSG_NOTE, vect_location,
9746 "Record the vdef for outer-loop vectorization.\n");
9748 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9749 (to be used when vectorizing outer-loop stmts that use the DEF of
9750 STMT). */
9751 if (gimple_code (stmt) == GIMPLE_PHI)
9752 scalar_dest = PHI_RESULT (stmt);
9753 else
9754 scalar_dest = gimple_assign_lhs (stmt);
9756 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9758 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9760 exit_phi = USE_STMT (use_p);
9761 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9766 /* Handle stmts whose DEF is used outside the loop-nest that is
9767 being vectorized. */
9768 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9770 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
9771 gcc_assert (done);
9774 if (vec_stmt)
9775 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9777 return is_store;
9781 /* Remove a group of stores (for SLP or interleaving), free their
9782 stmt_vec_info. */
9784 void
9785 vect_remove_stores (gimple *first_stmt)
9787 gimple *next = first_stmt;
9788 gimple *tmp;
9789 gimple_stmt_iterator next_si;
9791 while (next)
9793 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9795 tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
9796 if (is_pattern_stmt_p (stmt_info))
9797 next = STMT_VINFO_RELATED_STMT (stmt_info);
9798 /* Free the attached stmt_vec_info and remove the stmt. */
9799 next_si = gsi_for_stmt (next);
9800 unlink_stmt_vdef (next);
9801 gsi_remove (&next_si, true);
9802 release_defs (next);
9803 free_stmt_vec_info (next);
9804 next = tmp;
9809 /* Function new_stmt_vec_info.
9811 Create and initialize a new stmt_vec_info struct for STMT. */
9813 stmt_vec_info
9814 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9816 stmt_vec_info res;
9817 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9819 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9820 STMT_VINFO_STMT (res) = stmt;
9821 res->vinfo = vinfo;
9822 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9823 STMT_VINFO_LIVE_P (res) = false;
9824 STMT_VINFO_VECTYPE (res) = NULL;
9825 STMT_VINFO_VEC_STMT (res) = NULL;
9826 STMT_VINFO_VECTORIZABLE (res) = true;
9827 STMT_VINFO_IN_PATTERN_P (res) = false;
9828 STMT_VINFO_RELATED_STMT (res) = NULL;
9829 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9830 STMT_VINFO_DATA_REF (res) = NULL;
9831 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9832 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9834 if (gimple_code (stmt) == GIMPLE_PHI
9835 && is_loop_header_bb_p (gimple_bb (stmt)))
9836 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9837 else
9838 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9840 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9841 STMT_SLP_TYPE (res) = loop_vect;
9842 STMT_VINFO_NUM_SLP_USES (res) = 0;
9844 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9845 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9846 res->size = 0; /* GROUP_SIZE */
9847 res->store_count = 0; /* GROUP_STORE_COUNT */
9848 res->gap = 0; /* GROUP_GAP */
9849 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
9851 return res;
9855 /* Set the current stmt_vec_info vector to V. */
9857 void
9858 set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
9860 stmt_vec_info_vec = v;
9863 /* Free the stmt_vec_info entries in V and release V. */
9865 void
9866 free_stmt_vec_infos (vec<stmt_vec_info> *v)
9868 unsigned int i;
9869 stmt_vec_info info;
9870 FOR_EACH_VEC_ELT (*v, i, info)
9871 if (info != NULL)
9872 free_stmt_vec_info (STMT_VINFO_STMT (info));
9873 if (v == stmt_vec_info_vec)
9874 stmt_vec_info_vec = NULL;
9875 v->release ();
9879 /* Free stmt vectorization related info. */
9881 void
9882 free_stmt_vec_info (gimple *stmt)
9884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9886 if (!stmt_info)
9887 return;
9889 /* Check if this statement has a related "pattern stmt"
9890 (introduced by the vectorizer during the pattern recognition
9891 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9892 too. */
9893 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9895 stmt_vec_info patt_info
9896 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9897 if (patt_info)
9899 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9900 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9901 gimple_set_bb (patt_stmt, NULL);
9902 tree lhs = gimple_get_lhs (patt_stmt);
9903 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9904 release_ssa_name (lhs);
9905 if (seq)
9907 gimple_stmt_iterator si;
9908 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9910 gimple *seq_stmt = gsi_stmt (si);
9911 gimple_set_bb (seq_stmt, NULL);
9912 lhs = gimple_get_lhs (seq_stmt);
9913 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9914 release_ssa_name (lhs);
9915 free_stmt_vec_info (seq_stmt);
9918 free_stmt_vec_info (patt_stmt);
9922 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9923 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9924 set_vinfo_for_stmt (stmt, NULL);
9925 free (stmt_info);
9929 /* Function get_vectype_for_scalar_type_and_size.
9931 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9932 by the target. */
9934 tree
9935 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9937 tree orig_scalar_type = scalar_type;
9938 scalar_mode inner_mode;
9939 machine_mode simd_mode;
9940 poly_uint64 nunits;
9941 tree vectype;
9943 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9944 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9945 return NULL_TREE;
9947 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9949 /* For vector types of elements whose mode precision doesn't
9950 match their types precision we use a element type of mode
9951 precision. The vectorization routines will have to make sure
9952 they support the proper result truncation/extension.
9953 We also make sure to build vector types with INTEGER_TYPE
9954 component type only. */
9955 if (INTEGRAL_TYPE_P (scalar_type)
9956 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9957 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9958 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9959 TYPE_UNSIGNED (scalar_type));
9961 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9962 When the component mode passes the above test simply use a type
9963 corresponding to that mode. The theory is that any use that
9964 would cause problems with this will disable vectorization anyway. */
9965 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9966 && !INTEGRAL_TYPE_P (scalar_type))
9967 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9969 /* We can't build a vector type of elements with alignment bigger than
9970 their size. */
9971 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9972 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9973 TYPE_UNSIGNED (scalar_type));
9975 /* If we felt back to using the mode fail if there was
9976 no scalar type for it. */
9977 if (scalar_type == NULL_TREE)
9978 return NULL_TREE;
9980 /* If no size was supplied use the mode the target prefers. Otherwise
9981 lookup a vector mode of the specified size. */
9982 if (known_eq (size, 0U))
9983 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9984 else if (!multiple_p (size, nbytes, &nunits)
9985 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9986 return NULL_TREE;
9987 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9988 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9989 return NULL_TREE;
9991 vectype = build_vector_type (scalar_type, nunits);
9993 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9994 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9995 return NULL_TREE;
9997 /* Re-attach the address-space qualifier if we canonicalized the scalar
9998 type. */
9999 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10000 return build_qualified_type
10001 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10003 return vectype;
10006 poly_uint64 current_vector_size;
10008 /* Function get_vectype_for_scalar_type.
10010 Returns the vector type corresponding to SCALAR_TYPE as supported
10011 by the target. */
10013 tree
10014 get_vectype_for_scalar_type (tree scalar_type)
10016 tree vectype;
10017 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10018 current_vector_size);
10019 if (vectype
10020 && known_eq (current_vector_size, 0U))
10021 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10022 return vectype;
10025 /* Function get_mask_type_for_scalar_type.
10027 Returns the mask type corresponding to a result of comparison
10028 of vectors of specified SCALAR_TYPE as supported by target. */
10030 tree
10031 get_mask_type_for_scalar_type (tree scalar_type)
10033 tree vectype = get_vectype_for_scalar_type (scalar_type);
10035 if (!vectype)
10036 return NULL;
10038 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10039 current_vector_size);
10042 /* Function get_same_sized_vectype
10044 Returns a vector type corresponding to SCALAR_TYPE of size
10045 VECTOR_TYPE if supported by the target. */
10047 tree
10048 get_same_sized_vectype (tree scalar_type, tree vector_type)
10050 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10051 return build_same_sized_truth_vector_type (vector_type);
10053 return get_vectype_for_scalar_type_and_size
10054 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10057 /* Function vect_is_simple_use.
10059 Input:
10060 VINFO - the vect info of the loop or basic block that is being vectorized.
10061 OPERAND - operand in the loop or bb.
10062 Output:
10063 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
10064 DT - the type of definition
10066 Returns whether a stmt with OPERAND can be vectorized.
10067 For loops, supportable operands are constants, loop invariants, and operands
10068 that are defined by the current iteration of the loop. Unsupportable
10069 operands are those that are defined by a previous iteration of the loop (as
10070 is the case in reduction/induction computations).
10071 For basic blocks, supportable operands are constants and bb invariants.
10072 For now, operands defined outside the basic block are not supported. */
10074 bool
10075 vect_is_simple_use (tree operand, vec_info *vinfo,
10076 gimple **def_stmt, enum vect_def_type *dt)
10078 *def_stmt = NULL;
10079 *dt = vect_unknown_def_type;
10081 if (dump_enabled_p ())
10083 dump_printf_loc (MSG_NOTE, vect_location,
10084 "vect_is_simple_use: operand ");
10085 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10086 dump_printf (MSG_NOTE, "\n");
10089 if (CONSTANT_CLASS_P (operand))
10091 *dt = vect_constant_def;
10092 return true;
10095 if (is_gimple_min_invariant (operand))
10097 *dt = vect_external_def;
10098 return true;
10101 if (TREE_CODE (operand) != SSA_NAME)
10103 if (dump_enabled_p ())
10104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10105 "not ssa-name.\n");
10106 return false;
10109 if (SSA_NAME_IS_DEFAULT_DEF (operand))
10111 *dt = vect_external_def;
10112 return true;
10115 *def_stmt = SSA_NAME_DEF_STMT (operand);
10116 if (dump_enabled_p ())
10118 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
10119 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
10122 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
10123 *dt = vect_external_def;
10124 else
10126 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
10127 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10130 if (dump_enabled_p ())
10132 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
10133 switch (*dt)
10135 case vect_uninitialized_def:
10136 dump_printf (MSG_NOTE, "uninitialized\n");
10137 break;
10138 case vect_constant_def:
10139 dump_printf (MSG_NOTE, "constant\n");
10140 break;
10141 case vect_external_def:
10142 dump_printf (MSG_NOTE, "external\n");
10143 break;
10144 case vect_internal_def:
10145 dump_printf (MSG_NOTE, "internal\n");
10146 break;
10147 case vect_induction_def:
10148 dump_printf (MSG_NOTE, "induction\n");
10149 break;
10150 case vect_reduction_def:
10151 dump_printf (MSG_NOTE, "reduction\n");
10152 break;
10153 case vect_double_reduction_def:
10154 dump_printf (MSG_NOTE, "double reduction\n");
10155 break;
10156 case vect_nested_cycle:
10157 dump_printf (MSG_NOTE, "nested cycle\n");
10158 break;
10159 case vect_unknown_def_type:
10160 dump_printf (MSG_NOTE, "unknown\n");
10161 break;
10165 if (*dt == vect_unknown_def_type)
10167 if (dump_enabled_p ())
10168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10169 "Unsupported pattern.\n");
10170 return false;
10173 switch (gimple_code (*def_stmt))
10175 case GIMPLE_PHI:
10176 case GIMPLE_ASSIGN:
10177 case GIMPLE_CALL:
10178 break;
10179 default:
10180 if (dump_enabled_p ())
10181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10182 "unsupported defining stmt:\n");
10183 return false;
10186 return true;
10189 /* Function vect_is_simple_use.
10191 Same as vect_is_simple_use but also determines the vector operand
10192 type of OPERAND and stores it to *VECTYPE. If the definition of
10193 OPERAND is vect_uninitialized_def, vect_constant_def or
10194 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10195 is responsible to compute the best suited vector type for the
10196 scalar operand. */
10198 bool
10199 vect_is_simple_use (tree operand, vec_info *vinfo,
10200 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10202 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10203 return false;
10205 /* Now get a vector type if the def is internal, otherwise supply
10206 NULL_TREE and leave it up to the caller to figure out a proper
10207 type for the use stmt. */
10208 if (*dt == vect_internal_def
10209 || *dt == vect_induction_def
10210 || *dt == vect_reduction_def
10211 || *dt == vect_double_reduction_def
10212 || *dt == vect_nested_cycle)
10214 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10216 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10217 && !STMT_VINFO_RELEVANT (stmt_info)
10218 && !STMT_VINFO_LIVE_P (stmt_info))
10219 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10221 *vectype = STMT_VINFO_VECTYPE (stmt_info);
10222 gcc_assert (*vectype != NULL_TREE);
10224 else if (*dt == vect_uninitialized_def
10225 || *dt == vect_constant_def
10226 || *dt == vect_external_def)
10227 *vectype = NULL_TREE;
10228 else
10229 gcc_unreachable ();
10231 return true;
10235 /* Function supportable_widening_operation
10237 Check whether an operation represented by the code CODE is a
10238 widening operation that is supported by the target platform in
10239 vector form (i.e., when operating on arguments of type VECTYPE_IN
10240 producing a result of type VECTYPE_OUT).
10242 Widening operations we currently support are NOP (CONVERT), FLOAT,
10243 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10244 are supported by the target platform either directly (via vector
10245 tree-codes), or via target builtins.
10247 Output:
10248 - CODE1 and CODE2 are codes of vector operations to be used when
10249 vectorizing the operation, if available.
10250 - MULTI_STEP_CVT determines the number of required intermediate steps in
10251 case of multi-step conversion (like char->short->int - in that case
10252 MULTI_STEP_CVT will be 1).
10253 - INTERM_TYPES contains the intermediate type required to perform the
10254 widening operation (short in the above example). */
10256 bool
10257 supportable_widening_operation (enum tree_code code, gimple *stmt,
10258 tree vectype_out, tree vectype_in,
10259 enum tree_code *code1, enum tree_code *code2,
10260 int *multi_step_cvt,
10261 vec<tree> *interm_types)
10263 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10264 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10265 struct loop *vect_loop = NULL;
10266 machine_mode vec_mode;
10267 enum insn_code icode1, icode2;
10268 optab optab1, optab2;
10269 tree vectype = vectype_in;
10270 tree wide_vectype = vectype_out;
10271 enum tree_code c1, c2;
10272 int i;
10273 tree prev_type, intermediate_type;
10274 machine_mode intermediate_mode, prev_mode;
10275 optab optab3, optab4;
10277 *multi_step_cvt = 0;
10278 if (loop_info)
10279 vect_loop = LOOP_VINFO_LOOP (loop_info);
10281 switch (code)
10283 case WIDEN_MULT_EXPR:
10284 /* The result of a vectorized widening operation usually requires
10285 two vectors (because the widened results do not fit into one vector).
10286 The generated vector results would normally be expected to be
10287 generated in the same order as in the original scalar computation,
10288 i.e. if 8 results are generated in each vector iteration, they are
10289 to be organized as follows:
10290 vect1: [res1,res2,res3,res4],
10291 vect2: [res5,res6,res7,res8].
10293 However, in the special case that the result of the widening
10294 operation is used in a reduction computation only, the order doesn't
10295 matter (because when vectorizing a reduction we change the order of
10296 the computation). Some targets can take advantage of this and
10297 generate more efficient code. For example, targets like Altivec,
10298 that support widen_mult using a sequence of {mult_even,mult_odd}
10299 generate the following vectors:
10300 vect1: [res1,res3,res5,res7],
10301 vect2: [res2,res4,res6,res8].
10303 When vectorizing outer-loops, we execute the inner-loop sequentially
10304 (each vectorized inner-loop iteration contributes to VF outer-loop
10305 iterations in parallel). We therefore don't allow to change the
10306 order of the computation in the inner-loop during outer-loop
10307 vectorization. */
10308 /* TODO: Another case in which order doesn't *really* matter is when we
10309 widen and then contract again, e.g. (short)((int)x * y >> 8).
10310 Normally, pack_trunc performs an even/odd permute, whereas the
10311 repack from an even/odd expansion would be an interleave, which
10312 would be significantly simpler for e.g. AVX2. */
10313 /* In any case, in order to avoid duplicating the code below, recurse
10314 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10315 are properly set up for the caller. If we fail, we'll continue with
10316 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10317 if (vect_loop
10318 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10319 && !nested_in_vect_loop_p (vect_loop, stmt)
10320 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10321 stmt, vectype_out, vectype_in,
10322 code1, code2, multi_step_cvt,
10323 interm_types))
10325 /* Elements in a vector with vect_used_by_reduction property cannot
10326 be reordered if the use chain with this property does not have the
10327 same operation. One such an example is s += a * b, where elements
10328 in a and b cannot be reordered. Here we check if the vector defined
10329 by STMT is only directly used in the reduction statement. */
10330 tree lhs = gimple_assign_lhs (stmt);
10331 use_operand_p dummy;
10332 gimple *use_stmt;
10333 stmt_vec_info use_stmt_info = NULL;
10334 if (single_imm_use (lhs, &dummy, &use_stmt)
10335 && (use_stmt_info = vinfo_for_stmt (use_stmt))
10336 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10337 return true;
10339 c1 = VEC_WIDEN_MULT_LO_EXPR;
10340 c2 = VEC_WIDEN_MULT_HI_EXPR;
10341 break;
10343 case DOT_PROD_EXPR:
10344 c1 = DOT_PROD_EXPR;
10345 c2 = DOT_PROD_EXPR;
10346 break;
10348 case SAD_EXPR:
10349 c1 = SAD_EXPR;
10350 c2 = SAD_EXPR;
10351 break;
10353 case VEC_WIDEN_MULT_EVEN_EXPR:
10354 /* Support the recursion induced just above. */
10355 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10356 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10357 break;
10359 case WIDEN_LSHIFT_EXPR:
10360 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10361 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10362 break;
10364 CASE_CONVERT:
10365 c1 = VEC_UNPACK_LO_EXPR;
10366 c2 = VEC_UNPACK_HI_EXPR;
10367 break;
10369 case FLOAT_EXPR:
10370 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10371 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10372 break;
10374 case FIX_TRUNC_EXPR:
10375 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10376 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10377 break;
10379 default:
10380 gcc_unreachable ();
10383 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10384 std::swap (c1, c2);
10386 if (code == FIX_TRUNC_EXPR)
10388 /* The signedness is determined from output operand. */
10389 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10390 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10392 else
10394 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10395 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10398 if (!optab1 || !optab2)
10399 return false;
10401 vec_mode = TYPE_MODE (vectype);
10402 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10403 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10404 return false;
10406 *code1 = c1;
10407 *code2 = c2;
10409 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10410 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10411 /* For scalar masks we may have different boolean
10412 vector types having the same QImode. Thus we
10413 add additional check for elements number. */
10414 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10415 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10416 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10418 /* Check if it's a multi-step conversion that can be done using intermediate
10419 types. */
10421 prev_type = vectype;
10422 prev_mode = vec_mode;
10424 if (!CONVERT_EXPR_CODE_P (code))
10425 return false;
10427 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10428 intermediate steps in promotion sequence. We try
10429 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10430 not. */
10431 interm_types->create (MAX_INTERM_CVT_STEPS);
10432 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10434 intermediate_mode = insn_data[icode1].operand[0].mode;
10435 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10437 intermediate_type = vect_halve_mask_nunits (prev_type);
10438 if (intermediate_mode != TYPE_MODE (intermediate_type))
10439 return false;
10441 else
10442 intermediate_type
10443 = lang_hooks.types.type_for_mode (intermediate_mode,
10444 TYPE_UNSIGNED (prev_type));
10446 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10447 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10449 if (!optab3 || !optab4
10450 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10451 || insn_data[icode1].operand[0].mode != intermediate_mode
10452 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10453 || insn_data[icode2].operand[0].mode != intermediate_mode
10454 || ((icode1 = optab_handler (optab3, intermediate_mode))
10455 == CODE_FOR_nothing)
10456 || ((icode2 = optab_handler (optab4, intermediate_mode))
10457 == CODE_FOR_nothing))
10458 break;
10460 interm_types->quick_push (intermediate_type);
10461 (*multi_step_cvt)++;
10463 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10464 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10465 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10466 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10467 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10469 prev_type = intermediate_type;
10470 prev_mode = intermediate_mode;
10473 interm_types->release ();
10474 return false;
10478 /* Function supportable_narrowing_operation
10480 Check whether an operation represented by the code CODE is a
10481 narrowing operation that is supported by the target platform in
10482 vector form (i.e., when operating on arguments of type VECTYPE_IN
10483 and producing a result of type VECTYPE_OUT).
10485 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10486 and FLOAT. This function checks if these operations are supported by
10487 the target platform directly via vector tree-codes.
10489 Output:
10490 - CODE1 is the code of a vector operation to be used when
10491 vectorizing the operation, if available.
10492 - MULTI_STEP_CVT determines the number of required intermediate steps in
10493 case of multi-step conversion (like int->short->char - in that case
10494 MULTI_STEP_CVT will be 1).
10495 - INTERM_TYPES contains the intermediate type required to perform the
10496 narrowing operation (short in the above example). */
10498 bool
10499 supportable_narrowing_operation (enum tree_code code,
10500 tree vectype_out, tree vectype_in,
10501 enum tree_code *code1, int *multi_step_cvt,
10502 vec<tree> *interm_types)
10504 machine_mode vec_mode;
10505 enum insn_code icode1;
10506 optab optab1, interm_optab;
10507 tree vectype = vectype_in;
10508 tree narrow_vectype = vectype_out;
10509 enum tree_code c1;
10510 tree intermediate_type, prev_type;
10511 machine_mode intermediate_mode, prev_mode;
10512 int i;
10513 bool uns;
10515 *multi_step_cvt = 0;
10516 switch (code)
10518 CASE_CONVERT:
10519 c1 = VEC_PACK_TRUNC_EXPR;
10520 break;
10522 case FIX_TRUNC_EXPR:
10523 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10524 break;
10526 case FLOAT_EXPR:
10527 c1 = VEC_PACK_FLOAT_EXPR;
10528 break;
10530 default:
10531 gcc_unreachable ();
10534 if (code == FIX_TRUNC_EXPR)
10535 /* The signedness is determined from output operand. */
10536 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10537 else
10538 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10540 if (!optab1)
10541 return false;
10543 vec_mode = TYPE_MODE (vectype);
10544 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10545 return false;
10547 *code1 = c1;
10549 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10550 /* For scalar masks we may have different boolean
10551 vector types having the same QImode. Thus we
10552 add additional check for elements number. */
10553 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10554 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10555 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10557 if (code == FLOAT_EXPR)
10558 return false;
10560 /* Check if it's a multi-step conversion that can be done using intermediate
10561 types. */
10562 prev_mode = vec_mode;
10563 prev_type = vectype;
10564 if (code == FIX_TRUNC_EXPR)
10565 uns = TYPE_UNSIGNED (vectype_out);
10566 else
10567 uns = TYPE_UNSIGNED (vectype);
10569 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10570 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10571 costly than signed. */
10572 if (code == FIX_TRUNC_EXPR && uns)
10574 enum insn_code icode2;
10576 intermediate_type
10577 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10578 interm_optab
10579 = optab_for_tree_code (c1, intermediate_type, optab_default);
10580 if (interm_optab != unknown_optab
10581 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10582 && insn_data[icode1].operand[0].mode
10583 == insn_data[icode2].operand[0].mode)
10585 uns = false;
10586 optab1 = interm_optab;
10587 icode1 = icode2;
10591 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10592 intermediate steps in promotion sequence. We try
10593 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10594 interm_types->create (MAX_INTERM_CVT_STEPS);
10595 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10597 intermediate_mode = insn_data[icode1].operand[0].mode;
10598 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10600 intermediate_type = vect_double_mask_nunits (prev_type);
10601 if (intermediate_mode != TYPE_MODE (intermediate_type))
10602 return false;
10604 else
10605 intermediate_type
10606 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10607 interm_optab
10608 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10609 optab_default);
10610 if (!interm_optab
10611 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10612 || insn_data[icode1].operand[0].mode != intermediate_mode
10613 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10614 == CODE_FOR_nothing))
10615 break;
10617 interm_types->quick_push (intermediate_type);
10618 (*multi_step_cvt)++;
10620 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10621 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10622 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10623 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10625 prev_mode = intermediate_mode;
10626 prev_type = intermediate_type;
10627 optab1 = interm_optab;
10630 interm_types->release ();
10631 return false;
10634 /* Generate and return a statement that sets vector mask MASK such that
10635 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10637 gcall *
10638 vect_gen_while (tree mask, tree start_index, tree end_index)
10640 tree cmp_type = TREE_TYPE (start_index);
10641 tree mask_type = TREE_TYPE (mask);
10642 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10643 cmp_type, mask_type,
10644 OPTIMIZE_FOR_SPEED));
10645 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10646 start_index, end_index,
10647 build_zero_cst (mask_type));
10648 gimple_call_set_lhs (call, mask);
10649 return call;
10652 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10653 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10655 tree
10656 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10657 tree end_index)
10659 tree tmp = make_ssa_name (mask_type);
10660 gcall *call = vect_gen_while (tmp, start_index, end_index);
10661 gimple_seq_add_stmt (seq, call);
10662 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10665 /* Try to compute the vector types required to vectorize STMT_INFO,
10666 returning true on success and false if vectorization isn't possible.
10668 On success:
10670 - Set *STMT_VECTYPE_OUT to:
10671 - NULL_TREE if the statement doesn't need to be vectorized;
10672 - boolean_type_node if the statement is a boolean operation whose
10673 vector type can only be determined once all the other vector types
10674 are known; and
10675 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10677 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10678 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10679 statement does not help to determine the overall number of units. */
10681 bool
10682 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10683 tree *stmt_vectype_out,
10684 tree *nunits_vectype_out)
10686 gimple *stmt = stmt_info->stmt;
10688 *stmt_vectype_out = NULL_TREE;
10689 *nunits_vectype_out = NULL_TREE;
10691 if (gimple_get_lhs (stmt) == NULL_TREE
10692 /* MASK_STORE has no lhs, but is ok. */
10693 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10695 if (is_a <gcall *> (stmt))
10697 /* Ignore calls with no lhs. These must be calls to
10698 #pragma omp simd functions, and what vectorization factor
10699 it really needs can't be determined until
10700 vectorizable_simd_clone_call. */
10701 if (dump_enabled_p ())
10702 dump_printf_loc (MSG_NOTE, vect_location,
10703 "defer to SIMD clone analysis.\n");
10704 return true;
10707 if (dump_enabled_p ())
10709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10710 "not vectorized: irregular stmt.");
10711 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10713 return false;
10716 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10718 if (dump_enabled_p ())
10720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10721 "not vectorized: vector stmt in loop:");
10722 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10724 return false;
10727 tree vectype;
10728 tree scalar_type = NULL_TREE;
10729 if (STMT_VINFO_VECTYPE (stmt_info))
10730 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10731 else
10733 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10734 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10735 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10736 else
10737 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10739 /* Pure bool ops don't participate in number-of-units computation.
10740 For comparisons use the types being compared. */
10741 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10742 && is_gimple_assign (stmt)
10743 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10745 *stmt_vectype_out = boolean_type_node;
10747 tree rhs1 = gimple_assign_rhs1 (stmt);
10748 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10749 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10750 scalar_type = TREE_TYPE (rhs1);
10751 else
10753 if (dump_enabled_p ())
10754 dump_printf_loc (MSG_NOTE, vect_location,
10755 "pure bool operation.\n");
10756 return true;
10760 if (dump_enabled_p ())
10762 dump_printf_loc (MSG_NOTE, vect_location,
10763 "get vectype for scalar type: ");
10764 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10765 dump_printf (MSG_NOTE, "\n");
10767 vectype = get_vectype_for_scalar_type (scalar_type);
10768 if (!vectype)
10770 if (dump_enabled_p ())
10772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10773 "not vectorized: unsupported data-type ");
10774 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10775 scalar_type);
10776 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10778 return false;
10781 if (!*stmt_vectype_out)
10782 *stmt_vectype_out = vectype;
10784 if (dump_enabled_p ())
10786 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10787 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10788 dump_printf (MSG_NOTE, "\n");
10792 /* Don't try to compute scalar types if the stmt produces a boolean
10793 vector; use the existing vector type instead. */
10794 tree nunits_vectype;
10795 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10796 nunits_vectype = vectype;
10797 else
10799 /* The number of units is set according to the smallest scalar
10800 type (or the largest vector size, but we only support one
10801 vector size per vectorization). */
10802 if (*stmt_vectype_out != boolean_type_node)
10804 HOST_WIDE_INT dummy;
10805 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10807 if (dump_enabled_p ())
10809 dump_printf_loc (MSG_NOTE, vect_location,
10810 "get vectype for scalar type: ");
10811 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10812 dump_printf (MSG_NOTE, "\n");
10814 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10816 if (!nunits_vectype)
10818 if (dump_enabled_p ())
10820 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10821 "not vectorized: unsupported data-type ");
10822 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10823 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10825 return false;
10828 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10829 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10831 if (dump_enabled_p ())
10833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10834 "not vectorized: different sized vector "
10835 "types in statement, ");
10836 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10837 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10838 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10839 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10841 return false;
10844 if (dump_enabled_p ())
10846 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10847 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10848 dump_printf (MSG_NOTE, "\n");
10850 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10851 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10852 dump_printf (MSG_NOTE, "\n");
10855 *nunits_vectype_out = nunits_vectype;
10856 return true;
10859 /* Try to determine the correct vector type for STMT_INFO, which is a
10860 statement that produces a scalar boolean result. Return the vector
10861 type on success, otherwise return NULL_TREE. */
10863 tree
10864 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10866 gimple *stmt = stmt_info->stmt;
10867 tree mask_type = NULL;
10868 tree vectype, scalar_type;
10870 if (is_gimple_assign (stmt)
10871 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10872 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10874 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10875 mask_type = get_mask_type_for_scalar_type (scalar_type);
10877 if (!mask_type)
10879 if (dump_enabled_p ())
10880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10881 "not vectorized: unsupported mask\n");
10882 return NULL_TREE;
10885 else
10887 tree rhs;
10888 ssa_op_iter iter;
10889 gimple *def_stmt;
10890 enum vect_def_type dt;
10892 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10894 if (!vect_is_simple_use (rhs, stmt_info->vinfo,
10895 &def_stmt, &dt, &vectype))
10897 if (dump_enabled_p ())
10899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10900 "not vectorized: can't compute mask type "
10901 "for statement, ");
10902 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10905 return NULL_TREE;
10908 /* No vectype probably means external definition.
10909 Allow it in case there is another operand which
10910 allows to determine mask type. */
10911 if (!vectype)
10912 continue;
10914 if (!mask_type)
10915 mask_type = vectype;
10916 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10917 TYPE_VECTOR_SUBPARTS (vectype)))
10919 if (dump_enabled_p ())
10921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10922 "not vectorized: different sized masks "
10923 "types in statement, ");
10924 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10925 mask_type);
10926 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10927 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10928 vectype);
10929 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10931 return NULL_TREE;
10933 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10934 != VECTOR_BOOLEAN_TYPE_P (vectype))
10936 if (dump_enabled_p ())
10938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10939 "not vectorized: mixed mask and "
10940 "nonmask vector types in statement, ");
10941 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10942 mask_type);
10943 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10944 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10945 vectype);
10946 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10948 return NULL_TREE;
10952 /* We may compare boolean value loaded as vector of integers.
10953 Fix mask_type in such case. */
10954 if (mask_type
10955 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10956 && gimple_code (stmt) == GIMPLE_ASSIGN
10957 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10958 mask_type = build_same_sized_truth_vector_type (mask_type);
10961 /* No mask_type should mean loop invariant predicate.
10962 This is probably a subject for optimization in if-conversion. */
10963 if (!mask_type && dump_enabled_p ())
10965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10966 "not vectorized: can't compute mask type "
10967 "for statement, ");
10968 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10970 return mask_type;