[26/46] Make more use of dyn_cast in tree-vect*
[official-gcc.git] / gcc / tree-vect-stmts.c
blobd72dbcbe8c7cde4523907094d768eae18a2a6d73
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT and the vector is associated
121 with scalar destination SCALAR_DEST. */
123 static tree
124 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
125 tree array, unsigned HOST_WIDE_INT n)
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt, new_stmt, gsi);
142 return vect_name;
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT. */
149 static void
150 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
151 tree array, unsigned HOST_WIDE_INT n)
153 tree array_ref;
154 gimple *new_stmt;
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt, new_stmt, gsi);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
171 tree mem_ref;
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
179 /* Add a clobber of variable VAR to the vectorization of STMT.
180 Emit the clobber before *GSI. */
182 static void
183 vect_clobber_variable (gimple *stmt, gimple_stmt_iterator *gsi, tree var)
185 tree clobber = build_clobber (TREE_TYPE (var));
186 gimple *new_stmt = gimple_build_assign (var, clobber);
187 vect_finish_stmt_generation (stmt, new_stmt, gsi);
190 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
192 /* Function vect_mark_relevant.
194 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
196 static void
197 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
198 enum vect_relevant relevant, bool live_p)
200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE, vect_location,
207 "mark relevant %d, live %d: ", relevant, live_p);
208 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_NOTE, vect_location,
224 "last stmt in pattern. don't mark"
225 " relevant/live.\n");
226 stmt_vec_info old_stmt_info = stmt_info;
227 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
228 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
229 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
230 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
231 stmt = stmt_info->stmt;
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
247 worklist->safe_push (stmt);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT is simple and all uses of it are invariant. */
255 bool
256 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
258 tree op;
259 ssa_op_iter iter;
261 if (!is_gimple_assign (stmt))
262 return false;
264 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
266 enum vect_def_type dt = vect_uninitialized_def;
268 if (!vect_is_simple_use (op, loop_vinfo, &dt))
270 if (dump_enabled_p ())
271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
272 "use not simple.\n");
273 return false;
276 if (dt != vect_external_def && dt != vect_constant_def)
277 return false;
279 return true;
282 /* Function vect_stmt_relevant_p.
284 Return true if STMT in loop that is represented by LOOP_VINFO is
285 "relevant for vectorization".
287 A stmt is considered "relevant for vectorization" if:
288 - it has uses outside the loop.
289 - it has vdefs (it alters memory).
290 - control stmts in the loop (except for the exit condition).
292 CHECKME: what other side effects would the vectorizer allow? */
294 static bool
295 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
296 enum vect_relevant *relevant, bool *live_p)
298 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
299 ssa_op_iter op_iter;
300 imm_use_iterator imm_iter;
301 use_operand_p use_p;
302 def_operand_p def_p;
304 *relevant = vect_unused_in_scope;
305 *live_p = false;
307 /* cond stmt other than loop exit cond. */
308 if (is_ctrl_stmt (stmt)
309 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
310 != loop_exit_ctrl_vec_info_type)
311 *relevant = vect_used_in_scope;
313 /* changing memory. */
314 if (gimple_code (stmt) != GIMPLE_PHI)
315 if (gimple_vdef (stmt)
316 && !gimple_clobber_p (stmt))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant = vect_used_in_scope;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
344 *live_p = true;
349 if (*live_p && *relevant == vect_unused_in_scope
350 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355 *relevant = vect_used_only_live;
358 return (*live_p || *relevant);
362 /* Function exist_non_indexing_operands_for_use_p
364 USE is one of the uses attached to STMT. Check if USE is
365 used in STMT for anything other than indexing an array. */
367 static bool
368 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
370 tree operand;
371 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign *assign = dyn_cast <gassign *> (stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
395 gcall *call = dyn_cast <gcall *> (stmt);
396 if (call && gimple_call_internal_p (call))
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
411 return false;
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
420 if (operand == use)
421 return true;
423 return false;
428 Function process_use.
430 Inputs:
431 - a USE in STMT in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
448 skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
450 be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
454 static bool
455 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<gimple *> *worklist,
457 bool force)
459 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
460 stmt_vec_info dstmt_vinfo;
461 basic_block bb, def_bb;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
467 return true;
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
473 "not vectorized: unsupported use in stmt.\n");
474 return false;
477 if (!dstmt_vinfo)
478 return true;
480 def_bb = gimple_bb (dstmt_vinfo->stmt);
482 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
483 DSTMT_VINFO must have already been processed, because this should be the
484 only way that STMT, which is a reduction-phi, was put in the worklist,
485 as there should be no other uses for DSTMT_VINFO in the loop. So we just
486 check that everything is as expected, and we are done. */
487 bb = gimple_bb (stmt);
488 if (gimple_code (stmt) == GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
490 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
491 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
492 && bb->loop_father == def_bb->loop_father)
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_NOTE, vect_location,
496 "reduc-stmt defining reduc-phi in the same nest.\n");
497 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
498 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
499 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
500 return true;
503 /* case 3a: outer-loop stmt defining an inner-loop stmt:
504 outer-loop-header-bb:
505 d = dstmt_vinfo
506 inner-loop:
507 stmt # use (d)
508 outer-loop-tail-bb:
509 ... */
510 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE, vect_location,
514 "outer-loop def-stmt defining inner-loop stmt.\n");
516 switch (relevant)
518 case vect_unused_in_scope:
519 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
520 vect_used_in_scope : vect_unused_in_scope;
521 break;
523 case vect_used_in_outer_by_reduction:
524 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
525 relevant = vect_used_by_reduction;
526 break;
528 case vect_used_in_outer:
529 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
530 relevant = vect_used_in_scope;
531 break;
533 case vect_used_in_scope:
534 break;
536 default:
537 gcc_unreachable ();
541 /* case 3b: inner-loop stmt defining an outer-loop stmt:
542 outer-loop-header-bb:
544 inner-loop:
545 d = dstmt_vinfo
546 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
547 stmt # use (d) */
548 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
550 if (dump_enabled_p ())
551 dump_printf_loc (MSG_NOTE, vect_location,
552 "inner-loop def-stmt defining outer-loop stmt.\n");
554 switch (relevant)
556 case vect_unused_in_scope:
557 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
558 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
559 vect_used_in_outer_by_reduction : vect_unused_in_scope;
560 break;
562 case vect_used_by_reduction:
563 case vect_used_only_live:
564 relevant = vect_used_in_outer_by_reduction;
565 break;
567 case vect_used_in_scope:
568 relevant = vect_used_in_outer;
569 break;
571 default:
572 gcc_unreachable ();
575 /* We are also not interested in uses on loop PHI backedges that are
576 inductions. Otherwise we'll needlessly vectorize the IV increment
577 and cause hybrid SLP for SLP inductions. Unless the PHI is live
578 of course. */
579 else if (gimple_code (stmt) == GIMPLE_PHI
580 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
581 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
582 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
583 == use))
585 if (dump_enabled_p ())
586 dump_printf_loc (MSG_NOTE, vect_location,
587 "induction value on backedge.\n");
588 return true;
592 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
593 return true;
597 /* Function vect_mark_stmts_to_be_vectorized.
599 Not all stmts in the loop need to be vectorized. For example:
601 for i...
602 for j...
603 1. T0 = i + j
604 2. T1 = a[T0]
606 3. j = j + 1
608 Stmt 1 and 3 do not need to be vectorized, because loop control and
609 addressing of vectorized data-refs are handled differently.
611 This pass detects such stmts. */
613 bool
614 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
616 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
617 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
618 unsigned int nbbs = loop->num_nodes;
619 gimple_stmt_iterator si;
620 gimple *stmt;
621 unsigned int i;
622 stmt_vec_info stmt_vinfo;
623 basic_block bb;
624 gimple *phi;
625 bool live_p;
626 enum vect_relevant relevant;
628 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
630 auto_vec<gimple *, 64> worklist;
632 /* 1. Init worklist. */
633 for (i = 0; i < nbbs; i++)
635 bb = bbs[i];
636 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
638 phi = gsi_stmt (si);
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
642 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
645 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
646 vect_mark_relevant (&worklist, phi, relevant, live_p);
648 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
650 stmt = gsi_stmt (si);
651 if (dump_enabled_p ())
653 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
654 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
657 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
658 vect_mark_relevant (&worklist, stmt, relevant, live_p);
662 /* 2. Process_worklist */
663 while (worklist.length () > 0)
665 use_operand_p use_p;
666 ssa_op_iter iter;
668 stmt = worklist.pop ();
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
672 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
675 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
676 (DEF_STMT) as relevant/irrelevant according to the relevance property
677 of STMT. */
678 stmt_vinfo = vinfo_for_stmt (stmt);
679 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
681 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
682 propagated as is to the DEF_STMTs of its USEs.
684 One exception is when STMT has been identified as defining a reduction
685 variable; in this case we set the relevance to vect_used_by_reduction.
686 This is because we distinguish between two kinds of relevant stmts -
687 those that are used by a reduction computation, and those that are
688 (also) used by a regular computation. This allows us later on to
689 identify stmts that are used solely by a reduction, and therefore the
690 order of the results that they produce does not have to be kept. */
692 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
694 case vect_reduction_def:
695 gcc_assert (relevant != vect_unused_in_scope);
696 if (relevant != vect_unused_in_scope
697 && relevant != vect_used_in_scope
698 && relevant != vect_used_by_reduction
699 && relevant != vect_used_only_live)
701 if (dump_enabled_p ())
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "unsupported use of reduction.\n");
704 return false;
706 break;
708 case vect_nested_cycle:
709 if (relevant != vect_unused_in_scope
710 && relevant != vect_used_in_outer_by_reduction
711 && relevant != vect_used_in_outer)
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
715 "unsupported use of nested cycle.\n");
717 return false;
719 break;
721 case vect_double_reduction_def:
722 if (relevant != vect_unused_in_scope
723 && relevant != vect_used_by_reduction
724 && relevant != vect_used_only_live)
726 if (dump_enabled_p ())
727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
728 "unsupported use of double reduction.\n");
730 return false;
732 break;
734 default:
735 break;
738 if (is_pattern_stmt_p (stmt_vinfo))
740 /* Pattern statements are not inserted into the code, so
741 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
742 have to scan the RHS or function arguments instead. */
743 if (gassign *assign = dyn_cast <gassign *> (stmt))
745 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
746 tree op = gimple_assign_rhs1 (assign);
748 i = 1;
749 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
751 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
752 relevant, &worklist, false)
753 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
754 relevant, &worklist, false))
755 return false;
756 i = 2;
758 for (; i < gimple_num_ops (assign); i++)
760 op = gimple_op (assign, i);
761 if (TREE_CODE (op) == SSA_NAME
762 && !process_use (stmt, op, loop_vinfo, relevant,
763 &worklist, false))
764 return false;
767 else if (gcall *call = dyn_cast <gcall *> (stmt))
769 for (i = 0; i < gimple_call_num_args (call); i++)
771 tree arg = gimple_call_arg (call, i);
772 if (!process_use (stmt, arg, loop_vinfo, relevant,
773 &worklist, false))
774 return false;
778 else
779 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
781 tree op = USE_FROM_PTR (use_p);
782 if (!process_use (stmt, op, loop_vinfo, relevant,
783 &worklist, false))
784 return false;
787 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
789 gather_scatter_info gs_info;
790 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
791 gcc_unreachable ();
792 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
793 &worklist, true))
794 return false;
796 } /* while worklist */
798 return true;
801 /* Compute the prologue cost for invariant or constant operands. */
803 static unsigned
804 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
805 unsigned opno, enum vect_def_type dt,
806 stmt_vector_for_cost *cost_vec)
808 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
809 tree op = gimple_op (stmt, opno);
810 unsigned prologue_cost = 0;
812 /* Without looking at the actual initializer a vector of
813 constants can be implemented as load from the constant pool.
814 When all elements are the same we can use a splat. */
815 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
816 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
817 unsigned num_vects_to_check;
818 unsigned HOST_WIDE_INT const_nunits;
819 unsigned nelt_limit;
820 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
821 && ! multiple_p (const_nunits, group_size))
823 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
824 nelt_limit = const_nunits;
826 else
828 /* If either the vector has variable length or the vectors
829 are composed of repeated whole groups we only need to
830 cost construction once. All vectors will be the same. */
831 num_vects_to_check = 1;
832 nelt_limit = group_size;
834 tree elt = NULL_TREE;
835 unsigned nelt = 0;
836 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
838 unsigned si = j % group_size;
839 if (nelt == 0)
840 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
841 /* ??? We're just tracking whether all operands of a single
842 vector initializer are the same, ideally we'd check if
843 we emitted the same one already. */
844 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
845 opno))
846 elt = NULL_TREE;
847 nelt++;
848 if (nelt == nelt_limit)
850 /* ??? We need to pass down stmt_info for a vector type
851 even if it points to the wrong stmt. */
852 prologue_cost += record_stmt_cost
853 (cost_vec, 1,
854 dt == vect_external_def
855 ? (elt ? scalar_to_vec : vec_construct)
856 : vector_load,
857 stmt_info, 0, vect_prologue);
858 nelt = 0;
862 return prologue_cost;
865 /* Function vect_model_simple_cost.
867 Models cost for simple operations, i.e. those that only emit ncopies of a
868 single op. Right now, this does not account for multiple insns that could
869 be generated for the single vector op. We will handle that shortly. */
871 static void
872 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
873 enum vect_def_type *dt,
874 int ndts,
875 slp_tree node,
876 stmt_vector_for_cost *cost_vec)
878 int inside_cost = 0, prologue_cost = 0;
880 gcc_assert (cost_vec != NULL);
882 /* ??? Somehow we need to fix this at the callers. */
883 if (node)
884 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
886 if (node)
888 /* Scan operands and account for prologue cost of constants/externals.
889 ??? This over-estimates cost for multiple uses and should be
890 re-engineered. */
891 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
892 tree lhs = gimple_get_lhs (stmt);
893 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
895 tree op = gimple_op (stmt, i);
896 enum vect_def_type dt;
897 if (!op || op == lhs)
898 continue;
899 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
900 && (dt == vect_constant_def || dt == vect_external_def))
901 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
902 i, dt, cost_vec);
905 else
906 /* Cost the "broadcast" of a scalar operand in to a vector operand.
907 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
908 cost model. */
909 for (int i = 0; i < ndts; i++)
910 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
911 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
912 stmt_info, 0, vect_prologue);
914 /* Adjust for two-operator SLP nodes. */
915 if (node && SLP_TREE_TWO_OPERATORS (node))
917 ncopies *= 2;
918 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
919 stmt_info, 0, vect_body);
922 /* Pass the inside-of-loop statements to the target-specific cost model. */
923 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
924 stmt_info, 0, vect_body);
926 if (dump_enabled_p ())
927 dump_printf_loc (MSG_NOTE, vect_location,
928 "vect_model_simple_cost: inside_cost = %d, "
929 "prologue_cost = %d .\n", inside_cost, prologue_cost);
933 /* Model cost for type demotion and promotion operations. PWR is normally
934 zero for single-step promotions and demotions. It will be one if
935 two-step promotion/demotion is required, and so on. Each additional
936 step doubles the number of instructions required. */
938 static void
939 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
940 enum vect_def_type *dt, int pwr,
941 stmt_vector_for_cost *cost_vec)
943 int i, tmp;
944 int inside_cost = 0, prologue_cost = 0;
946 for (i = 0; i < pwr + 1; i++)
948 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
949 (i + 1) : i;
950 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
951 vec_promote_demote, stmt_info, 0,
952 vect_body);
955 /* FORNOW: Assuming maximum 2 args per stmts. */
956 for (i = 0; i < 2; i++)
957 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
958 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
959 stmt_info, 0, vect_prologue);
961 if (dump_enabled_p ())
962 dump_printf_loc (MSG_NOTE, vect_location,
963 "vect_model_promotion_demotion_cost: inside_cost = %d, "
964 "prologue_cost = %d .\n", inside_cost, prologue_cost);
967 /* Function vect_model_store_cost
969 Models cost for stores. In the case of grouped accesses, one access
970 has the overhead of the grouped access attributed to it. */
972 static void
973 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
974 enum vect_def_type dt,
975 vect_memory_access_type memory_access_type,
976 vec_load_store_type vls_type, slp_tree slp_node,
977 stmt_vector_for_cost *cost_vec)
979 unsigned int inside_cost = 0, prologue_cost = 0;
980 stmt_vec_info first_stmt_info = stmt_info;
981 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
983 /* ??? Somehow we need to fix this at the callers. */
984 if (slp_node)
985 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
987 if (vls_type == VLS_STORE_INVARIANT)
989 if (slp_node)
990 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
991 1, dt, cost_vec);
992 else
993 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
994 stmt_info, 0, vect_prologue);
997 /* Grouped stores update all elements in the group at once,
998 so we want the DR for the first statement. */
999 if (!slp_node && grouped_access_p)
1000 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1002 /* True if we should include any once-per-group costs as well as
1003 the cost of the statement itself. For SLP we only get called
1004 once per group anyhow. */
1005 bool first_stmt_p = (first_stmt_info == stmt_info);
1007 /* We assume that the cost of a single store-lanes instruction is
1008 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1009 access is instead being provided by a permute-and-store operation,
1010 include the cost of the permutes. */
1011 if (first_stmt_p
1012 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1014 /* Uses a high and low interleave or shuffle operations for each
1015 needed permute. */
1016 int group_size = DR_GROUP_SIZE (first_stmt_info);
1017 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1018 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1019 stmt_info, 0, vect_body);
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_NOTE, vect_location,
1023 "vect_model_store_cost: strided group_size = %d .\n",
1024 group_size);
1027 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1028 /* Costs of the stores. */
1029 if (memory_access_type == VMAT_ELEMENTWISE
1030 || memory_access_type == VMAT_GATHER_SCATTER)
1032 /* N scalar stores plus extracting the elements. */
1033 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1034 inside_cost += record_stmt_cost (cost_vec,
1035 ncopies * assumed_nunits,
1036 scalar_store, stmt_info, 0, vect_body);
1038 else
1039 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1041 if (memory_access_type == VMAT_ELEMENTWISE
1042 || memory_access_type == VMAT_STRIDED_SLP)
1044 /* N scalar stores plus extracting the elements. */
1045 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1046 inside_cost += record_stmt_cost (cost_vec,
1047 ncopies * assumed_nunits,
1048 vec_to_scalar, stmt_info, 0, vect_body);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1058 /* Calculate cost of DR's memory access. */
1059 void
1060 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1061 unsigned int *inside_cost,
1062 stmt_vector_for_cost *body_cost_vec)
1064 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1065 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1067 switch (alignment_support_scheme)
1069 case dr_aligned:
1071 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1072 vector_store, stmt_info, 0,
1073 vect_body);
1075 if (dump_enabled_p ())
1076 dump_printf_loc (MSG_NOTE, vect_location,
1077 "vect_model_store_cost: aligned.\n");
1078 break;
1081 case dr_unaligned_supported:
1083 /* Here, we assign an additional cost for the unaligned store. */
1084 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1085 unaligned_store, stmt_info,
1086 DR_MISALIGNMENT (dr), vect_body);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_store_cost: unaligned supported by "
1090 "hardware.\n");
1091 break;
1094 case dr_unaligned_unsupported:
1096 *inside_cost = VECT_MAX_COST;
1098 if (dump_enabled_p ())
1099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1100 "vect_model_store_cost: unsupported access.\n");
1101 break;
1104 default:
1105 gcc_unreachable ();
1110 /* Function vect_model_load_cost
1112 Models cost for loads. In the case of grouped accesses, one access has
1113 the overhead of the grouped access attributed to it. Since unaligned
1114 accesses are supported for loads, we also account for the costs of the
1115 access scheme chosen. */
1117 static void
1118 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1119 vect_memory_access_type memory_access_type,
1120 slp_instance instance,
1121 slp_tree slp_node,
1122 stmt_vector_for_cost *cost_vec)
1124 unsigned int inside_cost = 0, prologue_cost = 0;
1125 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1127 gcc_assert (cost_vec);
1129 /* ??? Somehow we need to fix this at the callers. */
1130 if (slp_node)
1131 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1133 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1135 /* If the load is permuted then the alignment is determined by
1136 the first group element not by the first scalar stmt DR. */
1137 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1138 /* Record the cost for the permutation. */
1139 unsigned n_perms;
1140 unsigned assumed_nunits
1141 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1142 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1143 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1144 slp_vf, instance, true,
1145 &n_perms);
1146 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1147 first_stmt_info, 0, vect_body);
1148 /* And adjust the number of loads performed. This handles
1149 redundancies as well as loads that are later dead. */
1150 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1151 bitmap_clear (perm);
1152 for (unsigned i = 0;
1153 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1154 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1155 ncopies = 0;
1156 bool load_seen = false;
1157 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1159 if (i % assumed_nunits == 0)
1161 if (load_seen)
1162 ncopies++;
1163 load_seen = false;
1165 if (bitmap_bit_p (perm, i))
1166 load_seen = true;
1168 if (load_seen)
1169 ncopies++;
1170 gcc_assert (ncopies
1171 <= (DR_GROUP_SIZE (first_stmt_info)
1172 - DR_GROUP_GAP (first_stmt_info)
1173 + assumed_nunits - 1) / assumed_nunits);
1176 /* Grouped loads read all elements in the group at once,
1177 so we want the DR for the first statement. */
1178 stmt_vec_info first_stmt_info = stmt_info;
1179 if (!slp_node && grouped_access_p)
1180 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1182 /* True if we should include any once-per-group costs as well as
1183 the cost of the statement itself. For SLP we only get called
1184 once per group anyhow. */
1185 bool first_stmt_p = (first_stmt_info == stmt_info);
1187 /* We assume that the cost of a single load-lanes instruction is
1188 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1189 access is instead being provided by a load-and-permute operation,
1190 include the cost of the permutes. */
1191 if (first_stmt_p
1192 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1194 /* Uses an even and odd extract operations or shuffle operations
1195 for each needed permute. */
1196 int group_size = DR_GROUP_SIZE (first_stmt_info);
1197 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1198 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1199 stmt_info, 0, vect_body);
1201 if (dump_enabled_p ())
1202 dump_printf_loc (MSG_NOTE, vect_location,
1203 "vect_model_load_cost: strided group_size = %d .\n",
1204 group_size);
1207 /* The loads themselves. */
1208 if (memory_access_type == VMAT_ELEMENTWISE
1209 || memory_access_type == VMAT_GATHER_SCATTER)
1211 /* N scalar loads plus gathering them into a vector. */
1212 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214 inside_cost += record_stmt_cost (cost_vec,
1215 ncopies * assumed_nunits,
1216 scalar_load, stmt_info, 0, vect_body);
1218 else
1219 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1220 &inside_cost, &prologue_cost,
1221 cost_vec, cost_vec, true);
1222 if (memory_access_type == VMAT_ELEMENTWISE
1223 || memory_access_type == VMAT_STRIDED_SLP)
1224 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1225 stmt_info, 0, vect_body);
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_NOTE, vect_location,
1229 "vect_model_load_cost: inside_cost = %d, "
1230 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1234 /* Calculate cost of DR's memory access. */
1235 void
1236 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1237 bool add_realign_cost, unsigned int *inside_cost,
1238 unsigned int *prologue_cost,
1239 stmt_vector_for_cost *prologue_cost_vec,
1240 stmt_vector_for_cost *body_cost_vec,
1241 bool record_prologue_costs)
1243 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1244 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1246 switch (alignment_support_scheme)
1248 case dr_aligned:
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1251 stmt_info, 0, vect_body);
1253 if (dump_enabled_p ())
1254 dump_printf_loc (MSG_NOTE, vect_location,
1255 "vect_model_load_cost: aligned.\n");
1257 break;
1259 case dr_unaligned_supported:
1261 /* Here, we assign an additional cost for the unaligned load. */
1262 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1263 unaligned_load, stmt_info,
1264 DR_MISALIGNMENT (dr), vect_body);
1266 if (dump_enabled_p ())
1267 dump_printf_loc (MSG_NOTE, vect_location,
1268 "vect_model_load_cost: unaligned supported by "
1269 "hardware.\n");
1271 break;
1273 case dr_explicit_realign:
1275 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1276 vector_load, stmt_info, 0, vect_body);
1277 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1278 vec_perm, stmt_info, 0, vect_body);
1280 /* FIXME: If the misalignment remains fixed across the iterations of
1281 the containing loop, the following cost should be added to the
1282 prologue costs. */
1283 if (targetm.vectorize.builtin_mask_for_load)
1284 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1285 stmt_info, 0, vect_body);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "vect_model_load_cost: explicit realign\n");
1291 break;
1293 case dr_explicit_realign_optimized:
1295 if (dump_enabled_p ())
1296 dump_printf_loc (MSG_NOTE, vect_location,
1297 "vect_model_load_cost: unaligned software "
1298 "pipelined.\n");
1300 /* Unaligned software pipeline has a load of an address, an initial
1301 load, and possibly a mask operation to "prime" the loop. However,
1302 if this is an access in a group of loads, which provide grouped
1303 access, then the above cost should only be considered for one
1304 access in the group. Inside the loop, there is a load op
1305 and a realignment op. */
1307 if (add_realign_cost && record_prologue_costs)
1309 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1310 vector_stmt, stmt_info,
1311 0, vect_prologue);
1312 if (targetm.vectorize.builtin_mask_for_load)
1313 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1314 vector_stmt, stmt_info,
1315 0, vect_prologue);
1318 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1319 stmt_info, 0, vect_body);
1320 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1321 stmt_info, 0, vect_body);
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE, vect_location,
1325 "vect_model_load_cost: explicit realign optimized"
1326 "\n");
1328 break;
1331 case dr_unaligned_unsupported:
1333 *inside_cost = VECT_MAX_COST;
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1337 "vect_model_load_cost: unsupported access.\n");
1338 break;
1341 default:
1342 gcc_unreachable ();
1346 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1347 the loop preheader for the vectorized stmt STMT. */
1349 static void
1350 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1352 if (gsi)
1353 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1354 else
1356 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1357 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1359 if (loop_vinfo)
1361 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1362 basic_block new_bb;
1363 edge pe;
1365 if (nested_in_vect_loop_p (loop, stmt))
1366 loop = loop->inner;
1368 pe = loop_preheader_edge (loop);
1369 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1370 gcc_assert (!new_bb);
1372 else
1374 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1375 basic_block bb;
1376 gimple_stmt_iterator gsi_bb_start;
1378 gcc_assert (bb_vinfo);
1379 bb = BB_VINFO_BB (bb_vinfo);
1380 gsi_bb_start = gsi_after_labels (bb);
1381 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1385 if (dump_enabled_p ())
1387 dump_printf_loc (MSG_NOTE, vect_location,
1388 "created new init_stmt: ");
1389 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1393 /* Function vect_init_vector.
1395 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1396 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1397 vector type a vector with all elements equal to VAL is created first.
1398 Place the initialization at BSI if it is not NULL. Otherwise, place the
1399 initialization at the loop preheader.
1400 Return the DEF of INIT_STMT.
1401 It will be used in the vectorization of STMT. */
1403 tree
1404 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1406 gimple *init_stmt;
1407 tree new_temp;
1409 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1410 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1412 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1413 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1415 /* Scalar boolean value should be transformed into
1416 all zeros or all ones value before building a vector. */
1417 if (VECTOR_BOOLEAN_TYPE_P (type))
1419 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1420 tree false_val = build_zero_cst (TREE_TYPE (type));
1422 if (CONSTANT_CLASS_P (val))
1423 val = integer_zerop (val) ? false_val : true_val;
1424 else
1426 new_temp = make_ssa_name (TREE_TYPE (type));
1427 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1428 val, true_val, false_val);
1429 vect_init_vector_1 (stmt, init_stmt, gsi);
1430 val = new_temp;
1433 else if (CONSTANT_CLASS_P (val))
1434 val = fold_convert (TREE_TYPE (type), val);
1435 else
1437 new_temp = make_ssa_name (TREE_TYPE (type));
1438 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1439 init_stmt = gimple_build_assign (new_temp,
1440 fold_build1 (VIEW_CONVERT_EXPR,
1441 TREE_TYPE (type),
1442 val));
1443 else
1444 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1445 vect_init_vector_1 (stmt, init_stmt, gsi);
1446 val = new_temp;
1449 val = build_vector_from_val (type, val);
1452 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1453 init_stmt = gimple_build_assign (new_temp, val);
1454 vect_init_vector_1 (stmt, init_stmt, gsi);
1455 return new_temp;
1458 /* Function vect_get_vec_def_for_operand_1.
1460 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1461 DT that will be used in the vectorized stmt. */
1463 tree
1464 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1466 tree vec_oprnd;
1467 stmt_vec_info vec_stmt_info;
1468 stmt_vec_info def_stmt_info = NULL;
1470 switch (dt)
1472 /* operand is a constant or a loop invariant. */
1473 case vect_constant_def:
1474 case vect_external_def:
1475 /* Code should use vect_get_vec_def_for_operand. */
1476 gcc_unreachable ();
1478 /* operand is defined inside the loop. */
1479 case vect_internal_def:
1481 /* Get the def from the vectorized stmt. */
1482 def_stmt_info = vinfo_for_stmt (def_stmt);
1484 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1485 /* Get vectorized pattern statement. */
1486 if (!vec_stmt_info
1487 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1488 && !STMT_VINFO_RELEVANT (def_stmt_info))
1489 vec_stmt_info = (STMT_VINFO_VEC_STMT
1490 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1491 gcc_assert (vec_stmt_info);
1492 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1493 vec_oprnd = PHI_RESULT (phi);
1494 else
1495 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1496 return vec_oprnd;
1499 /* operand is defined by a loop header phi. */
1500 case vect_reduction_def:
1501 case vect_double_reduction_def:
1502 case vect_nested_cycle:
1503 case vect_induction_def:
1505 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1507 /* Get the def from the vectorized stmt. */
1508 def_stmt_info = vinfo_for_stmt (def_stmt);
1509 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1510 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1511 vec_oprnd = PHI_RESULT (phi);
1512 else
1513 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1514 return vec_oprnd;
1517 default:
1518 gcc_unreachable ();
1523 /* Function vect_get_vec_def_for_operand.
1525 OP is an operand in STMT. This function returns a (vector) def that will be
1526 used in the vectorized stmt for STMT.
1528 In the case that OP is an SSA_NAME which is defined in the loop, then
1529 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1531 In case OP is an invariant or constant, a new stmt that creates a vector def
1532 needs to be introduced. VECTYPE may be used to specify a required type for
1533 vector invariant. */
1535 tree
1536 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1538 gimple *def_stmt;
1539 enum vect_def_type dt;
1540 bool is_simple_use;
1541 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1542 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1544 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE, vect_location,
1547 "vect_get_vec_def_for_operand: ");
1548 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1549 dump_printf (MSG_NOTE, "\n");
1552 stmt_vec_info def_stmt_info;
1553 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1554 &def_stmt_info, &def_stmt);
1555 gcc_assert (is_simple_use);
1556 if (def_stmt && dump_enabled_p ())
1558 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1559 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1562 if (dt == vect_constant_def || dt == vect_external_def)
1564 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1565 tree vector_type;
1567 if (vectype)
1568 vector_type = vectype;
1569 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1570 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1571 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1572 else
1573 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1575 gcc_assert (vector_type);
1576 return vect_init_vector (stmt, op, vector_type, NULL);
1578 else
1579 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1583 /* Function vect_get_vec_def_for_stmt_copy
1585 Return a vector-def for an operand. This function is used when the
1586 vectorized stmt to be created (by the caller to this function) is a "copy"
1587 created in case the vectorized result cannot fit in one vector, and several
1588 copies of the vector-stmt are required. In this case the vector-def is
1589 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1590 of the stmt that defines VEC_OPRND.
1591 DT is the type of the vector def VEC_OPRND.
1593 Context:
1594 In case the vectorization factor (VF) is bigger than the number
1595 of elements that can fit in a vectype (nunits), we have to generate
1596 more than one vector stmt to vectorize the scalar stmt. This situation
1597 arises when there are multiple data-types operated upon in the loop; the
1598 smallest data-type determines the VF, and as a result, when vectorizing
1599 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1600 vector stmt (each computing a vector of 'nunits' results, and together
1601 computing 'VF' results in each iteration). This function is called when
1602 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1603 which VF=16 and nunits=4, so the number of copies required is 4):
1605 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1607 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1608 VS1.1: vx.1 = memref1 VS1.2
1609 VS1.2: vx.2 = memref2 VS1.3
1610 VS1.3: vx.3 = memref3
1612 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1613 VSnew.1: vz1 = vx.1 + ... VSnew.2
1614 VSnew.2: vz2 = vx.2 + ... VSnew.3
1615 VSnew.3: vz3 = vx.3 + ...
1617 The vectorization of S1 is explained in vectorizable_load.
1618 The vectorization of S2:
1619 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1620 the function 'vect_get_vec_def_for_operand' is called to
1621 get the relevant vector-def for each operand of S2. For operand x it
1622 returns the vector-def 'vx.0'.
1624 To create the remaining copies of the vector-stmt (VSnew.j), this
1625 function is called to get the relevant vector-def for each operand. It is
1626 obtained from the respective VS1.j stmt, which is recorded in the
1627 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1629 For example, to obtain the vector-def 'vx.1' in order to create the
1630 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1631 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1632 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1633 and return its def ('vx.1').
1634 Overall, to create the above sequence this function will be called 3 times:
1635 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1636 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1637 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1639 tree
1640 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1642 gimple *vec_stmt_for_operand;
1643 stmt_vec_info def_stmt_info;
1645 /* Do nothing; can reuse same def. */
1646 if (dt == vect_external_def || dt == vect_constant_def )
1647 return vec_oprnd;
1649 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1650 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1651 gcc_assert (def_stmt_info);
1652 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1653 gcc_assert (vec_stmt_for_operand);
1654 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1655 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1656 else
1657 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1658 return vec_oprnd;
1662 /* Get vectorized definitions for the operands to create a copy of an original
1663 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1665 void
1666 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1667 vec<tree> *vec_oprnds0,
1668 vec<tree> *vec_oprnds1)
1670 tree vec_oprnd = vec_oprnds0->pop ();
1672 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1673 vec_oprnds0->quick_push (vec_oprnd);
1675 if (vec_oprnds1 && vec_oprnds1->length ())
1677 vec_oprnd = vec_oprnds1->pop ();
1678 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1679 vec_oprnds1->quick_push (vec_oprnd);
1684 /* Get vectorized definitions for OP0 and OP1. */
1686 void
1687 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1688 vec<tree> *vec_oprnds0,
1689 vec<tree> *vec_oprnds1,
1690 slp_tree slp_node)
1692 if (slp_node)
1694 int nops = (op1 == NULL_TREE) ? 1 : 2;
1695 auto_vec<tree> ops (nops);
1696 auto_vec<vec<tree> > vec_defs (nops);
1698 ops.quick_push (op0);
1699 if (op1)
1700 ops.quick_push (op1);
1702 vect_get_slp_defs (ops, slp_node, &vec_defs);
1704 *vec_oprnds0 = vec_defs[0];
1705 if (op1)
1706 *vec_oprnds1 = vec_defs[1];
1708 else
1710 tree vec_oprnd;
1712 vec_oprnds0->create (1);
1713 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1714 vec_oprnds0->quick_push (vec_oprnd);
1716 if (op1)
1718 vec_oprnds1->create (1);
1719 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1720 vec_oprnds1->quick_push (vec_oprnd);
1725 /* Helper function called by vect_finish_replace_stmt and
1726 vect_finish_stmt_generation. Set the location of the new
1727 statement and create and return a stmt_vec_info for it. */
1729 static stmt_vec_info
1730 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1732 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1733 vec_info *vinfo = stmt_info->vinfo;
1735 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1737 if (dump_enabled_p ())
1739 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1740 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1743 gimple_set_location (vec_stmt, gimple_location (stmt));
1745 /* While EH edges will generally prevent vectorization, stmt might
1746 e.g. be in a must-not-throw region. Ensure newly created stmts
1747 that could throw are part of the same region. */
1748 int lp_nr = lookup_stmt_eh_lp (stmt);
1749 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1750 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1752 return vec_stmt_info;
1755 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1756 which sets the same scalar result as STMT did. Create and return a
1757 stmt_vec_info for VEC_STMT. */
1759 stmt_vec_info
1760 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1762 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1764 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1765 gsi_replace (&gsi, vec_stmt, false);
1767 return vect_finish_stmt_generation_1 (stmt, vec_stmt);
1770 /* Add VEC_STMT to the vectorized implementation of STMT and insert it
1771 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1773 stmt_vec_info
1774 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1775 gimple_stmt_iterator *gsi)
1777 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1779 if (!gsi_end_p (*gsi)
1780 && gimple_has_mem_ops (vec_stmt))
1782 gimple *at_stmt = gsi_stmt (*gsi);
1783 tree vuse = gimple_vuse (at_stmt);
1784 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1786 tree vdef = gimple_vdef (at_stmt);
1787 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1788 /* If we have an SSA vuse and insert a store, update virtual
1789 SSA form to avoid triggering the renamer. Do so only
1790 if we can easily see all uses - which is what almost always
1791 happens with the way vectorized stmts are inserted. */
1792 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1793 && ((is_gimple_assign (vec_stmt)
1794 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1795 || (is_gimple_call (vec_stmt)
1796 && !(gimple_call_flags (vec_stmt)
1797 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1799 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1800 gimple_set_vdef (vec_stmt, new_vdef);
1801 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1805 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1806 return vect_finish_stmt_generation_1 (stmt, vec_stmt);
1809 /* We want to vectorize a call to combined function CFN with function
1810 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1811 as the types of all inputs. Check whether this is possible using
1812 an internal function, returning its code if so or IFN_LAST if not. */
1814 static internal_fn
1815 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1816 tree vectype_out, tree vectype_in)
1818 internal_fn ifn;
1819 if (internal_fn_p (cfn))
1820 ifn = as_internal_fn (cfn);
1821 else
1822 ifn = associated_internal_fn (fndecl);
1823 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1825 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1826 if (info.vectorizable)
1828 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1829 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1830 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1831 OPTIMIZE_FOR_SPEED))
1832 return ifn;
1835 return IFN_LAST;
1839 static tree permute_vec_elements (tree, tree, tree, gimple *,
1840 gimple_stmt_iterator *);
1842 /* Check whether a load or store statement in the loop described by
1843 LOOP_VINFO is possible in a fully-masked loop. This is testing
1844 whether the vectorizer pass has the appropriate support, as well as
1845 whether the target does.
1847 VLS_TYPE says whether the statement is a load or store and VECTYPE
1848 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1849 says how the load or store is going to be implemented and GROUP_SIZE
1850 is the number of load or store statements in the containing group.
1851 If the access is a gather load or scatter store, GS_INFO describes
1852 its arguments.
1854 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1855 supported, otherwise record the required mask types. */
1857 static void
1858 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1859 vec_load_store_type vls_type, int group_size,
1860 vect_memory_access_type memory_access_type,
1861 gather_scatter_info *gs_info)
1863 /* Invariant loads need no special support. */
1864 if (memory_access_type == VMAT_INVARIANT)
1865 return;
1867 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1868 machine_mode vecmode = TYPE_MODE (vectype);
1869 bool is_load = (vls_type == VLS_LOAD);
1870 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1872 if (is_load
1873 ? !vect_load_lanes_supported (vectype, group_size, true)
1874 : !vect_store_lanes_supported (vectype, group_size, true))
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "can't use a fully-masked loop because the"
1879 " target doesn't have an appropriate masked"
1880 " load/store-lanes instruction.\n");
1881 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1882 return;
1884 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1885 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1886 return;
1889 if (memory_access_type == VMAT_GATHER_SCATTER)
1891 internal_fn ifn = (is_load
1892 ? IFN_MASK_GATHER_LOAD
1893 : IFN_MASK_SCATTER_STORE);
1894 tree offset_type = TREE_TYPE (gs_info->offset);
1895 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1896 gs_info->memory_type,
1897 TYPE_SIGN (offset_type),
1898 gs_info->scale))
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1902 "can't use a fully-masked loop because the"
1903 " target doesn't have an appropriate masked"
1904 " gather load or scatter store instruction.\n");
1905 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1906 return;
1908 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1909 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1910 return;
1913 if (memory_access_type != VMAT_CONTIGUOUS
1914 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1916 /* Element X of the data must come from iteration i * VF + X of the
1917 scalar loop. We need more work to support other mappings. */
1918 if (dump_enabled_p ())
1919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1920 "can't use a fully-masked loop because an access"
1921 " isn't contiguous.\n");
1922 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1923 return;
1926 machine_mode mask_mode;
1927 if (!(targetm.vectorize.get_mask_mode
1928 (GET_MODE_NUNITS (vecmode),
1929 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1930 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1934 "can't use a fully-masked loop because the target"
1935 " doesn't have the appropriate masked load or"
1936 " store.\n");
1937 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1938 return;
1940 /* We might load more scalars than we need for permuting SLP loads.
1941 We checked in get_group_load_store_type that the extra elements
1942 don't leak into a new vector. */
1943 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1944 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1945 unsigned int nvectors;
1946 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1947 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1948 else
1949 gcc_unreachable ();
1952 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1953 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1954 that needs to be applied to all loads and stores in a vectorized loop.
1955 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1957 MASK_TYPE is the type of both masks. If new statements are needed,
1958 insert them before GSI. */
1960 static tree
1961 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1962 gimple_stmt_iterator *gsi)
1964 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1965 if (!loop_mask)
1966 return vec_mask;
1968 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1969 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1970 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1971 vec_mask, loop_mask);
1972 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1973 return and_res;
1976 /* Determine whether we can use a gather load or scatter store to vectorize
1977 strided load or store STMT by truncating the current offset to a smaller
1978 width. We need to be able to construct an offset vector:
1980 { 0, X, X*2, X*3, ... }
1982 without loss of precision, where X is STMT's DR_STEP.
1984 Return true if this is possible, describing the gather load or scatter
1985 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1987 static bool
1988 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1989 bool masked_p,
1990 gather_scatter_info *gs_info)
1992 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1993 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1994 tree step = DR_STEP (dr);
1995 if (TREE_CODE (step) != INTEGER_CST)
1997 /* ??? Perhaps we could use range information here? */
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_NOTE, vect_location,
2000 "cannot truncate variable step.\n");
2001 return false;
2004 /* Get the number of bits in an element. */
2005 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2006 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2007 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2009 /* Set COUNT to the upper limit on the number of elements - 1.
2010 Start with the maximum vectorization factor. */
2011 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2013 /* Try lowering COUNT to the number of scalar latch iterations. */
2014 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2015 widest_int max_iters;
2016 if (max_loop_iterations (loop, &max_iters)
2017 && max_iters < count)
2018 count = max_iters.to_shwi ();
2020 /* Try scales of 1 and the element size. */
2021 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2022 wi::overflow_type overflow = wi::OVF_NONE;
2023 for (int i = 0; i < 2; ++i)
2025 int scale = scales[i];
2026 widest_int factor;
2027 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2028 continue;
2030 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2031 in OFFSET_BITS bits. */
2032 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2033 if (overflow)
2034 continue;
2035 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2036 if (wi::min_precision (range, sign) > element_bits)
2038 overflow = wi::OVF_UNKNOWN;
2039 continue;
2042 /* See whether the target supports the operation. */
2043 tree memory_type = TREE_TYPE (DR_REF (dr));
2044 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2045 memory_type, element_bits, sign, scale,
2046 &gs_info->ifn, &gs_info->element_type))
2047 continue;
2049 tree offset_type = build_nonstandard_integer_type (element_bits,
2050 sign == UNSIGNED);
2052 gs_info->decl = NULL_TREE;
2053 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2054 but we don't need to store that here. */
2055 gs_info->base = NULL_TREE;
2056 gs_info->offset = fold_convert (offset_type, step);
2057 gs_info->offset_dt = vect_constant_def;
2058 gs_info->offset_vectype = NULL_TREE;
2059 gs_info->scale = scale;
2060 gs_info->memory_type = memory_type;
2061 return true;
2064 if (overflow && dump_enabled_p ())
2065 dump_printf_loc (MSG_NOTE, vect_location,
2066 "truncating gather/scatter offset to %d bits"
2067 " might change its value.\n", element_bits);
2069 return false;
2072 /* Return true if we can use gather/scatter internal functions to
2073 vectorize STMT, which is a grouped or strided load or store.
2074 MASKED_P is true if load or store is conditional. When returning
2075 true, fill in GS_INFO with the information required to perform the
2076 operation. */
2078 static bool
2079 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
2080 bool masked_p,
2081 gather_scatter_info *gs_info)
2083 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
2084 || gs_info->decl)
2085 return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
2086 masked_p, gs_info);
2088 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2089 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2090 tree offset_type = TREE_TYPE (gs_info->offset);
2091 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2093 /* Enforced by vect_check_gather_scatter. */
2094 gcc_assert (element_bits >= offset_bits);
2096 /* If the elements are wider than the offset, convert the offset to the
2097 same width, without changing its sign. */
2098 if (element_bits > offset_bits)
2100 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2101 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2102 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2105 if (dump_enabled_p ())
2106 dump_printf_loc (MSG_NOTE, vect_location,
2107 "using gather/scatter for strided/grouped access,"
2108 " scale = %d\n", gs_info->scale);
2110 return true;
2113 /* STMT is a non-strided load or store, meaning that it accesses
2114 elements with a known constant step. Return -1 if that step
2115 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2117 static int
2118 compare_step_with_zero (gimple *stmt)
2120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2121 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2122 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2123 size_zero_node);
2126 /* If the target supports a permute mask that reverses the elements in
2127 a vector of type VECTYPE, return that mask, otherwise return null. */
2129 static tree
2130 perm_mask_for_reverse (tree vectype)
2132 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2134 /* The encoding has a single stepped pattern. */
2135 vec_perm_builder sel (nunits, 1, 3);
2136 for (int i = 0; i < 3; ++i)
2137 sel.quick_push (nunits - 1 - i);
2139 vec_perm_indices indices (sel, 1, nunits);
2140 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2141 return NULL_TREE;
2142 return vect_gen_perm_mask_checked (vectype, indices);
2145 /* STMT is either a masked or unconditional store. Return the value
2146 being stored. */
2148 tree
2149 vect_get_store_rhs (gimple *stmt)
2151 if (gassign *assign = dyn_cast <gassign *> (stmt))
2153 gcc_assert (gimple_assign_single_p (assign));
2154 return gimple_assign_rhs1 (assign);
2156 if (gcall *call = dyn_cast <gcall *> (stmt))
2158 internal_fn ifn = gimple_call_internal_fn (call);
2159 int index = internal_fn_stored_value_index (ifn);
2160 gcc_assert (index >= 0);
2161 return gimple_call_arg (stmt, index);
2163 gcc_unreachable ();
2166 /* A subroutine of get_load_store_type, with a subset of the same
2167 arguments. Handle the case where STMT is part of a grouped load
2168 or store.
2170 For stores, the statements in the group are all consecutive
2171 and there is no gap at the end. For loads, the statements in the
2172 group might not be consecutive; there can be gaps between statements
2173 as well as at the end. */
2175 static bool
2176 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2177 bool masked_p, vec_load_store_type vls_type,
2178 vect_memory_access_type *memory_access_type,
2179 gather_scatter_info *gs_info)
2181 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2182 vec_info *vinfo = stmt_info->vinfo;
2183 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2184 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2185 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2186 data_reference *first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
2187 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2188 bool single_element_p = (stmt_info == first_stmt_info
2189 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2190 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2191 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2193 /* True if the vectorized statements would access beyond the last
2194 statement in the group. */
2195 bool overrun_p = false;
2197 /* True if we can cope with such overrun by peeling for gaps, so that
2198 there is at least one final scalar iteration after the vector loop. */
2199 bool can_overrun_p = (!masked_p
2200 && vls_type == VLS_LOAD
2201 && loop_vinfo
2202 && !loop->inner);
2204 /* There can only be a gap at the end of the group if the stride is
2205 known at compile time. */
2206 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2208 /* Stores can't yet have gaps. */
2209 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2211 if (slp)
2213 if (STMT_VINFO_STRIDED_P (stmt_info))
2215 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2216 separated by the stride, until we have a complete vector.
2217 Fall back to scalar accesses if that isn't possible. */
2218 if (multiple_p (nunits, group_size))
2219 *memory_access_type = VMAT_STRIDED_SLP;
2220 else
2221 *memory_access_type = VMAT_ELEMENTWISE;
2223 else
2225 overrun_p = loop_vinfo && gap != 0;
2226 if (overrun_p && vls_type != VLS_LOAD)
2228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2229 "Grouped store with gaps requires"
2230 " non-consecutive accesses\n");
2231 return false;
2233 /* An overrun is fine if the trailing elements are smaller
2234 than the alignment boundary B. Every vector access will
2235 be a multiple of B and so we are guaranteed to access a
2236 non-gap element in the same B-sized block. */
2237 if (overrun_p
2238 && gap < (vect_known_alignment_in_bytes (first_dr)
2239 / vect_get_scalar_dr_size (first_dr)))
2240 overrun_p = false;
2241 if (overrun_p && !can_overrun_p)
2243 if (dump_enabled_p ())
2244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2245 "Peeling for outer loop is not supported\n");
2246 return false;
2248 *memory_access_type = VMAT_CONTIGUOUS;
2251 else
2253 /* We can always handle this case using elementwise accesses,
2254 but see if something more efficient is available. */
2255 *memory_access_type = VMAT_ELEMENTWISE;
2257 /* If there is a gap at the end of the group then these optimizations
2258 would access excess elements in the last iteration. */
2259 bool would_overrun_p = (gap != 0);
2260 /* An overrun is fine if the trailing elements are smaller than the
2261 alignment boundary B. Every vector access will be a multiple of B
2262 and so we are guaranteed to access a non-gap element in the
2263 same B-sized block. */
2264 if (would_overrun_p
2265 && !masked_p
2266 && gap < (vect_known_alignment_in_bytes (first_dr)
2267 / vect_get_scalar_dr_size (first_dr)))
2268 would_overrun_p = false;
2270 if (!STMT_VINFO_STRIDED_P (stmt_info)
2271 && (can_overrun_p || !would_overrun_p)
2272 && compare_step_with_zero (stmt) > 0)
2274 /* First cope with the degenerate case of a single-element
2275 vector. */
2276 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2277 *memory_access_type = VMAT_CONTIGUOUS;
2279 /* Otherwise try using LOAD/STORE_LANES. */
2280 if (*memory_access_type == VMAT_ELEMENTWISE
2281 && (vls_type == VLS_LOAD
2282 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2283 : vect_store_lanes_supported (vectype, group_size,
2284 masked_p)))
2286 *memory_access_type = VMAT_LOAD_STORE_LANES;
2287 overrun_p = would_overrun_p;
2290 /* If that fails, try using permuting loads. */
2291 if (*memory_access_type == VMAT_ELEMENTWISE
2292 && (vls_type == VLS_LOAD
2293 ? vect_grouped_load_supported (vectype, single_element_p,
2294 group_size)
2295 : vect_grouped_store_supported (vectype, group_size)))
2297 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2298 overrun_p = would_overrun_p;
2302 /* As a last resort, trying using a gather load or scatter store.
2304 ??? Although the code can handle all group sizes correctly,
2305 it probably isn't a win to use separate strided accesses based
2306 on nearby locations. Or, even if it's a win over scalar code,
2307 it might not be a win over vectorizing at a lower VF, if that
2308 allows us to use contiguous accesses. */
2309 if (*memory_access_type == VMAT_ELEMENTWISE
2310 && single_element_p
2311 && loop_vinfo
2312 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2313 masked_p, gs_info))
2314 *memory_access_type = VMAT_GATHER_SCATTER;
2317 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2319 /* STMT is the leader of the group. Check the operands of all the
2320 stmts of the group. */
2321 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2322 while (next_stmt_info)
2324 tree op = vect_get_store_rhs (next_stmt_info);
2325 enum vect_def_type dt;
2326 if (!vect_is_simple_use (op, vinfo, &dt))
2328 if (dump_enabled_p ())
2329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2330 "use not simple.\n");
2331 return false;
2333 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2337 if (overrun_p)
2339 gcc_assert (can_overrun_p);
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2342 "Data access with gaps requires scalar "
2343 "epilogue loop\n");
2344 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2347 return true;
2350 /* A subroutine of get_load_store_type, with a subset of the same
2351 arguments. Handle the case where STMT is a load or store that
2352 accesses consecutive elements with a negative step. */
2354 static vect_memory_access_type
2355 get_negative_load_store_type (gimple *stmt, tree vectype,
2356 vec_load_store_type vls_type,
2357 unsigned int ncopies)
2359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2360 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2361 dr_alignment_support alignment_support_scheme;
2363 if (ncopies > 1)
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367 "multiple types with negative step.\n");
2368 return VMAT_ELEMENTWISE;
2371 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2372 if (alignment_support_scheme != dr_aligned
2373 && alignment_support_scheme != dr_unaligned_supported)
2375 if (dump_enabled_p ())
2376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377 "negative step but alignment required.\n");
2378 return VMAT_ELEMENTWISE;
2381 if (vls_type == VLS_STORE_INVARIANT)
2383 if (dump_enabled_p ())
2384 dump_printf_loc (MSG_NOTE, vect_location,
2385 "negative step with invariant source;"
2386 " no permute needed.\n");
2387 return VMAT_CONTIGUOUS_DOWN;
2390 if (!perm_mask_for_reverse (vectype))
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394 "negative step and reversing not supported.\n");
2395 return VMAT_ELEMENTWISE;
2398 return VMAT_CONTIGUOUS_REVERSE;
2401 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2402 if there is a memory access type that the vectorized form can use,
2403 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2404 or scatters, fill in GS_INFO accordingly.
2406 SLP says whether we're performing SLP rather than loop vectorization.
2407 MASKED_P is true if the statement is conditional on a vectorized mask.
2408 VECTYPE is the vector type that the vectorized statements will use.
2409 NCOPIES is the number of vector statements that will be needed. */
2411 static bool
2412 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2413 vec_load_store_type vls_type, unsigned int ncopies,
2414 vect_memory_access_type *memory_access_type,
2415 gather_scatter_info *gs_info)
2417 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2418 vec_info *vinfo = stmt_info->vinfo;
2419 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2420 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2421 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2423 *memory_access_type = VMAT_GATHER_SCATTER;
2424 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2425 gcc_unreachable ();
2426 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2427 &gs_info->offset_dt,
2428 &gs_info->offset_vectype))
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "%s index use not simple.\n",
2433 vls_type == VLS_LOAD ? "gather" : "scatter");
2434 return false;
2437 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2439 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2440 memory_access_type, gs_info))
2441 return false;
2443 else if (STMT_VINFO_STRIDED_P (stmt_info))
2445 gcc_assert (!slp);
2446 if (loop_vinfo
2447 && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2448 masked_p, gs_info))
2449 *memory_access_type = VMAT_GATHER_SCATTER;
2450 else
2451 *memory_access_type = VMAT_ELEMENTWISE;
2453 else
2455 int cmp = compare_step_with_zero (stmt);
2456 if (cmp < 0)
2457 *memory_access_type = get_negative_load_store_type
2458 (stmt, vectype, vls_type, ncopies);
2459 else if (cmp == 0)
2461 gcc_assert (vls_type == VLS_LOAD);
2462 *memory_access_type = VMAT_INVARIANT;
2464 else
2465 *memory_access_type = VMAT_CONTIGUOUS;
2468 if ((*memory_access_type == VMAT_ELEMENTWISE
2469 || *memory_access_type == VMAT_STRIDED_SLP)
2470 && !nunits.is_constant ())
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474 "Not using elementwise accesses due to variable "
2475 "vectorization factor.\n");
2476 return false;
2479 /* FIXME: At the moment the cost model seems to underestimate the
2480 cost of using elementwise accesses. This check preserves the
2481 traditional behavior until that can be fixed. */
2482 if (*memory_access_type == VMAT_ELEMENTWISE
2483 && !STMT_VINFO_STRIDED_P (stmt_info)
2484 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2485 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2486 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490 "not falling back to elementwise accesses\n");
2491 return false;
2493 return true;
2496 /* Return true if boolean argument MASK is suitable for vectorizing
2497 conditional load or store STMT. When returning true, store the type
2498 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2499 in *MASK_VECTYPE_OUT. */
2501 static bool
2502 vect_check_load_store_mask (gimple *stmt, tree mask,
2503 vect_def_type *mask_dt_out,
2504 tree *mask_vectype_out)
2506 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2510 "mask argument is not a boolean.\n");
2511 return false;
2514 if (TREE_CODE (mask) != SSA_NAME)
2516 if (dump_enabled_p ())
2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 "mask argument is not an SSA name.\n");
2519 return false;
2522 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2523 enum vect_def_type mask_dt;
2524 tree mask_vectype;
2525 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "mask use not simple.\n");
2530 return false;
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (!mask_vectype)
2535 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2537 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2539 if (dump_enabled_p ())
2540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2541 "could not find an appropriate vector mask type.\n");
2542 return false;
2545 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2546 TYPE_VECTOR_SUBPARTS (vectype)))
2548 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "vector mask type ");
2552 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2553 dump_printf (MSG_MISSED_OPTIMIZATION,
2554 " does not match vector data type ");
2555 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2556 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2558 return false;
2561 *mask_dt_out = mask_dt;
2562 *mask_vectype_out = mask_vectype;
2563 return true;
2566 /* Return true if stored value RHS is suitable for vectorizing store
2567 statement STMT. When returning true, store the type of the
2568 definition in *RHS_DT_OUT, the type of the vectorized store value in
2569 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2571 static bool
2572 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2573 tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2575 /* In the case this is a store from a constant make sure
2576 native_encode_expr can handle it. */
2577 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2581 "cannot encode constant as a byte sequence.\n");
2582 return false;
2585 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2586 enum vect_def_type rhs_dt;
2587 tree rhs_vectype;
2588 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2590 if (dump_enabled_p ())
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2592 "use not simple.\n");
2593 return false;
2596 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2597 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2599 if (dump_enabled_p ())
2600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2601 "incompatible vector types.\n");
2602 return false;
2605 *rhs_dt_out = rhs_dt;
2606 *rhs_vectype_out = rhs_vectype;
2607 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2608 *vls_type_out = VLS_STORE_INVARIANT;
2609 else
2610 *vls_type_out = VLS_STORE;
2611 return true;
2614 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2615 Note that we support masks with floating-point type, in which case the
2616 floats are interpreted as a bitmask. */
2618 static tree
2619 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2621 if (TREE_CODE (masktype) == INTEGER_TYPE)
2622 return build_int_cst (masktype, -1);
2623 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2625 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2626 mask = build_vector_from_val (masktype, mask);
2627 return vect_init_vector (stmt, mask, masktype, NULL);
2629 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2631 REAL_VALUE_TYPE r;
2632 long tmp[6];
2633 for (int j = 0; j < 6; ++j)
2634 tmp[j] = -1;
2635 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2636 tree mask = build_real (TREE_TYPE (masktype), r);
2637 mask = build_vector_from_val (masktype, mask);
2638 return vect_init_vector (stmt, mask, masktype, NULL);
2640 gcc_unreachable ();
2643 /* Build an all-zero merge value of type VECTYPE while vectorizing
2644 STMT as a gather load. */
2646 static tree
2647 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2649 tree merge;
2650 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2651 merge = build_int_cst (TREE_TYPE (vectype), 0);
2652 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2654 REAL_VALUE_TYPE r;
2655 long tmp[6];
2656 for (int j = 0; j < 6; ++j)
2657 tmp[j] = 0;
2658 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2659 merge = build_real (TREE_TYPE (vectype), r);
2661 else
2662 gcc_unreachable ();
2663 merge = build_vector_from_val (vectype, merge);
2664 return vect_init_vector (stmt, merge, vectype, NULL);
2667 /* Build a gather load call while vectorizing STMT. Insert new instructions
2668 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2669 operation. If the load is conditional, MASK is the unvectorized
2670 condition and MASK_DT is its definition type, otherwise MASK is null. */
2672 static void
2673 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2674 stmt_vec_info *vec_stmt,
2675 gather_scatter_info *gs_info, tree mask,
2676 vect_def_type mask_dt)
2678 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2679 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2680 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2681 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2682 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2683 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2684 edge pe = loop_preheader_edge (loop);
2685 enum { NARROW, NONE, WIDEN } modifier;
2686 poly_uint64 gather_off_nunits
2687 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2689 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2690 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2691 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2692 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2693 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2694 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2695 tree scaletype = TREE_VALUE (arglist);
2696 gcc_checking_assert (types_compatible_p (srctype, rettype)
2697 && (!mask || types_compatible_p (srctype, masktype)));
2699 tree perm_mask = NULL_TREE;
2700 tree mask_perm_mask = NULL_TREE;
2701 if (known_eq (nunits, gather_off_nunits))
2702 modifier = NONE;
2703 else if (known_eq (nunits * 2, gather_off_nunits))
2705 modifier = WIDEN;
2707 /* Currently widening gathers and scatters are only supported for
2708 fixed-length vectors. */
2709 int count = gather_off_nunits.to_constant ();
2710 vec_perm_builder sel (count, count, 1);
2711 for (int i = 0; i < count; ++i)
2712 sel.quick_push (i | (count / 2));
2714 vec_perm_indices indices (sel, 1, count);
2715 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2716 indices);
2718 else if (known_eq (nunits, gather_off_nunits * 2))
2720 modifier = NARROW;
2722 /* Currently narrowing gathers and scatters are only supported for
2723 fixed-length vectors. */
2724 int count = nunits.to_constant ();
2725 vec_perm_builder sel (count, count, 1);
2726 sel.quick_grow (count);
2727 for (int i = 0; i < count; ++i)
2728 sel[i] = i < count / 2 ? i : i + count / 2;
2729 vec_perm_indices indices (sel, 2, count);
2730 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2732 ncopies *= 2;
2734 if (mask)
2736 for (int i = 0; i < count; ++i)
2737 sel[i] = i | (count / 2);
2738 indices.new_vector (sel, 2, count);
2739 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2742 else
2743 gcc_unreachable ();
2745 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2746 vectype);
2748 tree ptr = fold_convert (ptrtype, gs_info->base);
2749 if (!is_gimple_min_invariant (ptr))
2751 gimple_seq seq;
2752 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2753 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2754 gcc_assert (!new_bb);
2757 tree scale = build_int_cst (scaletype, gs_info->scale);
2759 tree vec_oprnd0 = NULL_TREE;
2760 tree vec_mask = NULL_TREE;
2761 tree src_op = NULL_TREE;
2762 tree mask_op = NULL_TREE;
2763 tree prev_res = NULL_TREE;
2764 stmt_vec_info prev_stmt_info = NULL;
2766 if (!mask)
2768 src_op = vect_build_zero_merge_argument (stmt, rettype);
2769 mask_op = vect_build_all_ones_mask (stmt, masktype);
2772 for (int j = 0; j < ncopies; ++j)
2774 tree op, var;
2775 if (modifier == WIDEN && (j & 1))
2776 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2777 perm_mask, stmt, gsi);
2778 else if (j == 0)
2779 op = vec_oprnd0
2780 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2781 else
2782 op = vec_oprnd0
2783 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2785 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2787 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2788 TYPE_VECTOR_SUBPARTS (idxtype)));
2789 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2790 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2791 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2792 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2793 op = var;
2796 if (mask)
2798 if (mask_perm_mask && (j & 1))
2799 mask_op = permute_vec_elements (mask_op, mask_op,
2800 mask_perm_mask, stmt, gsi);
2801 else
2803 if (j == 0)
2804 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2805 else
2806 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2808 mask_op = vec_mask;
2809 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2811 gcc_assert
2812 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2813 TYPE_VECTOR_SUBPARTS (masktype)));
2814 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2815 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2816 gassign *new_stmt
2817 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2818 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2819 mask_op = var;
2822 src_op = mask_op;
2825 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2826 mask_op, scale);
2828 stmt_vec_info new_stmt_info;
2829 if (!useless_type_conversion_p (vectype, rettype))
2831 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2832 TYPE_VECTOR_SUBPARTS (rettype)));
2833 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2834 gimple_call_set_lhs (new_call, op);
2835 vect_finish_stmt_generation (stmt, new_call, gsi);
2836 var = make_ssa_name (vec_dest);
2837 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2838 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2839 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
2841 else
2843 var = make_ssa_name (vec_dest, new_call);
2844 gimple_call_set_lhs (new_call, var);
2845 new_stmt_info = vect_finish_stmt_generation (stmt, new_call, gsi);
2848 if (modifier == NARROW)
2850 if ((j & 1) == 0)
2852 prev_res = var;
2853 continue;
2855 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2856 new_stmt_info = loop_vinfo->lookup_def (var);
2859 if (prev_stmt_info == NULL_STMT_VEC_INFO)
2860 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2861 else
2862 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2863 prev_stmt_info = new_stmt_info;
2867 /* Prepare the base and offset in GS_INFO for vectorization.
2868 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2869 to the vectorized offset argument for the first copy of STMT. STMT
2870 is the statement described by GS_INFO and LOOP is the containing loop. */
2872 static void
2873 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2874 gather_scatter_info *gs_info,
2875 tree *dataref_ptr, tree *vec_offset)
2877 gimple_seq stmts = NULL;
2878 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2879 if (stmts != NULL)
2881 basic_block new_bb;
2882 edge pe = loop_preheader_edge (loop);
2883 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2884 gcc_assert (!new_bb);
2886 tree offset_type = TREE_TYPE (gs_info->offset);
2887 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2888 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2889 offset_vectype);
2892 /* Prepare to implement a grouped or strided load or store using
2893 the gather load or scatter store operation described by GS_INFO.
2894 STMT is the load or store statement.
2896 Set *DATAREF_BUMP to the amount that should be added to the base
2897 address after each copy of the vectorized statement. Set *VEC_OFFSET
2898 to an invariant offset vector in which element I has the value
2899 I * DR_STEP / SCALE. */
2901 static void
2902 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2903 gather_scatter_info *gs_info,
2904 tree *dataref_bump, tree *vec_offset)
2906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2907 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2908 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2909 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2910 gimple_seq stmts;
2912 tree bump = size_binop (MULT_EXPR,
2913 fold_convert (sizetype, DR_STEP (dr)),
2914 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2915 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2916 if (stmts)
2917 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2919 /* The offset given in GS_INFO can have pointer type, so use the element
2920 type of the vector instead. */
2921 tree offset_type = TREE_TYPE (gs_info->offset);
2922 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2923 offset_type = TREE_TYPE (offset_vectype);
2925 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2926 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2927 ssize_int (gs_info->scale));
2928 step = fold_convert (offset_type, step);
2929 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2931 /* Create {0, X, X*2, X*3, ...}. */
2932 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2933 build_zero_cst (offset_type), step);
2934 if (stmts)
2935 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2938 /* Return the amount that should be added to a vector pointer to move
2939 to the next or previous copy of AGGR_TYPE. DR is the data reference
2940 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2941 vectorization. */
2943 static tree
2944 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2945 vect_memory_access_type memory_access_type)
2947 if (memory_access_type == VMAT_INVARIANT)
2948 return size_zero_node;
2950 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2951 tree step = vect_dr_behavior (dr)->step;
2952 if (tree_int_cst_sgn (step) == -1)
2953 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2954 return iv_step;
2957 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2959 static bool
2960 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2961 stmt_vec_info *vec_stmt, slp_tree slp_node,
2962 tree vectype_in, enum vect_def_type *dt,
2963 stmt_vector_for_cost *cost_vec)
2965 tree op, vectype;
2966 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2967 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2968 unsigned ncopies;
2969 unsigned HOST_WIDE_INT nunits, num_bytes;
2971 op = gimple_call_arg (stmt, 0);
2972 vectype = STMT_VINFO_VECTYPE (stmt_info);
2974 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2975 return false;
2977 /* Multiple types in SLP are handled by creating the appropriate number of
2978 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2979 case of SLP. */
2980 if (slp_node)
2981 ncopies = 1;
2982 else
2983 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2985 gcc_assert (ncopies >= 1);
2987 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2988 if (! char_vectype)
2989 return false;
2991 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2992 return false;
2994 unsigned word_bytes = num_bytes / nunits;
2996 /* The encoding uses one stepped pattern for each byte in the word. */
2997 vec_perm_builder elts (num_bytes, word_bytes, 3);
2998 for (unsigned i = 0; i < 3; ++i)
2999 for (unsigned j = 0; j < word_bytes; ++j)
3000 elts.quick_push ((i + 1) * word_bytes - j - 1);
3002 vec_perm_indices indices (elts, 1, num_bytes);
3003 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3004 return false;
3006 if (! vec_stmt)
3008 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3009 DUMP_VECT_SCOPE ("vectorizable_bswap");
3010 if (! slp_node)
3012 record_stmt_cost (cost_vec,
3013 1, vector_stmt, stmt_info, 0, vect_prologue);
3014 record_stmt_cost (cost_vec,
3015 ncopies, vec_perm, stmt_info, 0, vect_body);
3017 return true;
3020 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3022 /* Transform. */
3023 vec<tree> vec_oprnds = vNULL;
3024 stmt_vec_info new_stmt_info = NULL;
3025 stmt_vec_info prev_stmt_info = NULL;
3026 for (unsigned j = 0; j < ncopies; j++)
3028 /* Handle uses. */
3029 if (j == 0)
3030 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3031 else
3032 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
3034 /* Arguments are ready. create the new vector stmt. */
3035 unsigned i;
3036 tree vop;
3037 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3039 gimple *new_stmt;
3040 tree tem = make_ssa_name (char_vectype);
3041 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3042 char_vectype, vop));
3043 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3044 tree tem2 = make_ssa_name (char_vectype);
3045 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3046 tem, tem, bswap_vconst);
3047 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3048 tem = make_ssa_name (vectype);
3049 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3050 vectype, tem2));
3051 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3052 if (slp_node)
3053 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3056 if (slp_node)
3057 continue;
3059 if (j == 0)
3060 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3061 else
3062 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3064 prev_stmt_info = new_stmt_info;
3067 vec_oprnds.release ();
3068 return true;
3071 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3072 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3073 in a single step. On success, store the binary pack code in
3074 *CONVERT_CODE. */
3076 static bool
3077 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3078 tree_code *convert_code)
3080 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3081 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3082 return false;
3084 tree_code code;
3085 int multi_step_cvt = 0;
3086 auto_vec <tree, 8> interm_types;
3087 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3088 &code, &multi_step_cvt,
3089 &interm_types)
3090 || multi_step_cvt)
3091 return false;
3093 *convert_code = code;
3094 return true;
3097 /* Function vectorizable_call.
3099 Check if GS performs a function call that can be vectorized.
3100 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3101 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3102 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3104 static bool
3105 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi,
3106 stmt_vec_info *vec_stmt, slp_tree slp_node,
3107 stmt_vector_for_cost *cost_vec)
3109 gcall *stmt;
3110 tree vec_dest;
3111 tree scalar_dest;
3112 tree op;
3113 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3114 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
3115 tree vectype_out, vectype_in;
3116 poly_uint64 nunits_in;
3117 poly_uint64 nunits_out;
3118 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3119 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3120 vec_info *vinfo = stmt_info->vinfo;
3121 tree fndecl, new_temp, rhs_type;
3122 enum vect_def_type dt[4]
3123 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3124 vect_unknown_def_type };
3125 int ndts = ARRAY_SIZE (dt);
3126 int ncopies, j;
3127 auto_vec<tree, 8> vargs;
3128 auto_vec<tree, 8> orig_vargs;
3129 enum { NARROW, NONE, WIDEN } modifier;
3130 size_t i, nargs;
3131 tree lhs;
3133 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3134 return false;
3136 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3137 && ! vec_stmt)
3138 return false;
3140 /* Is GS a vectorizable call? */
3141 stmt = dyn_cast <gcall *> (gs);
3142 if (!stmt)
3143 return false;
3145 if (gimple_call_internal_p (stmt)
3146 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3147 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3148 /* Handled by vectorizable_load and vectorizable_store. */
3149 return false;
3151 if (gimple_call_lhs (stmt) == NULL_TREE
3152 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3153 return false;
3155 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3157 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3159 /* Process function arguments. */
3160 rhs_type = NULL_TREE;
3161 vectype_in = NULL_TREE;
3162 nargs = gimple_call_num_args (stmt);
3164 /* Bail out if the function has more than three arguments, we do not have
3165 interesting builtin functions to vectorize with more than two arguments
3166 except for fma. No arguments is also not good. */
3167 if (nargs == 0 || nargs > 4)
3168 return false;
3170 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3171 combined_fn cfn = gimple_call_combined_fn (stmt);
3172 if (cfn == CFN_GOMP_SIMD_LANE)
3174 nargs = 0;
3175 rhs_type = unsigned_type_node;
3178 int mask_opno = -1;
3179 if (internal_fn_p (cfn))
3180 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3182 for (i = 0; i < nargs; i++)
3184 tree opvectype;
3186 op = gimple_call_arg (stmt, i);
3187 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3189 if (dump_enabled_p ())
3190 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3191 "use not simple.\n");
3192 return false;
3195 /* Skip the mask argument to an internal function. This operand
3196 has been converted via a pattern if necessary. */
3197 if ((int) i == mask_opno)
3198 continue;
3200 /* We can only handle calls with arguments of the same type. */
3201 if (rhs_type
3202 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3206 "argument types differ.\n");
3207 return false;
3209 if (!rhs_type)
3210 rhs_type = TREE_TYPE (op);
3212 if (!vectype_in)
3213 vectype_in = opvectype;
3214 else if (opvectype
3215 && opvectype != vectype_in)
3217 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3219 "argument vector types differ.\n");
3220 return false;
3223 /* If all arguments are external or constant defs use a vector type with
3224 the same size as the output vector type. */
3225 if (!vectype_in)
3226 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3227 if (vec_stmt)
3228 gcc_assert (vectype_in);
3229 if (!vectype_in)
3231 if (dump_enabled_p ())
3233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3234 "no vectype for scalar type ");
3235 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3236 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3239 return false;
3242 /* FORNOW */
3243 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3244 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3245 if (known_eq (nunits_in * 2, nunits_out))
3246 modifier = NARROW;
3247 else if (known_eq (nunits_out, nunits_in))
3248 modifier = NONE;
3249 else if (known_eq (nunits_out * 2, nunits_in))
3250 modifier = WIDEN;
3251 else
3252 return false;
3254 /* We only handle functions that do not read or clobber memory. */
3255 if (gimple_vuse (stmt))
3257 if (dump_enabled_p ())
3258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3259 "function reads from or writes to memory.\n");
3260 return false;
3263 /* For now, we only vectorize functions if a target specific builtin
3264 is available. TODO -- in some cases, it might be profitable to
3265 insert the calls for pieces of the vector, in order to be able
3266 to vectorize other operations in the loop. */
3267 fndecl = NULL_TREE;
3268 internal_fn ifn = IFN_LAST;
3269 tree callee = gimple_call_fndecl (stmt);
3271 /* First try using an internal function. */
3272 tree_code convert_code = ERROR_MARK;
3273 if (cfn != CFN_LAST
3274 && (modifier == NONE
3275 || (modifier == NARROW
3276 && simple_integer_narrowing (vectype_out, vectype_in,
3277 &convert_code))))
3278 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3279 vectype_in);
3281 /* If that fails, try asking for a target-specific built-in function. */
3282 if (ifn == IFN_LAST)
3284 if (cfn != CFN_LAST)
3285 fndecl = targetm.vectorize.builtin_vectorized_function
3286 (cfn, vectype_out, vectype_in);
3287 else if (callee)
3288 fndecl = targetm.vectorize.builtin_md_vectorized_function
3289 (callee, vectype_out, vectype_in);
3292 if (ifn == IFN_LAST && !fndecl)
3294 if (cfn == CFN_GOMP_SIMD_LANE
3295 && !slp_node
3296 && loop_vinfo
3297 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3298 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3299 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3300 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3302 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3303 { 0, 1, 2, ... vf - 1 } vector. */
3304 gcc_assert (nargs == 0);
3306 else if (modifier == NONE
3307 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3308 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3309 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3310 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3311 vectype_in, dt, cost_vec);
3312 else
3314 if (dump_enabled_p ())
3315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3316 "function is not vectorizable.\n");
3317 return false;
3321 if (slp_node)
3322 ncopies = 1;
3323 else if (modifier == NARROW && ifn == IFN_LAST)
3324 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3325 else
3326 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3328 /* Sanity check: make sure that at least one copy of the vectorized stmt
3329 needs to be generated. */
3330 gcc_assert (ncopies >= 1);
3332 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3333 if (!vec_stmt) /* transformation not required. */
3335 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3336 DUMP_VECT_SCOPE ("vectorizable_call");
3337 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3338 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3339 record_stmt_cost (cost_vec, ncopies / 2,
3340 vec_promote_demote, stmt_info, 0, vect_body);
3342 if (loop_vinfo && mask_opno >= 0)
3344 unsigned int nvectors = (slp_node
3345 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3346 : ncopies);
3347 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3349 return true;
3352 /* Transform. */
3354 if (dump_enabled_p ())
3355 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3357 /* Handle def. */
3358 scalar_dest = gimple_call_lhs (stmt);
3359 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3361 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3363 stmt_vec_info new_stmt_info = NULL;
3364 prev_stmt_info = NULL;
3365 if (modifier == NONE || ifn != IFN_LAST)
3367 tree prev_res = NULL_TREE;
3368 vargs.safe_grow (nargs);
3369 orig_vargs.safe_grow (nargs);
3370 for (j = 0; j < ncopies; ++j)
3372 /* Build argument list for the vectorized call. */
3373 if (slp_node)
3375 auto_vec<vec<tree> > vec_defs (nargs);
3376 vec<tree> vec_oprnds0;
3378 for (i = 0; i < nargs; i++)
3379 vargs[i] = gimple_call_arg (stmt, i);
3380 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3381 vec_oprnds0 = vec_defs[0];
3383 /* Arguments are ready. Create the new vector stmt. */
3384 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3386 size_t k;
3387 for (k = 0; k < nargs; k++)
3389 vec<tree> vec_oprndsk = vec_defs[k];
3390 vargs[k] = vec_oprndsk[i];
3392 if (modifier == NARROW)
3394 /* We don't define any narrowing conditional functions
3395 at present. */
3396 gcc_assert (mask_opno < 0);
3397 tree half_res = make_ssa_name (vectype_in);
3398 gcall *call
3399 = gimple_build_call_internal_vec (ifn, vargs);
3400 gimple_call_set_lhs (call, half_res);
3401 gimple_call_set_nothrow (call, true);
3402 new_stmt_info
3403 = vect_finish_stmt_generation (stmt, call, gsi);
3404 if ((i & 1) == 0)
3406 prev_res = half_res;
3407 continue;
3409 new_temp = make_ssa_name (vec_dest);
3410 gimple *new_stmt
3411 = gimple_build_assign (new_temp, convert_code,
3412 prev_res, half_res);
3413 new_stmt_info
3414 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3416 else
3418 if (mask_opno >= 0 && masked_loop_p)
3420 unsigned int vec_num = vec_oprnds0.length ();
3421 /* Always true for SLP. */
3422 gcc_assert (ncopies == 1);
3423 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3424 vectype_out, i);
3425 vargs[mask_opno] = prepare_load_store_mask
3426 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3429 gcall *call;
3430 if (ifn != IFN_LAST)
3431 call = gimple_build_call_internal_vec (ifn, vargs);
3432 else
3433 call = gimple_build_call_vec (fndecl, vargs);
3434 new_temp = make_ssa_name (vec_dest, call);
3435 gimple_call_set_lhs (call, new_temp);
3436 gimple_call_set_nothrow (call, true);
3437 new_stmt_info
3438 = vect_finish_stmt_generation (stmt, call, gsi);
3440 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3443 for (i = 0; i < nargs; i++)
3445 vec<tree> vec_oprndsi = vec_defs[i];
3446 vec_oprndsi.release ();
3448 continue;
3451 for (i = 0; i < nargs; i++)
3453 op = gimple_call_arg (stmt, i);
3454 if (j == 0)
3455 vec_oprnd0
3456 = vect_get_vec_def_for_operand (op, stmt);
3457 else
3458 vec_oprnd0
3459 = vect_get_vec_def_for_stmt_copy (dt[i], orig_vargs[i]);
3461 orig_vargs[i] = vargs[i] = vec_oprnd0;
3464 if (mask_opno >= 0 && masked_loop_p)
3466 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3467 vectype_out, j);
3468 vargs[mask_opno]
3469 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3470 vargs[mask_opno], gsi);
3473 if (cfn == CFN_GOMP_SIMD_LANE)
3475 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3476 tree new_var
3477 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3478 gimple *init_stmt = gimple_build_assign (new_var, cst);
3479 vect_init_vector_1 (stmt, init_stmt, NULL);
3480 new_temp = make_ssa_name (vec_dest);
3481 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3482 new_stmt_info
3483 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3485 else if (modifier == NARROW)
3487 /* We don't define any narrowing conditional functions at
3488 present. */
3489 gcc_assert (mask_opno < 0);
3490 tree half_res = make_ssa_name (vectype_in);
3491 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3492 gimple_call_set_lhs (call, half_res);
3493 gimple_call_set_nothrow (call, true);
3494 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
3495 if ((j & 1) == 0)
3497 prev_res = half_res;
3498 continue;
3500 new_temp = make_ssa_name (vec_dest);
3501 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3502 prev_res, half_res);
3503 new_stmt_info
3504 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3506 else
3508 gcall *call;
3509 if (ifn != IFN_LAST)
3510 call = gimple_build_call_internal_vec (ifn, vargs);
3511 else
3512 call = gimple_build_call_vec (fndecl, vargs);
3513 new_temp = make_ssa_name (vec_dest, call);
3514 gimple_call_set_lhs (call, new_temp);
3515 gimple_call_set_nothrow (call, true);
3516 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
3519 if (j == (modifier == NARROW ? 1 : 0))
3520 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3521 else
3522 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3524 prev_stmt_info = new_stmt_info;
3527 else if (modifier == NARROW)
3529 /* We don't define any narrowing conditional functions at present. */
3530 gcc_assert (mask_opno < 0);
3531 for (j = 0; j < ncopies; ++j)
3533 /* Build argument list for the vectorized call. */
3534 if (j == 0)
3535 vargs.create (nargs * 2);
3536 else
3537 vargs.truncate (0);
3539 if (slp_node)
3541 auto_vec<vec<tree> > vec_defs (nargs);
3542 vec<tree> vec_oprnds0;
3544 for (i = 0; i < nargs; i++)
3545 vargs.quick_push (gimple_call_arg (stmt, i));
3546 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3547 vec_oprnds0 = vec_defs[0];
3549 /* Arguments are ready. Create the new vector stmt. */
3550 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3552 size_t k;
3553 vargs.truncate (0);
3554 for (k = 0; k < nargs; k++)
3556 vec<tree> vec_oprndsk = vec_defs[k];
3557 vargs.quick_push (vec_oprndsk[i]);
3558 vargs.quick_push (vec_oprndsk[i + 1]);
3560 gcall *call;
3561 if (ifn != IFN_LAST)
3562 call = gimple_build_call_internal_vec (ifn, vargs);
3563 else
3564 call = gimple_build_call_vec (fndecl, vargs);
3565 new_temp = make_ssa_name (vec_dest, call);
3566 gimple_call_set_lhs (call, new_temp);
3567 gimple_call_set_nothrow (call, true);
3568 new_stmt_info
3569 = vect_finish_stmt_generation (stmt, call, gsi);
3570 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3573 for (i = 0; i < nargs; i++)
3575 vec<tree> vec_oprndsi = vec_defs[i];
3576 vec_oprndsi.release ();
3578 continue;
3581 for (i = 0; i < nargs; i++)
3583 op = gimple_call_arg (stmt, i);
3584 if (j == 0)
3586 vec_oprnd0
3587 = vect_get_vec_def_for_operand (op, stmt);
3588 vec_oprnd1
3589 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3591 else
3593 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3594 2 * i + 1);
3595 vec_oprnd0
3596 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3597 vec_oprnd1
3598 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3601 vargs.quick_push (vec_oprnd0);
3602 vargs.quick_push (vec_oprnd1);
3605 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3606 new_temp = make_ssa_name (vec_dest, new_stmt);
3607 gimple_call_set_lhs (new_stmt, new_temp);
3608 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
3610 if (j == 0)
3611 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3612 else
3613 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3615 prev_stmt_info = new_stmt_info;
3618 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3620 else
3621 /* No current target implements this case. */
3622 return false;
3624 vargs.release ();
3626 /* The call in STMT might prevent it from being removed in dce.
3627 We however cannot remove it here, due to the way the ssa name
3628 it defines is mapped to the new definition. So just replace
3629 rhs of the statement with something harmless. */
3631 if (slp_node)
3632 return true;
3634 if (is_pattern_stmt_p (stmt_info))
3635 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
3636 lhs = gimple_get_lhs (stmt_info->stmt);
3638 gassign *new_stmt
3639 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3640 set_vinfo_for_stmt (new_stmt, stmt_info);
3641 set_vinfo_for_stmt (stmt_info->stmt, NULL);
3642 STMT_VINFO_STMT (stmt_info) = new_stmt;
3643 gsi_replace (gsi, new_stmt, false);
3645 return true;
3649 struct simd_call_arg_info
3651 tree vectype;
3652 tree op;
3653 HOST_WIDE_INT linear_step;
3654 enum vect_def_type dt;
3655 unsigned int align;
3656 bool simd_lane_linear;
3659 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3660 is linear within simd lane (but not within whole loop), note it in
3661 *ARGINFO. */
3663 static void
3664 vect_simd_lane_linear (tree op, struct loop *loop,
3665 struct simd_call_arg_info *arginfo)
3667 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3669 if (!is_gimple_assign (def_stmt)
3670 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3671 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3672 return;
3674 tree base = gimple_assign_rhs1 (def_stmt);
3675 HOST_WIDE_INT linear_step = 0;
3676 tree v = gimple_assign_rhs2 (def_stmt);
3677 while (TREE_CODE (v) == SSA_NAME)
3679 tree t;
3680 def_stmt = SSA_NAME_DEF_STMT (v);
3681 if (is_gimple_assign (def_stmt))
3682 switch (gimple_assign_rhs_code (def_stmt))
3684 case PLUS_EXPR:
3685 t = gimple_assign_rhs2 (def_stmt);
3686 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3687 return;
3688 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3689 v = gimple_assign_rhs1 (def_stmt);
3690 continue;
3691 case MULT_EXPR:
3692 t = gimple_assign_rhs2 (def_stmt);
3693 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3694 return;
3695 linear_step = tree_to_shwi (t);
3696 v = gimple_assign_rhs1 (def_stmt);
3697 continue;
3698 CASE_CONVERT:
3699 t = gimple_assign_rhs1 (def_stmt);
3700 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3701 || (TYPE_PRECISION (TREE_TYPE (v))
3702 < TYPE_PRECISION (TREE_TYPE (t))))
3703 return;
3704 if (!linear_step)
3705 linear_step = 1;
3706 v = t;
3707 continue;
3708 default:
3709 return;
3711 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3712 && loop->simduid
3713 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3714 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3715 == loop->simduid))
3717 if (!linear_step)
3718 linear_step = 1;
3719 arginfo->linear_step = linear_step;
3720 arginfo->op = base;
3721 arginfo->simd_lane_linear = true;
3722 return;
3727 /* Return the number of elements in vector type VECTYPE, which is associated
3728 with a SIMD clone. At present these vectors always have a constant
3729 length. */
3731 static unsigned HOST_WIDE_INT
3732 simd_clone_subparts (tree vectype)
3734 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3737 /* Function vectorizable_simd_clone_call.
3739 Check if STMT performs a function call that can be vectorized
3740 by calling a simd clone of the function.
3741 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3742 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3743 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3745 static bool
3746 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3747 stmt_vec_info *vec_stmt, slp_tree slp_node,
3748 stmt_vector_for_cost *)
3750 tree vec_dest;
3751 tree scalar_dest;
3752 tree op, type;
3753 tree vec_oprnd0 = NULL_TREE;
3754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3755 tree vectype;
3756 unsigned int nunits;
3757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3759 vec_info *vinfo = stmt_info->vinfo;
3760 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3761 tree fndecl, new_temp;
3762 int ncopies, j;
3763 auto_vec<simd_call_arg_info> arginfo;
3764 vec<tree> vargs = vNULL;
3765 size_t i, nargs;
3766 tree lhs, rtype, ratype;
3767 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3769 /* Is STMT a vectorizable call? */
3770 if (!is_gimple_call (stmt))
3771 return false;
3773 fndecl = gimple_call_fndecl (stmt);
3774 if (fndecl == NULL_TREE)
3775 return false;
3777 struct cgraph_node *node = cgraph_node::get (fndecl);
3778 if (node == NULL || node->simd_clones == NULL)
3779 return false;
3781 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3782 return false;
3784 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3785 && ! vec_stmt)
3786 return false;
3788 if (gimple_call_lhs (stmt)
3789 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3790 return false;
3792 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3794 vectype = STMT_VINFO_VECTYPE (stmt_info);
3796 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3797 return false;
3799 /* FORNOW */
3800 if (slp_node)
3801 return false;
3803 /* Process function arguments. */
3804 nargs = gimple_call_num_args (stmt);
3806 /* Bail out if the function has zero arguments. */
3807 if (nargs == 0)
3808 return false;
3810 arginfo.reserve (nargs, true);
3812 for (i = 0; i < nargs; i++)
3814 simd_call_arg_info thisarginfo;
3815 affine_iv iv;
3817 thisarginfo.linear_step = 0;
3818 thisarginfo.align = 0;
3819 thisarginfo.op = NULL_TREE;
3820 thisarginfo.simd_lane_linear = false;
3822 op = gimple_call_arg (stmt, i);
3823 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3824 &thisarginfo.vectype)
3825 || thisarginfo.dt == vect_uninitialized_def)
3827 if (dump_enabled_p ())
3828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3829 "use not simple.\n");
3830 return false;
3833 if (thisarginfo.dt == vect_constant_def
3834 || thisarginfo.dt == vect_external_def)
3835 gcc_assert (thisarginfo.vectype == NULL_TREE);
3836 else
3837 gcc_assert (thisarginfo.vectype != NULL_TREE);
3839 /* For linear arguments, the analyze phase should have saved
3840 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3841 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3842 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3844 gcc_assert (vec_stmt);
3845 thisarginfo.linear_step
3846 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3847 thisarginfo.op
3848 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3849 thisarginfo.simd_lane_linear
3850 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3851 == boolean_true_node);
3852 /* If loop has been peeled for alignment, we need to adjust it. */
3853 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3854 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3855 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3857 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3858 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3859 tree opt = TREE_TYPE (thisarginfo.op);
3860 bias = fold_convert (TREE_TYPE (step), bias);
3861 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3862 thisarginfo.op
3863 = fold_build2 (POINTER_TYPE_P (opt)
3864 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3865 thisarginfo.op, bias);
3868 else if (!vec_stmt
3869 && thisarginfo.dt != vect_constant_def
3870 && thisarginfo.dt != vect_external_def
3871 && loop_vinfo
3872 && TREE_CODE (op) == SSA_NAME
3873 && simple_iv (loop, loop_containing_stmt (stmt), op,
3874 &iv, false)
3875 && tree_fits_shwi_p (iv.step))
3877 thisarginfo.linear_step = tree_to_shwi (iv.step);
3878 thisarginfo.op = iv.base;
3880 else if ((thisarginfo.dt == vect_constant_def
3881 || thisarginfo.dt == vect_external_def)
3882 && POINTER_TYPE_P (TREE_TYPE (op)))
3883 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3884 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3885 linear too. */
3886 if (POINTER_TYPE_P (TREE_TYPE (op))
3887 && !thisarginfo.linear_step
3888 && !vec_stmt
3889 && thisarginfo.dt != vect_constant_def
3890 && thisarginfo.dt != vect_external_def
3891 && loop_vinfo
3892 && !slp_node
3893 && TREE_CODE (op) == SSA_NAME)
3894 vect_simd_lane_linear (op, loop, &thisarginfo);
3896 arginfo.quick_push (thisarginfo);
3899 unsigned HOST_WIDE_INT vf;
3900 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3902 if (dump_enabled_p ())
3903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3904 "not considering SIMD clones; not yet supported"
3905 " for variable-width vectors.\n");
3906 return NULL;
3909 unsigned int badness = 0;
3910 struct cgraph_node *bestn = NULL;
3911 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3912 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3913 else
3914 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3915 n = n->simdclone->next_clone)
3917 unsigned int this_badness = 0;
3918 if (n->simdclone->simdlen > vf
3919 || n->simdclone->nargs != nargs)
3920 continue;
3921 if (n->simdclone->simdlen < vf)
3922 this_badness += (exact_log2 (vf)
3923 - exact_log2 (n->simdclone->simdlen)) * 1024;
3924 if (n->simdclone->inbranch)
3925 this_badness += 2048;
3926 int target_badness = targetm.simd_clone.usable (n);
3927 if (target_badness < 0)
3928 continue;
3929 this_badness += target_badness * 512;
3930 /* FORNOW: Have to add code to add the mask argument. */
3931 if (n->simdclone->inbranch)
3932 continue;
3933 for (i = 0; i < nargs; i++)
3935 switch (n->simdclone->args[i].arg_type)
3937 case SIMD_CLONE_ARG_TYPE_VECTOR:
3938 if (!useless_type_conversion_p
3939 (n->simdclone->args[i].orig_type,
3940 TREE_TYPE (gimple_call_arg (stmt, i))))
3941 i = -1;
3942 else if (arginfo[i].dt == vect_constant_def
3943 || arginfo[i].dt == vect_external_def
3944 || arginfo[i].linear_step)
3945 this_badness += 64;
3946 break;
3947 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3948 if (arginfo[i].dt != vect_constant_def
3949 && arginfo[i].dt != vect_external_def)
3950 i = -1;
3951 break;
3952 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3953 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3954 if (arginfo[i].dt == vect_constant_def
3955 || arginfo[i].dt == vect_external_def
3956 || (arginfo[i].linear_step
3957 != n->simdclone->args[i].linear_step))
3958 i = -1;
3959 break;
3960 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3961 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3962 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3963 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3964 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3965 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3966 /* FORNOW */
3967 i = -1;
3968 break;
3969 case SIMD_CLONE_ARG_TYPE_MASK:
3970 gcc_unreachable ();
3972 if (i == (size_t) -1)
3973 break;
3974 if (n->simdclone->args[i].alignment > arginfo[i].align)
3976 i = -1;
3977 break;
3979 if (arginfo[i].align)
3980 this_badness += (exact_log2 (arginfo[i].align)
3981 - exact_log2 (n->simdclone->args[i].alignment));
3983 if (i == (size_t) -1)
3984 continue;
3985 if (bestn == NULL || this_badness < badness)
3987 bestn = n;
3988 badness = this_badness;
3992 if (bestn == NULL)
3993 return false;
3995 for (i = 0; i < nargs; i++)
3996 if ((arginfo[i].dt == vect_constant_def
3997 || arginfo[i].dt == vect_external_def)
3998 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4000 arginfo[i].vectype
4001 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4002 i)));
4003 if (arginfo[i].vectype == NULL
4004 || (simd_clone_subparts (arginfo[i].vectype)
4005 > bestn->simdclone->simdlen))
4006 return false;
4009 fndecl = bestn->decl;
4010 nunits = bestn->simdclone->simdlen;
4011 ncopies = vf / nunits;
4013 /* If the function isn't const, only allow it in simd loops where user
4014 has asserted that at least nunits consecutive iterations can be
4015 performed using SIMD instructions. */
4016 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4017 && gimple_vuse (stmt))
4018 return false;
4020 /* Sanity check: make sure that at least one copy of the vectorized stmt
4021 needs to be generated. */
4022 gcc_assert (ncopies >= 1);
4024 if (!vec_stmt) /* transformation not required. */
4026 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4027 for (i = 0; i < nargs; i++)
4028 if ((bestn->simdclone->args[i].arg_type
4029 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4030 || (bestn->simdclone->args[i].arg_type
4031 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4033 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4034 + 1);
4035 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4036 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4037 ? size_type_node : TREE_TYPE (arginfo[i].op);
4038 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4039 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4040 tree sll = arginfo[i].simd_lane_linear
4041 ? boolean_true_node : boolean_false_node;
4042 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4044 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4045 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4046 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4047 return true;
4050 /* Transform. */
4052 if (dump_enabled_p ())
4053 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4055 /* Handle def. */
4056 scalar_dest = gimple_call_lhs (stmt);
4057 vec_dest = NULL_TREE;
4058 rtype = NULL_TREE;
4059 ratype = NULL_TREE;
4060 if (scalar_dest)
4062 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4063 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4064 if (TREE_CODE (rtype) == ARRAY_TYPE)
4066 ratype = rtype;
4067 rtype = TREE_TYPE (ratype);
4071 prev_stmt_info = NULL;
4072 for (j = 0; j < ncopies; ++j)
4074 /* Build argument list for the vectorized call. */
4075 if (j == 0)
4076 vargs.create (nargs);
4077 else
4078 vargs.truncate (0);
4080 for (i = 0; i < nargs; i++)
4082 unsigned int k, l, m, o;
4083 tree atype;
4084 op = gimple_call_arg (stmt, i);
4085 switch (bestn->simdclone->args[i].arg_type)
4087 case SIMD_CLONE_ARG_TYPE_VECTOR:
4088 atype = bestn->simdclone->args[i].vector_type;
4089 o = nunits / simd_clone_subparts (atype);
4090 for (m = j * o; m < (j + 1) * o; m++)
4092 if (simd_clone_subparts (atype)
4093 < simd_clone_subparts (arginfo[i].vectype))
4095 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4096 k = (simd_clone_subparts (arginfo[i].vectype)
4097 / simd_clone_subparts (atype));
4098 gcc_assert ((k & (k - 1)) == 0);
4099 if (m == 0)
4100 vec_oprnd0
4101 = vect_get_vec_def_for_operand (op, stmt);
4102 else
4104 vec_oprnd0 = arginfo[i].op;
4105 if ((m & (k - 1)) == 0)
4106 vec_oprnd0
4107 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4108 vec_oprnd0);
4110 arginfo[i].op = vec_oprnd0;
4111 vec_oprnd0
4112 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4113 bitsize_int (prec),
4114 bitsize_int ((m & (k - 1)) * prec));
4115 gassign *new_stmt
4116 = gimple_build_assign (make_ssa_name (atype),
4117 vec_oprnd0);
4118 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4119 vargs.safe_push (gimple_assign_lhs (new_stmt));
4121 else
4123 k = (simd_clone_subparts (atype)
4124 / simd_clone_subparts (arginfo[i].vectype));
4125 gcc_assert ((k & (k - 1)) == 0);
4126 vec<constructor_elt, va_gc> *ctor_elts;
4127 if (k != 1)
4128 vec_alloc (ctor_elts, k);
4129 else
4130 ctor_elts = NULL;
4131 for (l = 0; l < k; l++)
4133 if (m == 0 && l == 0)
4134 vec_oprnd0
4135 = vect_get_vec_def_for_operand (op, stmt);
4136 else
4137 vec_oprnd0
4138 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
4139 arginfo[i].op);
4140 arginfo[i].op = vec_oprnd0;
4141 if (k == 1)
4142 break;
4143 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4144 vec_oprnd0);
4146 if (k == 1)
4147 vargs.safe_push (vec_oprnd0);
4148 else
4150 vec_oprnd0 = build_constructor (atype, ctor_elts);
4151 gassign *new_stmt
4152 = gimple_build_assign (make_ssa_name (atype),
4153 vec_oprnd0);
4154 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4155 vargs.safe_push (gimple_assign_lhs (new_stmt));
4159 break;
4160 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4161 vargs.safe_push (op);
4162 break;
4163 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4164 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4165 if (j == 0)
4167 gimple_seq stmts;
4168 arginfo[i].op
4169 = force_gimple_operand (arginfo[i].op, &stmts, true,
4170 NULL_TREE);
4171 if (stmts != NULL)
4173 basic_block new_bb;
4174 edge pe = loop_preheader_edge (loop);
4175 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4176 gcc_assert (!new_bb);
4178 if (arginfo[i].simd_lane_linear)
4180 vargs.safe_push (arginfo[i].op);
4181 break;
4183 tree phi_res = copy_ssa_name (op);
4184 gphi *new_phi = create_phi_node (phi_res, loop->header);
4185 loop_vinfo->add_stmt (new_phi);
4186 add_phi_arg (new_phi, arginfo[i].op,
4187 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4188 enum tree_code code
4189 = POINTER_TYPE_P (TREE_TYPE (op))
4190 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4191 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4192 ? sizetype : TREE_TYPE (op);
4193 widest_int cst
4194 = wi::mul (bestn->simdclone->args[i].linear_step,
4195 ncopies * nunits);
4196 tree tcst = wide_int_to_tree (type, cst);
4197 tree phi_arg = copy_ssa_name (op);
4198 gassign *new_stmt
4199 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4200 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4201 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4202 loop_vinfo->add_stmt (new_stmt);
4203 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4204 UNKNOWN_LOCATION);
4205 arginfo[i].op = phi_res;
4206 vargs.safe_push (phi_res);
4208 else
4210 enum tree_code code
4211 = POINTER_TYPE_P (TREE_TYPE (op))
4212 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4213 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4214 ? sizetype : TREE_TYPE (op);
4215 widest_int cst
4216 = wi::mul (bestn->simdclone->args[i].linear_step,
4217 j * nunits);
4218 tree tcst = wide_int_to_tree (type, cst);
4219 new_temp = make_ssa_name (TREE_TYPE (op));
4220 gassign *new_stmt
4221 = gimple_build_assign (new_temp, code,
4222 arginfo[i].op, tcst);
4223 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4224 vargs.safe_push (new_temp);
4226 break;
4227 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4228 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4229 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4230 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4231 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4232 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4233 default:
4234 gcc_unreachable ();
4238 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4239 if (vec_dest)
4241 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4242 if (ratype)
4243 new_temp = create_tmp_var (ratype);
4244 else if (simd_clone_subparts (vectype)
4245 == simd_clone_subparts (rtype))
4246 new_temp = make_ssa_name (vec_dest, new_call);
4247 else
4248 new_temp = make_ssa_name (rtype, new_call);
4249 gimple_call_set_lhs (new_call, new_temp);
4251 stmt_vec_info new_stmt_info
4252 = vect_finish_stmt_generation (stmt, new_call, gsi);
4254 if (vec_dest)
4256 if (simd_clone_subparts (vectype) < nunits)
4258 unsigned int k, l;
4259 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4260 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4261 k = nunits / simd_clone_subparts (vectype);
4262 gcc_assert ((k & (k - 1)) == 0);
4263 for (l = 0; l < k; l++)
4265 tree t;
4266 if (ratype)
4268 t = build_fold_addr_expr (new_temp);
4269 t = build2 (MEM_REF, vectype, t,
4270 build_int_cst (TREE_TYPE (t), l * bytes));
4272 else
4273 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4274 bitsize_int (prec), bitsize_int (l * prec));
4275 gimple *new_stmt
4276 = gimple_build_assign (make_ssa_name (vectype), t);
4277 new_stmt_info
4278 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4280 if (j == 0 && l == 0)
4281 STMT_VINFO_VEC_STMT (stmt_info)
4282 = *vec_stmt = new_stmt_info;
4283 else
4284 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4286 prev_stmt_info = new_stmt_info;
4289 if (ratype)
4290 vect_clobber_variable (stmt, gsi, new_temp);
4291 continue;
4293 else if (simd_clone_subparts (vectype) > nunits)
4295 unsigned int k = (simd_clone_subparts (vectype)
4296 / simd_clone_subparts (rtype));
4297 gcc_assert ((k & (k - 1)) == 0);
4298 if ((j & (k - 1)) == 0)
4299 vec_alloc (ret_ctor_elts, k);
4300 if (ratype)
4302 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4303 for (m = 0; m < o; m++)
4305 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4306 size_int (m), NULL_TREE, NULL_TREE);
4307 gimple *new_stmt
4308 = gimple_build_assign (make_ssa_name (rtype), tem);
4309 new_stmt_info
4310 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4311 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4312 gimple_assign_lhs (new_stmt));
4314 vect_clobber_variable (stmt, gsi, new_temp);
4316 else
4317 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4318 if ((j & (k - 1)) != k - 1)
4319 continue;
4320 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4321 gimple *new_stmt
4322 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4323 new_stmt_info
4324 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4326 if ((unsigned) j == k - 1)
4327 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4328 else
4329 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4331 prev_stmt_info = new_stmt_info;
4332 continue;
4334 else if (ratype)
4336 tree t = build_fold_addr_expr (new_temp);
4337 t = build2 (MEM_REF, vectype, t,
4338 build_int_cst (TREE_TYPE (t), 0));
4339 gimple *new_stmt
4340 = gimple_build_assign (make_ssa_name (vec_dest), t);
4341 new_stmt_info
4342 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4343 vect_clobber_variable (stmt, gsi, new_temp);
4347 if (j == 0)
4348 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4349 else
4350 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4352 prev_stmt_info = new_stmt_info;
4355 vargs.release ();
4357 /* The call in STMT might prevent it from being removed in dce.
4358 We however cannot remove it here, due to the way the ssa name
4359 it defines is mapped to the new definition. So just replace
4360 rhs of the statement with something harmless. */
4362 if (slp_node)
4363 return true;
4365 gimple *new_stmt;
4366 if (scalar_dest)
4368 type = TREE_TYPE (scalar_dest);
4369 if (is_pattern_stmt_p (stmt_info))
4370 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
4371 else
4372 lhs = gimple_call_lhs (stmt);
4373 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4375 else
4376 new_stmt = gimple_build_nop ();
4377 set_vinfo_for_stmt (new_stmt, stmt_info);
4378 set_vinfo_for_stmt (stmt, NULL);
4379 STMT_VINFO_STMT (stmt_info) = new_stmt;
4380 gsi_replace (gsi, new_stmt, true);
4381 unlink_stmt_vdef (stmt);
4383 return true;
4387 /* Function vect_gen_widened_results_half
4389 Create a vector stmt whose code, type, number of arguments, and result
4390 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4391 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4392 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4393 needs to be created (DECL is a function-decl of a target-builtin).
4394 STMT is the original scalar stmt that we are vectorizing. */
4396 static gimple *
4397 vect_gen_widened_results_half (enum tree_code code,
4398 tree decl,
4399 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4400 tree vec_dest, gimple_stmt_iterator *gsi,
4401 gimple *stmt)
4403 gimple *new_stmt;
4404 tree new_temp;
4406 /* Generate half of the widened result: */
4407 if (code == CALL_EXPR)
4409 /* Target specific support */
4410 if (op_type == binary_op)
4411 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4412 else
4413 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4414 new_temp = make_ssa_name (vec_dest, new_stmt);
4415 gimple_call_set_lhs (new_stmt, new_temp);
4417 else
4419 /* Generic support */
4420 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4421 if (op_type != binary_op)
4422 vec_oprnd1 = NULL;
4423 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4424 new_temp = make_ssa_name (vec_dest, new_stmt);
4425 gimple_assign_set_lhs (new_stmt, new_temp);
4427 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4429 return new_stmt;
4433 /* Get vectorized definitions for loop-based vectorization. For the first
4434 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4435 scalar operand), and for the rest we get a copy with
4436 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4437 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4438 The vectors are collected into VEC_OPRNDS. */
4440 static void
4441 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4442 vec<tree> *vec_oprnds, int multi_step_cvt)
4444 tree vec_oprnd;
4446 /* Get first vector operand. */
4447 /* All the vector operands except the very first one (that is scalar oprnd)
4448 are stmt copies. */
4449 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4450 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4451 else
4452 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4454 vec_oprnds->quick_push (vec_oprnd);
4456 /* Get second vector operand. */
4457 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4458 vec_oprnds->quick_push (vec_oprnd);
4460 *oprnd = vec_oprnd;
4462 /* For conversion in multiple steps, continue to get operands
4463 recursively. */
4464 if (multi_step_cvt)
4465 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4469 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4470 For multi-step conversions store the resulting vectors and call the function
4471 recursively. */
4473 static void
4474 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4475 int multi_step_cvt, gimple *stmt,
4476 vec<tree> vec_dsts,
4477 gimple_stmt_iterator *gsi,
4478 slp_tree slp_node, enum tree_code code,
4479 stmt_vec_info *prev_stmt_info)
4481 unsigned int i;
4482 tree vop0, vop1, new_tmp, vec_dest;
4483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4485 vec_dest = vec_dsts.pop ();
4487 for (i = 0; i < vec_oprnds->length (); i += 2)
4489 /* Create demotion operation. */
4490 vop0 = (*vec_oprnds)[i];
4491 vop1 = (*vec_oprnds)[i + 1];
4492 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4493 new_tmp = make_ssa_name (vec_dest, new_stmt);
4494 gimple_assign_set_lhs (new_stmt, new_tmp);
4495 stmt_vec_info new_stmt_info
4496 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4498 if (multi_step_cvt)
4499 /* Store the resulting vector for next recursive call. */
4500 (*vec_oprnds)[i/2] = new_tmp;
4501 else
4503 /* This is the last step of the conversion sequence. Store the
4504 vectors in SLP_NODE or in vector info of the scalar statement
4505 (or in STMT_VINFO_RELATED_STMT chain). */
4506 if (slp_node)
4507 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4508 else
4510 if (!*prev_stmt_info)
4511 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4512 else
4513 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4515 *prev_stmt_info = new_stmt_info;
4520 /* For multi-step demotion operations we first generate demotion operations
4521 from the source type to the intermediate types, and then combine the
4522 results (stored in VEC_OPRNDS) in demotion operation to the destination
4523 type. */
4524 if (multi_step_cvt)
4526 /* At each level of recursion we have half of the operands we had at the
4527 previous level. */
4528 vec_oprnds->truncate ((i+1)/2);
4529 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4530 stmt, vec_dsts, gsi, slp_node,
4531 VEC_PACK_TRUNC_EXPR,
4532 prev_stmt_info);
4535 vec_dsts.quick_push (vec_dest);
4539 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4540 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4541 the resulting vectors and call the function recursively. */
4543 static void
4544 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4545 vec<tree> *vec_oprnds1,
4546 gimple *stmt, tree vec_dest,
4547 gimple_stmt_iterator *gsi,
4548 enum tree_code code1,
4549 enum tree_code code2, tree decl1,
4550 tree decl2, int op_type)
4552 int i;
4553 tree vop0, vop1, new_tmp1, new_tmp2;
4554 gimple *new_stmt1, *new_stmt2;
4555 vec<tree> vec_tmp = vNULL;
4557 vec_tmp.create (vec_oprnds0->length () * 2);
4558 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4560 if (op_type == binary_op)
4561 vop1 = (*vec_oprnds1)[i];
4562 else
4563 vop1 = NULL_TREE;
4565 /* Generate the two halves of promotion operation. */
4566 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4567 op_type, vec_dest, gsi, stmt);
4568 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4569 op_type, vec_dest, gsi, stmt);
4570 if (is_gimple_call (new_stmt1))
4572 new_tmp1 = gimple_call_lhs (new_stmt1);
4573 new_tmp2 = gimple_call_lhs (new_stmt2);
4575 else
4577 new_tmp1 = gimple_assign_lhs (new_stmt1);
4578 new_tmp2 = gimple_assign_lhs (new_stmt2);
4581 /* Store the results for the next step. */
4582 vec_tmp.quick_push (new_tmp1);
4583 vec_tmp.quick_push (new_tmp2);
4586 vec_oprnds0->release ();
4587 *vec_oprnds0 = vec_tmp;
4591 /* Check if STMT performs a conversion operation, that can be vectorized.
4592 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4593 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4594 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4596 static bool
4597 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4598 stmt_vec_info *vec_stmt, slp_tree slp_node,
4599 stmt_vector_for_cost *cost_vec)
4601 tree vec_dest;
4602 tree scalar_dest;
4603 tree op0, op1 = NULL_TREE;
4604 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4606 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4607 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4608 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4609 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4610 tree new_temp;
4611 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4612 int ndts = 2;
4613 stmt_vec_info prev_stmt_info;
4614 poly_uint64 nunits_in;
4615 poly_uint64 nunits_out;
4616 tree vectype_out, vectype_in;
4617 int ncopies, i, j;
4618 tree lhs_type, rhs_type;
4619 enum { NARROW, NONE, WIDEN } modifier;
4620 vec<tree> vec_oprnds0 = vNULL;
4621 vec<tree> vec_oprnds1 = vNULL;
4622 tree vop0;
4623 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4624 vec_info *vinfo = stmt_info->vinfo;
4625 int multi_step_cvt = 0;
4626 vec<tree> interm_types = vNULL;
4627 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4628 int op_type;
4629 unsigned short fltsz;
4631 /* Is STMT a vectorizable conversion? */
4633 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4634 return false;
4636 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4637 && ! vec_stmt)
4638 return false;
4640 if (!is_gimple_assign (stmt))
4641 return false;
4643 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4644 return false;
4646 code = gimple_assign_rhs_code (stmt);
4647 if (!CONVERT_EXPR_CODE_P (code)
4648 && code != FIX_TRUNC_EXPR
4649 && code != FLOAT_EXPR
4650 && code != WIDEN_MULT_EXPR
4651 && code != WIDEN_LSHIFT_EXPR)
4652 return false;
4654 op_type = TREE_CODE_LENGTH (code);
4656 /* Check types of lhs and rhs. */
4657 scalar_dest = gimple_assign_lhs (stmt);
4658 lhs_type = TREE_TYPE (scalar_dest);
4659 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4661 op0 = gimple_assign_rhs1 (stmt);
4662 rhs_type = TREE_TYPE (op0);
4664 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4665 && !((INTEGRAL_TYPE_P (lhs_type)
4666 && INTEGRAL_TYPE_P (rhs_type))
4667 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4668 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4669 return false;
4671 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4672 && ((INTEGRAL_TYPE_P (lhs_type)
4673 && !type_has_mode_precision_p (lhs_type))
4674 || (INTEGRAL_TYPE_P (rhs_type)
4675 && !type_has_mode_precision_p (rhs_type))))
4677 if (dump_enabled_p ())
4678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4679 "type conversion to/from bit-precision unsupported."
4680 "\n");
4681 return false;
4684 /* Check the operands of the operation. */
4685 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4687 if (dump_enabled_p ())
4688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4689 "use not simple.\n");
4690 return false;
4692 if (op_type == binary_op)
4694 bool ok;
4696 op1 = gimple_assign_rhs2 (stmt);
4697 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4698 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4699 OP1. */
4700 if (CONSTANT_CLASS_P (op0))
4701 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4702 else
4703 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4705 if (!ok)
4707 if (dump_enabled_p ())
4708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4709 "use not simple.\n");
4710 return false;
4714 /* If op0 is an external or constant defs use a vector type of
4715 the same size as the output vector type. */
4716 if (!vectype_in)
4717 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4718 if (vec_stmt)
4719 gcc_assert (vectype_in);
4720 if (!vectype_in)
4722 if (dump_enabled_p ())
4724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4725 "no vectype for scalar type ");
4726 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4727 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4730 return false;
4733 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4734 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4736 if (dump_enabled_p ())
4738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4739 "can't convert between boolean and non "
4740 "boolean vectors");
4741 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4742 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4745 return false;
4748 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4749 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4750 if (known_eq (nunits_out, nunits_in))
4751 modifier = NONE;
4752 else if (multiple_p (nunits_out, nunits_in))
4753 modifier = NARROW;
4754 else
4756 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4757 modifier = WIDEN;
4760 /* Multiple types in SLP are handled by creating the appropriate number of
4761 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4762 case of SLP. */
4763 if (slp_node)
4764 ncopies = 1;
4765 else if (modifier == NARROW)
4766 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4767 else
4768 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4770 /* Sanity check: make sure that at least one copy of the vectorized stmt
4771 needs to be generated. */
4772 gcc_assert (ncopies >= 1);
4774 bool found_mode = false;
4775 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4776 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4777 opt_scalar_mode rhs_mode_iter;
4779 /* Supportable by target? */
4780 switch (modifier)
4782 case NONE:
4783 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4784 return false;
4785 if (supportable_convert_operation (code, vectype_out, vectype_in,
4786 &decl1, &code1))
4787 break;
4788 /* FALLTHRU */
4789 unsupported:
4790 if (dump_enabled_p ())
4791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4792 "conversion not supported by target.\n");
4793 return false;
4795 case WIDEN:
4796 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4797 &code1, &code2, &multi_step_cvt,
4798 &interm_types))
4800 /* Binary widening operation can only be supported directly by the
4801 architecture. */
4802 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4803 break;
4806 if (code != FLOAT_EXPR
4807 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4808 goto unsupported;
4810 fltsz = GET_MODE_SIZE (lhs_mode);
4811 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4813 rhs_mode = rhs_mode_iter.require ();
4814 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4815 break;
4817 cvt_type
4818 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4819 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4820 if (cvt_type == NULL_TREE)
4821 goto unsupported;
4823 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4825 if (!supportable_convert_operation (code, vectype_out,
4826 cvt_type, &decl1, &codecvt1))
4827 goto unsupported;
4829 else if (!supportable_widening_operation (code, stmt, vectype_out,
4830 cvt_type, &codecvt1,
4831 &codecvt2, &multi_step_cvt,
4832 &interm_types))
4833 continue;
4834 else
4835 gcc_assert (multi_step_cvt == 0);
4837 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4838 vectype_in, &code1, &code2,
4839 &multi_step_cvt, &interm_types))
4841 found_mode = true;
4842 break;
4846 if (!found_mode)
4847 goto unsupported;
4849 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4850 codecvt2 = ERROR_MARK;
4851 else
4853 multi_step_cvt++;
4854 interm_types.safe_push (cvt_type);
4855 cvt_type = NULL_TREE;
4857 break;
4859 case NARROW:
4860 gcc_assert (op_type == unary_op);
4861 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4862 &code1, &multi_step_cvt,
4863 &interm_types))
4864 break;
4866 if (code != FIX_TRUNC_EXPR
4867 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4868 goto unsupported;
4870 cvt_type
4871 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4872 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4873 if (cvt_type == NULL_TREE)
4874 goto unsupported;
4875 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4876 &decl1, &codecvt1))
4877 goto unsupported;
4878 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4879 &code1, &multi_step_cvt,
4880 &interm_types))
4881 break;
4882 goto unsupported;
4884 default:
4885 gcc_unreachable ();
4888 if (!vec_stmt) /* transformation not required. */
4890 DUMP_VECT_SCOPE ("vectorizable_conversion");
4891 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4893 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4894 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4895 cost_vec);
4897 else if (modifier == NARROW)
4899 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4900 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4901 cost_vec);
4903 else
4905 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4906 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4907 cost_vec);
4909 interm_types.release ();
4910 return true;
4913 /* Transform. */
4914 if (dump_enabled_p ())
4915 dump_printf_loc (MSG_NOTE, vect_location,
4916 "transform conversion. ncopies = %d.\n", ncopies);
4918 if (op_type == binary_op)
4920 if (CONSTANT_CLASS_P (op0))
4921 op0 = fold_convert (TREE_TYPE (op1), op0);
4922 else if (CONSTANT_CLASS_P (op1))
4923 op1 = fold_convert (TREE_TYPE (op0), op1);
4926 /* In case of multi-step conversion, we first generate conversion operations
4927 to the intermediate types, and then from that types to the final one.
4928 We create vector destinations for the intermediate type (TYPES) received
4929 from supportable_*_operation, and store them in the correct order
4930 for future use in vect_create_vectorized_*_stmts (). */
4931 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4932 vec_dest = vect_create_destination_var (scalar_dest,
4933 (cvt_type && modifier == WIDEN)
4934 ? cvt_type : vectype_out);
4935 vec_dsts.quick_push (vec_dest);
4937 if (multi_step_cvt)
4939 for (i = interm_types.length () - 1;
4940 interm_types.iterate (i, &intermediate_type); i--)
4942 vec_dest = vect_create_destination_var (scalar_dest,
4943 intermediate_type);
4944 vec_dsts.quick_push (vec_dest);
4948 if (cvt_type)
4949 vec_dest = vect_create_destination_var (scalar_dest,
4950 modifier == WIDEN
4951 ? vectype_out : cvt_type);
4953 if (!slp_node)
4955 if (modifier == WIDEN)
4957 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4958 if (op_type == binary_op)
4959 vec_oprnds1.create (1);
4961 else if (modifier == NARROW)
4962 vec_oprnds0.create (
4963 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4965 else if (code == WIDEN_LSHIFT_EXPR)
4966 vec_oprnds1.create (slp_node->vec_stmts_size);
4968 last_oprnd = op0;
4969 prev_stmt_info = NULL;
4970 switch (modifier)
4972 case NONE:
4973 for (j = 0; j < ncopies; j++)
4975 if (j == 0)
4976 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4977 else
4978 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4980 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4982 stmt_vec_info new_stmt_info;
4983 /* Arguments are ready, create the new vector stmt. */
4984 if (code1 == CALL_EXPR)
4986 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4987 new_temp = make_ssa_name (vec_dest, new_stmt);
4988 gimple_call_set_lhs (new_stmt, new_temp);
4989 new_stmt_info
4990 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
4992 else
4994 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4995 gassign *new_stmt
4996 = gimple_build_assign (vec_dest, code1, vop0);
4997 new_temp = make_ssa_name (vec_dest, new_stmt);
4998 gimple_assign_set_lhs (new_stmt, new_temp);
4999 new_stmt_info
5000 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
5003 if (slp_node)
5004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5005 else
5007 if (!prev_stmt_info)
5008 STMT_VINFO_VEC_STMT (stmt_info)
5009 = *vec_stmt = new_stmt_info;
5010 else
5011 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5012 prev_stmt_info = new_stmt_info;
5016 break;
5018 case WIDEN:
5019 /* In case the vectorization factor (VF) is bigger than the number
5020 of elements that we can fit in a vectype (nunits), we have to
5021 generate more than one vector stmt - i.e - we need to "unroll"
5022 the vector stmt by a factor VF/nunits. */
5023 for (j = 0; j < ncopies; j++)
5025 /* Handle uses. */
5026 if (j == 0)
5028 if (slp_node)
5030 if (code == WIDEN_LSHIFT_EXPR)
5032 unsigned int k;
5034 vec_oprnd1 = op1;
5035 /* Store vec_oprnd1 for every vector stmt to be created
5036 for SLP_NODE. We check during the analysis that all
5037 the shift arguments are the same. */
5038 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5039 vec_oprnds1.quick_push (vec_oprnd1);
5041 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5042 slp_node);
5044 else
5045 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
5046 &vec_oprnds1, slp_node);
5048 else
5050 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
5051 vec_oprnds0.quick_push (vec_oprnd0);
5052 if (op_type == binary_op)
5054 if (code == WIDEN_LSHIFT_EXPR)
5055 vec_oprnd1 = op1;
5056 else
5057 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
5058 vec_oprnds1.quick_push (vec_oprnd1);
5062 else
5064 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
5065 vec_oprnds0.truncate (0);
5066 vec_oprnds0.quick_push (vec_oprnd0);
5067 if (op_type == binary_op)
5069 if (code == WIDEN_LSHIFT_EXPR)
5070 vec_oprnd1 = op1;
5071 else
5072 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
5073 vec_oprnd1);
5074 vec_oprnds1.truncate (0);
5075 vec_oprnds1.quick_push (vec_oprnd1);
5079 /* Arguments are ready. Create the new vector stmts. */
5080 for (i = multi_step_cvt; i >= 0; i--)
5082 tree this_dest = vec_dsts[i];
5083 enum tree_code c1 = code1, c2 = code2;
5084 if (i == 0 && codecvt2 != ERROR_MARK)
5086 c1 = codecvt1;
5087 c2 = codecvt2;
5089 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5090 &vec_oprnds1,
5091 stmt, this_dest, gsi,
5092 c1, c2, decl1, decl2,
5093 op_type);
5096 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5098 stmt_vec_info new_stmt_info;
5099 if (cvt_type)
5101 if (codecvt1 == CALL_EXPR)
5103 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5104 new_temp = make_ssa_name (vec_dest, new_stmt);
5105 gimple_call_set_lhs (new_stmt, new_temp);
5106 new_stmt_info
5107 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
5109 else
5111 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5112 new_temp = make_ssa_name (vec_dest);
5113 gassign *new_stmt
5114 = gimple_build_assign (new_temp, codecvt1, vop0);
5115 new_stmt_info
5116 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
5119 else
5120 new_stmt_info = vinfo->lookup_def (vop0);
5122 if (slp_node)
5123 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5124 else
5126 if (!prev_stmt_info)
5127 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5128 else
5129 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5130 prev_stmt_info = new_stmt_info;
5135 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5136 break;
5138 case NARROW:
5139 /* In case the vectorization factor (VF) is bigger than the number
5140 of elements that we can fit in a vectype (nunits), we have to
5141 generate more than one vector stmt - i.e - we need to "unroll"
5142 the vector stmt by a factor VF/nunits. */
5143 for (j = 0; j < ncopies; j++)
5145 /* Handle uses. */
5146 if (slp_node)
5147 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5148 slp_node);
5149 else
5151 vec_oprnds0.truncate (0);
5152 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
5153 vect_pow2 (multi_step_cvt) - 1);
5156 /* Arguments are ready. Create the new vector stmts. */
5157 if (cvt_type)
5158 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5160 if (codecvt1 == CALL_EXPR)
5162 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5163 new_temp = make_ssa_name (vec_dest, new_stmt);
5164 gimple_call_set_lhs (new_stmt, new_temp);
5165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5167 else
5169 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5170 new_temp = make_ssa_name (vec_dest);
5171 gassign *new_stmt
5172 = gimple_build_assign (new_temp, codecvt1, vop0);
5173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5176 vec_oprnds0[i] = new_temp;
5179 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5180 stmt, vec_dsts, gsi,
5181 slp_node, code1,
5182 &prev_stmt_info);
5185 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5186 break;
5189 vec_oprnds0.release ();
5190 vec_oprnds1.release ();
5191 interm_types.release ();
5193 return true;
5197 /* Function vectorizable_assignment.
5199 Check if STMT performs an assignment (copy) that can be vectorized.
5200 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5201 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5202 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5204 static bool
5205 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5206 stmt_vec_info *vec_stmt, slp_tree slp_node,
5207 stmt_vector_for_cost *cost_vec)
5209 tree vec_dest;
5210 tree scalar_dest;
5211 tree op;
5212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5213 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5214 tree new_temp;
5215 enum vect_def_type dt[1] = {vect_unknown_def_type};
5216 int ndts = 1;
5217 int ncopies;
5218 int i, j;
5219 vec<tree> vec_oprnds = vNULL;
5220 tree vop;
5221 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5222 vec_info *vinfo = stmt_info->vinfo;
5223 stmt_vec_info prev_stmt_info = NULL;
5224 enum tree_code code;
5225 tree vectype_in;
5227 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5228 return false;
5230 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5231 && ! vec_stmt)
5232 return false;
5234 /* Is vectorizable assignment? */
5235 if (!is_gimple_assign (stmt))
5236 return false;
5238 scalar_dest = gimple_assign_lhs (stmt);
5239 if (TREE_CODE (scalar_dest) != SSA_NAME)
5240 return false;
5242 code = gimple_assign_rhs_code (stmt);
5243 if (gimple_assign_single_p (stmt)
5244 || code == PAREN_EXPR
5245 || CONVERT_EXPR_CODE_P (code))
5246 op = gimple_assign_rhs1 (stmt);
5247 else
5248 return false;
5250 if (code == VIEW_CONVERT_EXPR)
5251 op = TREE_OPERAND (op, 0);
5253 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5254 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5256 /* Multiple types in SLP are handled by creating the appropriate number of
5257 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5258 case of SLP. */
5259 if (slp_node)
5260 ncopies = 1;
5261 else
5262 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5264 gcc_assert (ncopies >= 1);
5266 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5268 if (dump_enabled_p ())
5269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5270 "use not simple.\n");
5271 return false;
5274 /* We can handle NOP_EXPR conversions that do not change the number
5275 of elements or the vector size. */
5276 if ((CONVERT_EXPR_CODE_P (code)
5277 || code == VIEW_CONVERT_EXPR)
5278 && (!vectype_in
5279 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5280 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5281 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5282 return false;
5284 /* We do not handle bit-precision changes. */
5285 if ((CONVERT_EXPR_CODE_P (code)
5286 || code == VIEW_CONVERT_EXPR)
5287 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5288 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5289 || !type_has_mode_precision_p (TREE_TYPE (op)))
5290 /* But a conversion that does not change the bit-pattern is ok. */
5291 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5292 > TYPE_PRECISION (TREE_TYPE (op)))
5293 && TYPE_UNSIGNED (TREE_TYPE (op)))
5294 /* Conversion between boolean types of different sizes is
5295 a simple assignment in case their vectypes are same
5296 boolean vectors. */
5297 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5298 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5300 if (dump_enabled_p ())
5301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5302 "type conversion to/from bit-precision "
5303 "unsupported.\n");
5304 return false;
5307 if (!vec_stmt) /* transformation not required. */
5309 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5310 DUMP_VECT_SCOPE ("vectorizable_assignment");
5311 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5312 return true;
5315 /* Transform. */
5316 if (dump_enabled_p ())
5317 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5319 /* Handle def. */
5320 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5322 /* Handle use. */
5323 for (j = 0; j < ncopies; j++)
5325 /* Handle uses. */
5326 if (j == 0)
5327 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5328 else
5329 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5331 /* Arguments are ready. create the new vector stmt. */
5332 stmt_vec_info new_stmt_info = NULL;
5333 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5335 if (CONVERT_EXPR_CODE_P (code)
5336 || code == VIEW_CONVERT_EXPR)
5337 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5338 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5339 new_temp = make_ssa_name (vec_dest, new_stmt);
5340 gimple_assign_set_lhs (new_stmt, new_temp);
5341 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
5342 if (slp_node)
5343 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5346 if (slp_node)
5347 continue;
5349 if (j == 0)
5350 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5351 else
5352 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5354 prev_stmt_info = new_stmt_info;
5357 vec_oprnds.release ();
5358 return true;
5362 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5363 either as shift by a scalar or by a vector. */
5365 bool
5366 vect_supportable_shift (enum tree_code code, tree scalar_type)
5369 machine_mode vec_mode;
5370 optab optab;
5371 int icode;
5372 tree vectype;
5374 vectype = get_vectype_for_scalar_type (scalar_type);
5375 if (!vectype)
5376 return false;
5378 optab = optab_for_tree_code (code, vectype, optab_scalar);
5379 if (!optab
5380 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5382 optab = optab_for_tree_code (code, vectype, optab_vector);
5383 if (!optab
5384 || (optab_handler (optab, TYPE_MODE (vectype))
5385 == CODE_FOR_nothing))
5386 return false;
5389 vec_mode = TYPE_MODE (vectype);
5390 icode = (int) optab_handler (optab, vec_mode);
5391 if (icode == CODE_FOR_nothing)
5392 return false;
5394 return true;
5398 /* Function vectorizable_shift.
5400 Check if STMT performs a shift operation that can be vectorized.
5401 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5402 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5403 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5405 static bool
5406 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5407 stmt_vec_info *vec_stmt, slp_tree slp_node,
5408 stmt_vector_for_cost *cost_vec)
5410 tree vec_dest;
5411 tree scalar_dest;
5412 tree op0, op1 = NULL;
5413 tree vec_oprnd1 = NULL_TREE;
5414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5415 tree vectype;
5416 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5417 enum tree_code code;
5418 machine_mode vec_mode;
5419 tree new_temp;
5420 optab optab;
5421 int icode;
5422 machine_mode optab_op2_mode;
5423 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5424 int ndts = 2;
5425 stmt_vec_info prev_stmt_info;
5426 poly_uint64 nunits_in;
5427 poly_uint64 nunits_out;
5428 tree vectype_out;
5429 tree op1_vectype;
5430 int ncopies;
5431 int j, i;
5432 vec<tree> vec_oprnds0 = vNULL;
5433 vec<tree> vec_oprnds1 = vNULL;
5434 tree vop0, vop1;
5435 unsigned int k;
5436 bool scalar_shift_arg = true;
5437 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5438 vec_info *vinfo = stmt_info->vinfo;
5440 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5441 return false;
5443 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5444 && ! vec_stmt)
5445 return false;
5447 /* Is STMT a vectorizable binary/unary operation? */
5448 if (!is_gimple_assign (stmt))
5449 return false;
5451 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5452 return false;
5454 code = gimple_assign_rhs_code (stmt);
5456 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5457 || code == RROTATE_EXPR))
5458 return false;
5460 scalar_dest = gimple_assign_lhs (stmt);
5461 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5462 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5464 if (dump_enabled_p ())
5465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5466 "bit-precision shifts not supported.\n");
5467 return false;
5470 op0 = gimple_assign_rhs1 (stmt);
5471 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5473 if (dump_enabled_p ())
5474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5475 "use not simple.\n");
5476 return false;
5478 /* If op0 is an external or constant def use a vector type with
5479 the same size as the output vector type. */
5480 if (!vectype)
5481 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5482 if (vec_stmt)
5483 gcc_assert (vectype);
5484 if (!vectype)
5486 if (dump_enabled_p ())
5487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5488 "no vectype for scalar type\n");
5489 return false;
5492 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5493 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5494 if (maybe_ne (nunits_out, nunits_in))
5495 return false;
5497 op1 = gimple_assign_rhs2 (stmt);
5498 stmt_vec_info op1_def_stmt_info;
5499 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5500 &op1_def_stmt_info))
5502 if (dump_enabled_p ())
5503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5504 "use not simple.\n");
5505 return false;
5508 /* Multiple types in SLP are handled by creating the appropriate number of
5509 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5510 case of SLP. */
5511 if (slp_node)
5512 ncopies = 1;
5513 else
5514 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5516 gcc_assert (ncopies >= 1);
5518 /* Determine whether the shift amount is a vector, or scalar. If the
5519 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5521 if ((dt[1] == vect_internal_def
5522 || dt[1] == vect_induction_def)
5523 && !slp_node)
5524 scalar_shift_arg = false;
5525 else if (dt[1] == vect_constant_def
5526 || dt[1] == vect_external_def
5527 || dt[1] == vect_internal_def)
5529 /* In SLP, need to check whether the shift count is the same,
5530 in loops if it is a constant or invariant, it is always
5531 a scalar shift. */
5532 if (slp_node)
5534 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5535 stmt_vec_info slpstmt_info;
5537 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5539 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5540 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5541 scalar_shift_arg = false;
5545 /* If the shift amount is computed by a pattern stmt we cannot
5546 use the scalar amount directly thus give up and use a vector
5547 shift. */
5548 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5549 scalar_shift_arg = false;
5551 else
5553 if (dump_enabled_p ())
5554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5555 "operand mode requires invariant argument.\n");
5556 return false;
5559 /* Vector shifted by vector. */
5560 if (!scalar_shift_arg)
5562 optab = optab_for_tree_code (code, vectype, optab_vector);
5563 if (dump_enabled_p ())
5564 dump_printf_loc (MSG_NOTE, vect_location,
5565 "vector/vector shift/rotate found.\n");
5567 if (!op1_vectype)
5568 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5569 if (op1_vectype == NULL_TREE
5570 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "unusable type for last operand in"
5575 " vector/vector shift/rotate.\n");
5576 return false;
5579 /* See if the machine has a vector shifted by scalar insn and if not
5580 then see if it has a vector shifted by vector insn. */
5581 else
5583 optab = optab_for_tree_code (code, vectype, optab_scalar);
5584 if (optab
5585 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5587 if (dump_enabled_p ())
5588 dump_printf_loc (MSG_NOTE, vect_location,
5589 "vector/scalar shift/rotate found.\n");
5591 else
5593 optab = optab_for_tree_code (code, vectype, optab_vector);
5594 if (optab
5595 && (optab_handler (optab, TYPE_MODE (vectype))
5596 != CODE_FOR_nothing))
5598 scalar_shift_arg = false;
5600 if (dump_enabled_p ())
5601 dump_printf_loc (MSG_NOTE, vect_location,
5602 "vector/vector shift/rotate found.\n");
5604 /* Unlike the other binary operators, shifts/rotates have
5605 the rhs being int, instead of the same type as the lhs,
5606 so make sure the scalar is the right type if we are
5607 dealing with vectors of long long/long/short/char. */
5608 if (dt[1] == vect_constant_def)
5609 op1 = fold_convert (TREE_TYPE (vectype), op1);
5610 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5611 TREE_TYPE (op1)))
5613 if (slp_node
5614 && TYPE_MODE (TREE_TYPE (vectype))
5615 != TYPE_MODE (TREE_TYPE (op1)))
5617 if (dump_enabled_p ())
5618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5619 "unusable type for last operand in"
5620 " vector/vector shift/rotate.\n");
5621 return false;
5623 if (vec_stmt && !slp_node)
5625 op1 = fold_convert (TREE_TYPE (vectype), op1);
5626 op1 = vect_init_vector (stmt, op1,
5627 TREE_TYPE (vectype), NULL);
5634 /* Supportable by target? */
5635 if (!optab)
5637 if (dump_enabled_p ())
5638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5639 "no optab.\n");
5640 return false;
5642 vec_mode = TYPE_MODE (vectype);
5643 icode = (int) optab_handler (optab, vec_mode);
5644 if (icode == CODE_FOR_nothing)
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5648 "op not supported by target.\n");
5649 /* Check only during analysis. */
5650 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5651 || (!vec_stmt
5652 && !vect_worthwhile_without_simd_p (vinfo, code)))
5653 return false;
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_NOTE, vect_location,
5656 "proceeding using word mode.\n");
5659 /* Worthwhile without SIMD support? Check only during analysis. */
5660 if (!vec_stmt
5661 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5662 && !vect_worthwhile_without_simd_p (vinfo, code))
5664 if (dump_enabled_p ())
5665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5666 "not worthwhile without SIMD support.\n");
5667 return false;
5670 if (!vec_stmt) /* transformation not required. */
5672 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5673 DUMP_VECT_SCOPE ("vectorizable_shift");
5674 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5675 return true;
5678 /* Transform. */
5680 if (dump_enabled_p ())
5681 dump_printf_loc (MSG_NOTE, vect_location,
5682 "transform binary/unary operation.\n");
5684 /* Handle def. */
5685 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5687 prev_stmt_info = NULL;
5688 for (j = 0; j < ncopies; j++)
5690 /* Handle uses. */
5691 if (j == 0)
5693 if (scalar_shift_arg)
5695 /* Vector shl and shr insn patterns can be defined with scalar
5696 operand 2 (shift operand). In this case, use constant or loop
5697 invariant op1 directly, without extending it to vector mode
5698 first. */
5699 optab_op2_mode = insn_data[icode].operand[2].mode;
5700 if (!VECTOR_MODE_P (optab_op2_mode))
5702 if (dump_enabled_p ())
5703 dump_printf_loc (MSG_NOTE, vect_location,
5704 "operand 1 using scalar mode.\n");
5705 vec_oprnd1 = op1;
5706 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5707 vec_oprnds1.quick_push (vec_oprnd1);
5708 if (slp_node)
5710 /* Store vec_oprnd1 for every vector stmt to be created
5711 for SLP_NODE. We check during the analysis that all
5712 the shift arguments are the same.
5713 TODO: Allow different constants for different vector
5714 stmts generated for an SLP instance. */
5715 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5716 vec_oprnds1.quick_push (vec_oprnd1);
5721 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5722 (a special case for certain kind of vector shifts); otherwise,
5723 operand 1 should be of a vector type (the usual case). */
5724 if (vec_oprnd1)
5725 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5726 slp_node);
5727 else
5728 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5729 slp_node);
5731 else
5732 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5734 /* Arguments are ready. Create the new vector stmt. */
5735 stmt_vec_info new_stmt_info = NULL;
5736 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5738 vop1 = vec_oprnds1[i];
5739 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5740 new_temp = make_ssa_name (vec_dest, new_stmt);
5741 gimple_assign_set_lhs (new_stmt, new_temp);
5742 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
5743 if (slp_node)
5744 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5747 if (slp_node)
5748 continue;
5750 if (j == 0)
5751 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5752 else
5753 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5754 prev_stmt_info = new_stmt_info;
5757 vec_oprnds0.release ();
5758 vec_oprnds1.release ();
5760 return true;
5764 /* Function vectorizable_operation.
5766 Check if STMT performs a binary, unary or ternary operation that can
5767 be vectorized.
5768 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5769 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5770 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5772 static bool
5773 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5774 stmt_vec_info *vec_stmt, slp_tree slp_node,
5775 stmt_vector_for_cost *cost_vec)
5777 tree vec_dest;
5778 tree scalar_dest;
5779 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5781 tree vectype;
5782 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5783 enum tree_code code, orig_code;
5784 machine_mode vec_mode;
5785 tree new_temp;
5786 int op_type;
5787 optab optab;
5788 bool target_support_p;
5789 enum vect_def_type dt[3]
5790 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5791 int ndts = 3;
5792 stmt_vec_info prev_stmt_info;
5793 poly_uint64 nunits_in;
5794 poly_uint64 nunits_out;
5795 tree vectype_out;
5796 int ncopies;
5797 int j, i;
5798 vec<tree> vec_oprnds0 = vNULL;
5799 vec<tree> vec_oprnds1 = vNULL;
5800 vec<tree> vec_oprnds2 = vNULL;
5801 tree vop0, vop1, vop2;
5802 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5803 vec_info *vinfo = stmt_info->vinfo;
5805 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5806 return false;
5808 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5809 && ! vec_stmt)
5810 return false;
5812 /* Is STMT a vectorizable binary/unary operation? */
5813 if (!is_gimple_assign (stmt))
5814 return false;
5816 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5817 return false;
5819 orig_code = code = gimple_assign_rhs_code (stmt);
5821 /* For pointer addition and subtraction, we should use the normal
5822 plus and minus for the vector operation. */
5823 if (code == POINTER_PLUS_EXPR)
5824 code = PLUS_EXPR;
5825 if (code == POINTER_DIFF_EXPR)
5826 code = MINUS_EXPR;
5828 /* Support only unary or binary operations. */
5829 op_type = TREE_CODE_LENGTH (code);
5830 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5832 if (dump_enabled_p ())
5833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5834 "num. args = %d (not unary/binary/ternary op).\n",
5835 op_type);
5836 return false;
5839 scalar_dest = gimple_assign_lhs (stmt);
5840 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5842 /* Most operations cannot handle bit-precision types without extra
5843 truncations. */
5844 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5845 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5846 /* Exception are bitwise binary operations. */
5847 && code != BIT_IOR_EXPR
5848 && code != BIT_XOR_EXPR
5849 && code != BIT_AND_EXPR)
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5853 "bit-precision arithmetic not supported.\n");
5854 return false;
5857 op0 = gimple_assign_rhs1 (stmt);
5858 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5860 if (dump_enabled_p ())
5861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5862 "use not simple.\n");
5863 return false;
5865 /* If op0 is an external or constant def use a vector type with
5866 the same size as the output vector type. */
5867 if (!vectype)
5869 /* For boolean type we cannot determine vectype by
5870 invariant value (don't know whether it is a vector
5871 of booleans or vector of integers). We use output
5872 vectype because operations on boolean don't change
5873 type. */
5874 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5876 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5878 if (dump_enabled_p ())
5879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5880 "not supported operation on bool value.\n");
5881 return false;
5883 vectype = vectype_out;
5885 else
5886 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5888 if (vec_stmt)
5889 gcc_assert (vectype);
5890 if (!vectype)
5892 if (dump_enabled_p ())
5894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5895 "no vectype for scalar type ");
5896 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5897 TREE_TYPE (op0));
5898 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5901 return false;
5904 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5905 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5906 if (maybe_ne (nunits_out, nunits_in))
5907 return false;
5909 if (op_type == binary_op || op_type == ternary_op)
5911 op1 = gimple_assign_rhs2 (stmt);
5912 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5914 if (dump_enabled_p ())
5915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5916 "use not simple.\n");
5917 return false;
5920 if (op_type == ternary_op)
5922 op2 = gimple_assign_rhs3 (stmt);
5923 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5925 if (dump_enabled_p ())
5926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5927 "use not simple.\n");
5928 return false;
5932 /* Multiple types in SLP are handled by creating the appropriate number of
5933 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5934 case of SLP. */
5935 if (slp_node)
5936 ncopies = 1;
5937 else
5938 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5940 gcc_assert (ncopies >= 1);
5942 /* Shifts are handled in vectorizable_shift (). */
5943 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5944 || code == RROTATE_EXPR)
5945 return false;
5947 /* Supportable by target? */
5949 vec_mode = TYPE_MODE (vectype);
5950 if (code == MULT_HIGHPART_EXPR)
5951 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5952 else
5954 optab = optab_for_tree_code (code, vectype, optab_default);
5955 if (!optab)
5957 if (dump_enabled_p ())
5958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5959 "no optab.\n");
5960 return false;
5962 target_support_p = (optab_handler (optab, vec_mode)
5963 != CODE_FOR_nothing);
5966 if (!target_support_p)
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5970 "op not supported by target.\n");
5971 /* Check only during analysis. */
5972 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5973 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5974 return false;
5975 if (dump_enabled_p ())
5976 dump_printf_loc (MSG_NOTE, vect_location,
5977 "proceeding using word mode.\n");
5980 /* Worthwhile without SIMD support? Check only during analysis. */
5981 if (!VECTOR_MODE_P (vec_mode)
5982 && !vec_stmt
5983 && !vect_worthwhile_without_simd_p (vinfo, code))
5985 if (dump_enabled_p ())
5986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5987 "not worthwhile without SIMD support.\n");
5988 return false;
5991 if (!vec_stmt) /* transformation not required. */
5993 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5994 DUMP_VECT_SCOPE ("vectorizable_operation");
5995 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5996 return true;
5999 /* Transform. */
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_NOTE, vect_location,
6003 "transform binary/unary operation.\n");
6005 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6006 vectors with unsigned elements, but the result is signed. So, we
6007 need to compute the MINUS_EXPR into vectype temporary and
6008 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6009 tree vec_cvt_dest = NULL_TREE;
6010 if (orig_code == POINTER_DIFF_EXPR)
6012 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6013 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6015 /* Handle def. */
6016 else
6017 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6019 /* In case the vectorization factor (VF) is bigger than the number
6020 of elements that we can fit in a vectype (nunits), we have to generate
6021 more than one vector stmt - i.e - we need to "unroll" the
6022 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6023 from one copy of the vector stmt to the next, in the field
6024 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6025 stages to find the correct vector defs to be used when vectorizing
6026 stmts that use the defs of the current stmt. The example below
6027 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6028 we need to create 4 vectorized stmts):
6030 before vectorization:
6031 RELATED_STMT VEC_STMT
6032 S1: x = memref - -
6033 S2: z = x + 1 - -
6035 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6036 there):
6037 RELATED_STMT VEC_STMT
6038 VS1_0: vx0 = memref0 VS1_1 -
6039 VS1_1: vx1 = memref1 VS1_2 -
6040 VS1_2: vx2 = memref2 VS1_3 -
6041 VS1_3: vx3 = memref3 - -
6042 S1: x = load - VS1_0
6043 S2: z = x + 1 - -
6045 step2: vectorize stmt S2 (done here):
6046 To vectorize stmt S2 we first need to find the relevant vector
6047 def for the first operand 'x'. This is, as usual, obtained from
6048 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6049 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6050 relevant vector def 'vx0'. Having found 'vx0' we can generate
6051 the vector stmt VS2_0, and as usual, record it in the
6052 STMT_VINFO_VEC_STMT of stmt S2.
6053 When creating the second copy (VS2_1), we obtain the relevant vector
6054 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6055 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6056 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6057 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6058 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6059 chain of stmts and pointers:
6060 RELATED_STMT VEC_STMT
6061 VS1_0: vx0 = memref0 VS1_1 -
6062 VS1_1: vx1 = memref1 VS1_2 -
6063 VS1_2: vx2 = memref2 VS1_3 -
6064 VS1_3: vx3 = memref3 - -
6065 S1: x = load - VS1_0
6066 VS2_0: vz0 = vx0 + v1 VS2_1 -
6067 VS2_1: vz1 = vx1 + v1 VS2_2 -
6068 VS2_2: vz2 = vx2 + v1 VS2_3 -
6069 VS2_3: vz3 = vx3 + v1 - -
6070 S2: z = x + 1 - VS2_0 */
6072 prev_stmt_info = NULL;
6073 for (j = 0; j < ncopies; j++)
6075 /* Handle uses. */
6076 if (j == 0)
6078 if (op_type == binary_op)
6079 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6080 slp_node);
6081 else if (op_type == ternary_op)
6083 if (slp_node)
6085 auto_vec<tree> ops(3);
6086 ops.quick_push (op0);
6087 ops.quick_push (op1);
6088 ops.quick_push (op2);
6089 auto_vec<vec<tree> > vec_defs(3);
6090 vect_get_slp_defs (ops, slp_node, &vec_defs);
6091 vec_oprnds0 = vec_defs[0];
6092 vec_oprnds1 = vec_defs[1];
6093 vec_oprnds2 = vec_defs[2];
6095 else
6097 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
6098 NULL);
6099 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
6100 NULL);
6103 else
6104 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
6105 slp_node);
6107 else
6109 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
6110 if (op_type == ternary_op)
6112 tree vec_oprnd = vec_oprnds2.pop ();
6113 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
6114 vec_oprnd));
6118 /* Arguments are ready. Create the new vector stmt. */
6119 stmt_vec_info new_stmt_info = NULL;
6120 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6122 vop1 = ((op_type == binary_op || op_type == ternary_op)
6123 ? vec_oprnds1[i] : NULL_TREE);
6124 vop2 = ((op_type == ternary_op)
6125 ? vec_oprnds2[i] : NULL_TREE);
6126 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6127 vop0, vop1, vop2);
6128 new_temp = make_ssa_name (vec_dest, new_stmt);
6129 gimple_assign_set_lhs (new_stmt, new_temp);
6130 new_stmt_info = vect_finish_stmt_generation (stmt, new_stmt, gsi);
6131 if (vec_cvt_dest)
6133 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6134 gassign *new_stmt
6135 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6136 new_temp);
6137 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6138 gimple_assign_set_lhs (new_stmt, new_temp);
6139 new_stmt_info
6140 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
6142 if (slp_node)
6143 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6146 if (slp_node)
6147 continue;
6149 if (j == 0)
6150 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6151 else
6152 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6153 prev_stmt_info = new_stmt_info;
6156 vec_oprnds0.release ();
6157 vec_oprnds1.release ();
6158 vec_oprnds2.release ();
6160 return true;
6163 /* A helper function to ensure data reference DR's base alignment. */
6165 static void
6166 ensure_base_align (struct data_reference *dr)
6168 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6169 return;
6171 if (DR_VECT_AUX (dr)->base_misaligned)
6173 tree base_decl = DR_VECT_AUX (dr)->base_decl;
6175 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6177 if (decl_in_symtab_p (base_decl))
6178 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6179 else
6181 SET_DECL_ALIGN (base_decl, align_base_to);
6182 DECL_USER_ALIGN (base_decl) = 1;
6184 DR_VECT_AUX (dr)->base_misaligned = false;
6189 /* Function get_group_alias_ptr_type.
6191 Return the alias type for the group starting at FIRST_STMT. */
6193 static tree
6194 get_group_alias_ptr_type (gimple *first_stmt)
6196 struct data_reference *first_dr, *next_dr;
6198 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6199 stmt_vec_info next_stmt_info
6200 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6201 while (next_stmt_info)
6203 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6204 if (get_alias_set (DR_REF (first_dr))
6205 != get_alias_set (DR_REF (next_dr)))
6207 if (dump_enabled_p ())
6208 dump_printf_loc (MSG_NOTE, vect_location,
6209 "conflicting alias set types.\n");
6210 return ptr_type_node;
6212 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6214 return reference_alias_ptr_type (DR_REF (first_dr));
6218 /* Function vectorizable_store.
6220 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6221 can be vectorized.
6222 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6223 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6224 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6226 static bool
6227 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi,
6228 stmt_vec_info *vec_stmt, slp_tree slp_node,
6229 stmt_vector_for_cost *cost_vec)
6231 tree data_ref;
6232 tree op;
6233 tree vec_oprnd = NULL_TREE;
6234 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6235 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6236 tree elem_type;
6237 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6238 struct loop *loop = NULL;
6239 machine_mode vec_mode;
6240 tree dummy;
6241 enum dr_alignment_support alignment_support_scheme;
6242 enum vect_def_type rhs_dt = vect_unknown_def_type;
6243 enum vect_def_type mask_dt = vect_unknown_def_type;
6244 stmt_vec_info prev_stmt_info = NULL;
6245 tree dataref_ptr = NULL_TREE;
6246 tree dataref_offset = NULL_TREE;
6247 gimple *ptr_incr = NULL;
6248 int ncopies;
6249 int j;
6250 stmt_vec_info first_stmt_info;
6251 bool grouped_store;
6252 unsigned int group_size, i;
6253 vec<tree> oprnds = vNULL;
6254 vec<tree> result_chain = vNULL;
6255 bool inv_p;
6256 tree offset = NULL_TREE;
6257 vec<tree> vec_oprnds = vNULL;
6258 bool slp = (slp_node != NULL);
6259 unsigned int vec_num;
6260 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6261 vec_info *vinfo = stmt_info->vinfo;
6262 tree aggr_type;
6263 gather_scatter_info gs_info;
6264 poly_uint64 vf;
6265 vec_load_store_type vls_type;
6266 tree ref_type;
6268 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6269 return false;
6271 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6272 && ! vec_stmt)
6273 return false;
6275 /* Is vectorizable store? */
6277 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6278 if (gassign *assign = dyn_cast <gassign *> (stmt))
6280 tree scalar_dest = gimple_assign_lhs (assign);
6281 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6282 && is_pattern_stmt_p (stmt_info))
6283 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6284 if (TREE_CODE (scalar_dest) != ARRAY_REF
6285 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6286 && TREE_CODE (scalar_dest) != INDIRECT_REF
6287 && TREE_CODE (scalar_dest) != COMPONENT_REF
6288 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6289 && TREE_CODE (scalar_dest) != REALPART_EXPR
6290 && TREE_CODE (scalar_dest) != MEM_REF)
6291 return false;
6293 else
6295 gcall *call = dyn_cast <gcall *> (stmt);
6296 if (!call || !gimple_call_internal_p (call))
6297 return false;
6299 internal_fn ifn = gimple_call_internal_fn (call);
6300 if (!internal_store_fn_p (ifn))
6301 return false;
6303 if (slp_node != NULL)
6305 if (dump_enabled_p ())
6306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6307 "SLP of masked stores not supported.\n");
6308 return false;
6311 int mask_index = internal_fn_mask_index (ifn);
6312 if (mask_index >= 0)
6314 mask = gimple_call_arg (call, mask_index);
6315 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6316 &mask_vectype))
6317 return false;
6321 op = vect_get_store_rhs (stmt);
6323 /* Cannot have hybrid store SLP -- that would mean storing to the
6324 same location twice. */
6325 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6327 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6328 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6330 if (loop_vinfo)
6332 loop = LOOP_VINFO_LOOP (loop_vinfo);
6333 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6335 else
6336 vf = 1;
6338 /* Multiple types in SLP are handled by creating the appropriate number of
6339 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6340 case of SLP. */
6341 if (slp)
6342 ncopies = 1;
6343 else
6344 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6346 gcc_assert (ncopies >= 1);
6348 /* FORNOW. This restriction should be relaxed. */
6349 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6351 if (dump_enabled_p ())
6352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6353 "multiple types in nested loop.\n");
6354 return false;
6357 if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6358 return false;
6360 elem_type = TREE_TYPE (vectype);
6361 vec_mode = TYPE_MODE (vectype);
6363 if (!STMT_VINFO_DATA_REF (stmt_info))
6364 return false;
6366 vect_memory_access_type memory_access_type;
6367 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6368 &memory_access_type, &gs_info))
6369 return false;
6371 if (mask)
6373 if (memory_access_type == VMAT_CONTIGUOUS)
6375 if (!VECTOR_MODE_P (vec_mode)
6376 || !can_vec_mask_load_store_p (vec_mode,
6377 TYPE_MODE (mask_vectype), false))
6378 return false;
6380 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6381 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6383 if (dump_enabled_p ())
6384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6385 "unsupported access type for masked store.\n");
6386 return false;
6389 else
6391 /* FORNOW. In some cases can vectorize even if data-type not supported
6392 (e.g. - array initialization with 0). */
6393 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6394 return false;
6397 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6398 && memory_access_type != VMAT_GATHER_SCATTER
6399 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6400 if (grouped_store)
6402 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6403 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6404 group_size = DR_GROUP_SIZE (first_stmt_info);
6406 else
6408 first_stmt_info = stmt_info;
6409 first_dr = dr;
6410 group_size = vec_num = 1;
6413 if (!vec_stmt) /* transformation not required. */
6415 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6417 if (loop_vinfo
6418 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6419 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6420 memory_access_type, &gs_info);
6422 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6423 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6424 vls_type, slp_node, cost_vec);
6425 return true;
6427 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6429 /* Transform. */
6431 ensure_base_align (dr);
6433 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6435 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6436 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6437 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6438 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6439 edge pe = loop_preheader_edge (loop);
6440 gimple_seq seq;
6441 basic_block new_bb;
6442 enum { NARROW, NONE, WIDEN } modifier;
6443 poly_uint64 scatter_off_nunits
6444 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6446 if (known_eq (nunits, scatter_off_nunits))
6447 modifier = NONE;
6448 else if (known_eq (nunits * 2, scatter_off_nunits))
6450 modifier = WIDEN;
6452 /* Currently gathers and scatters are only supported for
6453 fixed-length vectors. */
6454 unsigned int count = scatter_off_nunits.to_constant ();
6455 vec_perm_builder sel (count, count, 1);
6456 for (i = 0; i < (unsigned int) count; ++i)
6457 sel.quick_push (i | (count / 2));
6459 vec_perm_indices indices (sel, 1, count);
6460 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6461 indices);
6462 gcc_assert (perm_mask != NULL_TREE);
6464 else if (known_eq (nunits, scatter_off_nunits * 2))
6466 modifier = NARROW;
6468 /* Currently gathers and scatters are only supported for
6469 fixed-length vectors. */
6470 unsigned int count = nunits.to_constant ();
6471 vec_perm_builder sel (count, count, 1);
6472 for (i = 0; i < (unsigned int) count; ++i)
6473 sel.quick_push (i | (count / 2));
6475 vec_perm_indices indices (sel, 2, count);
6476 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6477 gcc_assert (perm_mask != NULL_TREE);
6478 ncopies *= 2;
6480 else
6481 gcc_unreachable ();
6483 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6484 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6485 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6486 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6487 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6488 scaletype = TREE_VALUE (arglist);
6490 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6491 && TREE_CODE (rettype) == VOID_TYPE);
6493 ptr = fold_convert (ptrtype, gs_info.base);
6494 if (!is_gimple_min_invariant (ptr))
6496 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6497 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6498 gcc_assert (!new_bb);
6501 /* Currently we support only unconditional scatter stores,
6502 so mask should be all ones. */
6503 mask = build_int_cst (masktype, -1);
6504 mask = vect_init_vector (stmt, mask, masktype, NULL);
6506 scale = build_int_cst (scaletype, gs_info.scale);
6508 prev_stmt_info = NULL;
6509 for (j = 0; j < ncopies; ++j)
6511 if (j == 0)
6513 src = vec_oprnd1
6514 = vect_get_vec_def_for_operand (op, stmt);
6515 op = vec_oprnd0
6516 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6518 else if (modifier != NONE && (j & 1))
6520 if (modifier == WIDEN)
6522 src = vec_oprnd1
6523 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6524 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6525 stmt, gsi);
6527 else if (modifier == NARROW)
6529 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6530 stmt, gsi);
6531 op = vec_oprnd0
6532 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6533 vec_oprnd0);
6535 else
6536 gcc_unreachable ();
6538 else
6540 src = vec_oprnd1
6541 = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6542 op = vec_oprnd0
6543 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6544 vec_oprnd0);
6547 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6549 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6550 TYPE_VECTOR_SUBPARTS (srctype)));
6551 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6552 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6553 gassign *new_stmt
6554 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6555 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6556 src = var;
6559 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6561 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6562 TYPE_VECTOR_SUBPARTS (idxtype)));
6563 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6564 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6565 gassign *new_stmt
6566 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6568 op = var;
6571 gcall *new_stmt
6572 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6573 stmt_vec_info new_stmt_info
6574 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
6576 if (prev_stmt_info == NULL_STMT_VEC_INFO)
6577 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6578 else
6579 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6580 prev_stmt_info = new_stmt_info;
6582 return true;
6585 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6586 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6588 if (grouped_store)
6590 /* FORNOW */
6591 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6593 /* We vectorize all the stmts of the interleaving group when we
6594 reach the last stmt in the group. */
6595 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6596 < DR_GROUP_SIZE (first_stmt_info)
6597 && !slp)
6599 *vec_stmt = NULL;
6600 return true;
6603 if (slp)
6605 grouped_store = false;
6606 /* VEC_NUM is the number of vect stmts to be created for this
6607 group. */
6608 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6609 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6610 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6611 == first_stmt_info);
6612 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6613 op = vect_get_store_rhs (first_stmt_info);
6615 else
6616 /* VEC_NUM is the number of vect stmts to be created for this
6617 group. */
6618 vec_num = group_size;
6620 ref_type = get_group_alias_ptr_type (first_stmt_info);
6622 else
6623 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6625 if (dump_enabled_p ())
6626 dump_printf_loc (MSG_NOTE, vect_location,
6627 "transform store. ncopies = %d\n", ncopies);
6629 if (memory_access_type == VMAT_ELEMENTWISE
6630 || memory_access_type == VMAT_STRIDED_SLP)
6632 gimple_stmt_iterator incr_gsi;
6633 bool insert_after;
6634 gimple *incr;
6635 tree offvar;
6636 tree ivstep;
6637 tree running_off;
6638 tree stride_base, stride_step, alias_off;
6639 tree vec_oprnd;
6640 unsigned int g;
6641 /* Checked by get_load_store_type. */
6642 unsigned int const_nunits = nunits.to_constant ();
6644 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6645 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6647 stride_base
6648 = fold_build_pointer_plus
6649 (DR_BASE_ADDRESS (first_dr),
6650 size_binop (PLUS_EXPR,
6651 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6652 convert_to_ptrofftype (DR_INIT (first_dr))));
6653 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6655 /* For a store with loop-invariant (but other than power-of-2)
6656 stride (i.e. not a grouped access) like so:
6658 for (i = 0; i < n; i += stride)
6659 array[i] = ...;
6661 we generate a new induction variable and new stores from
6662 the components of the (vectorized) rhs:
6664 for (j = 0; ; j += VF*stride)
6665 vectemp = ...;
6666 tmp1 = vectemp[0];
6667 array[j] = tmp1;
6668 tmp2 = vectemp[1];
6669 array[j + stride] = tmp2;
6673 unsigned nstores = const_nunits;
6674 unsigned lnel = 1;
6675 tree ltype = elem_type;
6676 tree lvectype = vectype;
6677 if (slp)
6679 if (group_size < const_nunits
6680 && const_nunits % group_size == 0)
6682 nstores = const_nunits / group_size;
6683 lnel = group_size;
6684 ltype = build_vector_type (elem_type, group_size);
6685 lvectype = vectype;
6687 /* First check if vec_extract optab doesn't support extraction
6688 of vector elts directly. */
6689 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6690 machine_mode vmode;
6691 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6692 || !VECTOR_MODE_P (vmode)
6693 || !targetm.vector_mode_supported_p (vmode)
6694 || (convert_optab_handler (vec_extract_optab,
6695 TYPE_MODE (vectype), vmode)
6696 == CODE_FOR_nothing))
6698 /* Try to avoid emitting an extract of vector elements
6699 by performing the extracts using an integer type of the
6700 same size, extracting from a vector of those and then
6701 re-interpreting it as the original vector type if
6702 supported. */
6703 unsigned lsize
6704 = group_size * GET_MODE_BITSIZE (elmode);
6705 elmode = int_mode_for_size (lsize, 0).require ();
6706 unsigned int lnunits = const_nunits / group_size;
6707 /* If we can't construct such a vector fall back to
6708 element extracts from the original vector type and
6709 element size stores. */
6710 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6711 && VECTOR_MODE_P (vmode)
6712 && targetm.vector_mode_supported_p (vmode)
6713 && (convert_optab_handler (vec_extract_optab,
6714 vmode, elmode)
6715 != CODE_FOR_nothing))
6717 nstores = lnunits;
6718 lnel = group_size;
6719 ltype = build_nonstandard_integer_type (lsize, 1);
6720 lvectype = build_vector_type (ltype, nstores);
6722 /* Else fall back to vector extraction anyway.
6723 Fewer stores are more important than avoiding spilling
6724 of the vector we extract from. Compared to the
6725 construction case in vectorizable_load no store-forwarding
6726 issue exists here for reasonable archs. */
6729 else if (group_size >= const_nunits
6730 && group_size % const_nunits == 0)
6732 nstores = 1;
6733 lnel = const_nunits;
6734 ltype = vectype;
6735 lvectype = vectype;
6737 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6738 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6741 ivstep = stride_step;
6742 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6743 build_int_cst (TREE_TYPE (ivstep), vf));
6745 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6747 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6748 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6749 create_iv (stride_base, ivstep, NULL,
6750 loop, &incr_gsi, insert_after,
6751 &offvar, NULL);
6752 incr = gsi_stmt (incr_gsi);
6753 loop_vinfo->add_stmt (incr);
6755 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6757 prev_stmt_info = NULL;
6758 alias_off = build_int_cst (ref_type, 0);
6759 stmt_vec_info next_stmt_info = first_stmt_info;
6760 for (g = 0; g < group_size; g++)
6762 running_off = offvar;
6763 if (g)
6765 tree size = TYPE_SIZE_UNIT (ltype);
6766 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6767 size);
6768 tree newoff = copy_ssa_name (running_off, NULL);
6769 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6770 running_off, pos);
6771 vect_finish_stmt_generation (stmt, incr, gsi);
6772 running_off = newoff;
6774 unsigned int group_el = 0;
6775 unsigned HOST_WIDE_INT
6776 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6777 for (j = 0; j < ncopies; j++)
6779 /* We've set op and dt above, from vect_get_store_rhs,
6780 and first_stmt_info == stmt_info. */
6781 if (j == 0)
6783 if (slp)
6785 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6786 slp_node);
6787 vec_oprnd = vec_oprnds[0];
6789 else
6791 op = vect_get_store_rhs (next_stmt_info);
6792 vec_oprnd = vect_get_vec_def_for_operand
6793 (op, next_stmt_info);
6796 else
6798 if (slp)
6799 vec_oprnd = vec_oprnds[j];
6800 else
6802 vect_is_simple_use (op, vinfo, &rhs_dt);
6803 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6804 vec_oprnd);
6807 /* Pun the vector to extract from if necessary. */
6808 if (lvectype != vectype)
6810 tree tem = make_ssa_name (lvectype);
6811 gimple *pun
6812 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6813 lvectype, vec_oprnd));
6814 vect_finish_stmt_generation (stmt, pun, gsi);
6815 vec_oprnd = tem;
6817 for (i = 0; i < nstores; i++)
6819 tree newref, newoff;
6820 gimple *incr, *assign;
6821 tree size = TYPE_SIZE (ltype);
6822 /* Extract the i'th component. */
6823 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6824 bitsize_int (i), size);
6825 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6826 size, pos);
6828 elem = force_gimple_operand_gsi (gsi, elem, true,
6829 NULL_TREE, true,
6830 GSI_SAME_STMT);
6832 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6833 group_el * elsz);
6834 newref = build2 (MEM_REF, ltype,
6835 running_off, this_off);
6836 vect_copy_ref_info (newref, DR_REF (first_dr));
6838 /* And store it to *running_off. */
6839 assign = gimple_build_assign (newref, elem);
6840 stmt_vec_info assign_info
6841 = vect_finish_stmt_generation (stmt, assign, gsi);
6843 group_el += lnel;
6844 if (! slp
6845 || group_el == group_size)
6847 newoff = copy_ssa_name (running_off, NULL);
6848 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6849 running_off, stride_step);
6850 vect_finish_stmt_generation (stmt, incr, gsi);
6852 running_off = newoff;
6853 group_el = 0;
6855 if (g == group_size - 1
6856 && !slp)
6858 if (j == 0 && i == 0)
6859 STMT_VINFO_VEC_STMT (stmt_info)
6860 = *vec_stmt = assign_info;
6861 else
6862 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6863 prev_stmt_info = assign_info;
6867 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6868 if (slp)
6869 break;
6872 vec_oprnds.release ();
6873 return true;
6876 auto_vec<tree> dr_chain (group_size);
6877 oprnds.create (group_size);
6879 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6880 gcc_assert (alignment_support_scheme);
6881 vec_loop_masks *loop_masks
6882 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6883 ? &LOOP_VINFO_MASKS (loop_vinfo)
6884 : NULL);
6885 /* Targets with store-lane instructions must not require explicit
6886 realignment. vect_supportable_dr_alignment always returns either
6887 dr_aligned or dr_unaligned_supported for masked operations. */
6888 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6889 && !mask
6890 && !loop_masks)
6891 || alignment_support_scheme == dr_aligned
6892 || alignment_support_scheme == dr_unaligned_supported);
6894 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6895 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6896 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6898 tree bump;
6899 tree vec_offset = NULL_TREE;
6900 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6902 aggr_type = NULL_TREE;
6903 bump = NULL_TREE;
6905 else if (memory_access_type == VMAT_GATHER_SCATTER)
6907 aggr_type = elem_type;
6908 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6909 &bump, &vec_offset);
6911 else
6913 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6914 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6915 else
6916 aggr_type = vectype;
6917 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6920 if (mask)
6921 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6923 /* In case the vectorization factor (VF) is bigger than the number
6924 of elements that we can fit in a vectype (nunits), we have to generate
6925 more than one vector stmt - i.e - we need to "unroll" the
6926 vector stmt by a factor VF/nunits. For more details see documentation in
6927 vect_get_vec_def_for_copy_stmt. */
6929 /* In case of interleaving (non-unit grouped access):
6931 S1: &base + 2 = x2
6932 S2: &base = x0
6933 S3: &base + 1 = x1
6934 S4: &base + 3 = x3
6936 We create vectorized stores starting from base address (the access of the
6937 first stmt in the chain (S2 in the above example), when the last store stmt
6938 of the chain (S4) is reached:
6940 VS1: &base = vx2
6941 VS2: &base + vec_size*1 = vx0
6942 VS3: &base + vec_size*2 = vx1
6943 VS4: &base + vec_size*3 = vx3
6945 Then permutation statements are generated:
6947 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6948 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6951 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6952 (the order of the data-refs in the output of vect_permute_store_chain
6953 corresponds to the order of scalar stmts in the interleaving chain - see
6954 the documentation of vect_permute_store_chain()).
6956 In case of both multiple types and interleaving, above vector stores and
6957 permutation stmts are created for every copy. The result vector stmts are
6958 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6959 STMT_VINFO_RELATED_STMT for the next copies.
6962 prev_stmt_info = NULL;
6963 tree vec_mask = NULL_TREE;
6964 for (j = 0; j < ncopies; j++)
6966 stmt_vec_info new_stmt_info;
6967 if (j == 0)
6969 if (slp)
6971 /* Get vectorized arguments for SLP_NODE. */
6972 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6973 NULL, slp_node);
6975 vec_oprnd = vec_oprnds[0];
6977 else
6979 /* For interleaved stores we collect vectorized defs for all the
6980 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6981 used as an input to vect_permute_store_chain(), and OPRNDS as
6982 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6984 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6985 OPRNDS are of size 1. */
6986 stmt_vec_info next_stmt_info = first_stmt_info;
6987 for (i = 0; i < group_size; i++)
6989 /* Since gaps are not supported for interleaved stores,
6990 DR_GROUP_SIZE is the exact number of stmts in the chain.
6991 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6992 that there is no interleaving, DR_GROUP_SIZE is 1,
6993 and only one iteration of the loop will be executed. */
6994 op = vect_get_store_rhs (next_stmt_info);
6995 vec_oprnd = vect_get_vec_def_for_operand
6996 (op, next_stmt_info);
6997 dr_chain.quick_push (vec_oprnd);
6998 oprnds.quick_push (vec_oprnd);
6999 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7001 if (mask)
7002 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7003 mask_vectype);
7006 /* We should have catched mismatched types earlier. */
7007 gcc_assert (useless_type_conversion_p (vectype,
7008 TREE_TYPE (vec_oprnd)));
7009 bool simd_lane_access_p
7010 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7011 if (simd_lane_access_p
7012 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7013 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7014 && integer_zerop (DR_OFFSET (first_dr))
7015 && integer_zerop (DR_INIT (first_dr))
7016 && alias_sets_conflict_p (get_alias_set (aggr_type),
7017 get_alias_set (TREE_TYPE (ref_type))))
7019 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7020 dataref_offset = build_int_cst (ref_type, 0);
7021 inv_p = false;
7023 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7025 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
7026 &dataref_ptr, &vec_offset);
7027 inv_p = false;
7029 else
7030 dataref_ptr
7031 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7032 simd_lane_access_p ? loop : NULL,
7033 offset, &dummy, gsi, &ptr_incr,
7034 simd_lane_access_p, &inv_p,
7035 NULL_TREE, bump);
7036 gcc_assert (bb_vinfo || !inv_p);
7038 else
7040 /* For interleaved stores we created vectorized defs for all the
7041 defs stored in OPRNDS in the previous iteration (previous copy).
7042 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7043 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7044 next copy.
7045 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7046 OPRNDS are of size 1. */
7047 for (i = 0; i < group_size; i++)
7049 op = oprnds[i];
7050 vect_is_simple_use (op, vinfo, &rhs_dt);
7051 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
7052 dr_chain[i] = vec_oprnd;
7053 oprnds[i] = vec_oprnd;
7055 if (mask)
7056 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
7057 if (dataref_offset)
7058 dataref_offset
7059 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7060 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7061 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
7062 vec_offset);
7063 else
7064 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7065 bump);
7068 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7070 tree vec_array;
7072 /* Get an array into which we can store the individual vectors. */
7073 vec_array = create_vector_array (vectype, vec_num);
7075 /* Invalidate the current contents of VEC_ARRAY. This should
7076 become an RTL clobber too, which prevents the vector registers
7077 from being upward-exposed. */
7078 vect_clobber_variable (stmt, gsi, vec_array);
7080 /* Store the individual vectors into the array. */
7081 for (i = 0; i < vec_num; i++)
7083 vec_oprnd = dr_chain[i];
7084 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
7087 tree final_mask = NULL;
7088 if (loop_masks)
7089 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7090 vectype, j);
7091 if (vec_mask)
7092 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7093 vec_mask, gsi);
7095 gcall *call;
7096 if (final_mask)
7098 /* Emit:
7099 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7100 VEC_ARRAY). */
7101 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7102 tree alias_ptr = build_int_cst (ref_type, align);
7103 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7104 dataref_ptr, alias_ptr,
7105 final_mask, vec_array);
7107 else
7109 /* Emit:
7110 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7111 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7112 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7113 vec_array);
7114 gimple_call_set_lhs (call, data_ref);
7116 gimple_call_set_nothrow (call, true);
7117 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
7119 /* Record that VEC_ARRAY is now dead. */
7120 vect_clobber_variable (stmt, gsi, vec_array);
7122 else
7124 new_stmt_info = NULL;
7125 if (grouped_store)
7127 if (j == 0)
7128 result_chain.create (group_size);
7129 /* Permute. */
7130 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
7131 &result_chain);
7134 stmt_vec_info next_stmt_info = first_stmt_info;
7135 for (i = 0; i < vec_num; i++)
7137 unsigned align, misalign;
7139 tree final_mask = NULL_TREE;
7140 if (loop_masks)
7141 final_mask = vect_get_loop_mask (gsi, loop_masks,
7142 vec_num * ncopies,
7143 vectype, vec_num * j + i);
7144 if (vec_mask)
7145 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7146 vec_mask, gsi);
7148 if (memory_access_type == VMAT_GATHER_SCATTER)
7150 tree scale = size_int (gs_info.scale);
7151 gcall *call;
7152 if (loop_masks)
7153 call = gimple_build_call_internal
7154 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7155 scale, vec_oprnd, final_mask);
7156 else
7157 call = gimple_build_call_internal
7158 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7159 scale, vec_oprnd);
7160 gimple_call_set_nothrow (call, true);
7161 new_stmt_info
7162 = vect_finish_stmt_generation (stmt, call, gsi);
7163 break;
7166 if (i > 0)
7167 /* Bump the vector pointer. */
7168 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7169 stmt, bump);
7171 if (slp)
7172 vec_oprnd = vec_oprnds[i];
7173 else if (grouped_store)
7174 /* For grouped stores vectorized defs are interleaved in
7175 vect_permute_store_chain(). */
7176 vec_oprnd = result_chain[i];
7178 align = DR_TARGET_ALIGNMENT (first_dr);
7179 if (aligned_access_p (first_dr))
7180 misalign = 0;
7181 else if (DR_MISALIGNMENT (first_dr) == -1)
7183 align = dr_alignment (vect_dr_behavior (first_dr));
7184 misalign = 0;
7186 else
7187 misalign = DR_MISALIGNMENT (first_dr);
7188 if (dataref_offset == NULL_TREE
7189 && TREE_CODE (dataref_ptr) == SSA_NAME)
7190 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7191 misalign);
7193 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7195 tree perm_mask = perm_mask_for_reverse (vectype);
7196 tree perm_dest
7197 = vect_create_destination_var (vect_get_store_rhs (stmt),
7198 vectype);
7199 tree new_temp = make_ssa_name (perm_dest);
7201 /* Generate the permute statement. */
7202 gimple *perm_stmt
7203 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7204 vec_oprnd, perm_mask);
7205 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7207 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7208 vec_oprnd = new_temp;
7211 /* Arguments are ready. Create the new vector stmt. */
7212 if (final_mask)
7214 align = least_bit_hwi (misalign | align);
7215 tree ptr = build_int_cst (ref_type, align);
7216 gcall *call
7217 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7218 dataref_ptr, ptr,
7219 final_mask, vec_oprnd);
7220 gimple_call_set_nothrow (call, true);
7221 new_stmt_info
7222 = vect_finish_stmt_generation (stmt, call, gsi);
7224 else
7226 data_ref = fold_build2 (MEM_REF, vectype,
7227 dataref_ptr,
7228 dataref_offset
7229 ? dataref_offset
7230 : build_int_cst (ref_type, 0));
7231 if (aligned_access_p (first_dr))
7233 else if (DR_MISALIGNMENT (first_dr) == -1)
7234 TREE_TYPE (data_ref)
7235 = build_aligned_type (TREE_TYPE (data_ref),
7236 align * BITS_PER_UNIT);
7237 else
7238 TREE_TYPE (data_ref)
7239 = build_aligned_type (TREE_TYPE (data_ref),
7240 TYPE_ALIGN (elem_type));
7241 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7242 gassign *new_stmt
7243 = gimple_build_assign (data_ref, vec_oprnd);
7244 new_stmt_info
7245 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
7248 if (slp)
7249 continue;
7251 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7252 if (!next_stmt_info)
7253 break;
7256 if (!slp)
7258 if (j == 0)
7259 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7260 else
7261 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7262 prev_stmt_info = new_stmt_info;
7266 oprnds.release ();
7267 result_chain.release ();
7268 vec_oprnds.release ();
7270 return true;
7273 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7274 VECTOR_CST mask. No checks are made that the target platform supports the
7275 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7276 vect_gen_perm_mask_checked. */
7278 tree
7279 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7281 tree mask_type;
7283 poly_uint64 nunits = sel.length ();
7284 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7286 mask_type = build_vector_type (ssizetype, nunits);
7287 return vec_perm_indices_to_tree (mask_type, sel);
7290 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7291 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7293 tree
7294 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7296 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7297 return vect_gen_perm_mask_any (vectype, sel);
7300 /* Given a vector variable X and Y, that was generated for the scalar
7301 STMT, generate instructions to permute the vector elements of X and Y
7302 using permutation mask MASK_VEC, insert them at *GSI and return the
7303 permuted vector variable. */
7305 static tree
7306 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7307 gimple_stmt_iterator *gsi)
7309 tree vectype = TREE_TYPE (x);
7310 tree perm_dest, data_ref;
7311 gimple *perm_stmt;
7313 tree scalar_dest = gimple_get_lhs (stmt);
7314 if (TREE_CODE (scalar_dest) == SSA_NAME)
7315 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7316 else
7317 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7318 data_ref = make_ssa_name (perm_dest);
7320 /* Generate the permute statement. */
7321 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7322 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7324 return data_ref;
7327 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7328 inserting them on the loops preheader edge. Returns true if we
7329 were successful in doing so (and thus STMT can be moved then),
7330 otherwise returns false. */
7332 static bool
7333 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7335 ssa_op_iter i;
7336 tree op;
7337 bool any = false;
7339 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7341 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7342 if (!gimple_nop_p (def_stmt)
7343 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7345 /* Make sure we don't need to recurse. While we could do
7346 so in simple cases when there are more complex use webs
7347 we don't have an easy way to preserve stmt order to fulfil
7348 dependencies within them. */
7349 tree op2;
7350 ssa_op_iter i2;
7351 if (gimple_code (def_stmt) == GIMPLE_PHI)
7352 return false;
7353 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7355 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7356 if (!gimple_nop_p (def_stmt2)
7357 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7358 return false;
7360 any = true;
7364 if (!any)
7365 return true;
7367 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7369 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7370 if (!gimple_nop_p (def_stmt)
7371 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7373 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7374 gsi_remove (&gsi, false);
7375 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7379 return true;
7382 /* vectorizable_load.
7384 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7385 can be vectorized.
7386 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7387 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7388 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7390 static bool
7391 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi,
7392 stmt_vec_info *vec_stmt, slp_tree slp_node,
7393 slp_instance slp_node_instance,
7394 stmt_vector_for_cost *cost_vec)
7396 tree scalar_dest;
7397 tree vec_dest = NULL;
7398 tree data_ref = NULL;
7399 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7400 stmt_vec_info prev_stmt_info;
7401 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7402 struct loop *loop = NULL;
7403 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7404 bool nested_in_vect_loop = false;
7405 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7406 tree elem_type;
7407 tree new_temp;
7408 machine_mode mode;
7409 tree dummy;
7410 enum dr_alignment_support alignment_support_scheme;
7411 tree dataref_ptr = NULL_TREE;
7412 tree dataref_offset = NULL_TREE;
7413 gimple *ptr_incr = NULL;
7414 int ncopies;
7415 int i, j;
7416 unsigned int group_size;
7417 poly_uint64 group_gap_adj;
7418 tree msq = NULL_TREE, lsq;
7419 tree offset = NULL_TREE;
7420 tree byte_offset = NULL_TREE;
7421 tree realignment_token = NULL_TREE;
7422 gphi *phi = NULL;
7423 vec<tree> dr_chain = vNULL;
7424 bool grouped_load = false;
7425 stmt_vec_info first_stmt_info;
7426 stmt_vec_info first_stmt_info_for_drptr = NULL;
7427 bool inv_p;
7428 bool compute_in_loop = false;
7429 struct loop *at_loop;
7430 int vec_num;
7431 bool slp = (slp_node != NULL);
7432 bool slp_perm = false;
7433 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7434 poly_uint64 vf;
7435 tree aggr_type;
7436 gather_scatter_info gs_info;
7437 vec_info *vinfo = stmt_info->vinfo;
7438 tree ref_type;
7439 enum vect_def_type mask_dt = vect_unknown_def_type;
7441 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7442 return false;
7444 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7445 && ! vec_stmt)
7446 return false;
7448 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7449 if (gassign *assign = dyn_cast <gassign *> (stmt))
7451 scalar_dest = gimple_assign_lhs (assign);
7452 if (TREE_CODE (scalar_dest) != SSA_NAME)
7453 return false;
7455 tree_code code = gimple_assign_rhs_code (assign);
7456 if (code != ARRAY_REF
7457 && code != BIT_FIELD_REF
7458 && code != INDIRECT_REF
7459 && code != COMPONENT_REF
7460 && code != IMAGPART_EXPR
7461 && code != REALPART_EXPR
7462 && code != MEM_REF
7463 && TREE_CODE_CLASS (code) != tcc_declaration)
7464 return false;
7466 else
7468 gcall *call = dyn_cast <gcall *> (stmt);
7469 if (!call || !gimple_call_internal_p (call))
7470 return false;
7472 internal_fn ifn = gimple_call_internal_fn (call);
7473 if (!internal_load_fn_p (ifn))
7474 return false;
7476 scalar_dest = gimple_call_lhs (call);
7477 if (!scalar_dest)
7478 return false;
7480 if (slp_node != NULL)
7482 if (dump_enabled_p ())
7483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7484 "SLP of masked loads not supported.\n");
7485 return false;
7488 int mask_index = internal_fn_mask_index (ifn);
7489 if (mask_index >= 0)
7491 mask = gimple_call_arg (call, mask_index);
7492 if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7493 &mask_vectype))
7494 return false;
7498 if (!STMT_VINFO_DATA_REF (stmt_info))
7499 return false;
7501 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7502 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7504 if (loop_vinfo)
7506 loop = LOOP_VINFO_LOOP (loop_vinfo);
7507 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7508 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7510 else
7511 vf = 1;
7513 /* Multiple types in SLP are handled by creating the appropriate number of
7514 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7515 case of SLP. */
7516 if (slp)
7517 ncopies = 1;
7518 else
7519 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7521 gcc_assert (ncopies >= 1);
7523 /* FORNOW. This restriction should be relaxed. */
7524 if (nested_in_vect_loop && ncopies > 1)
7526 if (dump_enabled_p ())
7527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7528 "multiple types in nested loop.\n");
7529 return false;
7532 /* Invalidate assumptions made by dependence analysis when vectorization
7533 on the unrolled body effectively re-orders stmts. */
7534 if (ncopies > 1
7535 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7536 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7537 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7539 if (dump_enabled_p ())
7540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7541 "cannot perform implicit CSE when unrolling "
7542 "with negative dependence distance\n");
7543 return false;
7546 elem_type = TREE_TYPE (vectype);
7547 mode = TYPE_MODE (vectype);
7549 /* FORNOW. In some cases can vectorize even if data-type not supported
7550 (e.g. - data copies). */
7551 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7553 if (dump_enabled_p ())
7554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7555 "Aligned load, but unsupported type.\n");
7556 return false;
7559 /* Check if the load is a part of an interleaving chain. */
7560 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7562 grouped_load = true;
7563 /* FORNOW */
7564 gcc_assert (!nested_in_vect_loop);
7565 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7567 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7568 group_size = DR_GROUP_SIZE (first_stmt_info);
7570 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7571 slp_perm = true;
7573 /* Invalidate assumptions made by dependence analysis when vectorization
7574 on the unrolled body effectively re-orders stmts. */
7575 if (!PURE_SLP_STMT (stmt_info)
7576 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7577 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7578 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7580 if (dump_enabled_p ())
7581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7582 "cannot perform implicit CSE when performing "
7583 "group loads with negative dependence distance\n");
7584 return false;
7587 /* Similarly when the stmt is a load that is both part of a SLP
7588 instance and a loop vectorized stmt via the same-dr mechanism
7589 we have to give up. */
7590 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7591 && (STMT_SLP_TYPE (stmt_info)
7592 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7594 if (dump_enabled_p ())
7595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7596 "conflicting SLP types for CSEd load\n");
7597 return false;
7600 else
7601 group_size = 1;
7603 vect_memory_access_type memory_access_type;
7604 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7605 &memory_access_type, &gs_info))
7606 return false;
7608 if (mask)
7610 if (memory_access_type == VMAT_CONTIGUOUS)
7612 machine_mode vec_mode = TYPE_MODE (vectype);
7613 if (!VECTOR_MODE_P (vec_mode)
7614 || !can_vec_mask_load_store_p (vec_mode,
7615 TYPE_MODE (mask_vectype), true))
7616 return false;
7618 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7620 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7621 tree masktype
7622 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7623 if (TREE_CODE (masktype) == INTEGER_TYPE)
7625 if (dump_enabled_p ())
7626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7627 "masked gather with integer mask not"
7628 " supported.");
7629 return false;
7632 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7633 && memory_access_type != VMAT_GATHER_SCATTER)
7635 if (dump_enabled_p ())
7636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7637 "unsupported access type for masked load.\n");
7638 return false;
7642 if (!vec_stmt) /* transformation not required. */
7644 if (!slp)
7645 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7647 if (loop_vinfo
7648 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7649 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7650 memory_access_type, &gs_info);
7652 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7653 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7654 slp_node_instance, slp_node, cost_vec);
7655 return true;
7658 if (!slp)
7659 gcc_assert (memory_access_type
7660 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7662 if (dump_enabled_p ())
7663 dump_printf_loc (MSG_NOTE, vect_location,
7664 "transform load. ncopies = %d\n", ncopies);
7666 /* Transform. */
7668 ensure_base_align (dr);
7670 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7672 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7673 mask_dt);
7674 return true;
7677 if (memory_access_type == VMAT_ELEMENTWISE
7678 || memory_access_type == VMAT_STRIDED_SLP)
7680 gimple_stmt_iterator incr_gsi;
7681 bool insert_after;
7682 gimple *incr;
7683 tree offvar;
7684 tree ivstep;
7685 tree running_off;
7686 vec<constructor_elt, va_gc> *v = NULL;
7687 tree stride_base, stride_step, alias_off;
7688 /* Checked by get_load_store_type. */
7689 unsigned int const_nunits = nunits.to_constant ();
7690 unsigned HOST_WIDE_INT cst_offset = 0;
7692 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7693 gcc_assert (!nested_in_vect_loop);
7695 if (grouped_load)
7697 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7698 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7700 else
7702 first_stmt_info = stmt_info;
7703 first_dr = dr;
7705 if (slp && grouped_load)
7707 group_size = DR_GROUP_SIZE (first_stmt_info);
7708 ref_type = get_group_alias_ptr_type (first_stmt_info);
7710 else
7712 if (grouped_load)
7713 cst_offset
7714 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7715 * vect_get_place_in_interleaving_chain (stmt,
7716 first_stmt_info));
7717 group_size = 1;
7718 ref_type = reference_alias_ptr_type (DR_REF (dr));
7721 stride_base
7722 = fold_build_pointer_plus
7723 (DR_BASE_ADDRESS (first_dr),
7724 size_binop (PLUS_EXPR,
7725 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7726 convert_to_ptrofftype (DR_INIT (first_dr))));
7727 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7729 /* For a load with loop-invariant (but other than power-of-2)
7730 stride (i.e. not a grouped access) like so:
7732 for (i = 0; i < n; i += stride)
7733 ... = array[i];
7735 we generate a new induction variable and new accesses to
7736 form a new vector (or vectors, depending on ncopies):
7738 for (j = 0; ; j += VF*stride)
7739 tmp1 = array[j];
7740 tmp2 = array[j + stride];
7742 vectemp = {tmp1, tmp2, ...}
7745 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7746 build_int_cst (TREE_TYPE (stride_step), vf));
7748 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7750 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7751 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7752 create_iv (stride_base, ivstep, NULL,
7753 loop, &incr_gsi, insert_after,
7754 &offvar, NULL);
7755 incr = gsi_stmt (incr_gsi);
7756 loop_vinfo->add_stmt (incr);
7758 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7760 prev_stmt_info = NULL;
7761 running_off = offvar;
7762 alias_off = build_int_cst (ref_type, 0);
7763 int nloads = const_nunits;
7764 int lnel = 1;
7765 tree ltype = TREE_TYPE (vectype);
7766 tree lvectype = vectype;
7767 auto_vec<tree> dr_chain;
7768 if (memory_access_type == VMAT_STRIDED_SLP)
7770 if (group_size < const_nunits)
7772 /* First check if vec_init optab supports construction from
7773 vector elts directly. */
7774 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7775 machine_mode vmode;
7776 if (mode_for_vector (elmode, group_size).exists (&vmode)
7777 && VECTOR_MODE_P (vmode)
7778 && targetm.vector_mode_supported_p (vmode)
7779 && (convert_optab_handler (vec_init_optab,
7780 TYPE_MODE (vectype), vmode)
7781 != CODE_FOR_nothing))
7783 nloads = const_nunits / group_size;
7784 lnel = group_size;
7785 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7787 else
7789 /* Otherwise avoid emitting a constructor of vector elements
7790 by performing the loads using an integer type of the same
7791 size, constructing a vector of those and then
7792 re-interpreting it as the original vector type.
7793 This avoids a huge runtime penalty due to the general
7794 inability to perform store forwarding from smaller stores
7795 to a larger load. */
7796 unsigned lsize
7797 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7798 elmode = int_mode_for_size (lsize, 0).require ();
7799 unsigned int lnunits = const_nunits / group_size;
7800 /* If we can't construct such a vector fall back to
7801 element loads of the original vector type. */
7802 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7803 && VECTOR_MODE_P (vmode)
7804 && targetm.vector_mode_supported_p (vmode)
7805 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7806 != CODE_FOR_nothing))
7808 nloads = lnunits;
7809 lnel = group_size;
7810 ltype = build_nonstandard_integer_type (lsize, 1);
7811 lvectype = build_vector_type (ltype, nloads);
7815 else
7817 nloads = 1;
7818 lnel = const_nunits;
7819 ltype = vectype;
7821 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7823 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7824 else if (nloads == 1)
7825 ltype = vectype;
7827 if (slp)
7829 /* For SLP permutation support we need to load the whole group,
7830 not only the number of vector stmts the permutation result
7831 fits in. */
7832 if (slp_perm)
7834 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7835 variable VF. */
7836 unsigned int const_vf = vf.to_constant ();
7837 ncopies = CEIL (group_size * const_vf, const_nunits);
7838 dr_chain.create (ncopies);
7840 else
7841 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7843 unsigned int group_el = 0;
7844 unsigned HOST_WIDE_INT
7845 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7846 for (j = 0; j < ncopies; j++)
7848 if (nloads > 1)
7849 vec_alloc (v, nloads);
7850 stmt_vec_info new_stmt_info = NULL;
7851 for (i = 0; i < nloads; i++)
7853 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7854 group_el * elsz + cst_offset);
7855 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7856 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7857 gassign *new_stmt
7858 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7859 new_stmt_info
7860 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
7861 if (nloads > 1)
7862 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7863 gimple_assign_lhs (new_stmt));
7865 group_el += lnel;
7866 if (! slp
7867 || group_el == group_size)
7869 tree newoff = copy_ssa_name (running_off);
7870 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7871 running_off, stride_step);
7872 vect_finish_stmt_generation (stmt, incr, gsi);
7874 running_off = newoff;
7875 group_el = 0;
7878 if (nloads > 1)
7880 tree vec_inv = build_constructor (lvectype, v);
7881 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7882 new_stmt_info = vinfo->lookup_def (new_temp);
7883 if (lvectype != vectype)
7885 gassign *new_stmt
7886 = gimple_build_assign (make_ssa_name (vectype),
7887 VIEW_CONVERT_EXPR,
7888 build1 (VIEW_CONVERT_EXPR,
7889 vectype, new_temp));
7890 new_stmt_info
7891 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
7895 if (slp)
7897 if (slp_perm)
7898 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7899 else
7900 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7902 else
7904 if (j == 0)
7905 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7906 else
7907 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7908 prev_stmt_info = new_stmt_info;
7911 if (slp_perm)
7913 unsigned n_perms;
7914 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7915 slp_node_instance, false, &n_perms);
7917 return true;
7920 if (memory_access_type == VMAT_GATHER_SCATTER
7921 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7922 grouped_load = false;
7924 if (grouped_load)
7926 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7927 group_size = DR_GROUP_SIZE (first_stmt_info);
7928 /* For SLP vectorization we directly vectorize a subchain
7929 without permutation. */
7930 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7931 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7932 /* For BB vectorization always use the first stmt to base
7933 the data ref pointer on. */
7934 if (bb_vinfo)
7935 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7937 /* Check if the chain of loads is already vectorized. */
7938 if (STMT_VINFO_VEC_STMT (first_stmt_info)
7939 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7940 ??? But we can only do so if there is exactly one
7941 as we have no way to get at the rest. Leave the CSE
7942 opportunity alone.
7943 ??? With the group load eventually participating
7944 in multiple different permutations (having multiple
7945 slp nodes which refer to the same group) the CSE
7946 is even wrong code. See PR56270. */
7947 && !slp)
7949 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7950 return true;
7952 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7953 group_gap_adj = 0;
7955 /* VEC_NUM is the number of vect stmts to be created for this group. */
7956 if (slp)
7958 grouped_load = false;
7959 /* For SLP permutation support we need to load the whole group,
7960 not only the number of vector stmts the permutation result
7961 fits in. */
7962 if (slp_perm)
7964 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7965 variable VF. */
7966 unsigned int const_vf = vf.to_constant ();
7967 unsigned int const_nunits = nunits.to_constant ();
7968 vec_num = CEIL (group_size * const_vf, const_nunits);
7969 group_gap_adj = vf * group_size - nunits * vec_num;
7971 else
7973 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7974 group_gap_adj
7975 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7978 else
7979 vec_num = group_size;
7981 ref_type = get_group_alias_ptr_type (first_stmt_info);
7983 else
7985 first_stmt_info = stmt_info;
7986 first_dr = dr;
7987 group_size = vec_num = 1;
7988 group_gap_adj = 0;
7989 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7992 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7993 gcc_assert (alignment_support_scheme);
7994 vec_loop_masks *loop_masks
7995 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7996 ? &LOOP_VINFO_MASKS (loop_vinfo)
7997 : NULL);
7998 /* Targets with store-lane instructions must not require explicit
7999 realignment. vect_supportable_dr_alignment always returns either
8000 dr_aligned or dr_unaligned_supported for masked operations. */
8001 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8002 && !mask
8003 && !loop_masks)
8004 || alignment_support_scheme == dr_aligned
8005 || alignment_support_scheme == dr_unaligned_supported);
8007 /* In case the vectorization factor (VF) is bigger than the number
8008 of elements that we can fit in a vectype (nunits), we have to generate
8009 more than one vector stmt - i.e - we need to "unroll" the
8010 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8011 from one copy of the vector stmt to the next, in the field
8012 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8013 stages to find the correct vector defs to be used when vectorizing
8014 stmts that use the defs of the current stmt. The example below
8015 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8016 need to create 4 vectorized stmts):
8018 before vectorization:
8019 RELATED_STMT VEC_STMT
8020 S1: x = memref - -
8021 S2: z = x + 1 - -
8023 step 1: vectorize stmt S1:
8024 We first create the vector stmt VS1_0, and, as usual, record a
8025 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8026 Next, we create the vector stmt VS1_1, and record a pointer to
8027 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8028 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8029 stmts and pointers:
8030 RELATED_STMT VEC_STMT
8031 VS1_0: vx0 = memref0 VS1_1 -
8032 VS1_1: vx1 = memref1 VS1_2 -
8033 VS1_2: vx2 = memref2 VS1_3 -
8034 VS1_3: vx3 = memref3 - -
8035 S1: x = load - VS1_0
8036 S2: z = x + 1 - -
8038 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8039 information we recorded in RELATED_STMT field is used to vectorize
8040 stmt S2. */
8042 /* In case of interleaving (non-unit grouped access):
8044 S1: x2 = &base + 2
8045 S2: x0 = &base
8046 S3: x1 = &base + 1
8047 S4: x3 = &base + 3
8049 Vectorized loads are created in the order of memory accesses
8050 starting from the access of the first stmt of the chain:
8052 VS1: vx0 = &base
8053 VS2: vx1 = &base + vec_size*1
8054 VS3: vx3 = &base + vec_size*2
8055 VS4: vx4 = &base + vec_size*3
8057 Then permutation statements are generated:
8059 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8060 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8063 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8064 (the order of the data-refs in the output of vect_permute_load_chain
8065 corresponds to the order of scalar stmts in the interleaving chain - see
8066 the documentation of vect_permute_load_chain()).
8067 The generation of permutation stmts and recording them in
8068 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8070 In case of both multiple types and interleaving, the vector loads and
8071 permutation stmts above are created for every copy. The result vector
8072 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8073 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8075 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8076 on a target that supports unaligned accesses (dr_unaligned_supported)
8077 we generate the following code:
8078 p = initial_addr;
8079 indx = 0;
8080 loop {
8081 p = p + indx * vectype_size;
8082 vec_dest = *(p);
8083 indx = indx + 1;
8086 Otherwise, the data reference is potentially unaligned on a target that
8087 does not support unaligned accesses (dr_explicit_realign_optimized) -
8088 then generate the following code, in which the data in each iteration is
8089 obtained by two vector loads, one from the previous iteration, and one
8090 from the current iteration:
8091 p1 = initial_addr;
8092 msq_init = *(floor(p1))
8093 p2 = initial_addr + VS - 1;
8094 realignment_token = call target_builtin;
8095 indx = 0;
8096 loop {
8097 p2 = p2 + indx * vectype_size
8098 lsq = *(floor(p2))
8099 vec_dest = realign_load (msq, lsq, realignment_token)
8100 indx = indx + 1;
8101 msq = lsq;
8102 } */
8104 /* If the misalignment remains the same throughout the execution of the
8105 loop, we can create the init_addr and permutation mask at the loop
8106 preheader. Otherwise, it needs to be created inside the loop.
8107 This can only occur when vectorizing memory accesses in the inner-loop
8108 nested within an outer-loop that is being vectorized. */
8110 if (nested_in_vect_loop
8111 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8112 GET_MODE_SIZE (TYPE_MODE (vectype))))
8114 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8115 compute_in_loop = true;
8118 if ((alignment_support_scheme == dr_explicit_realign_optimized
8119 || alignment_support_scheme == dr_explicit_realign)
8120 && !compute_in_loop)
8122 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8123 alignment_support_scheme, NULL_TREE,
8124 &at_loop);
8125 if (alignment_support_scheme == dr_explicit_realign_optimized)
8127 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8128 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8129 size_one_node);
8132 else
8133 at_loop = loop;
8135 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8136 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8138 tree bump;
8139 tree vec_offset = NULL_TREE;
8140 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8142 aggr_type = NULL_TREE;
8143 bump = NULL_TREE;
8145 else if (memory_access_type == VMAT_GATHER_SCATTER)
8147 aggr_type = elem_type;
8148 vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
8149 &bump, &vec_offset);
8151 else
8153 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8154 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8155 else
8156 aggr_type = vectype;
8157 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8160 tree vec_mask = NULL_TREE;
8161 prev_stmt_info = NULL;
8162 poly_uint64 group_elt = 0;
8163 for (j = 0; j < ncopies; j++)
8165 stmt_vec_info new_stmt_info = NULL;
8166 /* 1. Create the vector or array pointer update chain. */
8167 if (j == 0)
8169 bool simd_lane_access_p
8170 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8171 if (simd_lane_access_p
8172 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8173 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8174 && integer_zerop (DR_OFFSET (first_dr))
8175 && integer_zerop (DR_INIT (first_dr))
8176 && alias_sets_conflict_p (get_alias_set (aggr_type),
8177 get_alias_set (TREE_TYPE (ref_type)))
8178 && (alignment_support_scheme == dr_aligned
8179 || alignment_support_scheme == dr_unaligned_supported))
8181 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8182 dataref_offset = build_int_cst (ref_type, 0);
8183 inv_p = false;
8185 else if (first_stmt_info_for_drptr
8186 && first_stmt_info != first_stmt_info_for_drptr)
8188 dataref_ptr
8189 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8190 aggr_type, at_loop, offset, &dummy,
8191 gsi, &ptr_incr, simd_lane_access_p,
8192 &inv_p, byte_offset, bump);
8193 /* Adjust the pointer by the difference to first_stmt. */
8194 data_reference_p ptrdr
8195 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8196 tree diff = fold_convert (sizetype,
8197 size_binop (MINUS_EXPR,
8198 DR_INIT (first_dr),
8199 DR_INIT (ptrdr)));
8200 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8201 stmt, diff);
8203 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8205 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8206 &dataref_ptr, &vec_offset);
8207 inv_p = false;
8209 else
8210 dataref_ptr
8211 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8212 offset, &dummy, gsi, &ptr_incr,
8213 simd_lane_access_p, &inv_p,
8214 byte_offset, bump);
8215 if (mask)
8216 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8217 mask_vectype);
8219 else
8221 if (dataref_offset)
8222 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8223 bump);
8224 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8225 vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8226 vec_offset);
8227 else
8228 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8229 stmt, bump);
8230 if (mask)
8231 vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8234 if (grouped_load || slp_perm)
8235 dr_chain.create (vec_num);
8237 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8239 tree vec_array;
8241 vec_array = create_vector_array (vectype, vec_num);
8243 tree final_mask = NULL_TREE;
8244 if (loop_masks)
8245 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8246 vectype, j);
8247 if (vec_mask)
8248 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8249 vec_mask, gsi);
8251 gcall *call;
8252 if (final_mask)
8254 /* Emit:
8255 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8256 VEC_MASK). */
8257 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8258 tree alias_ptr = build_int_cst (ref_type, align);
8259 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8260 dataref_ptr, alias_ptr,
8261 final_mask);
8263 else
8265 /* Emit:
8266 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8267 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8268 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8270 gimple_call_set_lhs (call, vec_array);
8271 gimple_call_set_nothrow (call, true);
8272 new_stmt_info = vect_finish_stmt_generation (stmt, call, gsi);
8274 /* Extract each vector into an SSA_NAME. */
8275 for (i = 0; i < vec_num; i++)
8277 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8278 vec_array, i);
8279 dr_chain.quick_push (new_temp);
8282 /* Record the mapping between SSA_NAMEs and statements. */
8283 vect_record_grouped_load_vectors (stmt, dr_chain);
8285 /* Record that VEC_ARRAY is now dead. */
8286 vect_clobber_variable (stmt, gsi, vec_array);
8288 else
8290 for (i = 0; i < vec_num; i++)
8292 tree final_mask = NULL_TREE;
8293 if (loop_masks
8294 && memory_access_type != VMAT_INVARIANT)
8295 final_mask = vect_get_loop_mask (gsi, loop_masks,
8296 vec_num * ncopies,
8297 vectype, vec_num * j + i);
8298 if (vec_mask)
8299 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8300 vec_mask, gsi);
8302 if (i > 0)
8303 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8304 stmt, bump);
8306 /* 2. Create the vector-load in the loop. */
8307 gimple *new_stmt = NULL;
8308 switch (alignment_support_scheme)
8310 case dr_aligned:
8311 case dr_unaligned_supported:
8313 unsigned int align, misalign;
8315 if (memory_access_type == VMAT_GATHER_SCATTER)
8317 tree scale = size_int (gs_info.scale);
8318 gcall *call;
8319 if (loop_masks)
8320 call = gimple_build_call_internal
8321 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8322 vec_offset, scale, final_mask);
8323 else
8324 call = gimple_build_call_internal
8325 (IFN_GATHER_LOAD, 3, dataref_ptr,
8326 vec_offset, scale);
8327 gimple_call_set_nothrow (call, true);
8328 new_stmt = call;
8329 data_ref = NULL_TREE;
8330 break;
8333 align = DR_TARGET_ALIGNMENT (dr);
8334 if (alignment_support_scheme == dr_aligned)
8336 gcc_assert (aligned_access_p (first_dr));
8337 misalign = 0;
8339 else if (DR_MISALIGNMENT (first_dr) == -1)
8341 align = dr_alignment (vect_dr_behavior (first_dr));
8342 misalign = 0;
8344 else
8345 misalign = DR_MISALIGNMENT (first_dr);
8346 if (dataref_offset == NULL_TREE
8347 && TREE_CODE (dataref_ptr) == SSA_NAME)
8348 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8349 align, misalign);
8351 if (final_mask)
8353 align = least_bit_hwi (misalign | align);
8354 tree ptr = build_int_cst (ref_type, align);
8355 gcall *call
8356 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8357 dataref_ptr, ptr,
8358 final_mask);
8359 gimple_call_set_nothrow (call, true);
8360 new_stmt = call;
8361 data_ref = NULL_TREE;
8363 else
8365 data_ref
8366 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8367 dataref_offset
8368 ? dataref_offset
8369 : build_int_cst (ref_type, 0));
8370 if (alignment_support_scheme == dr_aligned)
8372 else if (DR_MISALIGNMENT (first_dr) == -1)
8373 TREE_TYPE (data_ref)
8374 = build_aligned_type (TREE_TYPE (data_ref),
8375 align * BITS_PER_UNIT);
8376 else
8377 TREE_TYPE (data_ref)
8378 = build_aligned_type (TREE_TYPE (data_ref),
8379 TYPE_ALIGN (elem_type));
8381 break;
8383 case dr_explicit_realign:
8385 tree ptr, bump;
8387 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8389 if (compute_in_loop)
8390 msq = vect_setup_realignment (first_stmt_info, gsi,
8391 &realignment_token,
8392 dr_explicit_realign,
8393 dataref_ptr, NULL);
8395 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8396 ptr = copy_ssa_name (dataref_ptr);
8397 else
8398 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8399 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8400 new_stmt = gimple_build_assign
8401 (ptr, BIT_AND_EXPR, dataref_ptr,
8402 build_int_cst
8403 (TREE_TYPE (dataref_ptr),
8404 -(HOST_WIDE_INT) align));
8405 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8406 data_ref
8407 = build2 (MEM_REF, vectype, ptr,
8408 build_int_cst (ref_type, 0));
8409 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8410 vec_dest = vect_create_destination_var (scalar_dest,
8411 vectype);
8412 new_stmt = gimple_build_assign (vec_dest, data_ref);
8413 new_temp = make_ssa_name (vec_dest, new_stmt);
8414 gimple_assign_set_lhs (new_stmt, new_temp);
8415 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8416 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8417 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8418 msq = new_temp;
8420 bump = size_binop (MULT_EXPR, vs,
8421 TYPE_SIZE_UNIT (elem_type));
8422 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8423 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8424 new_stmt = gimple_build_assign
8425 (NULL_TREE, BIT_AND_EXPR, ptr,
8426 build_int_cst
8427 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8428 ptr = copy_ssa_name (ptr, new_stmt);
8429 gimple_assign_set_lhs (new_stmt, ptr);
8430 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8431 data_ref
8432 = build2 (MEM_REF, vectype, ptr,
8433 build_int_cst (ref_type, 0));
8434 break;
8436 case dr_explicit_realign_optimized:
8438 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8439 new_temp = copy_ssa_name (dataref_ptr);
8440 else
8441 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8442 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8443 new_stmt = gimple_build_assign
8444 (new_temp, BIT_AND_EXPR, dataref_ptr,
8445 build_int_cst (TREE_TYPE (dataref_ptr),
8446 -(HOST_WIDE_INT) align));
8447 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8448 data_ref
8449 = build2 (MEM_REF, vectype, new_temp,
8450 build_int_cst (ref_type, 0));
8451 break;
8453 default:
8454 gcc_unreachable ();
8456 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8457 /* DATA_REF is null if we've already built the statement. */
8458 if (data_ref)
8460 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8461 new_stmt = gimple_build_assign (vec_dest, data_ref);
8463 new_temp = make_ssa_name (vec_dest, new_stmt);
8464 gimple_set_lhs (new_stmt, new_temp);
8465 new_stmt_info
8466 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
8468 /* 3. Handle explicit realignment if necessary/supported.
8469 Create in loop:
8470 vec_dest = realign_load (msq, lsq, realignment_token) */
8471 if (alignment_support_scheme == dr_explicit_realign_optimized
8472 || alignment_support_scheme == dr_explicit_realign)
8474 lsq = gimple_assign_lhs (new_stmt);
8475 if (!realignment_token)
8476 realignment_token = dataref_ptr;
8477 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8478 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8479 msq, lsq, realignment_token);
8480 new_temp = make_ssa_name (vec_dest, new_stmt);
8481 gimple_assign_set_lhs (new_stmt, new_temp);
8482 new_stmt_info
8483 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
8485 if (alignment_support_scheme == dr_explicit_realign_optimized)
8487 gcc_assert (phi);
8488 if (i == vec_num - 1 && j == ncopies - 1)
8489 add_phi_arg (phi, lsq,
8490 loop_latch_edge (containing_loop),
8491 UNKNOWN_LOCATION);
8492 msq = lsq;
8496 /* 4. Handle invariant-load. */
8497 if (inv_p && !bb_vinfo)
8499 gcc_assert (!grouped_load);
8500 /* If we have versioned for aliasing or the loop doesn't
8501 have any data dependencies that would preclude this,
8502 then we are sure this is a loop invariant load and
8503 thus we can insert it on the preheader edge. */
8504 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8505 && !nested_in_vect_loop
8506 && hoist_defs_of_uses (stmt, loop))
8508 if (dump_enabled_p ())
8510 dump_printf_loc (MSG_NOTE, vect_location,
8511 "hoisting out of the vectorized "
8512 "loop: ");
8513 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8515 tree tem = copy_ssa_name (scalar_dest);
8516 gsi_insert_on_edge_immediate
8517 (loop_preheader_edge (loop),
8518 gimple_build_assign (tem,
8519 unshare_expr
8520 (gimple_assign_rhs1 (stmt))));
8521 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8522 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8523 new_stmt_info = vinfo->add_stmt (new_stmt);
8525 else
8527 gimple_stmt_iterator gsi2 = *gsi;
8528 gsi_next (&gsi2);
8529 new_temp = vect_init_vector (stmt, scalar_dest,
8530 vectype, &gsi2);
8531 new_stmt_info = vinfo->lookup_def (new_temp);
8535 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8537 tree perm_mask = perm_mask_for_reverse (vectype);
8538 new_temp = permute_vec_elements (new_temp, new_temp,
8539 perm_mask, stmt, gsi);
8540 new_stmt_info = vinfo->lookup_def (new_temp);
8543 /* Collect vector loads and later create their permutation in
8544 vect_transform_grouped_load (). */
8545 if (grouped_load || slp_perm)
8546 dr_chain.quick_push (new_temp);
8548 /* Store vector loads in the corresponding SLP_NODE. */
8549 if (slp && !slp_perm)
8550 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8552 /* With SLP permutation we load the gaps as well, without
8553 we need to skip the gaps after we manage to fully load
8554 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8555 group_elt += nunits;
8556 if (maybe_ne (group_gap_adj, 0U)
8557 && !slp_perm
8558 && known_eq (group_elt, group_size - group_gap_adj))
8560 poly_wide_int bump_val
8561 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8562 * group_gap_adj);
8563 tree bump = wide_int_to_tree (sizetype, bump_val);
8564 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8565 stmt, bump);
8566 group_elt = 0;
8569 /* Bump the vector pointer to account for a gap or for excess
8570 elements loaded for a permuted SLP load. */
8571 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8573 poly_wide_int bump_val
8574 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8575 * group_gap_adj);
8576 tree bump = wide_int_to_tree (sizetype, bump_val);
8577 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8578 stmt, bump);
8582 if (slp && !slp_perm)
8583 continue;
8585 if (slp_perm)
8587 unsigned n_perms;
8588 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8589 slp_node_instance, false,
8590 &n_perms))
8592 dr_chain.release ();
8593 return false;
8596 else
8598 if (grouped_load)
8600 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8601 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8602 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8604 else
8606 if (j == 0)
8607 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8608 else
8609 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8610 prev_stmt_info = new_stmt_info;
8613 dr_chain.release ();
8616 return true;
8619 /* Function vect_is_simple_cond.
8621 Input:
8622 LOOP - the loop that is being vectorized.
8623 COND - Condition that is checked for simple use.
8625 Output:
8626 *COMP_VECTYPE - the vector type for the comparison.
8627 *DTS - The def types for the arguments of the comparison
8629 Returns whether a COND can be vectorized. Checks whether
8630 condition operands are supportable using vec_is_simple_use. */
8632 static bool
8633 vect_is_simple_cond (tree cond, vec_info *vinfo,
8634 tree *comp_vectype, enum vect_def_type *dts,
8635 tree vectype)
8637 tree lhs, rhs;
8638 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8640 /* Mask case. */
8641 if (TREE_CODE (cond) == SSA_NAME
8642 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8644 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8645 || !*comp_vectype
8646 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8647 return false;
8648 return true;
8651 if (!COMPARISON_CLASS_P (cond))
8652 return false;
8654 lhs = TREE_OPERAND (cond, 0);
8655 rhs = TREE_OPERAND (cond, 1);
8657 if (TREE_CODE (lhs) == SSA_NAME)
8659 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8660 return false;
8662 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8663 || TREE_CODE (lhs) == FIXED_CST)
8664 dts[0] = vect_constant_def;
8665 else
8666 return false;
8668 if (TREE_CODE (rhs) == SSA_NAME)
8670 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8671 return false;
8673 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8674 || TREE_CODE (rhs) == FIXED_CST)
8675 dts[1] = vect_constant_def;
8676 else
8677 return false;
8679 if (vectype1 && vectype2
8680 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8681 TYPE_VECTOR_SUBPARTS (vectype2)))
8682 return false;
8684 *comp_vectype = vectype1 ? vectype1 : vectype2;
8685 /* Invariant comparison. */
8686 if (! *comp_vectype && vectype)
8688 tree scalar_type = TREE_TYPE (lhs);
8689 /* If we can widen the comparison to match vectype do so. */
8690 if (INTEGRAL_TYPE_P (scalar_type)
8691 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8692 TYPE_SIZE (TREE_TYPE (vectype))))
8693 scalar_type = build_nonstandard_integer_type
8694 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8695 TYPE_UNSIGNED (scalar_type));
8696 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8699 return true;
8702 /* vectorizable_condition.
8704 Check if STMT is conditional modify expression that can be vectorized.
8705 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8706 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8707 at GSI.
8709 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8710 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8711 else clause if it is 2).
8713 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8715 bool
8716 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8717 stmt_vec_info *vec_stmt, tree reduc_def,
8718 int reduc_index, slp_tree slp_node,
8719 stmt_vector_for_cost *cost_vec)
8721 tree scalar_dest = NULL_TREE;
8722 tree vec_dest = NULL_TREE;
8723 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8724 tree then_clause, else_clause;
8725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8726 tree comp_vectype = NULL_TREE;
8727 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8728 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8729 tree vec_compare;
8730 tree new_temp;
8731 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8732 enum vect_def_type dts[4]
8733 = {vect_unknown_def_type, vect_unknown_def_type,
8734 vect_unknown_def_type, vect_unknown_def_type};
8735 int ndts = 4;
8736 int ncopies;
8737 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8738 stmt_vec_info prev_stmt_info = NULL;
8739 int i, j;
8740 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8741 vec<tree> vec_oprnds0 = vNULL;
8742 vec<tree> vec_oprnds1 = vNULL;
8743 vec<tree> vec_oprnds2 = vNULL;
8744 vec<tree> vec_oprnds3 = vNULL;
8745 tree vec_cmp_type;
8746 bool masked = false;
8748 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8749 return false;
8751 vect_reduction_type reduction_type
8752 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8753 if (reduction_type == TREE_CODE_REDUCTION)
8755 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8756 return false;
8758 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8759 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8760 && reduc_def))
8761 return false;
8763 /* FORNOW: not yet supported. */
8764 if (STMT_VINFO_LIVE_P (stmt_info))
8766 if (dump_enabled_p ())
8767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8768 "value used after loop.\n");
8769 return false;
8773 /* Is vectorizable conditional operation? */
8774 if (!is_gimple_assign (stmt))
8775 return false;
8777 code = gimple_assign_rhs_code (stmt);
8779 if (code != COND_EXPR)
8780 return false;
8782 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8783 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8785 if (slp_node)
8786 ncopies = 1;
8787 else
8788 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8790 gcc_assert (ncopies >= 1);
8791 if (reduc_index && ncopies > 1)
8792 return false; /* FORNOW */
8794 cond_expr = gimple_assign_rhs1 (stmt);
8795 then_clause = gimple_assign_rhs2 (stmt);
8796 else_clause = gimple_assign_rhs3 (stmt);
8798 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8799 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8800 || !comp_vectype)
8801 return false;
8803 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8804 return false;
8805 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8806 return false;
8808 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8809 return false;
8811 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8812 return false;
8814 masked = !COMPARISON_CLASS_P (cond_expr);
8815 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8817 if (vec_cmp_type == NULL_TREE)
8818 return false;
8820 cond_code = TREE_CODE (cond_expr);
8821 if (!masked)
8823 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8824 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8827 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8829 /* Boolean values may have another representation in vectors
8830 and therefore we prefer bit operations over comparison for
8831 them (which also works for scalar masks). We store opcodes
8832 to use in bitop1 and bitop2. Statement is vectorized as
8833 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8834 depending on bitop1 and bitop2 arity. */
8835 switch (cond_code)
8837 case GT_EXPR:
8838 bitop1 = BIT_NOT_EXPR;
8839 bitop2 = BIT_AND_EXPR;
8840 break;
8841 case GE_EXPR:
8842 bitop1 = BIT_NOT_EXPR;
8843 bitop2 = BIT_IOR_EXPR;
8844 break;
8845 case LT_EXPR:
8846 bitop1 = BIT_NOT_EXPR;
8847 bitop2 = BIT_AND_EXPR;
8848 std::swap (cond_expr0, cond_expr1);
8849 break;
8850 case LE_EXPR:
8851 bitop1 = BIT_NOT_EXPR;
8852 bitop2 = BIT_IOR_EXPR;
8853 std::swap (cond_expr0, cond_expr1);
8854 break;
8855 case NE_EXPR:
8856 bitop1 = BIT_XOR_EXPR;
8857 break;
8858 case EQ_EXPR:
8859 bitop1 = BIT_XOR_EXPR;
8860 bitop2 = BIT_NOT_EXPR;
8861 break;
8862 default:
8863 return false;
8865 cond_code = SSA_NAME;
8868 if (!vec_stmt)
8870 if (bitop1 != NOP_EXPR)
8872 machine_mode mode = TYPE_MODE (comp_vectype);
8873 optab optab;
8875 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8876 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8877 return false;
8879 if (bitop2 != NOP_EXPR)
8881 optab = optab_for_tree_code (bitop2, comp_vectype,
8882 optab_default);
8883 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8884 return false;
8887 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8888 cond_code))
8890 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8891 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8892 cost_vec);
8893 return true;
8895 return false;
8898 /* Transform. */
8900 if (!slp_node)
8902 vec_oprnds0.create (1);
8903 vec_oprnds1.create (1);
8904 vec_oprnds2.create (1);
8905 vec_oprnds3.create (1);
8908 /* Handle def. */
8909 scalar_dest = gimple_assign_lhs (stmt);
8910 if (reduction_type != EXTRACT_LAST_REDUCTION)
8911 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8913 /* Handle cond expr. */
8914 for (j = 0; j < ncopies; j++)
8916 stmt_vec_info new_stmt_info = NULL;
8917 if (j == 0)
8919 if (slp_node)
8921 auto_vec<tree, 4> ops;
8922 auto_vec<vec<tree>, 4> vec_defs;
8924 if (masked)
8925 ops.safe_push (cond_expr);
8926 else
8928 ops.safe_push (cond_expr0);
8929 ops.safe_push (cond_expr1);
8931 ops.safe_push (then_clause);
8932 ops.safe_push (else_clause);
8933 vect_get_slp_defs (ops, slp_node, &vec_defs);
8934 vec_oprnds3 = vec_defs.pop ();
8935 vec_oprnds2 = vec_defs.pop ();
8936 if (!masked)
8937 vec_oprnds1 = vec_defs.pop ();
8938 vec_oprnds0 = vec_defs.pop ();
8940 else
8942 if (masked)
8944 vec_cond_lhs
8945 = vect_get_vec_def_for_operand (cond_expr, stmt,
8946 comp_vectype);
8947 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
8949 else
8951 vec_cond_lhs
8952 = vect_get_vec_def_for_operand (cond_expr0,
8953 stmt, comp_vectype);
8954 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
8956 vec_cond_rhs
8957 = vect_get_vec_def_for_operand (cond_expr1,
8958 stmt, comp_vectype);
8959 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
8961 if (reduc_index == 1)
8962 vec_then_clause = reduc_def;
8963 else
8965 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8966 stmt);
8967 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
8969 if (reduc_index == 2)
8970 vec_else_clause = reduc_def;
8971 else
8973 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8974 stmt);
8975 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
8979 else
8981 vec_cond_lhs
8982 = vect_get_vec_def_for_stmt_copy (dts[0],
8983 vec_oprnds0.pop ());
8984 if (!masked)
8985 vec_cond_rhs
8986 = vect_get_vec_def_for_stmt_copy (dts[1],
8987 vec_oprnds1.pop ());
8989 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8990 vec_oprnds2.pop ());
8991 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8992 vec_oprnds3.pop ());
8995 if (!slp_node)
8997 vec_oprnds0.quick_push (vec_cond_lhs);
8998 if (!masked)
8999 vec_oprnds1.quick_push (vec_cond_rhs);
9000 vec_oprnds2.quick_push (vec_then_clause);
9001 vec_oprnds3.quick_push (vec_else_clause);
9004 /* Arguments are ready. Create the new vector stmt. */
9005 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9007 vec_then_clause = vec_oprnds2[i];
9008 vec_else_clause = vec_oprnds3[i];
9010 if (masked)
9011 vec_compare = vec_cond_lhs;
9012 else
9014 vec_cond_rhs = vec_oprnds1[i];
9015 if (bitop1 == NOP_EXPR)
9016 vec_compare = build2 (cond_code, vec_cmp_type,
9017 vec_cond_lhs, vec_cond_rhs);
9018 else
9020 new_temp = make_ssa_name (vec_cmp_type);
9021 gassign *new_stmt;
9022 if (bitop1 == BIT_NOT_EXPR)
9023 new_stmt = gimple_build_assign (new_temp, bitop1,
9024 vec_cond_rhs);
9025 else
9026 new_stmt
9027 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9028 vec_cond_rhs);
9029 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9030 if (bitop2 == NOP_EXPR)
9031 vec_compare = new_temp;
9032 else if (bitop2 == BIT_NOT_EXPR)
9034 /* Instead of doing ~x ? y : z do x ? z : y. */
9035 vec_compare = new_temp;
9036 std::swap (vec_then_clause, vec_else_clause);
9038 else
9040 vec_compare = make_ssa_name (vec_cmp_type);
9041 new_stmt
9042 = gimple_build_assign (vec_compare, bitop2,
9043 vec_cond_lhs, new_temp);
9044 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9048 if (reduction_type == EXTRACT_LAST_REDUCTION)
9050 if (!is_gimple_val (vec_compare))
9052 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9053 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9054 vec_compare);
9055 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9056 vec_compare = vec_compare_name;
9058 gcc_assert (reduc_index == 2);
9059 gcall *new_stmt = gimple_build_call_internal
9060 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9061 vec_then_clause);
9062 gimple_call_set_lhs (new_stmt, scalar_dest);
9063 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9064 if (stmt == gsi_stmt (*gsi))
9065 new_stmt_info = vect_finish_replace_stmt (stmt, new_stmt);
9066 else
9068 /* In this case we're moving the definition to later in the
9069 block. That doesn't matter because the only uses of the
9070 lhs are in phi statements. */
9071 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
9072 gsi_remove (&old_gsi, true);
9073 new_stmt_info
9074 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
9077 else
9079 new_temp = make_ssa_name (vec_dest);
9080 gassign *new_stmt
9081 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9082 vec_then_clause, vec_else_clause);
9083 new_stmt_info
9084 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
9086 if (slp_node)
9087 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9090 if (slp_node)
9091 continue;
9093 if (j == 0)
9094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9095 else
9096 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9098 prev_stmt_info = new_stmt_info;
9101 vec_oprnds0.release ();
9102 vec_oprnds1.release ();
9103 vec_oprnds2.release ();
9104 vec_oprnds3.release ();
9106 return true;
9109 /* vectorizable_comparison.
9111 Check if STMT is comparison expression that can be vectorized.
9112 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
9113 comparison, put it in VEC_STMT, and insert it at GSI.
9115 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
9117 static bool
9118 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
9119 stmt_vec_info *vec_stmt, tree reduc_def,
9120 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9122 tree lhs, rhs1, rhs2;
9123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9124 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9125 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9126 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9127 tree new_temp;
9128 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9129 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9130 int ndts = 2;
9131 poly_uint64 nunits;
9132 int ncopies;
9133 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9134 stmt_vec_info prev_stmt_info = NULL;
9135 int i, j;
9136 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9137 vec<tree> vec_oprnds0 = vNULL;
9138 vec<tree> vec_oprnds1 = vNULL;
9139 tree mask_type;
9140 tree mask;
9142 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9143 return false;
9145 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9146 return false;
9148 mask_type = vectype;
9149 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9151 if (slp_node)
9152 ncopies = 1;
9153 else
9154 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9156 gcc_assert (ncopies >= 1);
9157 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9158 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9159 && reduc_def))
9160 return false;
9162 if (STMT_VINFO_LIVE_P (stmt_info))
9164 if (dump_enabled_p ())
9165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9166 "value used after loop.\n");
9167 return false;
9170 if (!is_gimple_assign (stmt))
9171 return false;
9173 code = gimple_assign_rhs_code (stmt);
9175 if (TREE_CODE_CLASS (code) != tcc_comparison)
9176 return false;
9178 rhs1 = gimple_assign_rhs1 (stmt);
9179 rhs2 = gimple_assign_rhs2 (stmt);
9181 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9182 return false;
9184 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9185 return false;
9187 if (vectype1 && vectype2
9188 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9189 TYPE_VECTOR_SUBPARTS (vectype2)))
9190 return false;
9192 vectype = vectype1 ? vectype1 : vectype2;
9194 /* Invariant comparison. */
9195 if (!vectype)
9197 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9198 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9199 return false;
9201 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9202 return false;
9204 /* Can't compare mask and non-mask types. */
9205 if (vectype1 && vectype2
9206 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9207 return false;
9209 /* Boolean values may have another representation in vectors
9210 and therefore we prefer bit operations over comparison for
9211 them (which also works for scalar masks). We store opcodes
9212 to use in bitop1 and bitop2. Statement is vectorized as
9213 BITOP2 (rhs1 BITOP1 rhs2) or
9214 rhs1 BITOP2 (BITOP1 rhs2)
9215 depending on bitop1 and bitop2 arity. */
9216 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9218 if (code == GT_EXPR)
9220 bitop1 = BIT_NOT_EXPR;
9221 bitop2 = BIT_AND_EXPR;
9223 else if (code == GE_EXPR)
9225 bitop1 = BIT_NOT_EXPR;
9226 bitop2 = BIT_IOR_EXPR;
9228 else if (code == LT_EXPR)
9230 bitop1 = BIT_NOT_EXPR;
9231 bitop2 = BIT_AND_EXPR;
9232 std::swap (rhs1, rhs2);
9233 std::swap (dts[0], dts[1]);
9235 else if (code == LE_EXPR)
9237 bitop1 = BIT_NOT_EXPR;
9238 bitop2 = BIT_IOR_EXPR;
9239 std::swap (rhs1, rhs2);
9240 std::swap (dts[0], dts[1]);
9242 else
9244 bitop1 = BIT_XOR_EXPR;
9245 if (code == EQ_EXPR)
9246 bitop2 = BIT_NOT_EXPR;
9250 if (!vec_stmt)
9252 if (bitop1 == NOP_EXPR)
9254 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9255 return false;
9257 else
9259 machine_mode mode = TYPE_MODE (vectype);
9260 optab optab;
9262 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9263 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9264 return false;
9266 if (bitop2 != NOP_EXPR)
9268 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9269 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9270 return false;
9274 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9275 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9276 dts, ndts, slp_node, cost_vec);
9277 return true;
9280 /* Transform. */
9281 if (!slp_node)
9283 vec_oprnds0.create (1);
9284 vec_oprnds1.create (1);
9287 /* Handle def. */
9288 lhs = gimple_assign_lhs (stmt);
9289 mask = vect_create_destination_var (lhs, mask_type);
9291 /* Handle cmp expr. */
9292 for (j = 0; j < ncopies; j++)
9294 stmt_vec_info new_stmt_info = NULL;
9295 if (j == 0)
9297 if (slp_node)
9299 auto_vec<tree, 2> ops;
9300 auto_vec<vec<tree>, 2> vec_defs;
9302 ops.safe_push (rhs1);
9303 ops.safe_push (rhs2);
9304 vect_get_slp_defs (ops, slp_node, &vec_defs);
9305 vec_oprnds1 = vec_defs.pop ();
9306 vec_oprnds0 = vec_defs.pop ();
9308 else
9310 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9311 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9314 else
9316 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9317 vec_oprnds0.pop ());
9318 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9319 vec_oprnds1.pop ());
9322 if (!slp_node)
9324 vec_oprnds0.quick_push (vec_rhs1);
9325 vec_oprnds1.quick_push (vec_rhs2);
9328 /* Arguments are ready. Create the new vector stmt. */
9329 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9331 vec_rhs2 = vec_oprnds1[i];
9333 new_temp = make_ssa_name (mask);
9334 if (bitop1 == NOP_EXPR)
9336 gassign *new_stmt = gimple_build_assign (new_temp, code,
9337 vec_rhs1, vec_rhs2);
9338 new_stmt_info
9339 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
9341 else
9343 gassign *new_stmt;
9344 if (bitop1 == BIT_NOT_EXPR)
9345 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9346 else
9347 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9348 vec_rhs2);
9349 new_stmt_info
9350 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
9351 if (bitop2 != NOP_EXPR)
9353 tree res = make_ssa_name (mask);
9354 if (bitop2 == BIT_NOT_EXPR)
9355 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9356 else
9357 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9358 new_temp);
9359 new_stmt_info
9360 = vect_finish_stmt_generation (stmt, new_stmt, gsi);
9363 if (slp_node)
9364 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9367 if (slp_node)
9368 continue;
9370 if (j == 0)
9371 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9372 else
9373 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9375 prev_stmt_info = new_stmt_info;
9378 vec_oprnds0.release ();
9379 vec_oprnds1.release ();
9381 return true;
9384 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9385 can handle all live statements in the node. Otherwise return true
9386 if STMT is not live or if vectorizable_live_operation can handle it.
9387 GSI and VEC_STMT are as for vectorizable_live_operation. */
9389 static bool
9390 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9391 slp_tree slp_node, stmt_vec_info *vec_stmt,
9392 stmt_vector_for_cost *cost_vec)
9394 if (slp_node)
9396 stmt_vec_info slp_stmt_info;
9397 unsigned int i;
9398 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9400 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9401 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9402 vec_stmt, cost_vec))
9403 return false;
9406 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9407 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt,
9408 cost_vec))
9409 return false;
9411 return true;
9414 /* Make sure the statement is vectorizable. */
9416 bool
9417 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9418 slp_instance node_instance, stmt_vector_for_cost *cost_vec)
9420 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9421 vec_info *vinfo = stmt_info->vinfo;
9422 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9423 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9424 bool ok;
9425 gimple_seq pattern_def_seq;
9427 if (dump_enabled_p ())
9429 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9430 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9433 if (gimple_has_volatile_ops (stmt))
9435 if (dump_enabled_p ())
9436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9437 "not vectorized: stmt has volatile operands\n");
9439 return false;
9442 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9443 && node == NULL
9444 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9446 gimple_stmt_iterator si;
9448 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9450 gimple *pattern_def_stmt = gsi_stmt (si);
9451 stmt_vec_info pattern_def_stmt_info
9452 = vinfo->lookup_stmt (gsi_stmt (si));
9453 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9454 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9456 /* Analyze def stmt of STMT if it's a pattern stmt. */
9457 if (dump_enabled_p ())
9459 dump_printf_loc (MSG_NOTE, vect_location,
9460 "==> examining pattern def statement: ");
9461 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9464 if (!vect_analyze_stmt (pattern_def_stmt,
9465 need_to_vectorize, node, node_instance,
9466 cost_vec))
9467 return false;
9472 /* Skip stmts that do not need to be vectorized. In loops this is expected
9473 to include:
9474 - the COND_EXPR which is the loop exit condition
9475 - any LABEL_EXPRs in the loop
9476 - computations that are used only for array indexing or loop control.
9477 In basic blocks we only analyze statements that are a part of some SLP
9478 instance, therefore, all the statements are relevant.
9480 Pattern statement needs to be analyzed instead of the original statement
9481 if the original statement is not relevant. Otherwise, we analyze both
9482 statements. In basic blocks we are called from some SLP instance
9483 traversal, don't analyze pattern stmts instead, the pattern stmts
9484 already will be part of SLP instance. */
9486 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9487 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9488 && !STMT_VINFO_LIVE_P (stmt_info))
9490 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9491 && pattern_stmt_info
9492 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9493 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9495 /* Analyze PATTERN_STMT instead of the original stmt. */
9496 stmt = pattern_stmt_info->stmt;
9497 stmt_info = pattern_stmt_info;
9498 if (dump_enabled_p ())
9500 dump_printf_loc (MSG_NOTE, vect_location,
9501 "==> examining pattern statement: ");
9502 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9505 else
9507 if (dump_enabled_p ())
9508 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9510 return true;
9513 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9514 && node == NULL
9515 && pattern_stmt_info
9516 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9517 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9519 /* Analyze PATTERN_STMT too. */
9520 if (dump_enabled_p ())
9522 dump_printf_loc (MSG_NOTE, vect_location,
9523 "==> examining pattern statement: ");
9524 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9527 if (!vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9528 node_instance, cost_vec))
9529 return false;
9532 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9534 case vect_internal_def:
9535 break;
9537 case vect_reduction_def:
9538 case vect_nested_cycle:
9539 gcc_assert (!bb_vinfo
9540 && (relevance == vect_used_in_outer
9541 || relevance == vect_used_in_outer_by_reduction
9542 || relevance == vect_used_by_reduction
9543 || relevance == vect_unused_in_scope
9544 || relevance == vect_used_only_live));
9545 break;
9547 case vect_induction_def:
9548 gcc_assert (!bb_vinfo);
9549 break;
9551 case vect_constant_def:
9552 case vect_external_def:
9553 case vect_unknown_def_type:
9554 default:
9555 gcc_unreachable ();
9558 if (STMT_VINFO_RELEVANT_P (stmt_info))
9560 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9561 gcall *call = dyn_cast <gcall *> (stmt);
9562 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9563 || (call && gimple_call_lhs (call) == NULL_TREE));
9564 *need_to_vectorize = true;
9567 if (PURE_SLP_STMT (stmt_info) && !node)
9569 dump_printf_loc (MSG_NOTE, vect_location,
9570 "handled only by SLP analysis\n");
9571 return true;
9574 ok = true;
9575 if (!bb_vinfo
9576 && (STMT_VINFO_RELEVANT_P (stmt_info)
9577 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9578 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9579 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9580 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9581 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9582 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9583 || vectorizable_load (stmt, NULL, NULL, node, node_instance, cost_vec)
9584 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9585 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9586 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance,
9587 cost_vec)
9588 || vectorizable_induction (stmt, NULL, NULL, node, cost_vec)
9589 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node, cost_vec)
9590 || vectorizable_comparison (stmt, NULL, NULL, NULL, node, cost_vec));
9591 else
9593 if (bb_vinfo)
9594 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node, cost_vec)
9595 || vectorizable_conversion (stmt, NULL, NULL, node, cost_vec)
9596 || vectorizable_shift (stmt, NULL, NULL, node, cost_vec)
9597 || vectorizable_operation (stmt, NULL, NULL, node, cost_vec)
9598 || vectorizable_assignment (stmt, NULL, NULL, node, cost_vec)
9599 || vectorizable_load (stmt, NULL, NULL, node, node_instance,
9600 cost_vec)
9601 || vectorizable_call (stmt, NULL, NULL, node, cost_vec)
9602 || vectorizable_store (stmt, NULL, NULL, node, cost_vec)
9603 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
9604 cost_vec)
9605 || vectorizable_comparison (stmt, NULL, NULL, NULL, node,
9606 cost_vec));
9609 if (!ok)
9611 if (dump_enabled_p ())
9613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9614 "not vectorized: relevant stmt not ");
9615 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9616 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9619 return false;
9622 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9623 need extra handling, except for vectorizable reductions. */
9624 if (!bb_vinfo
9625 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9626 && !can_vectorize_live_stmts (stmt, NULL, node, NULL, cost_vec))
9628 if (dump_enabled_p ())
9630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9631 "not vectorized: live stmt not supported: ");
9632 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9635 return false;
9638 return true;
9642 /* Function vect_transform_stmt.
9644 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9646 bool
9647 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9648 bool *grouped_store, slp_tree slp_node,
9649 slp_instance slp_node_instance)
9651 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9652 vec_info *vinfo = stmt_info->vinfo;
9653 bool is_store = false;
9654 stmt_vec_info vec_stmt = NULL;
9655 bool done;
9657 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9658 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9660 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9661 && nested_in_vect_loop_p
9662 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9663 stmt));
9665 switch (STMT_VINFO_TYPE (stmt_info))
9667 case type_demotion_vec_info_type:
9668 case type_promotion_vec_info_type:
9669 case type_conversion_vec_info_type:
9670 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node, NULL);
9671 gcc_assert (done);
9672 break;
9674 case induc_vec_info_type:
9675 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node, NULL);
9676 gcc_assert (done);
9677 break;
9679 case shift_vec_info_type:
9680 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node, NULL);
9681 gcc_assert (done);
9682 break;
9684 case op_vec_info_type:
9685 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node, NULL);
9686 gcc_assert (done);
9687 break;
9689 case assignment_vec_info_type:
9690 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node, NULL);
9691 gcc_assert (done);
9692 break;
9694 case load_vec_info_type:
9695 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9696 slp_node_instance, NULL);
9697 gcc_assert (done);
9698 break;
9700 case store_vec_info_type:
9701 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node, NULL);
9702 gcc_assert (done);
9703 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9705 /* In case of interleaving, the whole chain is vectorized when the
9706 last store in the chain is reached. Store stmts before the last
9707 one are skipped, and there vec_stmt_info shouldn't be freed
9708 meanwhile. */
9709 *grouped_store = true;
9710 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9711 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9712 is_store = true;
9714 else
9715 is_store = true;
9716 break;
9718 case condition_vec_info_type:
9719 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node, NULL);
9720 gcc_assert (done);
9721 break;
9723 case comparison_vec_info_type:
9724 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node, NULL);
9725 gcc_assert (done);
9726 break;
9728 case call_vec_info_type:
9729 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9730 stmt = gsi_stmt (*gsi);
9731 break;
9733 case call_simd_clone_vec_info_type:
9734 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node, NULL);
9735 stmt = gsi_stmt (*gsi);
9736 break;
9738 case reduc_vec_info_type:
9739 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9740 slp_node_instance, NULL);
9741 gcc_assert (done);
9742 break;
9744 default:
9745 if (!STMT_VINFO_LIVE_P (stmt_info))
9747 if (dump_enabled_p ())
9748 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9749 "stmt not supported.\n");
9750 gcc_unreachable ();
9754 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9755 This would break hybrid SLP vectorization. */
9756 if (slp_node)
9757 gcc_assert (!vec_stmt
9758 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9760 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9761 is being vectorized, but outside the immediately enclosing loop. */
9762 if (vec_stmt
9763 && nested_p
9764 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9765 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9766 || STMT_VINFO_RELEVANT (stmt_info) ==
9767 vect_used_in_outer_by_reduction))
9769 struct loop *innerloop = LOOP_VINFO_LOOP (
9770 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9771 imm_use_iterator imm_iter;
9772 use_operand_p use_p;
9773 tree scalar_dest;
9775 if (dump_enabled_p ())
9776 dump_printf_loc (MSG_NOTE, vect_location,
9777 "Record the vdef for outer-loop vectorization.\n");
9779 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9780 (to be used when vectorizing outer-loop stmts that use the DEF of
9781 STMT). */
9782 if (gimple_code (stmt) == GIMPLE_PHI)
9783 scalar_dest = PHI_RESULT (stmt);
9784 else
9785 scalar_dest = gimple_assign_lhs (stmt);
9787 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9788 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9790 stmt_vec_info exit_phi_info
9791 = vinfo->lookup_stmt (USE_STMT (use_p));
9792 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9796 /* Handle stmts whose DEF is used outside the loop-nest that is
9797 being vectorized. */
9798 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9800 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt, NULL);
9801 gcc_assert (done);
9804 if (vec_stmt)
9805 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9807 return is_store;
9811 /* Remove a group of stores (for SLP or interleaving), free their
9812 stmt_vec_info. */
9814 void
9815 vect_remove_stores (gimple *first_stmt)
9817 gimple *next = first_stmt;
9818 gimple_stmt_iterator next_si;
9820 while (next)
9822 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9824 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (stmt_info);
9825 if (is_pattern_stmt_p (stmt_info))
9826 next = STMT_VINFO_RELATED_STMT (stmt_info);
9827 /* Free the attached stmt_vec_info and remove the stmt. */
9828 next_si = gsi_for_stmt (next);
9829 unlink_stmt_vdef (next);
9830 gsi_remove (&next_si, true);
9831 release_defs (next);
9832 free_stmt_vec_info (next);
9833 next = tmp;
9838 /* Function new_stmt_vec_info.
9840 Create and initialize a new stmt_vec_info struct for STMT. */
9842 stmt_vec_info
9843 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9845 stmt_vec_info res;
9846 res = (_stmt_vec_info *) xcalloc (1, sizeof (struct _stmt_vec_info));
9848 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9849 STMT_VINFO_STMT (res) = stmt;
9850 res->vinfo = vinfo;
9851 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9852 STMT_VINFO_LIVE_P (res) = false;
9853 STMT_VINFO_VECTYPE (res) = NULL;
9854 STMT_VINFO_VEC_STMT (res) = NULL;
9855 STMT_VINFO_VECTORIZABLE (res) = true;
9856 STMT_VINFO_IN_PATTERN_P (res) = false;
9857 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9858 STMT_VINFO_DATA_REF (res) = NULL;
9859 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9860 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9862 if (gimple_code (stmt) == GIMPLE_PHI
9863 && is_loop_header_bb_p (gimple_bb (stmt)))
9864 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9865 else
9866 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9868 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9869 STMT_SLP_TYPE (res) = loop_vect;
9870 STMT_VINFO_NUM_SLP_USES (res) = 0;
9872 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9873 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9874 res->size = 0; /* GROUP_SIZE */
9875 res->store_count = 0; /* GROUP_STORE_COUNT */
9876 res->gap = 0; /* GROUP_GAP */
9877 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
9879 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9880 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9882 return res;
9886 /* Set the current stmt_vec_info vector to V. */
9888 void
9889 set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
9891 stmt_vec_info_vec = v;
9894 /* Free the stmt_vec_info entries in V and release V. */
9896 void
9897 free_stmt_vec_infos (vec<stmt_vec_info> *v)
9899 unsigned int i;
9900 stmt_vec_info info;
9901 FOR_EACH_VEC_ELT (*v, i, info)
9902 if (info != NULL_STMT_VEC_INFO)
9903 free_stmt_vec_info (STMT_VINFO_STMT (info));
9904 if (v == stmt_vec_info_vec)
9905 stmt_vec_info_vec = NULL;
9906 v->release ();
9910 /* Free stmt vectorization related info. */
9912 void
9913 free_stmt_vec_info (gimple *stmt)
9915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9917 if (!stmt_info)
9918 return;
9920 /* Check if this statement has a related "pattern stmt"
9921 (introduced by the vectorizer during the pattern recognition
9922 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9923 too. */
9924 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9926 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9927 for (gimple_stmt_iterator si = gsi_start (seq);
9928 !gsi_end_p (si); gsi_next (&si))
9930 gimple *seq_stmt = gsi_stmt (si);
9931 gimple_set_bb (seq_stmt, NULL);
9932 tree lhs = gimple_get_lhs (seq_stmt);
9933 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9934 release_ssa_name (lhs);
9935 free_stmt_vec_info (seq_stmt);
9937 stmt_vec_info patt_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9938 if (patt_stmt_info)
9940 gimple_set_bb (patt_stmt_info->stmt, NULL);
9941 tree lhs = gimple_get_lhs (patt_stmt_info->stmt);
9942 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9943 release_ssa_name (lhs);
9944 free_stmt_vec_info (patt_stmt_info);
9948 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9949 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9950 set_vinfo_for_stmt (stmt, NULL);
9951 free (stmt_info);
9955 /* Function get_vectype_for_scalar_type_and_size.
9957 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9958 by the target. */
9960 tree
9961 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9963 tree orig_scalar_type = scalar_type;
9964 scalar_mode inner_mode;
9965 machine_mode simd_mode;
9966 poly_uint64 nunits;
9967 tree vectype;
9969 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9970 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9971 return NULL_TREE;
9973 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9975 /* For vector types of elements whose mode precision doesn't
9976 match their types precision we use a element type of mode
9977 precision. The vectorization routines will have to make sure
9978 they support the proper result truncation/extension.
9979 We also make sure to build vector types with INTEGER_TYPE
9980 component type only. */
9981 if (INTEGRAL_TYPE_P (scalar_type)
9982 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9983 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9984 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9985 TYPE_UNSIGNED (scalar_type));
9987 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9988 When the component mode passes the above test simply use a type
9989 corresponding to that mode. The theory is that any use that
9990 would cause problems with this will disable vectorization anyway. */
9991 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9992 && !INTEGRAL_TYPE_P (scalar_type))
9993 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9995 /* We can't build a vector type of elements with alignment bigger than
9996 their size. */
9997 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9998 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9999 TYPE_UNSIGNED (scalar_type));
10001 /* If we felt back to using the mode fail if there was
10002 no scalar type for it. */
10003 if (scalar_type == NULL_TREE)
10004 return NULL_TREE;
10006 /* If no size was supplied use the mode the target prefers. Otherwise
10007 lookup a vector mode of the specified size. */
10008 if (known_eq (size, 0U))
10009 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10010 else if (!multiple_p (size, nbytes, &nunits)
10011 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10012 return NULL_TREE;
10013 /* NOTE: nunits == 1 is allowed to support single element vector types. */
10014 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
10015 return NULL_TREE;
10017 vectype = build_vector_type (scalar_type, nunits);
10019 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10020 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10021 return NULL_TREE;
10023 /* Re-attach the address-space qualifier if we canonicalized the scalar
10024 type. */
10025 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10026 return build_qualified_type
10027 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10029 return vectype;
10032 poly_uint64 current_vector_size;
10034 /* Function get_vectype_for_scalar_type.
10036 Returns the vector type corresponding to SCALAR_TYPE as supported
10037 by the target. */
10039 tree
10040 get_vectype_for_scalar_type (tree scalar_type)
10042 tree vectype;
10043 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10044 current_vector_size);
10045 if (vectype
10046 && known_eq (current_vector_size, 0U))
10047 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10048 return vectype;
10051 /* Function get_mask_type_for_scalar_type.
10053 Returns the mask type corresponding to a result of comparison
10054 of vectors of specified SCALAR_TYPE as supported by target. */
10056 tree
10057 get_mask_type_for_scalar_type (tree scalar_type)
10059 tree vectype = get_vectype_for_scalar_type (scalar_type);
10061 if (!vectype)
10062 return NULL;
10064 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10065 current_vector_size);
10068 /* Function get_same_sized_vectype
10070 Returns a vector type corresponding to SCALAR_TYPE of size
10071 VECTOR_TYPE if supported by the target. */
10073 tree
10074 get_same_sized_vectype (tree scalar_type, tree vector_type)
10076 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10077 return build_same_sized_truth_vector_type (vector_type);
10079 return get_vectype_for_scalar_type_and_size
10080 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10083 /* Function vect_is_simple_use.
10085 Input:
10086 VINFO - the vect info of the loop or basic block that is being vectorized.
10087 OPERAND - operand in the loop or bb.
10088 Output:
10089 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10090 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10091 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10092 the definition could be anywhere in the function
10093 DT - the type of definition
10095 Returns whether a stmt with OPERAND can be vectorized.
10096 For loops, supportable operands are constants, loop invariants, and operands
10097 that are defined by the current iteration of the loop. Unsupportable
10098 operands are those that are defined by a previous iteration of the loop (as
10099 is the case in reduction/induction computations).
10100 For basic blocks, supportable operands are constants and bb invariants.
10101 For now, operands defined outside the basic block are not supported. */
10103 bool
10104 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10105 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10107 if (def_stmt_info_out)
10108 *def_stmt_info_out = NULL;
10109 if (def_stmt_out)
10110 *def_stmt_out = NULL;
10111 *dt = vect_unknown_def_type;
10113 if (dump_enabled_p ())
10115 dump_printf_loc (MSG_NOTE, vect_location,
10116 "vect_is_simple_use: operand ");
10117 if (TREE_CODE (operand) == SSA_NAME
10118 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10119 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10120 else
10121 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10124 if (CONSTANT_CLASS_P (operand))
10125 *dt = vect_constant_def;
10126 else if (is_gimple_min_invariant (operand))
10127 *dt = vect_external_def;
10128 else if (TREE_CODE (operand) != SSA_NAME)
10129 *dt = vect_unknown_def_type;
10130 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10131 *dt = vect_external_def;
10132 else
10134 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10135 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10136 if (!stmt_vinfo)
10137 *dt = vect_external_def;
10138 else
10140 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10142 stmt_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10143 def_stmt = stmt_vinfo->stmt;
10145 switch (gimple_code (def_stmt))
10147 case GIMPLE_PHI:
10148 case GIMPLE_ASSIGN:
10149 case GIMPLE_CALL:
10150 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10151 break;
10152 default:
10153 *dt = vect_unknown_def_type;
10154 break;
10156 if (def_stmt_info_out)
10157 *def_stmt_info_out = stmt_vinfo;
10159 if (def_stmt_out)
10160 *def_stmt_out = def_stmt;
10163 if (dump_enabled_p ())
10165 dump_printf (MSG_NOTE, ", type of def: ");
10166 switch (*dt)
10168 case vect_uninitialized_def:
10169 dump_printf (MSG_NOTE, "uninitialized\n");
10170 break;
10171 case vect_constant_def:
10172 dump_printf (MSG_NOTE, "constant\n");
10173 break;
10174 case vect_external_def:
10175 dump_printf (MSG_NOTE, "external\n");
10176 break;
10177 case vect_internal_def:
10178 dump_printf (MSG_NOTE, "internal\n");
10179 break;
10180 case vect_induction_def:
10181 dump_printf (MSG_NOTE, "induction\n");
10182 break;
10183 case vect_reduction_def:
10184 dump_printf (MSG_NOTE, "reduction\n");
10185 break;
10186 case vect_double_reduction_def:
10187 dump_printf (MSG_NOTE, "double reduction\n");
10188 break;
10189 case vect_nested_cycle:
10190 dump_printf (MSG_NOTE, "nested cycle\n");
10191 break;
10192 case vect_unknown_def_type:
10193 dump_printf (MSG_NOTE, "unknown\n");
10194 break;
10198 if (*dt == vect_unknown_def_type)
10200 if (dump_enabled_p ())
10201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10202 "Unsupported pattern.\n");
10203 return false;
10206 return true;
10209 /* Function vect_is_simple_use.
10211 Same as vect_is_simple_use but also determines the vector operand
10212 type of OPERAND and stores it to *VECTYPE. If the definition of
10213 OPERAND is vect_uninitialized_def, vect_constant_def or
10214 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10215 is responsible to compute the best suited vector type for the
10216 scalar operand. */
10218 bool
10219 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10220 tree *vectype, stmt_vec_info *def_stmt_info_out,
10221 gimple **def_stmt_out)
10223 stmt_vec_info def_stmt_info;
10224 gimple *def_stmt;
10225 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10226 return false;
10228 if (def_stmt_out)
10229 *def_stmt_out = def_stmt;
10230 if (def_stmt_info_out)
10231 *def_stmt_info_out = def_stmt_info;
10233 /* Now get a vector type if the def is internal, otherwise supply
10234 NULL_TREE and leave it up to the caller to figure out a proper
10235 type for the use stmt. */
10236 if (*dt == vect_internal_def
10237 || *dt == vect_induction_def
10238 || *dt == vect_reduction_def
10239 || *dt == vect_double_reduction_def
10240 || *dt == vect_nested_cycle)
10242 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10243 gcc_assert (*vectype != NULL_TREE);
10244 if (dump_enabled_p ())
10246 dump_printf_loc (MSG_NOTE, vect_location,
10247 "vect_is_simple_use: vectype ");
10248 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10249 dump_printf (MSG_NOTE, "\n");
10252 else if (*dt == vect_uninitialized_def
10253 || *dt == vect_constant_def
10254 || *dt == vect_external_def)
10255 *vectype = NULL_TREE;
10256 else
10257 gcc_unreachable ();
10259 return true;
10263 /* Function supportable_widening_operation
10265 Check whether an operation represented by the code CODE is a
10266 widening operation that is supported by the target platform in
10267 vector form (i.e., when operating on arguments of type VECTYPE_IN
10268 producing a result of type VECTYPE_OUT).
10270 Widening operations we currently support are NOP (CONVERT), FLOAT,
10271 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10272 are supported by the target platform either directly (via vector
10273 tree-codes), or via target builtins.
10275 Output:
10276 - CODE1 and CODE2 are codes of vector operations to be used when
10277 vectorizing the operation, if available.
10278 - MULTI_STEP_CVT determines the number of required intermediate steps in
10279 case of multi-step conversion (like char->short->int - in that case
10280 MULTI_STEP_CVT will be 1).
10281 - INTERM_TYPES contains the intermediate type required to perform the
10282 widening operation (short in the above example). */
10284 bool
10285 supportable_widening_operation (enum tree_code code, gimple *stmt,
10286 tree vectype_out, tree vectype_in,
10287 enum tree_code *code1, enum tree_code *code2,
10288 int *multi_step_cvt,
10289 vec<tree> *interm_types)
10291 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10292 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10293 struct loop *vect_loop = NULL;
10294 machine_mode vec_mode;
10295 enum insn_code icode1, icode2;
10296 optab optab1, optab2;
10297 tree vectype = vectype_in;
10298 tree wide_vectype = vectype_out;
10299 enum tree_code c1, c2;
10300 int i;
10301 tree prev_type, intermediate_type;
10302 machine_mode intermediate_mode, prev_mode;
10303 optab optab3, optab4;
10305 *multi_step_cvt = 0;
10306 if (loop_info)
10307 vect_loop = LOOP_VINFO_LOOP (loop_info);
10309 switch (code)
10311 case WIDEN_MULT_EXPR:
10312 /* The result of a vectorized widening operation usually requires
10313 two vectors (because the widened results do not fit into one vector).
10314 The generated vector results would normally be expected to be
10315 generated in the same order as in the original scalar computation,
10316 i.e. if 8 results are generated in each vector iteration, they are
10317 to be organized as follows:
10318 vect1: [res1,res2,res3,res4],
10319 vect2: [res5,res6,res7,res8].
10321 However, in the special case that the result of the widening
10322 operation is used in a reduction computation only, the order doesn't
10323 matter (because when vectorizing a reduction we change the order of
10324 the computation). Some targets can take advantage of this and
10325 generate more efficient code. For example, targets like Altivec,
10326 that support widen_mult using a sequence of {mult_even,mult_odd}
10327 generate the following vectors:
10328 vect1: [res1,res3,res5,res7],
10329 vect2: [res2,res4,res6,res8].
10331 When vectorizing outer-loops, we execute the inner-loop sequentially
10332 (each vectorized inner-loop iteration contributes to VF outer-loop
10333 iterations in parallel). We therefore don't allow to change the
10334 order of the computation in the inner-loop during outer-loop
10335 vectorization. */
10336 /* TODO: Another case in which order doesn't *really* matter is when we
10337 widen and then contract again, e.g. (short)((int)x * y >> 8).
10338 Normally, pack_trunc performs an even/odd permute, whereas the
10339 repack from an even/odd expansion would be an interleave, which
10340 would be significantly simpler for e.g. AVX2. */
10341 /* In any case, in order to avoid duplicating the code below, recurse
10342 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10343 are properly set up for the caller. If we fail, we'll continue with
10344 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10345 if (vect_loop
10346 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10347 && !nested_in_vect_loop_p (vect_loop, stmt)
10348 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10349 stmt, vectype_out, vectype_in,
10350 code1, code2, multi_step_cvt,
10351 interm_types))
10353 /* Elements in a vector with vect_used_by_reduction property cannot
10354 be reordered if the use chain with this property does not have the
10355 same operation. One such an example is s += a * b, where elements
10356 in a and b cannot be reordered. Here we check if the vector defined
10357 by STMT is only directly used in the reduction statement. */
10358 tree lhs = gimple_assign_lhs (stmt);
10359 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10360 if (use_stmt_info
10361 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10362 return true;
10364 c1 = VEC_WIDEN_MULT_LO_EXPR;
10365 c2 = VEC_WIDEN_MULT_HI_EXPR;
10366 break;
10368 case DOT_PROD_EXPR:
10369 c1 = DOT_PROD_EXPR;
10370 c2 = DOT_PROD_EXPR;
10371 break;
10373 case SAD_EXPR:
10374 c1 = SAD_EXPR;
10375 c2 = SAD_EXPR;
10376 break;
10378 case VEC_WIDEN_MULT_EVEN_EXPR:
10379 /* Support the recursion induced just above. */
10380 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10381 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10382 break;
10384 case WIDEN_LSHIFT_EXPR:
10385 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10386 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10387 break;
10389 CASE_CONVERT:
10390 c1 = VEC_UNPACK_LO_EXPR;
10391 c2 = VEC_UNPACK_HI_EXPR;
10392 break;
10394 case FLOAT_EXPR:
10395 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10396 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10397 break;
10399 case FIX_TRUNC_EXPR:
10400 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10401 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10402 break;
10404 default:
10405 gcc_unreachable ();
10408 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10409 std::swap (c1, c2);
10411 if (code == FIX_TRUNC_EXPR)
10413 /* The signedness is determined from output operand. */
10414 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10415 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10417 else
10419 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10420 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10423 if (!optab1 || !optab2)
10424 return false;
10426 vec_mode = TYPE_MODE (vectype);
10427 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10428 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10429 return false;
10431 *code1 = c1;
10432 *code2 = c2;
10434 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10435 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10436 /* For scalar masks we may have different boolean
10437 vector types having the same QImode. Thus we
10438 add additional check for elements number. */
10439 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10440 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10441 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10443 /* Check if it's a multi-step conversion that can be done using intermediate
10444 types. */
10446 prev_type = vectype;
10447 prev_mode = vec_mode;
10449 if (!CONVERT_EXPR_CODE_P (code))
10450 return false;
10452 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10453 intermediate steps in promotion sequence. We try
10454 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10455 not. */
10456 interm_types->create (MAX_INTERM_CVT_STEPS);
10457 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10459 intermediate_mode = insn_data[icode1].operand[0].mode;
10460 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10462 intermediate_type = vect_halve_mask_nunits (prev_type);
10463 if (intermediate_mode != TYPE_MODE (intermediate_type))
10464 return false;
10466 else
10467 intermediate_type
10468 = lang_hooks.types.type_for_mode (intermediate_mode,
10469 TYPE_UNSIGNED (prev_type));
10471 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10472 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10474 if (!optab3 || !optab4
10475 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10476 || insn_data[icode1].operand[0].mode != intermediate_mode
10477 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10478 || insn_data[icode2].operand[0].mode != intermediate_mode
10479 || ((icode1 = optab_handler (optab3, intermediate_mode))
10480 == CODE_FOR_nothing)
10481 || ((icode2 = optab_handler (optab4, intermediate_mode))
10482 == CODE_FOR_nothing))
10483 break;
10485 interm_types->quick_push (intermediate_type);
10486 (*multi_step_cvt)++;
10488 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10489 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10490 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10491 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10492 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10494 prev_type = intermediate_type;
10495 prev_mode = intermediate_mode;
10498 interm_types->release ();
10499 return false;
10503 /* Function supportable_narrowing_operation
10505 Check whether an operation represented by the code CODE is a
10506 narrowing operation that is supported by the target platform in
10507 vector form (i.e., when operating on arguments of type VECTYPE_IN
10508 and producing a result of type VECTYPE_OUT).
10510 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10511 and FLOAT. This function checks if these operations are supported by
10512 the target platform directly via vector tree-codes.
10514 Output:
10515 - CODE1 is the code of a vector operation to be used when
10516 vectorizing the operation, if available.
10517 - MULTI_STEP_CVT determines the number of required intermediate steps in
10518 case of multi-step conversion (like int->short->char - in that case
10519 MULTI_STEP_CVT will be 1).
10520 - INTERM_TYPES contains the intermediate type required to perform the
10521 narrowing operation (short in the above example). */
10523 bool
10524 supportable_narrowing_operation (enum tree_code code,
10525 tree vectype_out, tree vectype_in,
10526 enum tree_code *code1, int *multi_step_cvt,
10527 vec<tree> *interm_types)
10529 machine_mode vec_mode;
10530 enum insn_code icode1;
10531 optab optab1, interm_optab;
10532 tree vectype = vectype_in;
10533 tree narrow_vectype = vectype_out;
10534 enum tree_code c1;
10535 tree intermediate_type, prev_type;
10536 machine_mode intermediate_mode, prev_mode;
10537 int i;
10538 bool uns;
10540 *multi_step_cvt = 0;
10541 switch (code)
10543 CASE_CONVERT:
10544 c1 = VEC_PACK_TRUNC_EXPR;
10545 break;
10547 case FIX_TRUNC_EXPR:
10548 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10549 break;
10551 case FLOAT_EXPR:
10552 c1 = VEC_PACK_FLOAT_EXPR;
10553 break;
10555 default:
10556 gcc_unreachable ();
10559 if (code == FIX_TRUNC_EXPR)
10560 /* The signedness is determined from output operand. */
10561 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10562 else
10563 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10565 if (!optab1)
10566 return false;
10568 vec_mode = TYPE_MODE (vectype);
10569 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10570 return false;
10572 *code1 = c1;
10574 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10575 /* For scalar masks we may have different boolean
10576 vector types having the same QImode. Thus we
10577 add additional check for elements number. */
10578 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10579 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10580 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10582 if (code == FLOAT_EXPR)
10583 return false;
10585 /* Check if it's a multi-step conversion that can be done using intermediate
10586 types. */
10587 prev_mode = vec_mode;
10588 prev_type = vectype;
10589 if (code == FIX_TRUNC_EXPR)
10590 uns = TYPE_UNSIGNED (vectype_out);
10591 else
10592 uns = TYPE_UNSIGNED (vectype);
10594 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10595 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10596 costly than signed. */
10597 if (code == FIX_TRUNC_EXPR && uns)
10599 enum insn_code icode2;
10601 intermediate_type
10602 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10603 interm_optab
10604 = optab_for_tree_code (c1, intermediate_type, optab_default);
10605 if (interm_optab != unknown_optab
10606 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10607 && insn_data[icode1].operand[0].mode
10608 == insn_data[icode2].operand[0].mode)
10610 uns = false;
10611 optab1 = interm_optab;
10612 icode1 = icode2;
10616 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10617 intermediate steps in promotion sequence. We try
10618 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10619 interm_types->create (MAX_INTERM_CVT_STEPS);
10620 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10622 intermediate_mode = insn_data[icode1].operand[0].mode;
10623 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10625 intermediate_type = vect_double_mask_nunits (prev_type);
10626 if (intermediate_mode != TYPE_MODE (intermediate_type))
10627 return false;
10629 else
10630 intermediate_type
10631 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10632 interm_optab
10633 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10634 optab_default);
10635 if (!interm_optab
10636 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10637 || insn_data[icode1].operand[0].mode != intermediate_mode
10638 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10639 == CODE_FOR_nothing))
10640 break;
10642 interm_types->quick_push (intermediate_type);
10643 (*multi_step_cvt)++;
10645 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10646 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10647 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10648 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10650 prev_mode = intermediate_mode;
10651 prev_type = intermediate_type;
10652 optab1 = interm_optab;
10655 interm_types->release ();
10656 return false;
10659 /* Generate and return a statement that sets vector mask MASK such that
10660 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10662 gcall *
10663 vect_gen_while (tree mask, tree start_index, tree end_index)
10665 tree cmp_type = TREE_TYPE (start_index);
10666 tree mask_type = TREE_TYPE (mask);
10667 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10668 cmp_type, mask_type,
10669 OPTIMIZE_FOR_SPEED));
10670 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10671 start_index, end_index,
10672 build_zero_cst (mask_type));
10673 gimple_call_set_lhs (call, mask);
10674 return call;
10677 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10678 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10680 tree
10681 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10682 tree end_index)
10684 tree tmp = make_ssa_name (mask_type);
10685 gcall *call = vect_gen_while (tmp, start_index, end_index);
10686 gimple_seq_add_stmt (seq, call);
10687 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10690 /* Try to compute the vector types required to vectorize STMT_INFO,
10691 returning true on success and false if vectorization isn't possible.
10693 On success:
10695 - Set *STMT_VECTYPE_OUT to:
10696 - NULL_TREE if the statement doesn't need to be vectorized;
10697 - boolean_type_node if the statement is a boolean operation whose
10698 vector type can only be determined once all the other vector types
10699 are known; and
10700 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10702 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10703 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10704 statement does not help to determine the overall number of units. */
10706 bool
10707 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10708 tree *stmt_vectype_out,
10709 tree *nunits_vectype_out)
10711 gimple *stmt = stmt_info->stmt;
10713 *stmt_vectype_out = NULL_TREE;
10714 *nunits_vectype_out = NULL_TREE;
10716 if (gimple_get_lhs (stmt) == NULL_TREE
10717 /* MASK_STORE has no lhs, but is ok. */
10718 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10720 if (is_a <gcall *> (stmt))
10722 /* Ignore calls with no lhs. These must be calls to
10723 #pragma omp simd functions, and what vectorization factor
10724 it really needs can't be determined until
10725 vectorizable_simd_clone_call. */
10726 if (dump_enabled_p ())
10727 dump_printf_loc (MSG_NOTE, vect_location,
10728 "defer to SIMD clone analysis.\n");
10729 return true;
10732 if (dump_enabled_p ())
10734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10735 "not vectorized: irregular stmt.");
10736 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10738 return false;
10741 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10743 if (dump_enabled_p ())
10745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10746 "not vectorized: vector stmt in loop:");
10747 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10749 return false;
10752 tree vectype;
10753 tree scalar_type = NULL_TREE;
10754 if (STMT_VINFO_VECTYPE (stmt_info))
10755 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10756 else
10758 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10759 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10760 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10761 else
10762 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10764 /* Pure bool ops don't participate in number-of-units computation.
10765 For comparisons use the types being compared. */
10766 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10767 && is_gimple_assign (stmt)
10768 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10770 *stmt_vectype_out = boolean_type_node;
10772 tree rhs1 = gimple_assign_rhs1 (stmt);
10773 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10774 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10775 scalar_type = TREE_TYPE (rhs1);
10776 else
10778 if (dump_enabled_p ())
10779 dump_printf_loc (MSG_NOTE, vect_location,
10780 "pure bool operation.\n");
10781 return true;
10785 if (dump_enabled_p ())
10787 dump_printf_loc (MSG_NOTE, vect_location,
10788 "get vectype for scalar type: ");
10789 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10790 dump_printf (MSG_NOTE, "\n");
10792 vectype = get_vectype_for_scalar_type (scalar_type);
10793 if (!vectype)
10795 if (dump_enabled_p ())
10797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10798 "not vectorized: unsupported data-type ");
10799 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10800 scalar_type);
10801 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10803 return false;
10806 if (!*stmt_vectype_out)
10807 *stmt_vectype_out = vectype;
10809 if (dump_enabled_p ())
10811 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10812 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10813 dump_printf (MSG_NOTE, "\n");
10817 /* Don't try to compute scalar types if the stmt produces a boolean
10818 vector; use the existing vector type instead. */
10819 tree nunits_vectype;
10820 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10821 nunits_vectype = vectype;
10822 else
10824 /* The number of units is set according to the smallest scalar
10825 type (or the largest vector size, but we only support one
10826 vector size per vectorization). */
10827 if (*stmt_vectype_out != boolean_type_node)
10829 HOST_WIDE_INT dummy;
10830 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
10832 if (dump_enabled_p ())
10834 dump_printf_loc (MSG_NOTE, vect_location,
10835 "get vectype for scalar type: ");
10836 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10837 dump_printf (MSG_NOTE, "\n");
10839 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10841 if (!nunits_vectype)
10843 if (dump_enabled_p ())
10845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10846 "not vectorized: unsupported data-type ");
10847 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10848 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10850 return false;
10853 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10854 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10856 if (dump_enabled_p ())
10858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10859 "not vectorized: different sized vector "
10860 "types in statement, ");
10861 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10862 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10863 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10864 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10866 return false;
10869 if (dump_enabled_p ())
10871 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10872 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10873 dump_printf (MSG_NOTE, "\n");
10875 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10876 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10877 dump_printf (MSG_NOTE, "\n");
10880 *nunits_vectype_out = nunits_vectype;
10881 return true;
10884 /* Try to determine the correct vector type for STMT_INFO, which is a
10885 statement that produces a scalar boolean result. Return the vector
10886 type on success, otherwise return NULL_TREE. */
10888 tree
10889 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10891 gimple *stmt = stmt_info->stmt;
10892 tree mask_type = NULL;
10893 tree vectype, scalar_type;
10895 if (is_gimple_assign (stmt)
10896 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10897 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10899 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10900 mask_type = get_mask_type_for_scalar_type (scalar_type);
10902 if (!mask_type)
10904 if (dump_enabled_p ())
10905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10906 "not vectorized: unsupported mask\n");
10907 return NULL_TREE;
10910 else
10912 tree rhs;
10913 ssa_op_iter iter;
10914 enum vect_def_type dt;
10916 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10918 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10920 if (dump_enabled_p ())
10922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10923 "not vectorized: can't compute mask type "
10924 "for statement, ");
10925 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10928 return NULL_TREE;
10931 /* No vectype probably means external definition.
10932 Allow it in case there is another operand which
10933 allows to determine mask type. */
10934 if (!vectype)
10935 continue;
10937 if (!mask_type)
10938 mask_type = vectype;
10939 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10940 TYPE_VECTOR_SUBPARTS (vectype)))
10942 if (dump_enabled_p ())
10944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10945 "not vectorized: different sized masks "
10946 "types in statement, ");
10947 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10948 mask_type);
10949 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10950 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10951 vectype);
10952 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10954 return NULL_TREE;
10956 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10957 != VECTOR_BOOLEAN_TYPE_P (vectype))
10959 if (dump_enabled_p ())
10961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10962 "not vectorized: mixed mask and "
10963 "nonmask vector types in statement, ");
10964 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10965 mask_type);
10966 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10967 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10968 vectype);
10969 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10971 return NULL_TREE;
10975 /* We may compare boolean value loaded as vector of integers.
10976 Fix mask_type in such case. */
10977 if (mask_type
10978 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10979 && gimple_code (stmt) == GIMPLE_ASSIGN
10980 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10981 mask_type = build_same_sized_truth_vector_type (mask_type);
10984 /* No mask_type should mean loop invariant predicate.
10985 This is probably a subject for optimization in if-conversion. */
10986 if (!mask_type && dump_enabled_p ())
10988 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10989 "not vectorized: can't compute mask type "
10990 "for statement, ");
10991 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10993 return mask_type;