[37/46] dr_aux tweaks
[official-gcc.git] / gcc / tree-vect-stmts.c
blob76ae8519d3c21ddc67a97d281566907e054ae2ae
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
101 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
102 body_cost_vec->safe_push (si);
104 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
111 static tree
112 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
114 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
115 "vect_array");
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
123 static tree
124 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
125 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
127 tree vect_type, vect, vect_name, array_ref;
128 gimple *new_stmt;
130 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
131 vect_type = TREE_TYPE (TREE_TYPE (array));
132 vect = vect_create_destination_var (scalar_dest, vect_type);
133 array_ref = build4 (ARRAY_REF, vect_type, array,
134 build_int_cst (size_type_node, n),
135 NULL_TREE, NULL_TREE);
137 new_stmt = gimple_build_assign (vect, array_ref);
138 vect_name = make_ssa_name (vect, new_stmt);
139 gimple_assign_set_lhs (new_stmt, vect_name);
140 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
142 return vect_name;
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
149 static void
150 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
151 tree vect, tree array, unsigned HOST_WIDE_INT n)
153 tree array_ref;
154 gimple *new_stmt;
156 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
157 build_int_cst (size_type_node, n),
158 NULL_TREE, NULL_TREE);
160 new_stmt = gimple_build_assign (array_ref, vect);
161 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
166 (and its group). */
168 static tree
169 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
171 tree mem_ref;
173 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
176 return mem_ref;
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
182 static void
183 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
184 tree var)
186 tree clobber = build_clobber (TREE_TYPE (var));
187 gimple *new_stmt = gimple_build_assign (var, clobber);
188 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
199 enum vect_relevant relevant, bool live_p)
201 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
202 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE, vect_location,
207 "mark relevant %d, live %d: ", relevant, live_p);
208 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 /* This is the last stmt in a sequence that was detected as a
218 pattern that can potentially be vectorized. Don't mark the stmt
219 as relevant/live because it's not going to be vectorized.
220 Instead mark the pattern-stmt that replaces it. */
222 if (dump_enabled_p ())
223 dump_printf_loc (MSG_NOTE, vect_location,
224 "last stmt in pattern. don't mark"
225 " relevant/live.\n");
226 stmt_vec_info old_stmt_info = stmt_info;
227 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
228 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
229 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
230 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
233 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
234 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
235 STMT_VINFO_RELEVANT (stmt_info) = relevant;
237 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
238 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
240 if (dump_enabled_p ())
241 dump_printf_loc (MSG_NOTE, vect_location,
242 "already marked relevant/live.\n");
243 return;
246 worklist->safe_push (stmt_info);
250 /* Function is_simple_and_all_uses_invariant
252 Return true if STMT_INFO is simple and all uses of it are invariant. */
254 bool
255 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
256 loop_vec_info loop_vinfo)
258 tree op;
259 ssa_op_iter iter;
261 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
262 if (!stmt)
263 return false;
265 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
267 enum vect_def_type dt = vect_uninitialized_def;
269 if (!vect_is_simple_use (op, loop_vinfo, &dt))
271 if (dump_enabled_p ())
272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
273 "use not simple.\n");
274 return false;
277 if (dt != vect_external_def && dt != vect_constant_def)
278 return false;
280 return true;
283 /* Function vect_stmt_relevant_p.
285 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
286 is "relevant for vectorization".
288 A stmt is considered "relevant for vectorization" if:
289 - it has uses outside the loop.
290 - it has vdefs (it alters memory).
291 - control stmts in the loop (except for the exit condition).
293 CHECKME: what other side effects would the vectorizer allow? */
295 static bool
296 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
297 enum vect_relevant *relevant, bool *live_p)
299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
300 ssa_op_iter op_iter;
301 imm_use_iterator imm_iter;
302 use_operand_p use_p;
303 def_operand_p def_p;
305 *relevant = vect_unused_in_scope;
306 *live_p = false;
308 /* cond stmt other than loop exit cond. */
309 if (is_ctrl_stmt (stmt_info->stmt)
310 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
311 *relevant = vect_used_in_scope;
313 /* changing memory. */
314 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
315 if (gimple_vdef (stmt_info->stmt)
316 && !gimple_clobber_p (stmt_info->stmt))
318 if (dump_enabled_p ())
319 dump_printf_loc (MSG_NOTE, vect_location,
320 "vec_stmt_relevant_p: stmt has vdefs.\n");
321 *relevant = vect_used_in_scope;
324 /* uses outside the loop. */
325 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
327 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
329 basic_block bb = gimple_bb (USE_STMT (use_p));
330 if (!flow_bb_inside_loop_p (loop, bb))
332 if (dump_enabled_p ())
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "vec_stmt_relevant_p: used out of loop.\n");
336 if (is_gimple_debug (USE_STMT (use_p)))
337 continue;
339 /* We expect all such uses to be in the loop exit phis
340 (because of loop closed form) */
341 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
342 gcc_assert (bb == single_exit (loop)->dest);
344 *live_p = true;
349 if (*live_p && *relevant == vect_unused_in_scope
350 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "vec_stmt_relevant_p: stmt live but not relevant.\n");
355 *relevant = vect_used_only_live;
358 return (*live_p || *relevant);
362 /* Function exist_non_indexing_operands_for_use_p
364 USE is one of the uses attached to STMT_INFO. Check if USE is
365 used in STMT_INFO for anything other than indexing an array. */
367 static bool
368 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
370 tree operand;
372 /* USE corresponds to some operand in STMT. If there is no data
373 reference in STMT, then any operand that corresponds to USE
374 is not indexing an array. */
375 if (!STMT_VINFO_DATA_REF (stmt_info))
376 return true;
378 /* STMT has a data_ref. FORNOW this means that its of one of
379 the following forms:
380 -1- ARRAY_REF = var
381 -2- var = ARRAY_REF
382 (This should have been verified in analyze_data_refs).
384 'var' in the second case corresponds to a def, not a use,
385 so USE cannot correspond to any operands that are not used
386 for array indexing.
388 Therefore, all we need to check is if STMT falls into the
389 first case, and whether var corresponds to USE. */
391 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
392 if (!assign || !gimple_assign_copy_p (assign))
394 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
395 if (call && gimple_call_internal_p (call))
397 internal_fn ifn = gimple_call_internal_fn (call);
398 int mask_index = internal_fn_mask_index (ifn);
399 if (mask_index >= 0
400 && use == gimple_call_arg (call, mask_index))
401 return true;
402 int stored_value_index = internal_fn_stored_value_index (ifn);
403 if (stored_value_index >= 0
404 && use == gimple_call_arg (call, stored_value_index))
405 return true;
406 if (internal_gather_scatter_fn_p (ifn)
407 && use == gimple_call_arg (call, 1))
408 return true;
410 return false;
413 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
414 return false;
415 operand = gimple_assign_rhs1 (assign);
416 if (TREE_CODE (operand) != SSA_NAME)
417 return false;
419 if (operand == use)
420 return true;
422 return false;
427 Function process_use.
429 Inputs:
430 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
431 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
432 that defined USE. This is done by calling mark_relevant and passing it
433 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
434 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
435 be performed.
437 Outputs:
438 Generally, LIVE_P and RELEVANT are used to define the liveness and
439 relevance info of the DEF_STMT of this USE:
440 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
441 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
442 Exceptions:
443 - case 1: If USE is used only for address computations (e.g. array indexing),
444 which does not need to be directly vectorized, then the liveness/relevance
445 of the respective DEF_STMT is left unchanged.
446 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
447 we skip DEF_STMT cause it had already been processed.
448 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
449 "relevant" will be modified accordingly.
451 Return true if everything is as expected. Return false otherwise. */
453 static bool
454 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
455 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
456 bool force)
458 stmt_vec_info dstmt_vinfo;
459 basic_block bb, def_bb;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!dstmt_vinfo)
476 return true;
478 def_bb = gimple_bb (dstmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 DSTMT_VINFO must have already been processed, because this should be the
482 only way that STMT, which is a reduction-phi, was put in the worklist,
483 as there should be no other uses for DSTMT_VINFO in the loop. So we just
484 check that everything is as expected, and we are done. */
485 bb = gimple_bb (stmt_vinfo->stmt);
486 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
488 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
489 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
490 && bb->loop_father == def_bb->loop_father)
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE, vect_location,
494 "reduc-stmt defining reduc-phi in the same nest.\n");
495 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
496 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
497 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
498 return true;
501 /* case 3a: outer-loop stmt defining an inner-loop stmt:
502 outer-loop-header-bb:
503 d = dstmt_vinfo
504 inner-loop:
505 stmt # use (d)
506 outer-loop-tail-bb:
507 ... */
508 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
510 if (dump_enabled_p ())
511 dump_printf_loc (MSG_NOTE, vect_location,
512 "outer-loop def-stmt defining inner-loop stmt.\n");
514 switch (relevant)
516 case vect_unused_in_scope:
517 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
518 vect_used_in_scope : vect_unused_in_scope;
519 break;
521 case vect_used_in_outer_by_reduction:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_by_reduction;
524 break;
526 case vect_used_in_outer:
527 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
528 relevant = vect_used_in_scope;
529 break;
531 case vect_used_in_scope:
532 break;
534 default:
535 gcc_unreachable ();
539 /* case 3b: inner-loop stmt defining an outer-loop stmt:
540 outer-loop-header-bb:
542 inner-loop:
543 d = dstmt_vinfo
544 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
545 stmt # use (d) */
546 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
548 if (dump_enabled_p ())
549 dump_printf_loc (MSG_NOTE, vect_location,
550 "inner-loop def-stmt defining outer-loop stmt.\n");
552 switch (relevant)
554 case vect_unused_in_scope:
555 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
556 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
557 vect_used_in_outer_by_reduction : vect_unused_in_scope;
558 break;
560 case vect_used_by_reduction:
561 case vect_used_only_live:
562 relevant = vect_used_in_outer_by_reduction;
563 break;
565 case vect_used_in_scope:
566 relevant = vect_used_in_outer;
567 break;
569 default:
570 gcc_unreachable ();
573 /* We are also not interested in uses on loop PHI backedges that are
574 inductions. Otherwise we'll needlessly vectorize the IV increment
575 and cause hybrid SLP for SLP inductions. Unless the PHI is live
576 of course. */
577 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
578 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
579 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
580 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
581 loop_latch_edge (bb->loop_father))
582 == use))
584 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE, vect_location,
586 "induction value on backedge.\n");
587 return true;
591 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
592 return true;
596 /* Function vect_mark_stmts_to_be_vectorized.
598 Not all stmts in the loop need to be vectorized. For example:
600 for i...
601 for j...
602 1. T0 = i + j
603 2. T1 = a[T0]
605 3. j = j + 1
607 Stmt 1 and 3 do not need to be vectorized, because loop control and
608 addressing of vectorized data-refs are handled differently.
610 This pass detects such stmts. */
612 bool
613 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
615 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
616 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
617 unsigned int nbbs = loop->num_nodes;
618 gimple_stmt_iterator si;
619 unsigned int i;
620 basic_block bb;
621 bool live_p;
622 enum vect_relevant relevant;
624 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
626 auto_vec<stmt_vec_info, 64> worklist;
628 /* 1. Init worklist. */
629 for (i = 0; i < nbbs; i++)
631 bb = bbs[i];
632 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
634 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
635 if (dump_enabled_p ())
637 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
638 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi_info->stmt, 0);
641 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
642 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
644 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
646 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
647 if (dump_enabled_p ())
649 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
650 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
653 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
654 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
658 /* 2. Process_worklist */
659 while (worklist.length () > 0)
661 use_operand_p use_p;
662 ssa_op_iter iter;
664 stmt_vec_info stmt_vinfo = worklist.pop ();
665 if (dump_enabled_p ())
667 dump_printf_loc (MSG_NOTE, vect_location,
668 "worklist: examine stmt: ");
669 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_vinfo->stmt, 0);
672 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
673 (DEF_STMT) as relevant/irrelevant according to the relevance property
674 of STMT. */
675 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
677 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
678 propagated as is to the DEF_STMTs of its USEs.
680 One exception is when STMT has been identified as defining a reduction
681 variable; in this case we set the relevance to vect_used_by_reduction.
682 This is because we distinguish between two kinds of relevant stmts -
683 those that are used by a reduction computation, and those that are
684 (also) used by a regular computation. This allows us later on to
685 identify stmts that are used solely by a reduction, and therefore the
686 order of the results that they produce does not have to be kept. */
688 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
690 case vect_reduction_def:
691 gcc_assert (relevant != vect_unused_in_scope);
692 if (relevant != vect_unused_in_scope
693 && relevant != vect_used_in_scope
694 && relevant != vect_used_by_reduction
695 && relevant != vect_used_only_live)
697 if (dump_enabled_p ())
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
699 "unsupported use of reduction.\n");
700 return false;
702 break;
704 case vect_nested_cycle:
705 if (relevant != vect_unused_in_scope
706 && relevant != vect_used_in_outer_by_reduction
707 && relevant != vect_used_in_outer)
709 if (dump_enabled_p ())
710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711 "unsupported use of nested cycle.\n");
713 return false;
715 break;
717 case vect_double_reduction_def:
718 if (relevant != vect_unused_in_scope
719 && relevant != vect_used_by_reduction
720 && relevant != vect_used_only_live)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of double reduction.\n");
726 return false;
728 break;
730 default:
731 break;
734 if (is_pattern_stmt_p (stmt_vinfo))
736 /* Pattern statements are not inserted into the code, so
737 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
738 have to scan the RHS or function arguments instead. */
739 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
741 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
742 tree op = gimple_assign_rhs1 (assign);
744 i = 1;
745 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
747 if (!process_use (stmt_vinfo, TREE_OPERAND (op, 0),
748 loop_vinfo, relevant, &worklist, false)
749 || !process_use (stmt_vinfo, TREE_OPERAND (op, 1),
750 loop_vinfo, relevant, &worklist, false))
751 return false;
752 i = 2;
754 for (; i < gimple_num_ops (assign); i++)
756 op = gimple_op (assign, i);
757 if (TREE_CODE (op) == SSA_NAME
758 && !process_use (stmt_vinfo, op, loop_vinfo, relevant,
759 &worklist, false))
760 return false;
763 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
765 for (i = 0; i < gimple_call_num_args (call); i++)
767 tree arg = gimple_call_arg (call, i);
768 if (!process_use (stmt_vinfo, arg, loop_vinfo, relevant,
769 &worklist, false))
770 return false;
774 else
775 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
777 tree op = USE_FROM_PTR (use_p);
778 if (!process_use (stmt_vinfo, op, loop_vinfo, relevant,
779 &worklist, false))
780 return false;
783 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
785 gather_scatter_info gs_info;
786 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
787 gcc_unreachable ();
788 if (!process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
789 &worklist, true))
790 return false;
792 } /* while worklist */
794 return true;
797 /* Compute the prologue cost for invariant or constant operands. */
799 static unsigned
800 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
801 unsigned opno, enum vect_def_type dt,
802 stmt_vector_for_cost *cost_vec)
804 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
805 tree op = gimple_op (stmt, opno);
806 unsigned prologue_cost = 0;
808 /* Without looking at the actual initializer a vector of
809 constants can be implemented as load from the constant pool.
810 When all elements are the same we can use a splat. */
811 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
812 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
813 unsigned num_vects_to_check;
814 unsigned HOST_WIDE_INT const_nunits;
815 unsigned nelt_limit;
816 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
817 && ! multiple_p (const_nunits, group_size))
819 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
820 nelt_limit = const_nunits;
822 else
824 /* If either the vector has variable length or the vectors
825 are composed of repeated whole groups we only need to
826 cost construction once. All vectors will be the same. */
827 num_vects_to_check = 1;
828 nelt_limit = group_size;
830 tree elt = NULL_TREE;
831 unsigned nelt = 0;
832 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
834 unsigned si = j % group_size;
835 if (nelt == 0)
836 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
837 /* ??? We're just tracking whether all operands of a single
838 vector initializer are the same, ideally we'd check if
839 we emitted the same one already. */
840 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
841 opno))
842 elt = NULL_TREE;
843 nelt++;
844 if (nelt == nelt_limit)
846 /* ??? We need to pass down stmt_info for a vector type
847 even if it points to the wrong stmt. */
848 prologue_cost += record_stmt_cost
849 (cost_vec, 1,
850 dt == vect_external_def
851 ? (elt ? scalar_to_vec : vec_construct)
852 : vector_load,
853 stmt_info, 0, vect_prologue);
854 nelt = 0;
858 return prologue_cost;
861 /* Function vect_model_simple_cost.
863 Models cost for simple operations, i.e. those that only emit ncopies of a
864 single op. Right now, this does not account for multiple insns that could
865 be generated for the single vector op. We will handle that shortly. */
867 static void
868 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
869 enum vect_def_type *dt,
870 int ndts,
871 slp_tree node,
872 stmt_vector_for_cost *cost_vec)
874 int inside_cost = 0, prologue_cost = 0;
876 gcc_assert (cost_vec != NULL);
878 /* ??? Somehow we need to fix this at the callers. */
879 if (node)
880 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
882 if (node)
884 /* Scan operands and account for prologue cost of constants/externals.
885 ??? This over-estimates cost for multiple uses and should be
886 re-engineered. */
887 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
888 tree lhs = gimple_get_lhs (stmt);
889 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
891 tree op = gimple_op (stmt, i);
892 enum vect_def_type dt;
893 if (!op || op == lhs)
894 continue;
895 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
896 && (dt == vect_constant_def || dt == vect_external_def))
897 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
898 i, dt, cost_vec);
901 else
902 /* Cost the "broadcast" of a scalar operand in to a vector operand.
903 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
904 cost model. */
905 for (int i = 0; i < ndts; i++)
906 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
907 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
908 stmt_info, 0, vect_prologue);
910 /* Adjust for two-operator SLP nodes. */
911 if (node && SLP_TREE_TWO_OPERATORS (node))
913 ncopies *= 2;
914 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
915 stmt_info, 0, vect_body);
918 /* Pass the inside-of-loop statements to the target-specific cost model. */
919 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
920 stmt_info, 0, vect_body);
922 if (dump_enabled_p ())
923 dump_printf_loc (MSG_NOTE, vect_location,
924 "vect_model_simple_cost: inside_cost = %d, "
925 "prologue_cost = %d .\n", inside_cost, prologue_cost);
929 /* Model cost for type demotion and promotion operations. PWR is normally
930 zero for single-step promotions and demotions. It will be one if
931 two-step promotion/demotion is required, and so on. Each additional
932 step doubles the number of instructions required. */
934 static void
935 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
936 enum vect_def_type *dt, int pwr,
937 stmt_vector_for_cost *cost_vec)
939 int i, tmp;
940 int inside_cost = 0, prologue_cost = 0;
942 for (i = 0; i < pwr + 1; i++)
944 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
945 (i + 1) : i;
946 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
947 vec_promote_demote, stmt_info, 0,
948 vect_body);
951 /* FORNOW: Assuming maximum 2 args per stmts. */
952 for (i = 0; i < 2; i++)
953 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
954 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
955 stmt_info, 0, vect_prologue);
957 if (dump_enabled_p ())
958 dump_printf_loc (MSG_NOTE, vect_location,
959 "vect_model_promotion_demotion_cost: inside_cost = %d, "
960 "prologue_cost = %d .\n", inside_cost, prologue_cost);
963 /* Function vect_model_store_cost
965 Models cost for stores. In the case of grouped accesses, one access
966 has the overhead of the grouped access attributed to it. */
968 static void
969 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
970 enum vect_def_type dt,
971 vect_memory_access_type memory_access_type,
972 vec_load_store_type vls_type, slp_tree slp_node,
973 stmt_vector_for_cost *cost_vec)
975 unsigned int inside_cost = 0, prologue_cost = 0;
976 stmt_vec_info first_stmt_info = stmt_info;
977 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
979 /* ??? Somehow we need to fix this at the callers. */
980 if (slp_node)
981 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
983 if (vls_type == VLS_STORE_INVARIANT)
985 if (slp_node)
986 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
987 1, dt, cost_vec);
988 else
989 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
990 stmt_info, 0, vect_prologue);
993 /* Grouped stores update all elements in the group at once,
994 so we want the DR for the first statement. */
995 if (!slp_node && grouped_access_p)
996 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
998 /* True if we should include any once-per-group costs as well as
999 the cost of the statement itself. For SLP we only get called
1000 once per group anyhow. */
1001 bool first_stmt_p = (first_stmt_info == stmt_info);
1003 /* We assume that the cost of a single store-lanes instruction is
1004 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1005 access is instead being provided by a permute-and-store operation,
1006 include the cost of the permutes. */
1007 if (first_stmt_p
1008 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1010 /* Uses a high and low interleave or shuffle operations for each
1011 needed permute. */
1012 int group_size = DR_GROUP_SIZE (first_stmt_info);
1013 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1014 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1015 stmt_info, 0, vect_body);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE, vect_location,
1019 "vect_model_store_cost: strided group_size = %d .\n",
1020 group_size);
1023 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1024 /* Costs of the stores. */
1025 if (memory_access_type == VMAT_ELEMENTWISE
1026 || memory_access_type == VMAT_GATHER_SCATTER)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1030 inside_cost += record_stmt_cost (cost_vec,
1031 ncopies * assumed_nunits,
1032 scalar_store, stmt_info, 0, vect_body);
1034 else
1035 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1037 if (memory_access_type == VMAT_ELEMENTWISE
1038 || memory_access_type == VMAT_STRIDED_SLP)
1040 /* N scalar stores plus extracting the elements. */
1041 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1042 inside_cost += record_stmt_cost (cost_vec,
1043 ncopies * assumed_nunits,
1044 vec_to_scalar, stmt_info, 0, vect_body);
1047 if (dump_enabled_p ())
1048 dump_printf_loc (MSG_NOTE, vect_location,
1049 "vect_model_store_cost: inside_cost = %d, "
1050 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1054 /* Calculate cost of DR's memory access. */
1055 void
1056 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1057 unsigned int *inside_cost,
1058 stmt_vector_for_cost *body_cost_vec)
1060 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1061 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1063 switch (alignment_support_scheme)
1065 case dr_aligned:
1067 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1068 vector_store, stmt_info, 0,
1069 vect_body);
1071 if (dump_enabled_p ())
1072 dump_printf_loc (MSG_NOTE, vect_location,
1073 "vect_model_store_cost: aligned.\n");
1074 break;
1077 case dr_unaligned_supported:
1079 /* Here, we assign an additional cost for the unaligned store. */
1080 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1081 unaligned_store, stmt_info,
1082 DR_MISALIGNMENT (dr), vect_body);
1083 if (dump_enabled_p ())
1084 dump_printf_loc (MSG_NOTE, vect_location,
1085 "vect_model_store_cost: unaligned supported by "
1086 "hardware.\n");
1087 break;
1090 case dr_unaligned_unsupported:
1092 *inside_cost = VECT_MAX_COST;
1094 if (dump_enabled_p ())
1095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1096 "vect_model_store_cost: unsupported access.\n");
1097 break;
1100 default:
1101 gcc_unreachable ();
1106 /* Function vect_model_load_cost
1108 Models cost for loads. In the case of grouped accesses, one access has
1109 the overhead of the grouped access attributed to it. Since unaligned
1110 accesses are supported for loads, we also account for the costs of the
1111 access scheme chosen. */
1113 static void
1114 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1115 vect_memory_access_type memory_access_type,
1116 slp_instance instance,
1117 slp_tree slp_node,
1118 stmt_vector_for_cost *cost_vec)
1120 unsigned int inside_cost = 0, prologue_cost = 0;
1121 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1123 gcc_assert (cost_vec);
1125 /* ??? Somehow we need to fix this at the callers. */
1126 if (slp_node)
1127 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1129 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1131 /* If the load is permuted then the alignment is determined by
1132 the first group element not by the first scalar stmt DR. */
1133 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1134 /* Record the cost for the permutation. */
1135 unsigned n_perms;
1136 unsigned assumed_nunits
1137 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1138 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1139 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1140 slp_vf, instance, true,
1141 &n_perms);
1142 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1143 first_stmt_info, 0, vect_body);
1144 /* And adjust the number of loads performed. This handles
1145 redundancies as well as loads that are later dead. */
1146 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1147 bitmap_clear (perm);
1148 for (unsigned i = 0;
1149 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1150 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1151 ncopies = 0;
1152 bool load_seen = false;
1153 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1155 if (i % assumed_nunits == 0)
1157 if (load_seen)
1158 ncopies++;
1159 load_seen = false;
1161 if (bitmap_bit_p (perm, i))
1162 load_seen = true;
1164 if (load_seen)
1165 ncopies++;
1166 gcc_assert (ncopies
1167 <= (DR_GROUP_SIZE (first_stmt_info)
1168 - DR_GROUP_GAP (first_stmt_info)
1169 + assumed_nunits - 1) / assumed_nunits);
1172 /* Grouped loads read all elements in the group at once,
1173 so we want the DR for the first statement. */
1174 stmt_vec_info first_stmt_info = stmt_info;
1175 if (!slp_node && grouped_access_p)
1176 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1178 /* True if we should include any once-per-group costs as well as
1179 the cost of the statement itself. For SLP we only get called
1180 once per group anyhow. */
1181 bool first_stmt_p = (first_stmt_info == stmt_info);
1183 /* We assume that the cost of a single load-lanes instruction is
1184 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1185 access is instead being provided by a load-and-permute operation,
1186 include the cost of the permutes. */
1187 if (first_stmt_p
1188 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1190 /* Uses an even and odd extract operations or shuffle operations
1191 for each needed permute. */
1192 int group_size = DR_GROUP_SIZE (first_stmt_info);
1193 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1194 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1195 stmt_info, 0, vect_body);
1197 if (dump_enabled_p ())
1198 dump_printf_loc (MSG_NOTE, vect_location,
1199 "vect_model_load_cost: strided group_size = %d .\n",
1200 group_size);
1203 /* The loads themselves. */
1204 if (memory_access_type == VMAT_ELEMENTWISE
1205 || memory_access_type == VMAT_GATHER_SCATTER)
1207 /* N scalar loads plus gathering them into a vector. */
1208 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1209 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1210 inside_cost += record_stmt_cost (cost_vec,
1211 ncopies * assumed_nunits,
1212 scalar_load, stmt_info, 0, vect_body);
1214 else
1215 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1216 &inside_cost, &prologue_cost,
1217 cost_vec, cost_vec, true);
1218 if (memory_access_type == VMAT_ELEMENTWISE
1219 || memory_access_type == VMAT_STRIDED_SLP)
1220 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1221 stmt_info, 0, vect_body);
1223 if (dump_enabled_p ())
1224 dump_printf_loc (MSG_NOTE, vect_location,
1225 "vect_model_load_cost: inside_cost = %d, "
1226 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1230 /* Calculate cost of DR's memory access. */
1231 void
1232 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1233 bool add_realign_cost, unsigned int *inside_cost,
1234 unsigned int *prologue_cost,
1235 stmt_vector_for_cost *prologue_cost_vec,
1236 stmt_vector_for_cost *body_cost_vec,
1237 bool record_prologue_costs)
1239 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1240 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1242 switch (alignment_support_scheme)
1244 case dr_aligned:
1246 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: aligned.\n");
1253 break;
1255 case dr_unaligned_supported:
1257 /* Here, we assign an additional cost for the unaligned load. */
1258 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1259 unaligned_load, stmt_info,
1260 DR_MISALIGNMENT (dr), vect_body);
1262 if (dump_enabled_p ())
1263 dump_printf_loc (MSG_NOTE, vect_location,
1264 "vect_model_load_cost: unaligned supported by "
1265 "hardware.\n");
1267 break;
1269 case dr_explicit_realign:
1271 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1272 vector_load, stmt_info, 0, vect_body);
1273 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1274 vec_perm, stmt_info, 0, vect_body);
1276 /* FIXME: If the misalignment remains fixed across the iterations of
1277 the containing loop, the following cost should be added to the
1278 prologue costs. */
1279 if (targetm.vectorize.builtin_mask_for_load)
1280 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1281 stmt_info, 0, vect_body);
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: explicit realign\n");
1287 break;
1289 case dr_explicit_realign_optimized:
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_model_load_cost: unaligned software "
1294 "pipelined.\n");
1296 /* Unaligned software pipeline has a load of an address, an initial
1297 load, and possibly a mask operation to "prime" the loop. However,
1298 if this is an access in a group of loads, which provide grouped
1299 access, then the above cost should only be considered for one
1300 access in the group. Inside the loop, there is a load op
1301 and a realignment op. */
1303 if (add_realign_cost && record_prologue_costs)
1305 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1306 vector_stmt, stmt_info,
1307 0, vect_prologue);
1308 if (targetm.vectorize.builtin_mask_for_load)
1309 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1310 vector_stmt, stmt_info,
1311 0, vect_prologue);
1314 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1315 stmt_info, 0, vect_body);
1316 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1317 stmt_info, 0, vect_body);
1319 if (dump_enabled_p ())
1320 dump_printf_loc (MSG_NOTE, vect_location,
1321 "vect_model_load_cost: explicit realign optimized"
1322 "\n");
1324 break;
1327 case dr_unaligned_unsupported:
1329 *inside_cost = VECT_MAX_COST;
1331 if (dump_enabled_p ())
1332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1333 "vect_model_load_cost: unsupported access.\n");
1334 break;
1337 default:
1338 gcc_unreachable ();
1342 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1343 the loop preheader for the vectorized stmt STMT_VINFO. */
1345 static void
1346 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1347 gimple_stmt_iterator *gsi)
1349 if (gsi)
1350 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1351 else
1353 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1355 if (loop_vinfo)
1357 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1358 basic_block new_bb;
1359 edge pe;
1361 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1362 loop = loop->inner;
1364 pe = loop_preheader_edge (loop);
1365 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1366 gcc_assert (!new_bb);
1368 else
1370 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1371 basic_block bb;
1372 gimple_stmt_iterator gsi_bb_start;
1374 gcc_assert (bb_vinfo);
1375 bb = BB_VINFO_BB (bb_vinfo);
1376 gsi_bb_start = gsi_after_labels (bb);
1377 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1381 if (dump_enabled_p ())
1383 dump_printf_loc (MSG_NOTE, vect_location,
1384 "created new init_stmt: ");
1385 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1389 /* Function vect_init_vector.
1391 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1392 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1393 vector type a vector with all elements equal to VAL is created first.
1394 Place the initialization at BSI if it is not NULL. Otherwise, place the
1395 initialization at the loop preheader.
1396 Return the DEF of INIT_STMT.
1397 It will be used in the vectorization of STMT_INFO. */
1399 tree
1400 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1401 gimple_stmt_iterator *gsi)
1403 gimple *init_stmt;
1404 tree new_temp;
1406 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1407 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1409 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1410 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1412 /* Scalar boolean value should be transformed into
1413 all zeros or all ones value before building a vector. */
1414 if (VECTOR_BOOLEAN_TYPE_P (type))
1416 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1417 tree false_val = build_zero_cst (TREE_TYPE (type));
1419 if (CONSTANT_CLASS_P (val))
1420 val = integer_zerop (val) ? false_val : true_val;
1421 else
1423 new_temp = make_ssa_name (TREE_TYPE (type));
1424 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1425 val, true_val, false_val);
1426 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1427 val = new_temp;
1430 else if (CONSTANT_CLASS_P (val))
1431 val = fold_convert (TREE_TYPE (type), val);
1432 else
1434 new_temp = make_ssa_name (TREE_TYPE (type));
1435 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1436 init_stmt = gimple_build_assign (new_temp,
1437 fold_build1 (VIEW_CONVERT_EXPR,
1438 TREE_TYPE (type),
1439 val));
1440 else
1441 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1442 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1443 val = new_temp;
1446 val = build_vector_from_val (type, val);
1449 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1450 init_stmt = gimple_build_assign (new_temp, val);
1451 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1452 return new_temp;
1455 /* Function vect_get_vec_def_for_operand_1.
1457 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1458 with type DT that will be used in the vectorized stmt. */
1460 tree
1461 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1462 enum vect_def_type dt)
1464 tree vec_oprnd;
1465 stmt_vec_info vec_stmt_info;
1467 switch (dt)
1469 /* operand is a constant or a loop invariant. */
1470 case vect_constant_def:
1471 case vect_external_def:
1472 /* Code should use vect_get_vec_def_for_operand. */
1473 gcc_unreachable ();
1475 /* operand is defined inside the loop. */
1476 case vect_internal_def:
1478 /* Get the def from the vectorized stmt. */
1479 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1480 /* Get vectorized pattern statement. */
1481 if (!vec_stmt_info
1482 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1483 && !STMT_VINFO_RELEVANT (def_stmt_info))
1484 vec_stmt_info = (STMT_VINFO_VEC_STMT
1485 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1486 gcc_assert (vec_stmt_info);
1487 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1488 vec_oprnd = PHI_RESULT (phi);
1489 else
1490 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1491 return vec_oprnd;
1494 /* operand is defined by a loop header phi. */
1495 case vect_reduction_def:
1496 case vect_double_reduction_def:
1497 case vect_nested_cycle:
1498 case vect_induction_def:
1500 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI);
1502 /* Get the def from the vectorized stmt. */
1503 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1504 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1505 vec_oprnd = PHI_RESULT (phi);
1506 else
1507 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1508 return vec_oprnd;
1511 default:
1512 gcc_unreachable ();
1517 /* Function vect_get_vec_def_for_operand.
1519 OP is an operand in STMT_VINFO. This function returns a (vector) def
1520 that will be used in the vectorized stmt for STMT_VINFO.
1522 In the case that OP is an SSA_NAME which is defined in the loop, then
1523 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1525 In case OP is an invariant or constant, a new stmt that creates a vector def
1526 needs to be introduced. VECTYPE may be used to specify a required type for
1527 vector invariant. */
1529 tree
1530 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1532 gimple *def_stmt;
1533 enum vect_def_type dt;
1534 bool is_simple_use;
1535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1537 if (dump_enabled_p ())
1539 dump_printf_loc (MSG_NOTE, vect_location,
1540 "vect_get_vec_def_for_operand: ");
1541 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1542 dump_printf (MSG_NOTE, "\n");
1545 stmt_vec_info def_stmt_info;
1546 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1547 &def_stmt_info, &def_stmt);
1548 gcc_assert (is_simple_use);
1549 if (def_stmt && dump_enabled_p ())
1551 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1552 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1555 if (dt == vect_constant_def || dt == vect_external_def)
1557 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1558 tree vector_type;
1560 if (vectype)
1561 vector_type = vectype;
1562 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1563 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1564 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1565 else
1566 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1568 gcc_assert (vector_type);
1569 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1571 else
1572 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1576 /* Function vect_get_vec_def_for_stmt_copy
1578 Return a vector-def for an operand. This function is used when the
1579 vectorized stmt to be created (by the caller to this function) is a "copy"
1580 created in case the vectorized result cannot fit in one vector, and several
1581 copies of the vector-stmt are required. In this case the vector-def is
1582 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1583 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1585 Context:
1586 In case the vectorization factor (VF) is bigger than the number
1587 of elements that can fit in a vectype (nunits), we have to generate
1588 more than one vector stmt to vectorize the scalar stmt. This situation
1589 arises when there are multiple data-types operated upon in the loop; the
1590 smallest data-type determines the VF, and as a result, when vectorizing
1591 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1592 vector stmt (each computing a vector of 'nunits' results, and together
1593 computing 'VF' results in each iteration). This function is called when
1594 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1595 which VF=16 and nunits=4, so the number of copies required is 4):
1597 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1599 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1600 VS1.1: vx.1 = memref1 VS1.2
1601 VS1.2: vx.2 = memref2 VS1.3
1602 VS1.3: vx.3 = memref3
1604 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1605 VSnew.1: vz1 = vx.1 + ... VSnew.2
1606 VSnew.2: vz2 = vx.2 + ... VSnew.3
1607 VSnew.3: vz3 = vx.3 + ...
1609 The vectorization of S1 is explained in vectorizable_load.
1610 The vectorization of S2:
1611 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1612 the function 'vect_get_vec_def_for_operand' is called to
1613 get the relevant vector-def for each operand of S2. For operand x it
1614 returns the vector-def 'vx.0'.
1616 To create the remaining copies of the vector-stmt (VSnew.j), this
1617 function is called to get the relevant vector-def for each operand. It is
1618 obtained from the respective VS1.j stmt, which is recorded in the
1619 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1621 For example, to obtain the vector-def 'vx.1' in order to create the
1622 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1623 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1624 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1625 and return its def ('vx.1').
1626 Overall, to create the above sequence this function will be called 3 times:
1627 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1628 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1629 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1631 tree
1632 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1634 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1635 if (!def_stmt_info)
1636 /* Do nothing; can reuse same def. */
1637 return vec_oprnd;
1639 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1640 gcc_assert (def_stmt_info);
1641 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1642 vec_oprnd = PHI_RESULT (phi);
1643 else
1644 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1645 return vec_oprnd;
1649 /* Get vectorized definitions for the operands to create a copy of an original
1650 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1652 void
1653 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1654 vec<tree> *vec_oprnds0,
1655 vec<tree> *vec_oprnds1)
1657 tree vec_oprnd = vec_oprnds0->pop ();
1659 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1660 vec_oprnds0->quick_push (vec_oprnd);
1662 if (vec_oprnds1 && vec_oprnds1->length ())
1664 vec_oprnd = vec_oprnds1->pop ();
1665 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1666 vec_oprnds1->quick_push (vec_oprnd);
1671 /* Get vectorized definitions for OP0 and OP1. */
1673 void
1674 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1675 vec<tree> *vec_oprnds0,
1676 vec<tree> *vec_oprnds1,
1677 slp_tree slp_node)
1679 if (slp_node)
1681 int nops = (op1 == NULL_TREE) ? 1 : 2;
1682 auto_vec<tree> ops (nops);
1683 auto_vec<vec<tree> > vec_defs (nops);
1685 ops.quick_push (op0);
1686 if (op1)
1687 ops.quick_push (op1);
1689 vect_get_slp_defs (ops, slp_node, &vec_defs);
1691 *vec_oprnds0 = vec_defs[0];
1692 if (op1)
1693 *vec_oprnds1 = vec_defs[1];
1695 else
1697 tree vec_oprnd;
1699 vec_oprnds0->create (1);
1700 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1701 vec_oprnds0->quick_push (vec_oprnd);
1703 if (op1)
1705 vec_oprnds1->create (1);
1706 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1707 vec_oprnds1->quick_push (vec_oprnd);
1712 /* Helper function called by vect_finish_replace_stmt and
1713 vect_finish_stmt_generation. Set the location of the new
1714 statement and create and return a stmt_vec_info for it. */
1716 static stmt_vec_info
1717 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1719 vec_info *vinfo = stmt_info->vinfo;
1721 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1723 if (dump_enabled_p ())
1725 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1726 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1729 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1731 /* While EH edges will generally prevent vectorization, stmt might
1732 e.g. be in a must-not-throw region. Ensure newly created stmts
1733 that could throw are part of the same region. */
1734 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1735 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1736 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1738 return vec_stmt_info;
1741 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1742 which sets the same scalar result as STMT_INFO did. Create and return a
1743 stmt_vec_info for VEC_STMT. */
1745 stmt_vec_info
1746 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1748 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1750 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1751 gsi_replace (&gsi, vec_stmt, false);
1753 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1756 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1757 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1759 stmt_vec_info
1760 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1761 gimple_stmt_iterator *gsi)
1763 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1765 if (!gsi_end_p (*gsi)
1766 && gimple_has_mem_ops (vec_stmt))
1768 gimple *at_stmt = gsi_stmt (*gsi);
1769 tree vuse = gimple_vuse (at_stmt);
1770 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1772 tree vdef = gimple_vdef (at_stmt);
1773 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1774 /* If we have an SSA vuse and insert a store, update virtual
1775 SSA form to avoid triggering the renamer. Do so only
1776 if we can easily see all uses - which is what almost always
1777 happens with the way vectorized stmts are inserted. */
1778 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1779 && ((is_gimple_assign (vec_stmt)
1780 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1781 || (is_gimple_call (vec_stmt)
1782 && !(gimple_call_flags (vec_stmt)
1783 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1785 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1786 gimple_set_vdef (vec_stmt, new_vdef);
1787 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1791 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1792 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1795 /* We want to vectorize a call to combined function CFN with function
1796 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1797 as the types of all inputs. Check whether this is possible using
1798 an internal function, returning its code if so or IFN_LAST if not. */
1800 static internal_fn
1801 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1802 tree vectype_out, tree vectype_in)
1804 internal_fn ifn;
1805 if (internal_fn_p (cfn))
1806 ifn = as_internal_fn (cfn);
1807 else
1808 ifn = associated_internal_fn (fndecl);
1809 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1811 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1812 if (info.vectorizable)
1814 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1815 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1816 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1817 OPTIMIZE_FOR_SPEED))
1818 return ifn;
1821 return IFN_LAST;
1825 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1826 gimple_stmt_iterator *);
1828 /* Check whether a load or store statement in the loop described by
1829 LOOP_VINFO is possible in a fully-masked loop. This is testing
1830 whether the vectorizer pass has the appropriate support, as well as
1831 whether the target does.
1833 VLS_TYPE says whether the statement is a load or store and VECTYPE
1834 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1835 says how the load or store is going to be implemented and GROUP_SIZE
1836 is the number of load or store statements in the containing group.
1837 If the access is a gather load or scatter store, GS_INFO describes
1838 its arguments.
1840 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1841 supported, otherwise record the required mask types. */
1843 static void
1844 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1845 vec_load_store_type vls_type, int group_size,
1846 vect_memory_access_type memory_access_type,
1847 gather_scatter_info *gs_info)
1849 /* Invariant loads need no special support. */
1850 if (memory_access_type == VMAT_INVARIANT)
1851 return;
1853 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1854 machine_mode vecmode = TYPE_MODE (vectype);
1855 bool is_load = (vls_type == VLS_LOAD);
1856 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1858 if (is_load
1859 ? !vect_load_lanes_supported (vectype, group_size, true)
1860 : !vect_store_lanes_supported (vectype, group_size, true))
1862 if (dump_enabled_p ())
1863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1864 "can't use a fully-masked loop because the"
1865 " target doesn't have an appropriate masked"
1866 " load/store-lanes instruction.\n");
1867 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1868 return;
1870 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1871 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1872 return;
1875 if (memory_access_type == VMAT_GATHER_SCATTER)
1877 internal_fn ifn = (is_load
1878 ? IFN_MASK_GATHER_LOAD
1879 : IFN_MASK_SCATTER_STORE);
1880 tree offset_type = TREE_TYPE (gs_info->offset);
1881 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1882 gs_info->memory_type,
1883 TYPE_SIGN (offset_type),
1884 gs_info->scale))
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1888 "can't use a fully-masked loop because the"
1889 " target doesn't have an appropriate masked"
1890 " gather load or scatter store instruction.\n");
1891 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1892 return;
1894 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1895 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1896 return;
1899 if (memory_access_type != VMAT_CONTIGUOUS
1900 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1902 /* Element X of the data must come from iteration i * VF + X of the
1903 scalar loop. We need more work to support other mappings. */
1904 if (dump_enabled_p ())
1905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1906 "can't use a fully-masked loop because an access"
1907 " isn't contiguous.\n");
1908 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1909 return;
1912 machine_mode mask_mode;
1913 if (!(targetm.vectorize.get_mask_mode
1914 (GET_MODE_NUNITS (vecmode),
1915 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1916 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1918 if (dump_enabled_p ())
1919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1920 "can't use a fully-masked loop because the target"
1921 " doesn't have the appropriate masked load or"
1922 " store.\n");
1923 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1924 return;
1926 /* We might load more scalars than we need for permuting SLP loads.
1927 We checked in get_group_load_store_type that the extra elements
1928 don't leak into a new vector. */
1929 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1930 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1931 unsigned int nvectors;
1932 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1933 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1934 else
1935 gcc_unreachable ();
1938 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1939 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1940 that needs to be applied to all loads and stores in a vectorized loop.
1941 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1943 MASK_TYPE is the type of both masks. If new statements are needed,
1944 insert them before GSI. */
1946 static tree
1947 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1948 gimple_stmt_iterator *gsi)
1950 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1951 if (!loop_mask)
1952 return vec_mask;
1954 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1955 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1956 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1957 vec_mask, loop_mask);
1958 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1959 return and_res;
1962 /* Determine whether we can use a gather load or scatter store to vectorize
1963 strided load or store STMT_INFO by truncating the current offset to a
1964 smaller width. We need to be able to construct an offset vector:
1966 { 0, X, X*2, X*3, ... }
1968 without loss of precision, where X is STMT_INFO's DR_STEP.
1970 Return true if this is possible, describing the gather load or scatter
1971 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1973 static bool
1974 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1975 loop_vec_info loop_vinfo, bool masked_p,
1976 gather_scatter_info *gs_info)
1978 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1979 tree step = DR_STEP (dr);
1980 if (TREE_CODE (step) != INTEGER_CST)
1982 /* ??? Perhaps we could use range information here? */
1983 if (dump_enabled_p ())
1984 dump_printf_loc (MSG_NOTE, vect_location,
1985 "cannot truncate variable step.\n");
1986 return false;
1989 /* Get the number of bits in an element. */
1990 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1991 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1992 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1994 /* Set COUNT to the upper limit on the number of elements - 1.
1995 Start with the maximum vectorization factor. */
1996 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1998 /* Try lowering COUNT to the number of scalar latch iterations. */
1999 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2000 widest_int max_iters;
2001 if (max_loop_iterations (loop, &max_iters)
2002 && max_iters < count)
2003 count = max_iters.to_shwi ();
2005 /* Try scales of 1 and the element size. */
2006 int scales[] = { 1, vect_get_scalar_dr_size (dr) };
2007 wi::overflow_type overflow = wi::OVF_NONE;
2008 for (int i = 0; i < 2; ++i)
2010 int scale = scales[i];
2011 widest_int factor;
2012 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2013 continue;
2015 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2016 in OFFSET_BITS bits. */
2017 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2018 if (overflow)
2019 continue;
2020 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2021 if (wi::min_precision (range, sign) > element_bits)
2023 overflow = wi::OVF_UNKNOWN;
2024 continue;
2027 /* See whether the target supports the operation. */
2028 tree memory_type = TREE_TYPE (DR_REF (dr));
2029 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2030 memory_type, element_bits, sign, scale,
2031 &gs_info->ifn, &gs_info->element_type))
2032 continue;
2034 tree offset_type = build_nonstandard_integer_type (element_bits,
2035 sign == UNSIGNED);
2037 gs_info->decl = NULL_TREE;
2038 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2039 but we don't need to store that here. */
2040 gs_info->base = NULL_TREE;
2041 gs_info->offset = fold_convert (offset_type, step);
2042 gs_info->offset_dt = vect_constant_def;
2043 gs_info->offset_vectype = NULL_TREE;
2044 gs_info->scale = scale;
2045 gs_info->memory_type = memory_type;
2046 return true;
2049 if (overflow && dump_enabled_p ())
2050 dump_printf_loc (MSG_NOTE, vect_location,
2051 "truncating gather/scatter offset to %d bits"
2052 " might change its value.\n", element_bits);
2054 return false;
2057 /* Return true if we can use gather/scatter internal functions to
2058 vectorize STMT_INFO, which is a grouped or strided load or store.
2059 MASKED_P is true if load or store is conditional. When returning
2060 true, fill in GS_INFO with the information required to perform the
2061 operation. */
2063 static bool
2064 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2065 loop_vec_info loop_vinfo, bool masked_p,
2066 gather_scatter_info *gs_info)
2068 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2069 || gs_info->decl)
2070 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2071 masked_p, gs_info);
2073 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2074 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2075 tree offset_type = TREE_TYPE (gs_info->offset);
2076 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2078 /* Enforced by vect_check_gather_scatter. */
2079 gcc_assert (element_bits >= offset_bits);
2081 /* If the elements are wider than the offset, convert the offset to the
2082 same width, without changing its sign. */
2083 if (element_bits > offset_bits)
2085 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2086 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2087 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2090 if (dump_enabled_p ())
2091 dump_printf_loc (MSG_NOTE, vect_location,
2092 "using gather/scatter for strided/grouped access,"
2093 " scale = %d\n", gs_info->scale);
2095 return true;
2098 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2099 elements with a known constant step. Return -1 if that step
2100 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2102 static int
2103 compare_step_with_zero (stmt_vec_info stmt_info)
2105 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2106 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
2107 size_zero_node);
2110 /* If the target supports a permute mask that reverses the elements in
2111 a vector of type VECTYPE, return that mask, otherwise return null. */
2113 static tree
2114 perm_mask_for_reverse (tree vectype)
2116 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2118 /* The encoding has a single stepped pattern. */
2119 vec_perm_builder sel (nunits, 1, 3);
2120 for (int i = 0; i < 3; ++i)
2121 sel.quick_push (nunits - 1 - i);
2123 vec_perm_indices indices (sel, 1, nunits);
2124 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2125 return NULL_TREE;
2126 return vect_gen_perm_mask_checked (vectype, indices);
2129 /* STMT_INFO is either a masked or unconditional store. Return the value
2130 being stored. */
2132 tree
2133 vect_get_store_rhs (stmt_vec_info stmt_info)
2135 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2137 gcc_assert (gimple_assign_single_p (assign));
2138 return gimple_assign_rhs1 (assign);
2140 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2142 internal_fn ifn = gimple_call_internal_fn (call);
2143 int index = internal_fn_stored_value_index (ifn);
2144 gcc_assert (index >= 0);
2145 return gimple_call_arg (call, index);
2147 gcc_unreachable ();
2150 /* A subroutine of get_load_store_type, with a subset of the same
2151 arguments. Handle the case where STMT_INFO is part of a grouped load
2152 or store.
2154 For stores, the statements in the group are all consecutive
2155 and there is no gap at the end. For loads, the statements in the
2156 group might not be consecutive; there can be gaps between statements
2157 as well as at the end. */
2159 static bool
2160 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2161 bool masked_p, vec_load_store_type vls_type,
2162 vect_memory_access_type *memory_access_type,
2163 gather_scatter_info *gs_info)
2165 vec_info *vinfo = stmt_info->vinfo;
2166 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2167 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2168 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2169 data_reference *first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
2170 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2171 bool single_element_p = (stmt_info == first_stmt_info
2172 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2173 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2174 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2176 /* True if the vectorized statements would access beyond the last
2177 statement in the group. */
2178 bool overrun_p = false;
2180 /* True if we can cope with such overrun by peeling for gaps, so that
2181 there is at least one final scalar iteration after the vector loop. */
2182 bool can_overrun_p = (!masked_p
2183 && vls_type == VLS_LOAD
2184 && loop_vinfo
2185 && !loop->inner);
2187 /* There can only be a gap at the end of the group if the stride is
2188 known at compile time. */
2189 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2191 /* Stores can't yet have gaps. */
2192 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2194 if (slp)
2196 if (STMT_VINFO_STRIDED_P (stmt_info))
2198 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2199 separated by the stride, until we have a complete vector.
2200 Fall back to scalar accesses if that isn't possible. */
2201 if (multiple_p (nunits, group_size))
2202 *memory_access_type = VMAT_STRIDED_SLP;
2203 else
2204 *memory_access_type = VMAT_ELEMENTWISE;
2206 else
2208 overrun_p = loop_vinfo && gap != 0;
2209 if (overrun_p && vls_type != VLS_LOAD)
2211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2212 "Grouped store with gaps requires"
2213 " non-consecutive accesses\n");
2214 return false;
2216 /* An overrun is fine if the trailing elements are smaller
2217 than the alignment boundary B. Every vector access will
2218 be a multiple of B and so we are guaranteed to access a
2219 non-gap element in the same B-sized block. */
2220 if (overrun_p
2221 && gap < (vect_known_alignment_in_bytes (first_dr)
2222 / vect_get_scalar_dr_size (first_dr)))
2223 overrun_p = false;
2224 if (overrun_p && !can_overrun_p)
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2228 "Peeling for outer loop is not supported\n");
2229 return false;
2231 *memory_access_type = VMAT_CONTIGUOUS;
2234 else
2236 /* We can always handle this case using elementwise accesses,
2237 but see if something more efficient is available. */
2238 *memory_access_type = VMAT_ELEMENTWISE;
2240 /* If there is a gap at the end of the group then these optimizations
2241 would access excess elements in the last iteration. */
2242 bool would_overrun_p = (gap != 0);
2243 /* An overrun is fine if the trailing elements are smaller than the
2244 alignment boundary B. Every vector access will be a multiple of B
2245 and so we are guaranteed to access a non-gap element in the
2246 same B-sized block. */
2247 if (would_overrun_p
2248 && !masked_p
2249 && gap < (vect_known_alignment_in_bytes (first_dr)
2250 / vect_get_scalar_dr_size (first_dr)))
2251 would_overrun_p = false;
2253 if (!STMT_VINFO_STRIDED_P (stmt_info)
2254 && (can_overrun_p || !would_overrun_p)
2255 && compare_step_with_zero (stmt_info) > 0)
2257 /* First cope with the degenerate case of a single-element
2258 vector. */
2259 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2260 *memory_access_type = VMAT_CONTIGUOUS;
2262 /* Otherwise try using LOAD/STORE_LANES. */
2263 if (*memory_access_type == VMAT_ELEMENTWISE
2264 && (vls_type == VLS_LOAD
2265 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2266 : vect_store_lanes_supported (vectype, group_size,
2267 masked_p)))
2269 *memory_access_type = VMAT_LOAD_STORE_LANES;
2270 overrun_p = would_overrun_p;
2273 /* If that fails, try using permuting loads. */
2274 if (*memory_access_type == VMAT_ELEMENTWISE
2275 && (vls_type == VLS_LOAD
2276 ? vect_grouped_load_supported (vectype, single_element_p,
2277 group_size)
2278 : vect_grouped_store_supported (vectype, group_size)))
2280 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2281 overrun_p = would_overrun_p;
2285 /* As a last resort, trying using a gather load or scatter store.
2287 ??? Although the code can handle all group sizes correctly,
2288 it probably isn't a win to use separate strided accesses based
2289 on nearby locations. Or, even if it's a win over scalar code,
2290 it might not be a win over vectorizing at a lower VF, if that
2291 allows us to use contiguous accesses. */
2292 if (*memory_access_type == VMAT_ELEMENTWISE
2293 && single_element_p
2294 && loop_vinfo
2295 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2296 masked_p, gs_info))
2297 *memory_access_type = VMAT_GATHER_SCATTER;
2300 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2302 /* STMT is the leader of the group. Check the operands of all the
2303 stmts of the group. */
2304 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2305 while (next_stmt_info)
2307 tree op = vect_get_store_rhs (next_stmt_info);
2308 enum vect_def_type dt;
2309 if (!vect_is_simple_use (op, vinfo, &dt))
2311 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2313 "use not simple.\n");
2314 return false;
2316 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2320 if (overrun_p)
2322 gcc_assert (can_overrun_p);
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2325 "Data access with gaps requires scalar "
2326 "epilogue loop\n");
2327 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2330 return true;
2333 /* A subroutine of get_load_store_type, with a subset of the same
2334 arguments. Handle the case where STMT_INFO is a load or store that
2335 accesses consecutive elements with a negative step. */
2337 static vect_memory_access_type
2338 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2339 vec_load_store_type vls_type,
2340 unsigned int ncopies)
2342 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2343 dr_alignment_support alignment_support_scheme;
2345 if (ncopies > 1)
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "multiple types with negative step.\n");
2350 return VMAT_ELEMENTWISE;
2353 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2354 if (alignment_support_scheme != dr_aligned
2355 && alignment_support_scheme != dr_unaligned_supported)
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "negative step but alignment required.\n");
2360 return VMAT_ELEMENTWISE;
2363 if (vls_type == VLS_STORE_INVARIANT)
2365 if (dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE, vect_location,
2367 "negative step with invariant source;"
2368 " no permute needed.\n");
2369 return VMAT_CONTIGUOUS_DOWN;
2372 if (!perm_mask_for_reverse (vectype))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "negative step and reversing not supported.\n");
2377 return VMAT_ELEMENTWISE;
2380 return VMAT_CONTIGUOUS_REVERSE;
2383 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2384 if there is a memory access type that the vectorized form can use,
2385 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2386 or scatters, fill in GS_INFO accordingly.
2388 SLP says whether we're performing SLP rather than loop vectorization.
2389 MASKED_P is true if the statement is conditional on a vectorized mask.
2390 VECTYPE is the vector type that the vectorized statements will use.
2391 NCOPIES is the number of vector statements that will be needed. */
2393 static bool
2394 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2395 bool masked_p, vec_load_store_type vls_type,
2396 unsigned int ncopies,
2397 vect_memory_access_type *memory_access_type,
2398 gather_scatter_info *gs_info)
2400 vec_info *vinfo = stmt_info->vinfo;
2401 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2402 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2403 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2405 *memory_access_type = VMAT_GATHER_SCATTER;
2406 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2407 gcc_unreachable ();
2408 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2409 &gs_info->offset_dt,
2410 &gs_info->offset_vectype))
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2414 "%s index use not simple.\n",
2415 vls_type == VLS_LOAD ? "gather" : "scatter");
2416 return false;
2419 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2421 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2422 vls_type, memory_access_type, gs_info))
2423 return false;
2425 else if (STMT_VINFO_STRIDED_P (stmt_info))
2427 gcc_assert (!slp);
2428 if (loop_vinfo
2429 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2430 masked_p, gs_info))
2431 *memory_access_type = VMAT_GATHER_SCATTER;
2432 else
2433 *memory_access_type = VMAT_ELEMENTWISE;
2435 else
2437 int cmp = compare_step_with_zero (stmt_info);
2438 if (cmp < 0)
2439 *memory_access_type = get_negative_load_store_type
2440 (stmt_info, vectype, vls_type, ncopies);
2441 else if (cmp == 0)
2443 gcc_assert (vls_type == VLS_LOAD);
2444 *memory_access_type = VMAT_INVARIANT;
2446 else
2447 *memory_access_type = VMAT_CONTIGUOUS;
2450 if ((*memory_access_type == VMAT_ELEMENTWISE
2451 || *memory_access_type == VMAT_STRIDED_SLP)
2452 && !nunits.is_constant ())
2454 if (dump_enabled_p ())
2455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2456 "Not using elementwise accesses due to variable "
2457 "vectorization factor.\n");
2458 return false;
2461 /* FIXME: At the moment the cost model seems to underestimate the
2462 cost of using elementwise accesses. This check preserves the
2463 traditional behavior until that can be fixed. */
2464 if (*memory_access_type == VMAT_ELEMENTWISE
2465 && !STMT_VINFO_STRIDED_P (stmt_info)
2466 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2467 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2468 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2472 "not falling back to elementwise accesses\n");
2473 return false;
2475 return true;
2478 /* Return true if boolean argument MASK is suitable for vectorizing
2479 conditional load or store STMT_INFO. When returning true, store the type
2480 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2481 in *MASK_VECTYPE_OUT. */
2483 static bool
2484 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2485 vect_def_type *mask_dt_out,
2486 tree *mask_vectype_out)
2488 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 "mask argument is not a boolean.\n");
2493 return false;
2496 if (TREE_CODE (mask) != SSA_NAME)
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2500 "mask argument is not an SSA name.\n");
2501 return false;
2504 enum vect_def_type mask_dt;
2505 tree mask_vectype;
2506 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2510 "mask use not simple.\n");
2511 return false;
2514 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2515 if (!mask_vectype)
2516 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2518 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522 "could not find an appropriate vector mask type.\n");
2523 return false;
2526 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2527 TYPE_VECTOR_SUBPARTS (vectype)))
2529 if (dump_enabled_p ())
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2532 "vector mask type ");
2533 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2534 dump_printf (MSG_MISSED_OPTIMIZATION,
2535 " does not match vector data type ");
2536 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2537 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2539 return false;
2542 *mask_dt_out = mask_dt;
2543 *mask_vectype_out = mask_vectype;
2544 return true;
2547 /* Return true if stored value RHS is suitable for vectorizing store
2548 statement STMT_INFO. When returning true, store the type of the
2549 definition in *RHS_DT_OUT, the type of the vectorized store value in
2550 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2552 static bool
2553 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2554 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2555 vec_load_store_type *vls_type_out)
2557 /* In the case this is a store from a constant make sure
2558 native_encode_expr can handle it. */
2559 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2561 if (dump_enabled_p ())
2562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2563 "cannot encode constant as a byte sequence.\n");
2564 return false;
2567 enum vect_def_type rhs_dt;
2568 tree rhs_vectype;
2569 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2571 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2573 "use not simple.\n");
2574 return false;
2577 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2578 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2582 "incompatible vector types.\n");
2583 return false;
2586 *rhs_dt_out = rhs_dt;
2587 *rhs_vectype_out = rhs_vectype;
2588 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2589 *vls_type_out = VLS_STORE_INVARIANT;
2590 else
2591 *vls_type_out = VLS_STORE;
2592 return true;
2595 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2596 Note that we support masks with floating-point type, in which case the
2597 floats are interpreted as a bitmask. */
2599 static tree
2600 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2602 if (TREE_CODE (masktype) == INTEGER_TYPE)
2603 return build_int_cst (masktype, -1);
2604 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2606 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2607 mask = build_vector_from_val (masktype, mask);
2608 return vect_init_vector (stmt_info, mask, masktype, NULL);
2610 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2612 REAL_VALUE_TYPE r;
2613 long tmp[6];
2614 for (int j = 0; j < 6; ++j)
2615 tmp[j] = -1;
2616 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2617 tree mask = build_real (TREE_TYPE (masktype), r);
2618 mask = build_vector_from_val (masktype, mask);
2619 return vect_init_vector (stmt_info, mask, masktype, NULL);
2621 gcc_unreachable ();
2624 /* Build an all-zero merge value of type VECTYPE while vectorizing
2625 STMT_INFO as a gather load. */
2627 static tree
2628 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2630 tree merge;
2631 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2632 merge = build_int_cst (TREE_TYPE (vectype), 0);
2633 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2635 REAL_VALUE_TYPE r;
2636 long tmp[6];
2637 for (int j = 0; j < 6; ++j)
2638 tmp[j] = 0;
2639 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2640 merge = build_real (TREE_TYPE (vectype), r);
2642 else
2643 gcc_unreachable ();
2644 merge = build_vector_from_val (vectype, merge);
2645 return vect_init_vector (stmt_info, merge, vectype, NULL);
2648 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2649 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2650 the gather load operation. If the load is conditional, MASK is the
2651 unvectorized condition and MASK_DT is its definition type, otherwise
2652 MASK is null. */
2654 static void
2655 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2656 gimple_stmt_iterator *gsi,
2657 stmt_vec_info *vec_stmt,
2658 gather_scatter_info *gs_info,
2659 tree mask)
2661 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2662 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2663 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2664 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2665 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2666 edge pe = loop_preheader_edge (loop);
2667 enum { NARROW, NONE, WIDEN } modifier;
2668 poly_uint64 gather_off_nunits
2669 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2671 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2672 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2673 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2674 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2675 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2676 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2677 tree scaletype = TREE_VALUE (arglist);
2678 gcc_checking_assert (types_compatible_p (srctype, rettype)
2679 && (!mask || types_compatible_p (srctype, masktype)));
2681 tree perm_mask = NULL_TREE;
2682 tree mask_perm_mask = NULL_TREE;
2683 if (known_eq (nunits, gather_off_nunits))
2684 modifier = NONE;
2685 else if (known_eq (nunits * 2, gather_off_nunits))
2687 modifier = WIDEN;
2689 /* Currently widening gathers and scatters are only supported for
2690 fixed-length vectors. */
2691 int count = gather_off_nunits.to_constant ();
2692 vec_perm_builder sel (count, count, 1);
2693 for (int i = 0; i < count; ++i)
2694 sel.quick_push (i | (count / 2));
2696 vec_perm_indices indices (sel, 1, count);
2697 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2698 indices);
2700 else if (known_eq (nunits, gather_off_nunits * 2))
2702 modifier = NARROW;
2704 /* Currently narrowing gathers and scatters are only supported for
2705 fixed-length vectors. */
2706 int count = nunits.to_constant ();
2707 vec_perm_builder sel (count, count, 1);
2708 sel.quick_grow (count);
2709 for (int i = 0; i < count; ++i)
2710 sel[i] = i < count / 2 ? i : i + count / 2;
2711 vec_perm_indices indices (sel, 2, count);
2712 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2714 ncopies *= 2;
2716 if (mask)
2718 for (int i = 0; i < count; ++i)
2719 sel[i] = i | (count / 2);
2720 indices.new_vector (sel, 2, count);
2721 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2724 else
2725 gcc_unreachable ();
2727 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2728 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2730 tree ptr = fold_convert (ptrtype, gs_info->base);
2731 if (!is_gimple_min_invariant (ptr))
2733 gimple_seq seq;
2734 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2735 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2736 gcc_assert (!new_bb);
2739 tree scale = build_int_cst (scaletype, gs_info->scale);
2741 tree vec_oprnd0 = NULL_TREE;
2742 tree vec_mask = NULL_TREE;
2743 tree src_op = NULL_TREE;
2744 tree mask_op = NULL_TREE;
2745 tree prev_res = NULL_TREE;
2746 stmt_vec_info prev_stmt_info = NULL;
2748 if (!mask)
2750 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2751 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2754 for (int j = 0; j < ncopies; ++j)
2756 tree op, var;
2757 if (modifier == WIDEN && (j & 1))
2758 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2759 perm_mask, stmt_info, gsi);
2760 else if (j == 0)
2761 op = vec_oprnd0
2762 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2763 else
2764 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2765 vec_oprnd0);
2767 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2769 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2770 TYPE_VECTOR_SUBPARTS (idxtype)));
2771 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2772 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2773 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2774 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2775 op = var;
2778 if (mask)
2780 if (mask_perm_mask && (j & 1))
2781 mask_op = permute_vec_elements (mask_op, mask_op,
2782 mask_perm_mask, stmt_info, gsi);
2783 else
2785 if (j == 0)
2786 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2787 else
2788 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2789 vec_mask);
2791 mask_op = vec_mask;
2792 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2794 gcc_assert
2795 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2796 TYPE_VECTOR_SUBPARTS (masktype)));
2797 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2798 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2799 gassign *new_stmt
2800 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2801 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2802 mask_op = var;
2805 src_op = mask_op;
2808 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2809 mask_op, scale);
2811 stmt_vec_info new_stmt_info;
2812 if (!useless_type_conversion_p (vectype, rettype))
2814 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2815 TYPE_VECTOR_SUBPARTS (rettype)));
2816 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2817 gimple_call_set_lhs (new_call, op);
2818 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2819 var = make_ssa_name (vec_dest);
2820 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2821 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2822 new_stmt_info
2823 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2825 else
2827 var = make_ssa_name (vec_dest, new_call);
2828 gimple_call_set_lhs (new_call, var);
2829 new_stmt_info
2830 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2833 if (modifier == NARROW)
2835 if ((j & 1) == 0)
2837 prev_res = var;
2838 continue;
2840 var = permute_vec_elements (prev_res, var, perm_mask,
2841 stmt_info, gsi);
2842 new_stmt_info = loop_vinfo->lookup_def (var);
2845 if (prev_stmt_info == NULL_STMT_VEC_INFO)
2846 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2847 else
2848 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2849 prev_stmt_info = new_stmt_info;
2853 /* Prepare the base and offset in GS_INFO for vectorization.
2854 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2855 to the vectorized offset argument for the first copy of STMT_INFO.
2856 STMT_INFO is the statement described by GS_INFO and LOOP is the
2857 containing loop. */
2859 static void
2860 vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
2861 gather_scatter_info *gs_info,
2862 tree *dataref_ptr, tree *vec_offset)
2864 gimple_seq stmts = NULL;
2865 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2866 if (stmts != NULL)
2868 basic_block new_bb;
2869 edge pe = loop_preheader_edge (loop);
2870 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2871 gcc_assert (!new_bb);
2873 tree offset_type = TREE_TYPE (gs_info->offset);
2874 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2875 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2876 offset_vectype);
2879 /* Prepare to implement a grouped or strided load or store using
2880 the gather load or scatter store operation described by GS_INFO.
2881 STMT_INFO is the load or store statement.
2883 Set *DATAREF_BUMP to the amount that should be added to the base
2884 address after each copy of the vectorized statement. Set *VEC_OFFSET
2885 to an invariant offset vector in which element I has the value
2886 I * DR_STEP / SCALE. */
2888 static void
2889 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2890 loop_vec_info loop_vinfo,
2891 gather_scatter_info *gs_info,
2892 tree *dataref_bump, tree *vec_offset)
2894 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2896 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2897 gimple_seq stmts;
2899 tree bump = size_binop (MULT_EXPR,
2900 fold_convert (sizetype, DR_STEP (dr)),
2901 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2902 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2903 if (stmts)
2904 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2906 /* The offset given in GS_INFO can have pointer type, so use the element
2907 type of the vector instead. */
2908 tree offset_type = TREE_TYPE (gs_info->offset);
2909 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2910 offset_type = TREE_TYPE (offset_vectype);
2912 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2913 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2914 ssize_int (gs_info->scale));
2915 step = fold_convert (offset_type, step);
2916 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2918 /* Create {0, X, X*2, X*3, ...}. */
2919 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2920 build_zero_cst (offset_type), step);
2921 if (stmts)
2922 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2925 /* Return the amount that should be added to a vector pointer to move
2926 to the next or previous copy of AGGR_TYPE. DR is the data reference
2927 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2928 vectorization. */
2930 static tree
2931 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2932 vect_memory_access_type memory_access_type)
2934 if (memory_access_type == VMAT_INVARIANT)
2935 return size_zero_node;
2937 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2938 tree step = vect_dr_behavior (dr)->step;
2939 if (tree_int_cst_sgn (step) == -1)
2940 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2941 return iv_step;
2944 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2946 static bool
2947 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2948 stmt_vec_info *vec_stmt, slp_tree slp_node,
2949 tree vectype_in, stmt_vector_for_cost *cost_vec)
2951 tree op, vectype;
2952 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2953 vec_info *vinfo = stmt_info->vinfo;
2954 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2955 unsigned ncopies;
2956 unsigned HOST_WIDE_INT nunits, num_bytes;
2958 op = gimple_call_arg (stmt, 0);
2959 vectype = STMT_VINFO_VECTYPE (stmt_info);
2961 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2962 return false;
2964 /* Multiple types in SLP are handled by creating the appropriate number of
2965 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2966 case of SLP. */
2967 if (slp_node)
2968 ncopies = 1;
2969 else
2970 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2972 gcc_assert (ncopies >= 1);
2974 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2975 if (! char_vectype)
2976 return false;
2978 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2979 return false;
2981 unsigned word_bytes = num_bytes / nunits;
2983 /* The encoding uses one stepped pattern for each byte in the word. */
2984 vec_perm_builder elts (num_bytes, word_bytes, 3);
2985 for (unsigned i = 0; i < 3; ++i)
2986 for (unsigned j = 0; j < word_bytes; ++j)
2987 elts.quick_push ((i + 1) * word_bytes - j - 1);
2989 vec_perm_indices indices (elts, 1, num_bytes);
2990 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2991 return false;
2993 if (! vec_stmt)
2995 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2996 DUMP_VECT_SCOPE ("vectorizable_bswap");
2997 if (! slp_node)
2999 record_stmt_cost (cost_vec,
3000 1, vector_stmt, stmt_info, 0, vect_prologue);
3001 record_stmt_cost (cost_vec,
3002 ncopies, vec_perm, stmt_info, 0, vect_body);
3004 return true;
3007 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3009 /* Transform. */
3010 vec<tree> vec_oprnds = vNULL;
3011 stmt_vec_info new_stmt_info = NULL;
3012 stmt_vec_info prev_stmt_info = NULL;
3013 for (unsigned j = 0; j < ncopies; j++)
3015 /* Handle uses. */
3016 if (j == 0)
3017 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3018 else
3019 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3021 /* Arguments are ready. create the new vector stmt. */
3022 unsigned i;
3023 tree vop;
3024 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3026 gimple *new_stmt;
3027 tree tem = make_ssa_name (char_vectype);
3028 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3029 char_vectype, vop));
3030 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3031 tree tem2 = make_ssa_name (char_vectype);
3032 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3033 tem, tem, bswap_vconst);
3034 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3035 tem = make_ssa_name (vectype);
3036 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3037 vectype, tem2));
3038 new_stmt_info
3039 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3040 if (slp_node)
3041 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3044 if (slp_node)
3045 continue;
3047 if (j == 0)
3048 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3049 else
3050 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3052 prev_stmt_info = new_stmt_info;
3055 vec_oprnds.release ();
3056 return true;
3059 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3060 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3061 in a single step. On success, store the binary pack code in
3062 *CONVERT_CODE. */
3064 static bool
3065 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3066 tree_code *convert_code)
3068 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3069 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3070 return false;
3072 tree_code code;
3073 int multi_step_cvt = 0;
3074 auto_vec <tree, 8> interm_types;
3075 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3076 &code, &multi_step_cvt,
3077 &interm_types)
3078 || multi_step_cvt)
3079 return false;
3081 *convert_code = code;
3082 return true;
3085 /* Function vectorizable_call.
3087 Check if STMT_INFO performs a function call that can be vectorized.
3088 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3089 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3090 Return true if STMT_INFO is vectorizable in this way. */
3092 static bool
3093 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3094 stmt_vec_info *vec_stmt, slp_tree slp_node,
3095 stmt_vector_for_cost *cost_vec)
3097 gcall *stmt;
3098 tree vec_dest;
3099 tree scalar_dest;
3100 tree op;
3101 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3102 stmt_vec_info prev_stmt_info;
3103 tree vectype_out, vectype_in;
3104 poly_uint64 nunits_in;
3105 poly_uint64 nunits_out;
3106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3107 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3108 vec_info *vinfo = stmt_info->vinfo;
3109 tree fndecl, new_temp, rhs_type;
3110 enum vect_def_type dt[4]
3111 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3112 vect_unknown_def_type };
3113 int ndts = ARRAY_SIZE (dt);
3114 int ncopies, j;
3115 auto_vec<tree, 8> vargs;
3116 auto_vec<tree, 8> orig_vargs;
3117 enum { NARROW, NONE, WIDEN } modifier;
3118 size_t i, nargs;
3119 tree lhs;
3121 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3122 return false;
3124 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3125 && ! vec_stmt)
3126 return false;
3128 /* Is STMT_INFO a vectorizable call? */
3129 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3130 if (!stmt)
3131 return false;
3133 if (gimple_call_internal_p (stmt)
3134 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3135 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3136 /* Handled by vectorizable_load and vectorizable_store. */
3137 return false;
3139 if (gimple_call_lhs (stmt) == NULL_TREE
3140 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3141 return false;
3143 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3145 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3147 /* Process function arguments. */
3148 rhs_type = NULL_TREE;
3149 vectype_in = NULL_TREE;
3150 nargs = gimple_call_num_args (stmt);
3152 /* Bail out if the function has more than three arguments, we do not have
3153 interesting builtin functions to vectorize with more than two arguments
3154 except for fma. No arguments is also not good. */
3155 if (nargs == 0 || nargs > 4)
3156 return false;
3158 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3159 combined_fn cfn = gimple_call_combined_fn (stmt);
3160 if (cfn == CFN_GOMP_SIMD_LANE)
3162 nargs = 0;
3163 rhs_type = unsigned_type_node;
3166 int mask_opno = -1;
3167 if (internal_fn_p (cfn))
3168 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3170 for (i = 0; i < nargs; i++)
3172 tree opvectype;
3174 op = gimple_call_arg (stmt, i);
3175 if (!vect_is_simple_use (op, vinfo, &dt[i], &opvectype))
3177 if (dump_enabled_p ())
3178 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3179 "use not simple.\n");
3180 return false;
3183 /* Skip the mask argument to an internal function. This operand
3184 has been converted via a pattern if necessary. */
3185 if ((int) i == mask_opno)
3186 continue;
3188 /* We can only handle calls with arguments of the same type. */
3189 if (rhs_type
3190 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3192 if (dump_enabled_p ())
3193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3194 "argument types differ.\n");
3195 return false;
3197 if (!rhs_type)
3198 rhs_type = TREE_TYPE (op);
3200 if (!vectype_in)
3201 vectype_in = opvectype;
3202 else if (opvectype
3203 && opvectype != vectype_in)
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3207 "argument vector types differ.\n");
3208 return false;
3211 /* If all arguments are external or constant defs use a vector type with
3212 the same size as the output vector type. */
3213 if (!vectype_in)
3214 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3215 if (vec_stmt)
3216 gcc_assert (vectype_in);
3217 if (!vectype_in)
3219 if (dump_enabled_p ())
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3222 "no vectype for scalar type ");
3223 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3224 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3227 return false;
3230 /* FORNOW */
3231 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3232 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3233 if (known_eq (nunits_in * 2, nunits_out))
3234 modifier = NARROW;
3235 else if (known_eq (nunits_out, nunits_in))
3236 modifier = NONE;
3237 else if (known_eq (nunits_out * 2, nunits_in))
3238 modifier = WIDEN;
3239 else
3240 return false;
3242 /* We only handle functions that do not read or clobber memory. */
3243 if (gimple_vuse (stmt))
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "function reads from or writes to memory.\n");
3248 return false;
3251 /* For now, we only vectorize functions if a target specific builtin
3252 is available. TODO -- in some cases, it might be profitable to
3253 insert the calls for pieces of the vector, in order to be able
3254 to vectorize other operations in the loop. */
3255 fndecl = NULL_TREE;
3256 internal_fn ifn = IFN_LAST;
3257 tree callee = gimple_call_fndecl (stmt);
3259 /* First try using an internal function. */
3260 tree_code convert_code = ERROR_MARK;
3261 if (cfn != CFN_LAST
3262 && (modifier == NONE
3263 || (modifier == NARROW
3264 && simple_integer_narrowing (vectype_out, vectype_in,
3265 &convert_code))))
3266 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3267 vectype_in);
3269 /* If that fails, try asking for a target-specific built-in function. */
3270 if (ifn == IFN_LAST)
3272 if (cfn != CFN_LAST)
3273 fndecl = targetm.vectorize.builtin_vectorized_function
3274 (cfn, vectype_out, vectype_in);
3275 else if (callee)
3276 fndecl = targetm.vectorize.builtin_md_vectorized_function
3277 (callee, vectype_out, vectype_in);
3280 if (ifn == IFN_LAST && !fndecl)
3282 if (cfn == CFN_GOMP_SIMD_LANE
3283 && !slp_node
3284 && loop_vinfo
3285 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3286 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3287 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3288 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3290 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3291 { 0, 1, 2, ... vf - 1 } vector. */
3292 gcc_assert (nargs == 0);
3294 else if (modifier == NONE
3295 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3296 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3297 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3298 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3299 vectype_in, cost_vec);
3300 else
3302 if (dump_enabled_p ())
3303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3304 "function is not vectorizable.\n");
3305 return false;
3309 if (slp_node)
3310 ncopies = 1;
3311 else if (modifier == NARROW && ifn == IFN_LAST)
3312 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3313 else
3314 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3316 /* Sanity check: make sure that at least one copy of the vectorized stmt
3317 needs to be generated. */
3318 gcc_assert (ncopies >= 1);
3320 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3321 if (!vec_stmt) /* transformation not required. */
3323 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3324 DUMP_VECT_SCOPE ("vectorizable_call");
3325 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3326 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3327 record_stmt_cost (cost_vec, ncopies / 2,
3328 vec_promote_demote, stmt_info, 0, vect_body);
3330 if (loop_vinfo && mask_opno >= 0)
3332 unsigned int nvectors = (slp_node
3333 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3334 : ncopies);
3335 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3337 return true;
3340 /* Transform. */
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3345 /* Handle def. */
3346 scalar_dest = gimple_call_lhs (stmt);
3347 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3349 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3351 stmt_vec_info new_stmt_info = NULL;
3352 prev_stmt_info = NULL;
3353 if (modifier == NONE || ifn != IFN_LAST)
3355 tree prev_res = NULL_TREE;
3356 vargs.safe_grow (nargs);
3357 orig_vargs.safe_grow (nargs);
3358 for (j = 0; j < ncopies; ++j)
3360 /* Build argument list for the vectorized call. */
3361 if (slp_node)
3363 auto_vec<vec<tree> > vec_defs (nargs);
3364 vec<tree> vec_oprnds0;
3366 for (i = 0; i < nargs; i++)
3367 vargs[i] = gimple_call_arg (stmt, i);
3368 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3369 vec_oprnds0 = vec_defs[0];
3371 /* Arguments are ready. Create the new vector stmt. */
3372 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3374 size_t k;
3375 for (k = 0; k < nargs; k++)
3377 vec<tree> vec_oprndsk = vec_defs[k];
3378 vargs[k] = vec_oprndsk[i];
3380 if (modifier == NARROW)
3382 /* We don't define any narrowing conditional functions
3383 at present. */
3384 gcc_assert (mask_opno < 0);
3385 tree half_res = make_ssa_name (vectype_in);
3386 gcall *call
3387 = gimple_build_call_internal_vec (ifn, vargs);
3388 gimple_call_set_lhs (call, half_res);
3389 gimple_call_set_nothrow (call, true);
3390 new_stmt_info
3391 = vect_finish_stmt_generation (stmt_info, call, gsi);
3392 if ((i & 1) == 0)
3394 prev_res = half_res;
3395 continue;
3397 new_temp = make_ssa_name (vec_dest);
3398 gimple *new_stmt
3399 = gimple_build_assign (new_temp, convert_code,
3400 prev_res, half_res);
3401 new_stmt_info
3402 = vect_finish_stmt_generation (stmt_info, new_stmt,
3403 gsi);
3405 else
3407 if (mask_opno >= 0 && masked_loop_p)
3409 unsigned int vec_num = vec_oprnds0.length ();
3410 /* Always true for SLP. */
3411 gcc_assert (ncopies == 1);
3412 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3413 vectype_out, i);
3414 vargs[mask_opno] = prepare_load_store_mask
3415 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3418 gcall *call;
3419 if (ifn != IFN_LAST)
3420 call = gimple_build_call_internal_vec (ifn, vargs);
3421 else
3422 call = gimple_build_call_vec (fndecl, vargs);
3423 new_temp = make_ssa_name (vec_dest, call);
3424 gimple_call_set_lhs (call, new_temp);
3425 gimple_call_set_nothrow (call, true);
3426 new_stmt_info
3427 = vect_finish_stmt_generation (stmt_info, call, gsi);
3429 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3432 for (i = 0; i < nargs; i++)
3434 vec<tree> vec_oprndsi = vec_defs[i];
3435 vec_oprndsi.release ();
3437 continue;
3440 for (i = 0; i < nargs; i++)
3442 op = gimple_call_arg (stmt, i);
3443 if (j == 0)
3444 vec_oprnd0
3445 = vect_get_vec_def_for_operand (op, stmt_info);
3446 else
3447 vec_oprnd0
3448 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3450 orig_vargs[i] = vargs[i] = vec_oprnd0;
3453 if (mask_opno >= 0 && masked_loop_p)
3455 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3456 vectype_out, j);
3457 vargs[mask_opno]
3458 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3459 vargs[mask_opno], gsi);
3462 if (cfn == CFN_GOMP_SIMD_LANE)
3464 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3465 tree new_var
3466 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3467 gimple *init_stmt = gimple_build_assign (new_var, cst);
3468 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3469 new_temp = make_ssa_name (vec_dest);
3470 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3471 new_stmt_info
3472 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3474 else if (modifier == NARROW)
3476 /* We don't define any narrowing conditional functions at
3477 present. */
3478 gcc_assert (mask_opno < 0);
3479 tree half_res = make_ssa_name (vectype_in);
3480 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3481 gimple_call_set_lhs (call, half_res);
3482 gimple_call_set_nothrow (call, true);
3483 new_stmt_info
3484 = vect_finish_stmt_generation (stmt_info, call, gsi);
3485 if ((j & 1) == 0)
3487 prev_res = half_res;
3488 continue;
3490 new_temp = make_ssa_name (vec_dest);
3491 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3492 prev_res, half_res);
3493 new_stmt_info
3494 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3496 else
3498 gcall *call;
3499 if (ifn != IFN_LAST)
3500 call = gimple_build_call_internal_vec (ifn, vargs);
3501 else
3502 call = gimple_build_call_vec (fndecl, vargs);
3503 new_temp = make_ssa_name (vec_dest, call);
3504 gimple_call_set_lhs (call, new_temp);
3505 gimple_call_set_nothrow (call, true);
3506 new_stmt_info
3507 = vect_finish_stmt_generation (stmt_info, call, gsi);
3510 if (j == (modifier == NARROW ? 1 : 0))
3511 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3512 else
3513 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3515 prev_stmt_info = new_stmt_info;
3518 else if (modifier == NARROW)
3520 /* We don't define any narrowing conditional functions at present. */
3521 gcc_assert (mask_opno < 0);
3522 for (j = 0; j < ncopies; ++j)
3524 /* Build argument list for the vectorized call. */
3525 if (j == 0)
3526 vargs.create (nargs * 2);
3527 else
3528 vargs.truncate (0);
3530 if (slp_node)
3532 auto_vec<vec<tree> > vec_defs (nargs);
3533 vec<tree> vec_oprnds0;
3535 for (i = 0; i < nargs; i++)
3536 vargs.quick_push (gimple_call_arg (stmt, i));
3537 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3538 vec_oprnds0 = vec_defs[0];
3540 /* Arguments are ready. Create the new vector stmt. */
3541 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3543 size_t k;
3544 vargs.truncate (0);
3545 for (k = 0; k < nargs; k++)
3547 vec<tree> vec_oprndsk = vec_defs[k];
3548 vargs.quick_push (vec_oprndsk[i]);
3549 vargs.quick_push (vec_oprndsk[i + 1]);
3551 gcall *call;
3552 if (ifn != IFN_LAST)
3553 call = gimple_build_call_internal_vec (ifn, vargs);
3554 else
3555 call = gimple_build_call_vec (fndecl, vargs);
3556 new_temp = make_ssa_name (vec_dest, call);
3557 gimple_call_set_lhs (call, new_temp);
3558 gimple_call_set_nothrow (call, true);
3559 new_stmt_info
3560 = vect_finish_stmt_generation (stmt_info, call, gsi);
3561 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3564 for (i = 0; i < nargs; i++)
3566 vec<tree> vec_oprndsi = vec_defs[i];
3567 vec_oprndsi.release ();
3569 continue;
3572 for (i = 0; i < nargs; i++)
3574 op = gimple_call_arg (stmt, i);
3575 if (j == 0)
3577 vec_oprnd0
3578 = vect_get_vec_def_for_operand (op, stmt_info);
3579 vec_oprnd1
3580 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3582 else
3584 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3585 2 * i + 1);
3586 vec_oprnd0
3587 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3588 vec_oprnd1
3589 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3592 vargs.quick_push (vec_oprnd0);
3593 vargs.quick_push (vec_oprnd1);
3596 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3597 new_temp = make_ssa_name (vec_dest, new_stmt);
3598 gimple_call_set_lhs (new_stmt, new_temp);
3599 new_stmt_info
3600 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3602 if (j == 0)
3603 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3604 else
3605 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3607 prev_stmt_info = new_stmt_info;
3610 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3612 else
3613 /* No current target implements this case. */
3614 return false;
3616 vargs.release ();
3618 /* The call in STMT might prevent it from being removed in dce.
3619 We however cannot remove it here, due to the way the ssa name
3620 it defines is mapped to the new definition. So just replace
3621 rhs of the statement with something harmless. */
3623 if (slp_node)
3624 return true;
3626 if (is_pattern_stmt_p (stmt_info))
3627 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
3628 lhs = gimple_get_lhs (stmt_info->stmt);
3630 gassign *new_stmt
3631 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3632 set_vinfo_for_stmt (new_stmt, stmt_info);
3633 set_vinfo_for_stmt (stmt_info->stmt, NULL);
3634 STMT_VINFO_STMT (stmt_info) = new_stmt;
3635 gsi_replace (gsi, new_stmt, false);
3637 return true;
3641 struct simd_call_arg_info
3643 tree vectype;
3644 tree op;
3645 HOST_WIDE_INT linear_step;
3646 enum vect_def_type dt;
3647 unsigned int align;
3648 bool simd_lane_linear;
3651 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3652 is linear within simd lane (but not within whole loop), note it in
3653 *ARGINFO. */
3655 static void
3656 vect_simd_lane_linear (tree op, struct loop *loop,
3657 struct simd_call_arg_info *arginfo)
3659 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3661 if (!is_gimple_assign (def_stmt)
3662 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3663 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3664 return;
3666 tree base = gimple_assign_rhs1 (def_stmt);
3667 HOST_WIDE_INT linear_step = 0;
3668 tree v = gimple_assign_rhs2 (def_stmt);
3669 while (TREE_CODE (v) == SSA_NAME)
3671 tree t;
3672 def_stmt = SSA_NAME_DEF_STMT (v);
3673 if (is_gimple_assign (def_stmt))
3674 switch (gimple_assign_rhs_code (def_stmt))
3676 case PLUS_EXPR:
3677 t = gimple_assign_rhs2 (def_stmt);
3678 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3679 return;
3680 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3681 v = gimple_assign_rhs1 (def_stmt);
3682 continue;
3683 case MULT_EXPR:
3684 t = gimple_assign_rhs2 (def_stmt);
3685 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3686 return;
3687 linear_step = tree_to_shwi (t);
3688 v = gimple_assign_rhs1 (def_stmt);
3689 continue;
3690 CASE_CONVERT:
3691 t = gimple_assign_rhs1 (def_stmt);
3692 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3693 || (TYPE_PRECISION (TREE_TYPE (v))
3694 < TYPE_PRECISION (TREE_TYPE (t))))
3695 return;
3696 if (!linear_step)
3697 linear_step = 1;
3698 v = t;
3699 continue;
3700 default:
3701 return;
3703 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3704 && loop->simduid
3705 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3706 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3707 == loop->simduid))
3709 if (!linear_step)
3710 linear_step = 1;
3711 arginfo->linear_step = linear_step;
3712 arginfo->op = base;
3713 arginfo->simd_lane_linear = true;
3714 return;
3719 /* Return the number of elements in vector type VECTYPE, which is associated
3720 with a SIMD clone. At present these vectors always have a constant
3721 length. */
3723 static unsigned HOST_WIDE_INT
3724 simd_clone_subparts (tree vectype)
3726 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3729 /* Function vectorizable_simd_clone_call.
3731 Check if STMT_INFO performs a function call that can be vectorized
3732 by calling a simd clone of the function.
3733 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3734 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3735 Return true if STMT_INFO is vectorizable in this way. */
3737 static bool
3738 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3739 gimple_stmt_iterator *gsi,
3740 stmt_vec_info *vec_stmt, slp_tree slp_node,
3741 stmt_vector_for_cost *)
3743 tree vec_dest;
3744 tree scalar_dest;
3745 tree op, type;
3746 tree vec_oprnd0 = NULL_TREE;
3747 stmt_vec_info prev_stmt_info;
3748 tree vectype;
3749 unsigned int nunits;
3750 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3751 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3752 vec_info *vinfo = stmt_info->vinfo;
3753 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3754 tree fndecl, new_temp;
3755 int ncopies, j;
3756 auto_vec<simd_call_arg_info> arginfo;
3757 vec<tree> vargs = vNULL;
3758 size_t i, nargs;
3759 tree lhs, rtype, ratype;
3760 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3762 /* Is STMT a vectorizable call? */
3763 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3764 if (!stmt)
3765 return false;
3767 fndecl = gimple_call_fndecl (stmt);
3768 if (fndecl == NULL_TREE)
3769 return false;
3771 struct cgraph_node *node = cgraph_node::get (fndecl);
3772 if (node == NULL || node->simd_clones == NULL)
3773 return false;
3775 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3776 return false;
3778 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3779 && ! vec_stmt)
3780 return false;
3782 if (gimple_call_lhs (stmt)
3783 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3784 return false;
3786 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3788 vectype = STMT_VINFO_VECTYPE (stmt_info);
3790 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3791 return false;
3793 /* FORNOW */
3794 if (slp_node)
3795 return false;
3797 /* Process function arguments. */
3798 nargs = gimple_call_num_args (stmt);
3800 /* Bail out if the function has zero arguments. */
3801 if (nargs == 0)
3802 return false;
3804 arginfo.reserve (nargs, true);
3806 for (i = 0; i < nargs; i++)
3808 simd_call_arg_info thisarginfo;
3809 affine_iv iv;
3811 thisarginfo.linear_step = 0;
3812 thisarginfo.align = 0;
3813 thisarginfo.op = NULL_TREE;
3814 thisarginfo.simd_lane_linear = false;
3816 op = gimple_call_arg (stmt, i);
3817 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3818 &thisarginfo.vectype)
3819 || thisarginfo.dt == vect_uninitialized_def)
3821 if (dump_enabled_p ())
3822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3823 "use not simple.\n");
3824 return false;
3827 if (thisarginfo.dt == vect_constant_def
3828 || thisarginfo.dt == vect_external_def)
3829 gcc_assert (thisarginfo.vectype == NULL_TREE);
3830 else
3831 gcc_assert (thisarginfo.vectype != NULL_TREE);
3833 /* For linear arguments, the analyze phase should have saved
3834 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3835 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3836 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3838 gcc_assert (vec_stmt);
3839 thisarginfo.linear_step
3840 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3841 thisarginfo.op
3842 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3843 thisarginfo.simd_lane_linear
3844 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3845 == boolean_true_node);
3846 /* If loop has been peeled for alignment, we need to adjust it. */
3847 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3848 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3849 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3851 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3852 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3853 tree opt = TREE_TYPE (thisarginfo.op);
3854 bias = fold_convert (TREE_TYPE (step), bias);
3855 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3856 thisarginfo.op
3857 = fold_build2 (POINTER_TYPE_P (opt)
3858 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3859 thisarginfo.op, bias);
3862 else if (!vec_stmt
3863 && thisarginfo.dt != vect_constant_def
3864 && thisarginfo.dt != vect_external_def
3865 && loop_vinfo
3866 && TREE_CODE (op) == SSA_NAME
3867 && simple_iv (loop, loop_containing_stmt (stmt), op,
3868 &iv, false)
3869 && tree_fits_shwi_p (iv.step))
3871 thisarginfo.linear_step = tree_to_shwi (iv.step);
3872 thisarginfo.op = iv.base;
3874 else if ((thisarginfo.dt == vect_constant_def
3875 || thisarginfo.dt == vect_external_def)
3876 && POINTER_TYPE_P (TREE_TYPE (op)))
3877 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3878 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3879 linear too. */
3880 if (POINTER_TYPE_P (TREE_TYPE (op))
3881 && !thisarginfo.linear_step
3882 && !vec_stmt
3883 && thisarginfo.dt != vect_constant_def
3884 && thisarginfo.dt != vect_external_def
3885 && loop_vinfo
3886 && !slp_node
3887 && TREE_CODE (op) == SSA_NAME)
3888 vect_simd_lane_linear (op, loop, &thisarginfo);
3890 arginfo.quick_push (thisarginfo);
3893 unsigned HOST_WIDE_INT vf;
3894 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3898 "not considering SIMD clones; not yet supported"
3899 " for variable-width vectors.\n");
3900 return NULL;
3903 unsigned int badness = 0;
3904 struct cgraph_node *bestn = NULL;
3905 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3906 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3907 else
3908 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3909 n = n->simdclone->next_clone)
3911 unsigned int this_badness = 0;
3912 if (n->simdclone->simdlen > vf
3913 || n->simdclone->nargs != nargs)
3914 continue;
3915 if (n->simdclone->simdlen < vf)
3916 this_badness += (exact_log2 (vf)
3917 - exact_log2 (n->simdclone->simdlen)) * 1024;
3918 if (n->simdclone->inbranch)
3919 this_badness += 2048;
3920 int target_badness = targetm.simd_clone.usable (n);
3921 if (target_badness < 0)
3922 continue;
3923 this_badness += target_badness * 512;
3924 /* FORNOW: Have to add code to add the mask argument. */
3925 if (n->simdclone->inbranch)
3926 continue;
3927 for (i = 0; i < nargs; i++)
3929 switch (n->simdclone->args[i].arg_type)
3931 case SIMD_CLONE_ARG_TYPE_VECTOR:
3932 if (!useless_type_conversion_p
3933 (n->simdclone->args[i].orig_type,
3934 TREE_TYPE (gimple_call_arg (stmt, i))))
3935 i = -1;
3936 else if (arginfo[i].dt == vect_constant_def
3937 || arginfo[i].dt == vect_external_def
3938 || arginfo[i].linear_step)
3939 this_badness += 64;
3940 break;
3941 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3942 if (arginfo[i].dt != vect_constant_def
3943 && arginfo[i].dt != vect_external_def)
3944 i = -1;
3945 break;
3946 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3947 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3948 if (arginfo[i].dt == vect_constant_def
3949 || arginfo[i].dt == vect_external_def
3950 || (arginfo[i].linear_step
3951 != n->simdclone->args[i].linear_step))
3952 i = -1;
3953 break;
3954 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3955 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3956 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3957 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3958 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3959 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3960 /* FORNOW */
3961 i = -1;
3962 break;
3963 case SIMD_CLONE_ARG_TYPE_MASK:
3964 gcc_unreachable ();
3966 if (i == (size_t) -1)
3967 break;
3968 if (n->simdclone->args[i].alignment > arginfo[i].align)
3970 i = -1;
3971 break;
3973 if (arginfo[i].align)
3974 this_badness += (exact_log2 (arginfo[i].align)
3975 - exact_log2 (n->simdclone->args[i].alignment));
3977 if (i == (size_t) -1)
3978 continue;
3979 if (bestn == NULL || this_badness < badness)
3981 bestn = n;
3982 badness = this_badness;
3986 if (bestn == NULL)
3987 return false;
3989 for (i = 0; i < nargs; i++)
3990 if ((arginfo[i].dt == vect_constant_def
3991 || arginfo[i].dt == vect_external_def)
3992 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3994 arginfo[i].vectype
3995 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3996 i)));
3997 if (arginfo[i].vectype == NULL
3998 || (simd_clone_subparts (arginfo[i].vectype)
3999 > bestn->simdclone->simdlen))
4000 return false;
4003 fndecl = bestn->decl;
4004 nunits = bestn->simdclone->simdlen;
4005 ncopies = vf / nunits;
4007 /* If the function isn't const, only allow it in simd loops where user
4008 has asserted that at least nunits consecutive iterations can be
4009 performed using SIMD instructions. */
4010 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4011 && gimple_vuse (stmt))
4012 return false;
4014 /* Sanity check: make sure that at least one copy of the vectorized stmt
4015 needs to be generated. */
4016 gcc_assert (ncopies >= 1);
4018 if (!vec_stmt) /* transformation not required. */
4020 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4021 for (i = 0; i < nargs; i++)
4022 if ((bestn->simdclone->args[i].arg_type
4023 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4024 || (bestn->simdclone->args[i].arg_type
4025 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4027 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4028 + 1);
4029 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4030 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4031 ? size_type_node : TREE_TYPE (arginfo[i].op);
4032 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4033 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4034 tree sll = arginfo[i].simd_lane_linear
4035 ? boolean_true_node : boolean_false_node;
4036 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4038 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4039 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4040 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4041 return true;
4044 /* Transform. */
4046 if (dump_enabled_p ())
4047 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4049 /* Handle def. */
4050 scalar_dest = gimple_call_lhs (stmt);
4051 vec_dest = NULL_TREE;
4052 rtype = NULL_TREE;
4053 ratype = NULL_TREE;
4054 if (scalar_dest)
4056 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4057 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4058 if (TREE_CODE (rtype) == ARRAY_TYPE)
4060 ratype = rtype;
4061 rtype = TREE_TYPE (ratype);
4065 prev_stmt_info = NULL;
4066 for (j = 0; j < ncopies; ++j)
4068 /* Build argument list for the vectorized call. */
4069 if (j == 0)
4070 vargs.create (nargs);
4071 else
4072 vargs.truncate (0);
4074 for (i = 0; i < nargs; i++)
4076 unsigned int k, l, m, o;
4077 tree atype;
4078 op = gimple_call_arg (stmt, i);
4079 switch (bestn->simdclone->args[i].arg_type)
4081 case SIMD_CLONE_ARG_TYPE_VECTOR:
4082 atype = bestn->simdclone->args[i].vector_type;
4083 o = nunits / simd_clone_subparts (atype);
4084 for (m = j * o; m < (j + 1) * o; m++)
4086 if (simd_clone_subparts (atype)
4087 < simd_clone_subparts (arginfo[i].vectype))
4089 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4090 k = (simd_clone_subparts (arginfo[i].vectype)
4091 / simd_clone_subparts (atype));
4092 gcc_assert ((k & (k - 1)) == 0);
4093 if (m == 0)
4094 vec_oprnd0
4095 = vect_get_vec_def_for_operand (op, stmt_info);
4096 else
4098 vec_oprnd0 = arginfo[i].op;
4099 if ((m & (k - 1)) == 0)
4100 vec_oprnd0
4101 = vect_get_vec_def_for_stmt_copy (vinfo,
4102 vec_oprnd0);
4104 arginfo[i].op = vec_oprnd0;
4105 vec_oprnd0
4106 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4107 bitsize_int (prec),
4108 bitsize_int ((m & (k - 1)) * prec));
4109 gassign *new_stmt
4110 = gimple_build_assign (make_ssa_name (atype),
4111 vec_oprnd0);
4112 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4113 vargs.safe_push (gimple_assign_lhs (new_stmt));
4115 else
4117 k = (simd_clone_subparts (atype)
4118 / simd_clone_subparts (arginfo[i].vectype));
4119 gcc_assert ((k & (k - 1)) == 0);
4120 vec<constructor_elt, va_gc> *ctor_elts;
4121 if (k != 1)
4122 vec_alloc (ctor_elts, k);
4123 else
4124 ctor_elts = NULL;
4125 for (l = 0; l < k; l++)
4127 if (m == 0 && l == 0)
4128 vec_oprnd0
4129 = vect_get_vec_def_for_operand (op, stmt_info);
4130 else
4131 vec_oprnd0
4132 = vect_get_vec_def_for_stmt_copy (vinfo,
4133 arginfo[i].op);
4134 arginfo[i].op = vec_oprnd0;
4135 if (k == 1)
4136 break;
4137 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4138 vec_oprnd0);
4140 if (k == 1)
4141 vargs.safe_push (vec_oprnd0);
4142 else
4144 vec_oprnd0 = build_constructor (atype, ctor_elts);
4145 gassign *new_stmt
4146 = gimple_build_assign (make_ssa_name (atype),
4147 vec_oprnd0);
4148 vect_finish_stmt_generation (stmt_info, new_stmt,
4149 gsi);
4150 vargs.safe_push (gimple_assign_lhs (new_stmt));
4154 break;
4155 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4156 vargs.safe_push (op);
4157 break;
4158 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4159 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4160 if (j == 0)
4162 gimple_seq stmts;
4163 arginfo[i].op
4164 = force_gimple_operand (arginfo[i].op, &stmts, true,
4165 NULL_TREE);
4166 if (stmts != NULL)
4168 basic_block new_bb;
4169 edge pe = loop_preheader_edge (loop);
4170 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4171 gcc_assert (!new_bb);
4173 if (arginfo[i].simd_lane_linear)
4175 vargs.safe_push (arginfo[i].op);
4176 break;
4178 tree phi_res = copy_ssa_name (op);
4179 gphi *new_phi = create_phi_node (phi_res, loop->header);
4180 loop_vinfo->add_stmt (new_phi);
4181 add_phi_arg (new_phi, arginfo[i].op,
4182 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4183 enum tree_code code
4184 = POINTER_TYPE_P (TREE_TYPE (op))
4185 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4186 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4187 ? sizetype : TREE_TYPE (op);
4188 widest_int cst
4189 = wi::mul (bestn->simdclone->args[i].linear_step,
4190 ncopies * nunits);
4191 tree tcst = wide_int_to_tree (type, cst);
4192 tree phi_arg = copy_ssa_name (op);
4193 gassign *new_stmt
4194 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4195 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4196 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4197 loop_vinfo->add_stmt (new_stmt);
4198 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4199 UNKNOWN_LOCATION);
4200 arginfo[i].op = phi_res;
4201 vargs.safe_push (phi_res);
4203 else
4205 enum tree_code code
4206 = POINTER_TYPE_P (TREE_TYPE (op))
4207 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4208 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4209 ? sizetype : TREE_TYPE (op);
4210 widest_int cst
4211 = wi::mul (bestn->simdclone->args[i].linear_step,
4212 j * nunits);
4213 tree tcst = wide_int_to_tree (type, cst);
4214 new_temp = make_ssa_name (TREE_TYPE (op));
4215 gassign *new_stmt
4216 = gimple_build_assign (new_temp, code,
4217 arginfo[i].op, tcst);
4218 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4219 vargs.safe_push (new_temp);
4221 break;
4222 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4223 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4224 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4225 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4226 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4227 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4228 default:
4229 gcc_unreachable ();
4233 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4234 if (vec_dest)
4236 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4237 if (ratype)
4238 new_temp = create_tmp_var (ratype);
4239 else if (simd_clone_subparts (vectype)
4240 == simd_clone_subparts (rtype))
4241 new_temp = make_ssa_name (vec_dest, new_call);
4242 else
4243 new_temp = make_ssa_name (rtype, new_call);
4244 gimple_call_set_lhs (new_call, new_temp);
4246 stmt_vec_info new_stmt_info
4247 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4249 if (vec_dest)
4251 if (simd_clone_subparts (vectype) < nunits)
4253 unsigned int k, l;
4254 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4255 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4256 k = nunits / simd_clone_subparts (vectype);
4257 gcc_assert ((k & (k - 1)) == 0);
4258 for (l = 0; l < k; l++)
4260 tree t;
4261 if (ratype)
4263 t = build_fold_addr_expr (new_temp);
4264 t = build2 (MEM_REF, vectype, t,
4265 build_int_cst (TREE_TYPE (t), l * bytes));
4267 else
4268 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4269 bitsize_int (prec), bitsize_int (l * prec));
4270 gimple *new_stmt
4271 = gimple_build_assign (make_ssa_name (vectype), t);
4272 new_stmt_info
4273 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4275 if (j == 0 && l == 0)
4276 STMT_VINFO_VEC_STMT (stmt_info)
4277 = *vec_stmt = new_stmt_info;
4278 else
4279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4281 prev_stmt_info = new_stmt_info;
4284 if (ratype)
4285 vect_clobber_variable (stmt_info, gsi, new_temp);
4286 continue;
4288 else if (simd_clone_subparts (vectype) > nunits)
4290 unsigned int k = (simd_clone_subparts (vectype)
4291 / simd_clone_subparts (rtype));
4292 gcc_assert ((k & (k - 1)) == 0);
4293 if ((j & (k - 1)) == 0)
4294 vec_alloc (ret_ctor_elts, k);
4295 if (ratype)
4297 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4298 for (m = 0; m < o; m++)
4300 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4301 size_int (m), NULL_TREE, NULL_TREE);
4302 gimple *new_stmt
4303 = gimple_build_assign (make_ssa_name (rtype), tem);
4304 new_stmt_info
4305 = vect_finish_stmt_generation (stmt_info, new_stmt,
4306 gsi);
4307 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4308 gimple_assign_lhs (new_stmt));
4310 vect_clobber_variable (stmt_info, gsi, new_temp);
4312 else
4313 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4314 if ((j & (k - 1)) != k - 1)
4315 continue;
4316 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4317 gimple *new_stmt
4318 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4319 new_stmt_info
4320 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4322 if ((unsigned) j == k - 1)
4323 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4324 else
4325 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4327 prev_stmt_info = new_stmt_info;
4328 continue;
4330 else if (ratype)
4332 tree t = build_fold_addr_expr (new_temp);
4333 t = build2 (MEM_REF, vectype, t,
4334 build_int_cst (TREE_TYPE (t), 0));
4335 gimple *new_stmt
4336 = gimple_build_assign (make_ssa_name (vec_dest), t);
4337 new_stmt_info
4338 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4339 vect_clobber_variable (stmt_info, gsi, new_temp);
4343 if (j == 0)
4344 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4345 else
4346 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4348 prev_stmt_info = new_stmt_info;
4351 vargs.release ();
4353 /* The call in STMT might prevent it from being removed in dce.
4354 We however cannot remove it here, due to the way the ssa name
4355 it defines is mapped to the new definition. So just replace
4356 rhs of the statement with something harmless. */
4358 if (slp_node)
4359 return true;
4361 gimple *new_stmt;
4362 if (scalar_dest)
4364 type = TREE_TYPE (scalar_dest);
4365 if (is_pattern_stmt_p (stmt_info))
4366 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
4367 else
4368 lhs = gimple_call_lhs (stmt);
4369 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4371 else
4372 new_stmt = gimple_build_nop ();
4373 set_vinfo_for_stmt (new_stmt, stmt_info);
4374 set_vinfo_for_stmt (stmt, NULL);
4375 STMT_VINFO_STMT (stmt_info) = new_stmt;
4376 gsi_replace (gsi, new_stmt, true);
4377 unlink_stmt_vdef (stmt);
4379 return true;
4383 /* Function vect_gen_widened_results_half
4385 Create a vector stmt whose code, type, number of arguments, and result
4386 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4387 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4388 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4389 needs to be created (DECL is a function-decl of a target-builtin).
4390 STMT_INFO is the original scalar stmt that we are vectorizing. */
4392 static gimple *
4393 vect_gen_widened_results_half (enum tree_code code,
4394 tree decl,
4395 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4396 tree vec_dest, gimple_stmt_iterator *gsi,
4397 stmt_vec_info stmt_info)
4399 gimple *new_stmt;
4400 tree new_temp;
4402 /* Generate half of the widened result: */
4403 if (code == CALL_EXPR)
4405 /* Target specific support */
4406 if (op_type == binary_op)
4407 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4408 else
4409 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4410 new_temp = make_ssa_name (vec_dest, new_stmt);
4411 gimple_call_set_lhs (new_stmt, new_temp);
4413 else
4415 /* Generic support */
4416 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4417 if (op_type != binary_op)
4418 vec_oprnd1 = NULL;
4419 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4420 new_temp = make_ssa_name (vec_dest, new_stmt);
4421 gimple_assign_set_lhs (new_stmt, new_temp);
4423 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4425 return new_stmt;
4429 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4430 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4431 containing scalar operand), and for the rest we get a copy with
4432 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4433 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4434 The vectors are collected into VEC_OPRNDS. */
4436 static void
4437 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4438 vec<tree> *vec_oprnds, int multi_step_cvt)
4440 vec_info *vinfo = stmt_info->vinfo;
4441 tree vec_oprnd;
4443 /* Get first vector operand. */
4444 /* All the vector operands except the very first one (that is scalar oprnd)
4445 are stmt copies. */
4446 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4447 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4448 else
4449 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4451 vec_oprnds->quick_push (vec_oprnd);
4453 /* Get second vector operand. */
4454 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4455 vec_oprnds->quick_push (vec_oprnd);
4457 *oprnd = vec_oprnd;
4459 /* For conversion in multiple steps, continue to get operands
4460 recursively. */
4461 if (multi_step_cvt)
4462 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4463 multi_step_cvt - 1);
4467 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4468 For multi-step conversions store the resulting vectors and call the function
4469 recursively. */
4471 static void
4472 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4473 int multi_step_cvt,
4474 stmt_vec_info stmt_info,
4475 vec<tree> vec_dsts,
4476 gimple_stmt_iterator *gsi,
4477 slp_tree slp_node, enum tree_code code,
4478 stmt_vec_info *prev_stmt_info)
4480 unsigned int i;
4481 tree vop0, vop1, new_tmp, vec_dest;
4483 vec_dest = vec_dsts.pop ();
4485 for (i = 0; i < vec_oprnds->length (); i += 2)
4487 /* Create demotion operation. */
4488 vop0 = (*vec_oprnds)[i];
4489 vop1 = (*vec_oprnds)[i + 1];
4490 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4491 new_tmp = make_ssa_name (vec_dest, new_stmt);
4492 gimple_assign_set_lhs (new_stmt, new_tmp);
4493 stmt_vec_info new_stmt_info
4494 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4496 if (multi_step_cvt)
4497 /* Store the resulting vector for next recursive call. */
4498 (*vec_oprnds)[i/2] = new_tmp;
4499 else
4501 /* This is the last step of the conversion sequence. Store the
4502 vectors in SLP_NODE or in vector info of the scalar statement
4503 (or in STMT_VINFO_RELATED_STMT chain). */
4504 if (slp_node)
4505 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4506 else
4508 if (!*prev_stmt_info)
4509 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4510 else
4511 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4513 *prev_stmt_info = new_stmt_info;
4518 /* For multi-step demotion operations we first generate demotion operations
4519 from the source type to the intermediate types, and then combine the
4520 results (stored in VEC_OPRNDS) in demotion operation to the destination
4521 type. */
4522 if (multi_step_cvt)
4524 /* At each level of recursion we have half of the operands we had at the
4525 previous level. */
4526 vec_oprnds->truncate ((i+1)/2);
4527 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4528 stmt_info, vec_dsts, gsi,
4529 slp_node, VEC_PACK_TRUNC_EXPR,
4530 prev_stmt_info);
4533 vec_dsts.quick_push (vec_dest);
4537 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4538 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4539 STMT_INFO. For multi-step conversions store the resulting vectors and
4540 call the function recursively. */
4542 static void
4543 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4544 vec<tree> *vec_oprnds1,
4545 stmt_vec_info stmt_info, tree vec_dest,
4546 gimple_stmt_iterator *gsi,
4547 enum tree_code code1,
4548 enum tree_code code2, tree decl1,
4549 tree decl2, int op_type)
4551 int i;
4552 tree vop0, vop1, new_tmp1, new_tmp2;
4553 gimple *new_stmt1, *new_stmt2;
4554 vec<tree> vec_tmp = vNULL;
4556 vec_tmp.create (vec_oprnds0->length () * 2);
4557 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4559 if (op_type == binary_op)
4560 vop1 = (*vec_oprnds1)[i];
4561 else
4562 vop1 = NULL_TREE;
4564 /* Generate the two halves of promotion operation. */
4565 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4566 op_type, vec_dest, gsi,
4567 stmt_info);
4568 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4569 op_type, vec_dest, gsi,
4570 stmt_info);
4571 if (is_gimple_call (new_stmt1))
4573 new_tmp1 = gimple_call_lhs (new_stmt1);
4574 new_tmp2 = gimple_call_lhs (new_stmt2);
4576 else
4578 new_tmp1 = gimple_assign_lhs (new_stmt1);
4579 new_tmp2 = gimple_assign_lhs (new_stmt2);
4582 /* Store the results for the next step. */
4583 vec_tmp.quick_push (new_tmp1);
4584 vec_tmp.quick_push (new_tmp2);
4587 vec_oprnds0->release ();
4588 *vec_oprnds0 = vec_tmp;
4592 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4593 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4594 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4595 Return true if STMT_INFO is vectorizable in this way. */
4597 static bool
4598 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4599 stmt_vec_info *vec_stmt, slp_tree slp_node,
4600 stmt_vector_for_cost *cost_vec)
4602 tree vec_dest;
4603 tree scalar_dest;
4604 tree op0, op1 = NULL_TREE;
4605 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4606 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4607 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4608 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4609 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4610 tree new_temp;
4611 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4612 int ndts = 2;
4613 stmt_vec_info prev_stmt_info;
4614 poly_uint64 nunits_in;
4615 poly_uint64 nunits_out;
4616 tree vectype_out, vectype_in;
4617 int ncopies, i, j;
4618 tree lhs_type, rhs_type;
4619 enum { NARROW, NONE, WIDEN } modifier;
4620 vec<tree> vec_oprnds0 = vNULL;
4621 vec<tree> vec_oprnds1 = vNULL;
4622 tree vop0;
4623 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4624 vec_info *vinfo = stmt_info->vinfo;
4625 int multi_step_cvt = 0;
4626 vec<tree> interm_types = vNULL;
4627 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4628 int op_type;
4629 unsigned short fltsz;
4631 /* Is STMT a vectorizable conversion? */
4633 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4634 return false;
4636 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4637 && ! vec_stmt)
4638 return false;
4640 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4641 if (!stmt)
4642 return false;
4644 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4645 return false;
4647 code = gimple_assign_rhs_code (stmt);
4648 if (!CONVERT_EXPR_CODE_P (code)
4649 && code != FIX_TRUNC_EXPR
4650 && code != FLOAT_EXPR
4651 && code != WIDEN_MULT_EXPR
4652 && code != WIDEN_LSHIFT_EXPR)
4653 return false;
4655 op_type = TREE_CODE_LENGTH (code);
4657 /* Check types of lhs and rhs. */
4658 scalar_dest = gimple_assign_lhs (stmt);
4659 lhs_type = TREE_TYPE (scalar_dest);
4660 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4662 op0 = gimple_assign_rhs1 (stmt);
4663 rhs_type = TREE_TYPE (op0);
4665 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4666 && !((INTEGRAL_TYPE_P (lhs_type)
4667 && INTEGRAL_TYPE_P (rhs_type))
4668 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4669 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4670 return false;
4672 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4673 && ((INTEGRAL_TYPE_P (lhs_type)
4674 && !type_has_mode_precision_p (lhs_type))
4675 || (INTEGRAL_TYPE_P (rhs_type)
4676 && !type_has_mode_precision_p (rhs_type))))
4678 if (dump_enabled_p ())
4679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4680 "type conversion to/from bit-precision unsupported."
4681 "\n");
4682 return false;
4685 /* Check the operands of the operation. */
4686 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4690 "use not simple.\n");
4691 return false;
4693 if (op_type == binary_op)
4695 bool ok;
4697 op1 = gimple_assign_rhs2 (stmt);
4698 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4699 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4700 OP1. */
4701 if (CONSTANT_CLASS_P (op0))
4702 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4703 else
4704 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4706 if (!ok)
4708 if (dump_enabled_p ())
4709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4710 "use not simple.\n");
4711 return false;
4715 /* If op0 is an external or constant defs use a vector type of
4716 the same size as the output vector type. */
4717 if (!vectype_in)
4718 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4719 if (vec_stmt)
4720 gcc_assert (vectype_in);
4721 if (!vectype_in)
4723 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4726 "no vectype for scalar type ");
4727 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4728 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4731 return false;
4734 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4735 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4737 if (dump_enabled_p ())
4739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4740 "can't convert between boolean and non "
4741 "boolean vectors");
4742 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4743 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4746 return false;
4749 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4750 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4751 if (known_eq (nunits_out, nunits_in))
4752 modifier = NONE;
4753 else if (multiple_p (nunits_out, nunits_in))
4754 modifier = NARROW;
4755 else
4757 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4758 modifier = WIDEN;
4761 /* Multiple types in SLP are handled by creating the appropriate number of
4762 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4763 case of SLP. */
4764 if (slp_node)
4765 ncopies = 1;
4766 else if (modifier == NARROW)
4767 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4768 else
4769 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4771 /* Sanity check: make sure that at least one copy of the vectorized stmt
4772 needs to be generated. */
4773 gcc_assert (ncopies >= 1);
4775 bool found_mode = false;
4776 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4777 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4778 opt_scalar_mode rhs_mode_iter;
4780 /* Supportable by target? */
4781 switch (modifier)
4783 case NONE:
4784 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4785 return false;
4786 if (supportable_convert_operation (code, vectype_out, vectype_in,
4787 &decl1, &code1))
4788 break;
4789 /* FALLTHRU */
4790 unsupported:
4791 if (dump_enabled_p ())
4792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4793 "conversion not supported by target.\n");
4794 return false;
4796 case WIDEN:
4797 if (supportable_widening_operation (code, stmt_info, vectype_out,
4798 vectype_in, &code1, &code2,
4799 &multi_step_cvt, &interm_types))
4801 /* Binary widening operation can only be supported directly by the
4802 architecture. */
4803 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4804 break;
4807 if (code != FLOAT_EXPR
4808 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4809 goto unsupported;
4811 fltsz = GET_MODE_SIZE (lhs_mode);
4812 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4814 rhs_mode = rhs_mode_iter.require ();
4815 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4816 break;
4818 cvt_type
4819 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4820 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4821 if (cvt_type == NULL_TREE)
4822 goto unsupported;
4824 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4826 if (!supportable_convert_operation (code, vectype_out,
4827 cvt_type, &decl1, &codecvt1))
4828 goto unsupported;
4830 else if (!supportable_widening_operation (code, stmt_info,
4831 vectype_out, cvt_type,
4832 &codecvt1, &codecvt2,
4833 &multi_step_cvt,
4834 &interm_types))
4835 continue;
4836 else
4837 gcc_assert (multi_step_cvt == 0);
4839 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4840 vectype_in, &code1, &code2,
4841 &multi_step_cvt, &interm_types))
4843 found_mode = true;
4844 break;
4848 if (!found_mode)
4849 goto unsupported;
4851 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4852 codecvt2 = ERROR_MARK;
4853 else
4855 multi_step_cvt++;
4856 interm_types.safe_push (cvt_type);
4857 cvt_type = NULL_TREE;
4859 break;
4861 case NARROW:
4862 gcc_assert (op_type == unary_op);
4863 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4864 &code1, &multi_step_cvt,
4865 &interm_types))
4866 break;
4868 if (code != FIX_TRUNC_EXPR
4869 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4870 goto unsupported;
4872 cvt_type
4873 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4874 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4875 if (cvt_type == NULL_TREE)
4876 goto unsupported;
4877 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4878 &decl1, &codecvt1))
4879 goto unsupported;
4880 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4881 &code1, &multi_step_cvt,
4882 &interm_types))
4883 break;
4884 goto unsupported;
4886 default:
4887 gcc_unreachable ();
4890 if (!vec_stmt) /* transformation not required. */
4892 DUMP_VECT_SCOPE ("vectorizable_conversion");
4893 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4895 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4896 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4897 cost_vec);
4899 else if (modifier == NARROW)
4901 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4902 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4903 cost_vec);
4905 else
4907 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4908 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4909 cost_vec);
4911 interm_types.release ();
4912 return true;
4915 /* Transform. */
4916 if (dump_enabled_p ())
4917 dump_printf_loc (MSG_NOTE, vect_location,
4918 "transform conversion. ncopies = %d.\n", ncopies);
4920 if (op_type == binary_op)
4922 if (CONSTANT_CLASS_P (op0))
4923 op0 = fold_convert (TREE_TYPE (op1), op0);
4924 else if (CONSTANT_CLASS_P (op1))
4925 op1 = fold_convert (TREE_TYPE (op0), op1);
4928 /* In case of multi-step conversion, we first generate conversion operations
4929 to the intermediate types, and then from that types to the final one.
4930 We create vector destinations for the intermediate type (TYPES) received
4931 from supportable_*_operation, and store them in the correct order
4932 for future use in vect_create_vectorized_*_stmts (). */
4933 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4934 vec_dest = vect_create_destination_var (scalar_dest,
4935 (cvt_type && modifier == WIDEN)
4936 ? cvt_type : vectype_out);
4937 vec_dsts.quick_push (vec_dest);
4939 if (multi_step_cvt)
4941 for (i = interm_types.length () - 1;
4942 interm_types.iterate (i, &intermediate_type); i--)
4944 vec_dest = vect_create_destination_var (scalar_dest,
4945 intermediate_type);
4946 vec_dsts.quick_push (vec_dest);
4950 if (cvt_type)
4951 vec_dest = vect_create_destination_var (scalar_dest,
4952 modifier == WIDEN
4953 ? vectype_out : cvt_type);
4955 if (!slp_node)
4957 if (modifier == WIDEN)
4959 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4960 if (op_type == binary_op)
4961 vec_oprnds1.create (1);
4963 else if (modifier == NARROW)
4964 vec_oprnds0.create (
4965 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4967 else if (code == WIDEN_LSHIFT_EXPR)
4968 vec_oprnds1.create (slp_node->vec_stmts_size);
4970 last_oprnd = op0;
4971 prev_stmt_info = NULL;
4972 switch (modifier)
4974 case NONE:
4975 for (j = 0; j < ncopies; j++)
4977 if (j == 0)
4978 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
4979 NULL, slp_node);
4980 else
4981 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
4983 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4985 stmt_vec_info new_stmt_info;
4986 /* Arguments are ready, create the new vector stmt. */
4987 if (code1 == CALL_EXPR)
4989 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
4990 new_temp = make_ssa_name (vec_dest, new_stmt);
4991 gimple_call_set_lhs (new_stmt, new_temp);
4992 new_stmt_info
4993 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4995 else
4997 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4998 gassign *new_stmt
4999 = gimple_build_assign (vec_dest, code1, vop0);
5000 new_temp = make_ssa_name (vec_dest, new_stmt);
5001 gimple_assign_set_lhs (new_stmt, new_temp);
5002 new_stmt_info
5003 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5006 if (slp_node)
5007 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5008 else
5010 if (!prev_stmt_info)
5011 STMT_VINFO_VEC_STMT (stmt_info)
5012 = *vec_stmt = new_stmt_info;
5013 else
5014 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5015 prev_stmt_info = new_stmt_info;
5019 break;
5021 case WIDEN:
5022 /* In case the vectorization factor (VF) is bigger than the number
5023 of elements that we can fit in a vectype (nunits), we have to
5024 generate more than one vector stmt - i.e - we need to "unroll"
5025 the vector stmt by a factor VF/nunits. */
5026 for (j = 0; j < ncopies; j++)
5028 /* Handle uses. */
5029 if (j == 0)
5031 if (slp_node)
5033 if (code == WIDEN_LSHIFT_EXPR)
5035 unsigned int k;
5037 vec_oprnd1 = op1;
5038 /* Store vec_oprnd1 for every vector stmt to be created
5039 for SLP_NODE. We check during the analysis that all
5040 the shift arguments are the same. */
5041 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5042 vec_oprnds1.quick_push (vec_oprnd1);
5044 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5045 &vec_oprnds0, NULL, slp_node);
5047 else
5048 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5049 &vec_oprnds1, slp_node);
5051 else
5053 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5054 vec_oprnds0.quick_push (vec_oprnd0);
5055 if (op_type == binary_op)
5057 if (code == WIDEN_LSHIFT_EXPR)
5058 vec_oprnd1 = op1;
5059 else
5060 vec_oprnd1
5061 = vect_get_vec_def_for_operand (op1, stmt_info);
5062 vec_oprnds1.quick_push (vec_oprnd1);
5066 else
5068 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5069 vec_oprnds0.truncate (0);
5070 vec_oprnds0.quick_push (vec_oprnd0);
5071 if (op_type == binary_op)
5073 if (code == WIDEN_LSHIFT_EXPR)
5074 vec_oprnd1 = op1;
5075 else
5076 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5077 vec_oprnd1);
5078 vec_oprnds1.truncate (0);
5079 vec_oprnds1.quick_push (vec_oprnd1);
5083 /* Arguments are ready. Create the new vector stmts. */
5084 for (i = multi_step_cvt; i >= 0; i--)
5086 tree this_dest = vec_dsts[i];
5087 enum tree_code c1 = code1, c2 = code2;
5088 if (i == 0 && codecvt2 != ERROR_MARK)
5090 c1 = codecvt1;
5091 c2 = codecvt2;
5093 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5094 &vec_oprnds1, stmt_info,
5095 this_dest, gsi,
5096 c1, c2, decl1, decl2,
5097 op_type);
5100 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5102 stmt_vec_info new_stmt_info;
5103 if (cvt_type)
5105 if (codecvt1 == CALL_EXPR)
5107 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5108 new_temp = make_ssa_name (vec_dest, new_stmt);
5109 gimple_call_set_lhs (new_stmt, new_temp);
5110 new_stmt_info
5111 = vect_finish_stmt_generation (stmt_info, new_stmt,
5112 gsi);
5114 else
5116 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5117 new_temp = make_ssa_name (vec_dest);
5118 gassign *new_stmt
5119 = gimple_build_assign (new_temp, codecvt1, vop0);
5120 new_stmt_info
5121 = vect_finish_stmt_generation (stmt_info, new_stmt,
5122 gsi);
5125 else
5126 new_stmt_info = vinfo->lookup_def (vop0);
5128 if (slp_node)
5129 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5130 else
5132 if (!prev_stmt_info)
5133 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5134 else
5135 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5136 prev_stmt_info = new_stmt_info;
5141 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5142 break;
5144 case NARROW:
5145 /* In case the vectorization factor (VF) is bigger than the number
5146 of elements that we can fit in a vectype (nunits), we have to
5147 generate more than one vector stmt - i.e - we need to "unroll"
5148 the vector stmt by a factor VF/nunits. */
5149 for (j = 0; j < ncopies; j++)
5151 /* Handle uses. */
5152 if (slp_node)
5153 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5154 slp_node);
5155 else
5157 vec_oprnds0.truncate (0);
5158 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5159 vect_pow2 (multi_step_cvt) - 1);
5162 /* Arguments are ready. Create the new vector stmts. */
5163 if (cvt_type)
5164 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5166 if (codecvt1 == CALL_EXPR)
5168 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5169 new_temp = make_ssa_name (vec_dest, new_stmt);
5170 gimple_call_set_lhs (new_stmt, new_temp);
5171 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5173 else
5175 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5176 new_temp = make_ssa_name (vec_dest);
5177 gassign *new_stmt
5178 = gimple_build_assign (new_temp, codecvt1, vop0);
5179 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5182 vec_oprnds0[i] = new_temp;
5185 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5186 stmt_info, vec_dsts, gsi,
5187 slp_node, code1,
5188 &prev_stmt_info);
5191 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5192 break;
5195 vec_oprnds0.release ();
5196 vec_oprnds1.release ();
5197 interm_types.release ();
5199 return true;
5203 /* Function vectorizable_assignment.
5205 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5206 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5207 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5208 Return true if STMT_INFO is vectorizable in this way. */
5210 static bool
5211 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5212 stmt_vec_info *vec_stmt, slp_tree slp_node,
5213 stmt_vector_for_cost *cost_vec)
5215 tree vec_dest;
5216 tree scalar_dest;
5217 tree op;
5218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5219 tree new_temp;
5220 enum vect_def_type dt[1] = {vect_unknown_def_type};
5221 int ndts = 1;
5222 int ncopies;
5223 int i, j;
5224 vec<tree> vec_oprnds = vNULL;
5225 tree vop;
5226 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5227 vec_info *vinfo = stmt_info->vinfo;
5228 stmt_vec_info prev_stmt_info = NULL;
5229 enum tree_code code;
5230 tree vectype_in;
5232 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5233 return false;
5235 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5236 && ! vec_stmt)
5237 return false;
5239 /* Is vectorizable assignment? */
5240 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5241 if (!stmt)
5242 return false;
5244 scalar_dest = gimple_assign_lhs (stmt);
5245 if (TREE_CODE (scalar_dest) != SSA_NAME)
5246 return false;
5248 code = gimple_assign_rhs_code (stmt);
5249 if (gimple_assign_single_p (stmt)
5250 || code == PAREN_EXPR
5251 || CONVERT_EXPR_CODE_P (code))
5252 op = gimple_assign_rhs1 (stmt);
5253 else
5254 return false;
5256 if (code == VIEW_CONVERT_EXPR)
5257 op = TREE_OPERAND (op, 0);
5259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5260 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5262 /* Multiple types in SLP are handled by creating the appropriate number of
5263 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5264 case of SLP. */
5265 if (slp_node)
5266 ncopies = 1;
5267 else
5268 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5270 gcc_assert (ncopies >= 1);
5272 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5274 if (dump_enabled_p ())
5275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5276 "use not simple.\n");
5277 return false;
5280 /* We can handle NOP_EXPR conversions that do not change the number
5281 of elements or the vector size. */
5282 if ((CONVERT_EXPR_CODE_P (code)
5283 || code == VIEW_CONVERT_EXPR)
5284 && (!vectype_in
5285 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5286 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5287 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5288 return false;
5290 /* We do not handle bit-precision changes. */
5291 if ((CONVERT_EXPR_CODE_P (code)
5292 || code == VIEW_CONVERT_EXPR)
5293 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5294 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5295 || !type_has_mode_precision_p (TREE_TYPE (op)))
5296 /* But a conversion that does not change the bit-pattern is ok. */
5297 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5298 > TYPE_PRECISION (TREE_TYPE (op)))
5299 && TYPE_UNSIGNED (TREE_TYPE (op)))
5300 /* Conversion between boolean types of different sizes is
5301 a simple assignment in case their vectypes are same
5302 boolean vectors. */
5303 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5304 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5308 "type conversion to/from bit-precision "
5309 "unsupported.\n");
5310 return false;
5313 if (!vec_stmt) /* transformation not required. */
5315 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5316 DUMP_VECT_SCOPE ("vectorizable_assignment");
5317 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5318 return true;
5321 /* Transform. */
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5325 /* Handle def. */
5326 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5328 /* Handle use. */
5329 for (j = 0; j < ncopies; j++)
5331 /* Handle uses. */
5332 if (j == 0)
5333 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5334 else
5335 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5337 /* Arguments are ready. create the new vector stmt. */
5338 stmt_vec_info new_stmt_info = NULL;
5339 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5341 if (CONVERT_EXPR_CODE_P (code)
5342 || code == VIEW_CONVERT_EXPR)
5343 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5344 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5345 new_temp = make_ssa_name (vec_dest, new_stmt);
5346 gimple_assign_set_lhs (new_stmt, new_temp);
5347 new_stmt_info
5348 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5349 if (slp_node)
5350 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5353 if (slp_node)
5354 continue;
5356 if (j == 0)
5357 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5358 else
5359 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5361 prev_stmt_info = new_stmt_info;
5364 vec_oprnds.release ();
5365 return true;
5369 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5370 either as shift by a scalar or by a vector. */
5372 bool
5373 vect_supportable_shift (enum tree_code code, tree scalar_type)
5376 machine_mode vec_mode;
5377 optab optab;
5378 int icode;
5379 tree vectype;
5381 vectype = get_vectype_for_scalar_type (scalar_type);
5382 if (!vectype)
5383 return false;
5385 optab = optab_for_tree_code (code, vectype, optab_scalar);
5386 if (!optab
5387 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5389 optab = optab_for_tree_code (code, vectype, optab_vector);
5390 if (!optab
5391 || (optab_handler (optab, TYPE_MODE (vectype))
5392 == CODE_FOR_nothing))
5393 return false;
5396 vec_mode = TYPE_MODE (vectype);
5397 icode = (int) optab_handler (optab, vec_mode);
5398 if (icode == CODE_FOR_nothing)
5399 return false;
5401 return true;
5405 /* Function vectorizable_shift.
5407 Check if STMT_INFO performs a shift operation that can be vectorized.
5408 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5409 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5410 Return true if STMT_INFO is vectorizable in this way. */
5412 static bool
5413 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5414 stmt_vec_info *vec_stmt, slp_tree slp_node,
5415 stmt_vector_for_cost *cost_vec)
5417 tree vec_dest;
5418 tree scalar_dest;
5419 tree op0, op1 = NULL;
5420 tree vec_oprnd1 = NULL_TREE;
5421 tree vectype;
5422 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5423 enum tree_code code;
5424 machine_mode vec_mode;
5425 tree new_temp;
5426 optab optab;
5427 int icode;
5428 machine_mode optab_op2_mode;
5429 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5430 int ndts = 2;
5431 stmt_vec_info prev_stmt_info;
5432 poly_uint64 nunits_in;
5433 poly_uint64 nunits_out;
5434 tree vectype_out;
5435 tree op1_vectype;
5436 int ncopies;
5437 int j, i;
5438 vec<tree> vec_oprnds0 = vNULL;
5439 vec<tree> vec_oprnds1 = vNULL;
5440 tree vop0, vop1;
5441 unsigned int k;
5442 bool scalar_shift_arg = true;
5443 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5444 vec_info *vinfo = stmt_info->vinfo;
5446 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5447 return false;
5449 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5450 && ! vec_stmt)
5451 return false;
5453 /* Is STMT a vectorizable binary/unary operation? */
5454 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5455 if (!stmt)
5456 return false;
5458 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5459 return false;
5461 code = gimple_assign_rhs_code (stmt);
5463 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5464 || code == RROTATE_EXPR))
5465 return false;
5467 scalar_dest = gimple_assign_lhs (stmt);
5468 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5469 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5471 if (dump_enabled_p ())
5472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5473 "bit-precision shifts not supported.\n");
5474 return false;
5477 op0 = gimple_assign_rhs1 (stmt);
5478 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5480 if (dump_enabled_p ())
5481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5482 "use not simple.\n");
5483 return false;
5485 /* If op0 is an external or constant def use a vector type with
5486 the same size as the output vector type. */
5487 if (!vectype)
5488 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5489 if (vec_stmt)
5490 gcc_assert (vectype);
5491 if (!vectype)
5493 if (dump_enabled_p ())
5494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5495 "no vectype for scalar type\n");
5496 return false;
5499 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5500 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5501 if (maybe_ne (nunits_out, nunits_in))
5502 return false;
5504 op1 = gimple_assign_rhs2 (stmt);
5505 stmt_vec_info op1_def_stmt_info;
5506 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5507 &op1_def_stmt_info))
5509 if (dump_enabled_p ())
5510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5511 "use not simple.\n");
5512 return false;
5515 /* Multiple types in SLP are handled by creating the appropriate number of
5516 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5517 case of SLP. */
5518 if (slp_node)
5519 ncopies = 1;
5520 else
5521 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5523 gcc_assert (ncopies >= 1);
5525 /* Determine whether the shift amount is a vector, or scalar. If the
5526 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5528 if ((dt[1] == vect_internal_def
5529 || dt[1] == vect_induction_def)
5530 && !slp_node)
5531 scalar_shift_arg = false;
5532 else if (dt[1] == vect_constant_def
5533 || dt[1] == vect_external_def
5534 || dt[1] == vect_internal_def)
5536 /* In SLP, need to check whether the shift count is the same,
5537 in loops if it is a constant or invariant, it is always
5538 a scalar shift. */
5539 if (slp_node)
5541 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5542 stmt_vec_info slpstmt_info;
5544 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5546 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5547 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5548 scalar_shift_arg = false;
5552 /* If the shift amount is computed by a pattern stmt we cannot
5553 use the scalar amount directly thus give up and use a vector
5554 shift. */
5555 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5556 scalar_shift_arg = false;
5558 else
5560 if (dump_enabled_p ())
5561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5562 "operand mode requires invariant argument.\n");
5563 return false;
5566 /* Vector shifted by vector. */
5567 if (!scalar_shift_arg)
5569 optab = optab_for_tree_code (code, vectype, optab_vector);
5570 if (dump_enabled_p ())
5571 dump_printf_loc (MSG_NOTE, vect_location,
5572 "vector/vector shift/rotate found.\n");
5574 if (!op1_vectype)
5575 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5576 if (op1_vectype == NULL_TREE
5577 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5581 "unusable type for last operand in"
5582 " vector/vector shift/rotate.\n");
5583 return false;
5586 /* See if the machine has a vector shifted by scalar insn and if not
5587 then see if it has a vector shifted by vector insn. */
5588 else
5590 optab = optab_for_tree_code (code, vectype, optab_scalar);
5591 if (optab
5592 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5594 if (dump_enabled_p ())
5595 dump_printf_loc (MSG_NOTE, vect_location,
5596 "vector/scalar shift/rotate found.\n");
5598 else
5600 optab = optab_for_tree_code (code, vectype, optab_vector);
5601 if (optab
5602 && (optab_handler (optab, TYPE_MODE (vectype))
5603 != CODE_FOR_nothing))
5605 scalar_shift_arg = false;
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_NOTE, vect_location,
5609 "vector/vector shift/rotate found.\n");
5611 /* Unlike the other binary operators, shifts/rotates have
5612 the rhs being int, instead of the same type as the lhs,
5613 so make sure the scalar is the right type if we are
5614 dealing with vectors of long long/long/short/char. */
5615 if (dt[1] == vect_constant_def)
5616 op1 = fold_convert (TREE_TYPE (vectype), op1);
5617 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5618 TREE_TYPE (op1)))
5620 if (slp_node
5621 && TYPE_MODE (TREE_TYPE (vectype))
5622 != TYPE_MODE (TREE_TYPE (op1)))
5624 if (dump_enabled_p ())
5625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5626 "unusable type for last operand in"
5627 " vector/vector shift/rotate.\n");
5628 return false;
5630 if (vec_stmt && !slp_node)
5632 op1 = fold_convert (TREE_TYPE (vectype), op1);
5633 op1 = vect_init_vector (stmt_info, op1,
5634 TREE_TYPE (vectype), NULL);
5641 /* Supportable by target? */
5642 if (!optab)
5644 if (dump_enabled_p ())
5645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5646 "no optab.\n");
5647 return false;
5649 vec_mode = TYPE_MODE (vectype);
5650 icode = (int) optab_handler (optab, vec_mode);
5651 if (icode == CODE_FOR_nothing)
5653 if (dump_enabled_p ())
5654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5655 "op not supported by target.\n");
5656 /* Check only during analysis. */
5657 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5658 || (!vec_stmt
5659 && !vect_worthwhile_without_simd_p (vinfo, code)))
5660 return false;
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE, vect_location,
5663 "proceeding using word mode.\n");
5666 /* Worthwhile without SIMD support? Check only during analysis. */
5667 if (!vec_stmt
5668 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5669 && !vect_worthwhile_without_simd_p (vinfo, code))
5671 if (dump_enabled_p ())
5672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5673 "not worthwhile without SIMD support.\n");
5674 return false;
5677 if (!vec_stmt) /* transformation not required. */
5679 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5680 DUMP_VECT_SCOPE ("vectorizable_shift");
5681 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5682 return true;
5685 /* Transform. */
5687 if (dump_enabled_p ())
5688 dump_printf_loc (MSG_NOTE, vect_location,
5689 "transform binary/unary operation.\n");
5691 /* Handle def. */
5692 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5694 prev_stmt_info = NULL;
5695 for (j = 0; j < ncopies; j++)
5697 /* Handle uses. */
5698 if (j == 0)
5700 if (scalar_shift_arg)
5702 /* Vector shl and shr insn patterns can be defined with scalar
5703 operand 2 (shift operand). In this case, use constant or loop
5704 invariant op1 directly, without extending it to vector mode
5705 first. */
5706 optab_op2_mode = insn_data[icode].operand[2].mode;
5707 if (!VECTOR_MODE_P (optab_op2_mode))
5709 if (dump_enabled_p ())
5710 dump_printf_loc (MSG_NOTE, vect_location,
5711 "operand 1 using scalar mode.\n");
5712 vec_oprnd1 = op1;
5713 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5714 vec_oprnds1.quick_push (vec_oprnd1);
5715 if (slp_node)
5717 /* Store vec_oprnd1 for every vector stmt to be created
5718 for SLP_NODE. We check during the analysis that all
5719 the shift arguments are the same.
5720 TODO: Allow different constants for different vector
5721 stmts generated for an SLP instance. */
5722 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5723 vec_oprnds1.quick_push (vec_oprnd1);
5728 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5729 (a special case for certain kind of vector shifts); otherwise,
5730 operand 1 should be of a vector type (the usual case). */
5731 if (vec_oprnd1)
5732 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5733 slp_node);
5734 else
5735 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5736 slp_node);
5738 else
5739 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5741 /* Arguments are ready. Create the new vector stmt. */
5742 stmt_vec_info new_stmt_info = NULL;
5743 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5745 vop1 = vec_oprnds1[i];
5746 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5747 new_temp = make_ssa_name (vec_dest, new_stmt);
5748 gimple_assign_set_lhs (new_stmt, new_temp);
5749 new_stmt_info
5750 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5751 if (slp_node)
5752 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5755 if (slp_node)
5756 continue;
5758 if (j == 0)
5759 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5760 else
5761 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5762 prev_stmt_info = new_stmt_info;
5765 vec_oprnds0.release ();
5766 vec_oprnds1.release ();
5768 return true;
5772 /* Function vectorizable_operation.
5774 Check if STMT_INFO performs a binary, unary or ternary operation that can
5775 be vectorized.
5776 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5777 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5778 Return true if STMT_INFO is vectorizable in this way. */
5780 static bool
5781 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5782 stmt_vec_info *vec_stmt, slp_tree slp_node,
5783 stmt_vector_for_cost *cost_vec)
5785 tree vec_dest;
5786 tree scalar_dest;
5787 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5788 tree vectype;
5789 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5790 enum tree_code code, orig_code;
5791 machine_mode vec_mode;
5792 tree new_temp;
5793 int op_type;
5794 optab optab;
5795 bool target_support_p;
5796 enum vect_def_type dt[3]
5797 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5798 int ndts = 3;
5799 stmt_vec_info prev_stmt_info;
5800 poly_uint64 nunits_in;
5801 poly_uint64 nunits_out;
5802 tree vectype_out;
5803 int ncopies;
5804 int j, i;
5805 vec<tree> vec_oprnds0 = vNULL;
5806 vec<tree> vec_oprnds1 = vNULL;
5807 vec<tree> vec_oprnds2 = vNULL;
5808 tree vop0, vop1, vop2;
5809 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5810 vec_info *vinfo = stmt_info->vinfo;
5812 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5813 return false;
5815 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5816 && ! vec_stmt)
5817 return false;
5819 /* Is STMT a vectorizable binary/unary operation? */
5820 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5821 if (!stmt)
5822 return false;
5824 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5825 return false;
5827 orig_code = code = gimple_assign_rhs_code (stmt);
5829 /* For pointer addition and subtraction, we should use the normal
5830 plus and minus for the vector operation. */
5831 if (code == POINTER_PLUS_EXPR)
5832 code = PLUS_EXPR;
5833 if (code == POINTER_DIFF_EXPR)
5834 code = MINUS_EXPR;
5836 /* Support only unary or binary operations. */
5837 op_type = TREE_CODE_LENGTH (code);
5838 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5840 if (dump_enabled_p ())
5841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5842 "num. args = %d (not unary/binary/ternary op).\n",
5843 op_type);
5844 return false;
5847 scalar_dest = gimple_assign_lhs (stmt);
5848 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5850 /* Most operations cannot handle bit-precision types without extra
5851 truncations. */
5852 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5853 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5854 /* Exception are bitwise binary operations. */
5855 && code != BIT_IOR_EXPR
5856 && code != BIT_XOR_EXPR
5857 && code != BIT_AND_EXPR)
5859 if (dump_enabled_p ())
5860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5861 "bit-precision arithmetic not supported.\n");
5862 return false;
5865 op0 = gimple_assign_rhs1 (stmt);
5866 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5868 if (dump_enabled_p ())
5869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5870 "use not simple.\n");
5871 return false;
5873 /* If op0 is an external or constant def use a vector type with
5874 the same size as the output vector type. */
5875 if (!vectype)
5877 /* For boolean type we cannot determine vectype by
5878 invariant value (don't know whether it is a vector
5879 of booleans or vector of integers). We use output
5880 vectype because operations on boolean don't change
5881 type. */
5882 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5884 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5888 "not supported operation on bool value.\n");
5889 return false;
5891 vectype = vectype_out;
5893 else
5894 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5896 if (vec_stmt)
5897 gcc_assert (vectype);
5898 if (!vectype)
5900 if (dump_enabled_p ())
5902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5903 "no vectype for scalar type ");
5904 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5905 TREE_TYPE (op0));
5906 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5909 return false;
5912 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5913 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5914 if (maybe_ne (nunits_out, nunits_in))
5915 return false;
5917 if (op_type == binary_op || op_type == ternary_op)
5919 op1 = gimple_assign_rhs2 (stmt);
5920 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5924 "use not simple.\n");
5925 return false;
5928 if (op_type == ternary_op)
5930 op2 = gimple_assign_rhs3 (stmt);
5931 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
5933 if (dump_enabled_p ())
5934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5935 "use not simple.\n");
5936 return false;
5940 /* Multiple types in SLP are handled by creating the appropriate number of
5941 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5942 case of SLP. */
5943 if (slp_node)
5944 ncopies = 1;
5945 else
5946 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5948 gcc_assert (ncopies >= 1);
5950 /* Shifts are handled in vectorizable_shift (). */
5951 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5952 || code == RROTATE_EXPR)
5953 return false;
5955 /* Supportable by target? */
5957 vec_mode = TYPE_MODE (vectype);
5958 if (code == MULT_HIGHPART_EXPR)
5959 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5960 else
5962 optab = optab_for_tree_code (code, vectype, optab_default);
5963 if (!optab)
5965 if (dump_enabled_p ())
5966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5967 "no optab.\n");
5968 return false;
5970 target_support_p = (optab_handler (optab, vec_mode)
5971 != CODE_FOR_nothing);
5974 if (!target_support_p)
5976 if (dump_enabled_p ())
5977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5978 "op not supported by target.\n");
5979 /* Check only during analysis. */
5980 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5981 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5982 return false;
5983 if (dump_enabled_p ())
5984 dump_printf_loc (MSG_NOTE, vect_location,
5985 "proceeding using word mode.\n");
5988 /* Worthwhile without SIMD support? Check only during analysis. */
5989 if (!VECTOR_MODE_P (vec_mode)
5990 && !vec_stmt
5991 && !vect_worthwhile_without_simd_p (vinfo, code))
5993 if (dump_enabled_p ())
5994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5995 "not worthwhile without SIMD support.\n");
5996 return false;
5999 if (!vec_stmt) /* transformation not required. */
6001 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6002 DUMP_VECT_SCOPE ("vectorizable_operation");
6003 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6004 return true;
6007 /* Transform. */
6009 if (dump_enabled_p ())
6010 dump_printf_loc (MSG_NOTE, vect_location,
6011 "transform binary/unary operation.\n");
6013 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6014 vectors with unsigned elements, but the result is signed. So, we
6015 need to compute the MINUS_EXPR into vectype temporary and
6016 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6017 tree vec_cvt_dest = NULL_TREE;
6018 if (orig_code == POINTER_DIFF_EXPR)
6020 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6021 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6023 /* Handle def. */
6024 else
6025 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6027 /* In case the vectorization factor (VF) is bigger than the number
6028 of elements that we can fit in a vectype (nunits), we have to generate
6029 more than one vector stmt - i.e - we need to "unroll" the
6030 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6031 from one copy of the vector stmt to the next, in the field
6032 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6033 stages to find the correct vector defs to be used when vectorizing
6034 stmts that use the defs of the current stmt. The example below
6035 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6036 we need to create 4 vectorized stmts):
6038 before vectorization:
6039 RELATED_STMT VEC_STMT
6040 S1: x = memref - -
6041 S2: z = x + 1 - -
6043 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6044 there):
6045 RELATED_STMT VEC_STMT
6046 VS1_0: vx0 = memref0 VS1_1 -
6047 VS1_1: vx1 = memref1 VS1_2 -
6048 VS1_2: vx2 = memref2 VS1_3 -
6049 VS1_3: vx3 = memref3 - -
6050 S1: x = load - VS1_0
6051 S2: z = x + 1 - -
6053 step2: vectorize stmt S2 (done here):
6054 To vectorize stmt S2 we first need to find the relevant vector
6055 def for the first operand 'x'. This is, as usual, obtained from
6056 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6057 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6058 relevant vector def 'vx0'. Having found 'vx0' we can generate
6059 the vector stmt VS2_0, and as usual, record it in the
6060 STMT_VINFO_VEC_STMT of stmt S2.
6061 When creating the second copy (VS2_1), we obtain the relevant vector
6062 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6063 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6064 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6065 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6066 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6067 chain of stmts and pointers:
6068 RELATED_STMT VEC_STMT
6069 VS1_0: vx0 = memref0 VS1_1 -
6070 VS1_1: vx1 = memref1 VS1_2 -
6071 VS1_2: vx2 = memref2 VS1_3 -
6072 VS1_3: vx3 = memref3 - -
6073 S1: x = load - VS1_0
6074 VS2_0: vz0 = vx0 + v1 VS2_1 -
6075 VS2_1: vz1 = vx1 + v1 VS2_2 -
6076 VS2_2: vz2 = vx2 + v1 VS2_3 -
6077 VS2_3: vz3 = vx3 + v1 - -
6078 S2: z = x + 1 - VS2_0 */
6080 prev_stmt_info = NULL;
6081 for (j = 0; j < ncopies; j++)
6083 /* Handle uses. */
6084 if (j == 0)
6086 if (op_type == binary_op)
6087 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6088 slp_node);
6089 else if (op_type == ternary_op)
6091 if (slp_node)
6093 auto_vec<tree> ops(3);
6094 ops.quick_push (op0);
6095 ops.quick_push (op1);
6096 ops.quick_push (op2);
6097 auto_vec<vec<tree> > vec_defs(3);
6098 vect_get_slp_defs (ops, slp_node, &vec_defs);
6099 vec_oprnds0 = vec_defs[0];
6100 vec_oprnds1 = vec_defs[1];
6101 vec_oprnds2 = vec_defs[2];
6103 else
6105 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6106 &vec_oprnds1, NULL);
6107 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6108 NULL, NULL);
6111 else
6112 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6113 slp_node);
6115 else
6117 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6118 if (op_type == ternary_op)
6120 tree vec_oprnd = vec_oprnds2.pop ();
6121 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6122 vec_oprnd));
6126 /* Arguments are ready. Create the new vector stmt. */
6127 stmt_vec_info new_stmt_info = NULL;
6128 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6130 vop1 = ((op_type == binary_op || op_type == ternary_op)
6131 ? vec_oprnds1[i] : NULL_TREE);
6132 vop2 = ((op_type == ternary_op)
6133 ? vec_oprnds2[i] : NULL_TREE);
6134 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6135 vop0, vop1, vop2);
6136 new_temp = make_ssa_name (vec_dest, new_stmt);
6137 gimple_assign_set_lhs (new_stmt, new_temp);
6138 new_stmt_info
6139 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6140 if (vec_cvt_dest)
6142 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6143 gassign *new_stmt
6144 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6145 new_temp);
6146 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6147 gimple_assign_set_lhs (new_stmt, new_temp);
6148 new_stmt_info
6149 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6151 if (slp_node)
6152 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6155 if (slp_node)
6156 continue;
6158 if (j == 0)
6159 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6160 else
6161 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6162 prev_stmt_info = new_stmt_info;
6165 vec_oprnds0.release ();
6166 vec_oprnds1.release ();
6167 vec_oprnds2.release ();
6169 return true;
6172 /* A helper function to ensure data reference DR's base alignment. */
6174 static void
6175 ensure_base_align (struct data_reference *dr)
6177 if (DR_VECT_AUX (dr)->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6178 return;
6180 if (DR_VECT_AUX (dr)->base_misaligned)
6182 tree base_decl = DR_VECT_AUX (dr)->base_decl;
6184 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6186 if (decl_in_symtab_p (base_decl))
6187 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6188 else
6190 SET_DECL_ALIGN (base_decl, align_base_to);
6191 DECL_USER_ALIGN (base_decl) = 1;
6193 DR_VECT_AUX (dr)->base_misaligned = false;
6198 /* Function get_group_alias_ptr_type.
6200 Return the alias type for the group starting at FIRST_STMT_INFO. */
6202 static tree
6203 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6205 struct data_reference *first_dr, *next_dr;
6207 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6208 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6209 while (next_stmt_info)
6211 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6212 if (get_alias_set (DR_REF (first_dr))
6213 != get_alias_set (DR_REF (next_dr)))
6215 if (dump_enabled_p ())
6216 dump_printf_loc (MSG_NOTE, vect_location,
6217 "conflicting alias set types.\n");
6218 return ptr_type_node;
6220 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6222 return reference_alias_ptr_type (DR_REF (first_dr));
6226 /* Function vectorizable_store.
6228 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6229 that can be vectorized.
6230 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6231 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6232 Return true if STMT_INFO is vectorizable in this way. */
6234 static bool
6235 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6236 stmt_vec_info *vec_stmt, slp_tree slp_node,
6237 stmt_vector_for_cost *cost_vec)
6239 tree data_ref;
6240 tree op;
6241 tree vec_oprnd = NULL_TREE;
6242 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6243 tree elem_type;
6244 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6245 struct loop *loop = NULL;
6246 machine_mode vec_mode;
6247 tree dummy;
6248 enum dr_alignment_support alignment_support_scheme;
6249 enum vect_def_type rhs_dt = vect_unknown_def_type;
6250 enum vect_def_type mask_dt = vect_unknown_def_type;
6251 stmt_vec_info prev_stmt_info = NULL;
6252 tree dataref_ptr = NULL_TREE;
6253 tree dataref_offset = NULL_TREE;
6254 gimple *ptr_incr = NULL;
6255 int ncopies;
6256 int j;
6257 stmt_vec_info first_stmt_info;
6258 bool grouped_store;
6259 unsigned int group_size, i;
6260 vec<tree> oprnds = vNULL;
6261 vec<tree> result_chain = vNULL;
6262 bool inv_p;
6263 tree offset = NULL_TREE;
6264 vec<tree> vec_oprnds = vNULL;
6265 bool slp = (slp_node != NULL);
6266 unsigned int vec_num;
6267 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6268 vec_info *vinfo = stmt_info->vinfo;
6269 tree aggr_type;
6270 gather_scatter_info gs_info;
6271 poly_uint64 vf;
6272 vec_load_store_type vls_type;
6273 tree ref_type;
6275 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6276 return false;
6278 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6279 && ! vec_stmt)
6280 return false;
6282 /* Is vectorizable store? */
6284 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6285 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
6287 tree scalar_dest = gimple_assign_lhs (assign);
6288 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6289 && is_pattern_stmt_p (stmt_info))
6290 scalar_dest = TREE_OPERAND (scalar_dest, 0);
6291 if (TREE_CODE (scalar_dest) != ARRAY_REF
6292 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6293 && TREE_CODE (scalar_dest) != INDIRECT_REF
6294 && TREE_CODE (scalar_dest) != COMPONENT_REF
6295 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6296 && TREE_CODE (scalar_dest) != REALPART_EXPR
6297 && TREE_CODE (scalar_dest) != MEM_REF)
6298 return false;
6300 else
6302 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
6303 if (!call || !gimple_call_internal_p (call))
6304 return false;
6306 internal_fn ifn = gimple_call_internal_fn (call);
6307 if (!internal_store_fn_p (ifn))
6308 return false;
6310 if (slp_node != NULL)
6312 if (dump_enabled_p ())
6313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6314 "SLP of masked stores not supported.\n");
6315 return false;
6318 int mask_index = internal_fn_mask_index (ifn);
6319 if (mask_index >= 0)
6321 mask = gimple_call_arg (call, mask_index);
6322 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
6323 &mask_vectype))
6324 return false;
6328 op = vect_get_store_rhs (stmt_info);
6330 /* Cannot have hybrid store SLP -- that would mean storing to the
6331 same location twice. */
6332 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6334 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6335 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6337 if (loop_vinfo)
6339 loop = LOOP_VINFO_LOOP (loop_vinfo);
6340 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6342 else
6343 vf = 1;
6345 /* Multiple types in SLP are handled by creating the appropriate number of
6346 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6347 case of SLP. */
6348 if (slp)
6349 ncopies = 1;
6350 else
6351 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6353 gcc_assert (ncopies >= 1);
6355 /* FORNOW. This restriction should be relaxed. */
6356 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6360 "multiple types in nested loop.\n");
6361 return false;
6364 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
6365 return false;
6367 elem_type = TREE_TYPE (vectype);
6368 vec_mode = TYPE_MODE (vectype);
6370 if (!STMT_VINFO_DATA_REF (stmt_info))
6371 return false;
6373 vect_memory_access_type memory_access_type;
6374 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
6375 &memory_access_type, &gs_info))
6376 return false;
6378 if (mask)
6380 if (memory_access_type == VMAT_CONTIGUOUS)
6382 if (!VECTOR_MODE_P (vec_mode)
6383 || !can_vec_mask_load_store_p (vec_mode,
6384 TYPE_MODE (mask_vectype), false))
6385 return false;
6387 else if (memory_access_type != VMAT_LOAD_STORE_LANES
6388 && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6390 if (dump_enabled_p ())
6391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6392 "unsupported access type for masked store.\n");
6393 return false;
6396 else
6398 /* FORNOW. In some cases can vectorize even if data-type not supported
6399 (e.g. - array initialization with 0). */
6400 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6401 return false;
6404 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6405 && memory_access_type != VMAT_GATHER_SCATTER
6406 && (slp || memory_access_type != VMAT_CONTIGUOUS));
6407 if (grouped_store)
6409 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
6410 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6411 group_size = DR_GROUP_SIZE (first_stmt_info);
6413 else
6415 first_stmt_info = stmt_info;
6416 first_dr = dr;
6417 group_size = vec_num = 1;
6420 if (!vec_stmt) /* transformation not required. */
6422 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6424 if (loop_vinfo
6425 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6426 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6427 memory_access_type, &gs_info);
6429 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6430 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
6431 vls_type, slp_node, cost_vec);
6432 return true;
6434 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6436 /* Transform. */
6438 ensure_base_align (dr);
6440 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6442 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6443 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6444 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6445 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6446 edge pe = loop_preheader_edge (loop);
6447 gimple_seq seq;
6448 basic_block new_bb;
6449 enum { NARROW, NONE, WIDEN } modifier;
6450 poly_uint64 scatter_off_nunits
6451 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6453 if (known_eq (nunits, scatter_off_nunits))
6454 modifier = NONE;
6455 else if (known_eq (nunits * 2, scatter_off_nunits))
6457 modifier = WIDEN;
6459 /* Currently gathers and scatters are only supported for
6460 fixed-length vectors. */
6461 unsigned int count = scatter_off_nunits.to_constant ();
6462 vec_perm_builder sel (count, count, 1);
6463 for (i = 0; i < (unsigned int) count; ++i)
6464 sel.quick_push (i | (count / 2));
6466 vec_perm_indices indices (sel, 1, count);
6467 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6468 indices);
6469 gcc_assert (perm_mask != NULL_TREE);
6471 else if (known_eq (nunits, scatter_off_nunits * 2))
6473 modifier = NARROW;
6475 /* Currently gathers and scatters are only supported for
6476 fixed-length vectors. */
6477 unsigned int count = nunits.to_constant ();
6478 vec_perm_builder sel (count, count, 1);
6479 for (i = 0; i < (unsigned int) count; ++i)
6480 sel.quick_push (i | (count / 2));
6482 vec_perm_indices indices (sel, 2, count);
6483 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6484 gcc_assert (perm_mask != NULL_TREE);
6485 ncopies *= 2;
6487 else
6488 gcc_unreachable ();
6490 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6491 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6492 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6493 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6494 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6495 scaletype = TREE_VALUE (arglist);
6497 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6498 && TREE_CODE (rettype) == VOID_TYPE);
6500 ptr = fold_convert (ptrtype, gs_info.base);
6501 if (!is_gimple_min_invariant (ptr))
6503 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6504 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6505 gcc_assert (!new_bb);
6508 /* Currently we support only unconditional scatter stores,
6509 so mask should be all ones. */
6510 mask = build_int_cst (masktype, -1);
6511 mask = vect_init_vector (stmt_info, mask, masktype, NULL);
6513 scale = build_int_cst (scaletype, gs_info.scale);
6515 prev_stmt_info = NULL;
6516 for (j = 0; j < ncopies; ++j)
6518 if (j == 0)
6520 src = vec_oprnd1
6521 = vect_get_vec_def_for_operand (op, stmt_info);
6522 op = vec_oprnd0
6523 = vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
6525 else if (modifier != NONE && (j & 1))
6527 if (modifier == WIDEN)
6529 src = vec_oprnd1
6530 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6531 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6532 stmt_info, gsi);
6534 else if (modifier == NARROW)
6536 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6537 stmt_info, gsi);
6538 op = vec_oprnd0
6539 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6541 else
6542 gcc_unreachable ();
6544 else
6546 src = vec_oprnd1
6547 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
6548 op = vec_oprnd0
6549 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
6552 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6554 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6555 TYPE_VECTOR_SUBPARTS (srctype)));
6556 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6557 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6558 gassign *new_stmt
6559 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6560 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6561 src = var;
6564 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6566 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6567 TYPE_VECTOR_SUBPARTS (idxtype)));
6568 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6569 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6570 gassign *new_stmt
6571 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6572 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6573 op = var;
6576 gcall *new_stmt
6577 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6578 stmt_vec_info new_stmt_info
6579 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6581 if (prev_stmt_info == NULL_STMT_VEC_INFO)
6582 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6583 else
6584 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6585 prev_stmt_info = new_stmt_info;
6587 return true;
6590 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6591 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
6593 if (grouped_store)
6595 /* FORNOW */
6596 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
6598 /* We vectorize all the stmts of the interleaving group when we
6599 reach the last stmt in the group. */
6600 if (DR_GROUP_STORE_COUNT (first_stmt_info)
6601 < DR_GROUP_SIZE (first_stmt_info)
6602 && !slp)
6604 *vec_stmt = NULL;
6605 return true;
6608 if (slp)
6610 grouped_store = false;
6611 /* VEC_NUM is the number of vect stmts to be created for this
6612 group. */
6613 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6614 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6615 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
6616 == first_stmt_info);
6617 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6618 op = vect_get_store_rhs (first_stmt_info);
6620 else
6621 /* VEC_NUM is the number of vect stmts to be created for this
6622 group. */
6623 vec_num = group_size;
6625 ref_type = get_group_alias_ptr_type (first_stmt_info);
6627 else
6628 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6630 if (dump_enabled_p ())
6631 dump_printf_loc (MSG_NOTE, vect_location,
6632 "transform store. ncopies = %d\n", ncopies);
6634 if (memory_access_type == VMAT_ELEMENTWISE
6635 || memory_access_type == VMAT_STRIDED_SLP)
6637 gimple_stmt_iterator incr_gsi;
6638 bool insert_after;
6639 gimple *incr;
6640 tree offvar;
6641 tree ivstep;
6642 tree running_off;
6643 tree stride_base, stride_step, alias_off;
6644 tree vec_oprnd;
6645 unsigned int g;
6646 /* Checked by get_load_store_type. */
6647 unsigned int const_nunits = nunits.to_constant ();
6649 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6650 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
6652 stride_base
6653 = fold_build_pointer_plus
6654 (DR_BASE_ADDRESS (first_dr),
6655 size_binop (PLUS_EXPR,
6656 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6657 convert_to_ptrofftype (DR_INIT (first_dr))));
6658 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6660 /* For a store with loop-invariant (but other than power-of-2)
6661 stride (i.e. not a grouped access) like so:
6663 for (i = 0; i < n; i += stride)
6664 array[i] = ...;
6666 we generate a new induction variable and new stores from
6667 the components of the (vectorized) rhs:
6669 for (j = 0; ; j += VF*stride)
6670 vectemp = ...;
6671 tmp1 = vectemp[0];
6672 array[j] = tmp1;
6673 tmp2 = vectemp[1];
6674 array[j + stride] = tmp2;
6678 unsigned nstores = const_nunits;
6679 unsigned lnel = 1;
6680 tree ltype = elem_type;
6681 tree lvectype = vectype;
6682 if (slp)
6684 if (group_size < const_nunits
6685 && const_nunits % group_size == 0)
6687 nstores = const_nunits / group_size;
6688 lnel = group_size;
6689 ltype = build_vector_type (elem_type, group_size);
6690 lvectype = vectype;
6692 /* First check if vec_extract optab doesn't support extraction
6693 of vector elts directly. */
6694 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6695 machine_mode vmode;
6696 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6697 || !VECTOR_MODE_P (vmode)
6698 || !targetm.vector_mode_supported_p (vmode)
6699 || (convert_optab_handler (vec_extract_optab,
6700 TYPE_MODE (vectype), vmode)
6701 == CODE_FOR_nothing))
6703 /* Try to avoid emitting an extract of vector elements
6704 by performing the extracts using an integer type of the
6705 same size, extracting from a vector of those and then
6706 re-interpreting it as the original vector type if
6707 supported. */
6708 unsigned lsize
6709 = group_size * GET_MODE_BITSIZE (elmode);
6710 elmode = int_mode_for_size (lsize, 0).require ();
6711 unsigned int lnunits = const_nunits / group_size;
6712 /* If we can't construct such a vector fall back to
6713 element extracts from the original vector type and
6714 element size stores. */
6715 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6716 && VECTOR_MODE_P (vmode)
6717 && targetm.vector_mode_supported_p (vmode)
6718 && (convert_optab_handler (vec_extract_optab,
6719 vmode, elmode)
6720 != CODE_FOR_nothing))
6722 nstores = lnunits;
6723 lnel = group_size;
6724 ltype = build_nonstandard_integer_type (lsize, 1);
6725 lvectype = build_vector_type (ltype, nstores);
6727 /* Else fall back to vector extraction anyway.
6728 Fewer stores are more important than avoiding spilling
6729 of the vector we extract from. Compared to the
6730 construction case in vectorizable_load no store-forwarding
6731 issue exists here for reasonable archs. */
6734 else if (group_size >= const_nunits
6735 && group_size % const_nunits == 0)
6737 nstores = 1;
6738 lnel = const_nunits;
6739 ltype = vectype;
6740 lvectype = vectype;
6742 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6743 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6746 ivstep = stride_step;
6747 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6748 build_int_cst (TREE_TYPE (ivstep), vf));
6750 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6752 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6753 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6754 create_iv (stride_base, ivstep, NULL,
6755 loop, &incr_gsi, insert_after,
6756 &offvar, NULL);
6757 incr = gsi_stmt (incr_gsi);
6758 loop_vinfo->add_stmt (incr);
6760 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6762 prev_stmt_info = NULL;
6763 alias_off = build_int_cst (ref_type, 0);
6764 stmt_vec_info next_stmt_info = first_stmt_info;
6765 for (g = 0; g < group_size; g++)
6767 running_off = offvar;
6768 if (g)
6770 tree size = TYPE_SIZE_UNIT (ltype);
6771 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6772 size);
6773 tree newoff = copy_ssa_name (running_off, NULL);
6774 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6775 running_off, pos);
6776 vect_finish_stmt_generation (stmt_info, incr, gsi);
6777 running_off = newoff;
6779 unsigned int group_el = 0;
6780 unsigned HOST_WIDE_INT
6781 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6782 for (j = 0; j < ncopies; j++)
6784 /* We've set op and dt above, from vect_get_store_rhs,
6785 and first_stmt_info == stmt_info. */
6786 if (j == 0)
6788 if (slp)
6790 vect_get_vec_defs (op, NULL_TREE, stmt_info,
6791 &vec_oprnds, NULL, slp_node);
6792 vec_oprnd = vec_oprnds[0];
6794 else
6796 op = vect_get_store_rhs (next_stmt_info);
6797 vec_oprnd = vect_get_vec_def_for_operand
6798 (op, next_stmt_info);
6801 else
6803 if (slp)
6804 vec_oprnd = vec_oprnds[j];
6805 else
6806 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
6807 vec_oprnd);
6809 /* Pun the vector to extract from if necessary. */
6810 if (lvectype != vectype)
6812 tree tem = make_ssa_name (lvectype);
6813 gimple *pun
6814 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6815 lvectype, vec_oprnd));
6816 vect_finish_stmt_generation (stmt_info, pun, gsi);
6817 vec_oprnd = tem;
6819 for (i = 0; i < nstores; i++)
6821 tree newref, newoff;
6822 gimple *incr, *assign;
6823 tree size = TYPE_SIZE (ltype);
6824 /* Extract the i'th component. */
6825 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6826 bitsize_int (i), size);
6827 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6828 size, pos);
6830 elem = force_gimple_operand_gsi (gsi, elem, true,
6831 NULL_TREE, true,
6832 GSI_SAME_STMT);
6834 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6835 group_el * elsz);
6836 newref = build2 (MEM_REF, ltype,
6837 running_off, this_off);
6838 vect_copy_ref_info (newref, DR_REF (first_dr));
6840 /* And store it to *running_off. */
6841 assign = gimple_build_assign (newref, elem);
6842 stmt_vec_info assign_info
6843 = vect_finish_stmt_generation (stmt_info, assign, gsi);
6845 group_el += lnel;
6846 if (! slp
6847 || group_el == group_size)
6849 newoff = copy_ssa_name (running_off, NULL);
6850 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6851 running_off, stride_step);
6852 vect_finish_stmt_generation (stmt_info, incr, gsi);
6854 running_off = newoff;
6855 group_el = 0;
6857 if (g == group_size - 1
6858 && !slp)
6860 if (j == 0 && i == 0)
6861 STMT_VINFO_VEC_STMT (stmt_info)
6862 = *vec_stmt = assign_info;
6863 else
6864 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
6865 prev_stmt_info = assign_info;
6869 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6870 if (slp)
6871 break;
6874 vec_oprnds.release ();
6875 return true;
6878 auto_vec<tree> dr_chain (group_size);
6879 oprnds.create (group_size);
6881 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6882 gcc_assert (alignment_support_scheme);
6883 vec_loop_masks *loop_masks
6884 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6885 ? &LOOP_VINFO_MASKS (loop_vinfo)
6886 : NULL);
6887 /* Targets with store-lane instructions must not require explicit
6888 realignment. vect_supportable_dr_alignment always returns either
6889 dr_aligned or dr_unaligned_supported for masked operations. */
6890 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6891 && !mask
6892 && !loop_masks)
6893 || alignment_support_scheme == dr_aligned
6894 || alignment_support_scheme == dr_unaligned_supported);
6896 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6897 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6898 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6900 tree bump;
6901 tree vec_offset = NULL_TREE;
6902 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6904 aggr_type = NULL_TREE;
6905 bump = NULL_TREE;
6907 else if (memory_access_type == VMAT_GATHER_SCATTER)
6909 aggr_type = elem_type;
6910 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
6911 &bump, &vec_offset);
6913 else
6915 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6916 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6917 else
6918 aggr_type = vectype;
6919 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6922 if (mask)
6923 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6925 /* In case the vectorization factor (VF) is bigger than the number
6926 of elements that we can fit in a vectype (nunits), we have to generate
6927 more than one vector stmt - i.e - we need to "unroll" the
6928 vector stmt by a factor VF/nunits. For more details see documentation in
6929 vect_get_vec_def_for_copy_stmt. */
6931 /* In case of interleaving (non-unit grouped access):
6933 S1: &base + 2 = x2
6934 S2: &base = x0
6935 S3: &base + 1 = x1
6936 S4: &base + 3 = x3
6938 We create vectorized stores starting from base address (the access of the
6939 first stmt in the chain (S2 in the above example), when the last store stmt
6940 of the chain (S4) is reached:
6942 VS1: &base = vx2
6943 VS2: &base + vec_size*1 = vx0
6944 VS3: &base + vec_size*2 = vx1
6945 VS4: &base + vec_size*3 = vx3
6947 Then permutation statements are generated:
6949 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6950 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6953 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6954 (the order of the data-refs in the output of vect_permute_store_chain
6955 corresponds to the order of scalar stmts in the interleaving chain - see
6956 the documentation of vect_permute_store_chain()).
6958 In case of both multiple types and interleaving, above vector stores and
6959 permutation stmts are created for every copy. The result vector stmts are
6960 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6961 STMT_VINFO_RELATED_STMT for the next copies.
6964 prev_stmt_info = NULL;
6965 tree vec_mask = NULL_TREE;
6966 for (j = 0; j < ncopies; j++)
6968 stmt_vec_info new_stmt_info;
6969 if (j == 0)
6971 if (slp)
6973 /* Get vectorized arguments for SLP_NODE. */
6974 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
6975 NULL, slp_node);
6977 vec_oprnd = vec_oprnds[0];
6979 else
6981 /* For interleaved stores we collect vectorized defs for all the
6982 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6983 used as an input to vect_permute_store_chain(), and OPRNDS as
6984 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6986 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6987 OPRNDS are of size 1. */
6988 stmt_vec_info next_stmt_info = first_stmt_info;
6989 for (i = 0; i < group_size; i++)
6991 /* Since gaps are not supported for interleaved stores,
6992 DR_GROUP_SIZE is the exact number of stmts in the chain.
6993 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6994 that there is no interleaving, DR_GROUP_SIZE is 1,
6995 and only one iteration of the loop will be executed. */
6996 op = vect_get_store_rhs (next_stmt_info);
6997 vec_oprnd = vect_get_vec_def_for_operand
6998 (op, next_stmt_info);
6999 dr_chain.quick_push (vec_oprnd);
7000 oprnds.quick_push (vec_oprnd);
7001 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7003 if (mask)
7004 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
7005 mask_vectype);
7008 /* We should have catched mismatched types earlier. */
7009 gcc_assert (useless_type_conversion_p (vectype,
7010 TREE_TYPE (vec_oprnd)));
7011 bool simd_lane_access_p
7012 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7013 if (simd_lane_access_p
7014 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7015 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7016 && integer_zerop (DR_OFFSET (first_dr))
7017 && integer_zerop (DR_INIT (first_dr))
7018 && alias_sets_conflict_p (get_alias_set (aggr_type),
7019 get_alias_set (TREE_TYPE (ref_type))))
7021 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7022 dataref_offset = build_int_cst (ref_type, 0);
7023 inv_p = false;
7025 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7027 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
7028 &dataref_ptr, &vec_offset);
7029 inv_p = false;
7031 else
7032 dataref_ptr
7033 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
7034 simd_lane_access_p ? loop : NULL,
7035 offset, &dummy, gsi, &ptr_incr,
7036 simd_lane_access_p, &inv_p,
7037 NULL_TREE, bump);
7038 gcc_assert (bb_vinfo || !inv_p);
7040 else
7042 /* For interleaved stores we created vectorized defs for all the
7043 defs stored in OPRNDS in the previous iteration (previous copy).
7044 DR_CHAIN is then used as an input to vect_permute_store_chain(),
7045 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
7046 next copy.
7047 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7048 OPRNDS are of size 1. */
7049 for (i = 0; i < group_size; i++)
7051 op = oprnds[i];
7052 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
7053 dr_chain[i] = vec_oprnd;
7054 oprnds[i] = vec_oprnd;
7056 if (mask)
7057 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
7058 if (dataref_offset)
7059 dataref_offset
7060 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7061 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7062 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
7063 else
7064 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7065 stmt_info, bump);
7068 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7070 tree vec_array;
7072 /* Get an array into which we can store the individual vectors. */
7073 vec_array = create_vector_array (vectype, vec_num);
7075 /* Invalidate the current contents of VEC_ARRAY. This should
7076 become an RTL clobber too, which prevents the vector registers
7077 from being upward-exposed. */
7078 vect_clobber_variable (stmt_info, gsi, vec_array);
7080 /* Store the individual vectors into the array. */
7081 for (i = 0; i < vec_num; i++)
7083 vec_oprnd = dr_chain[i];
7084 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
7087 tree final_mask = NULL;
7088 if (loop_masks)
7089 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
7090 vectype, j);
7091 if (vec_mask)
7092 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7093 vec_mask, gsi);
7095 gcall *call;
7096 if (final_mask)
7098 /* Emit:
7099 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7100 VEC_ARRAY). */
7101 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7102 tree alias_ptr = build_int_cst (ref_type, align);
7103 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
7104 dataref_ptr, alias_ptr,
7105 final_mask, vec_array);
7107 else
7109 /* Emit:
7110 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7111 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7112 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
7113 vec_array);
7114 gimple_call_set_lhs (call, data_ref);
7116 gimple_call_set_nothrow (call, true);
7117 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
7119 /* Record that VEC_ARRAY is now dead. */
7120 vect_clobber_variable (stmt_info, gsi, vec_array);
7122 else
7124 new_stmt_info = NULL;
7125 if (grouped_store)
7127 if (j == 0)
7128 result_chain.create (group_size);
7129 /* Permute. */
7130 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
7131 &result_chain);
7134 stmt_vec_info next_stmt_info = first_stmt_info;
7135 for (i = 0; i < vec_num; i++)
7137 unsigned align, misalign;
7139 tree final_mask = NULL_TREE;
7140 if (loop_masks)
7141 final_mask = vect_get_loop_mask (gsi, loop_masks,
7142 vec_num * ncopies,
7143 vectype, vec_num * j + i);
7144 if (vec_mask)
7145 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7146 vec_mask, gsi);
7148 if (memory_access_type == VMAT_GATHER_SCATTER)
7150 tree scale = size_int (gs_info.scale);
7151 gcall *call;
7152 if (loop_masks)
7153 call = gimple_build_call_internal
7154 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
7155 scale, vec_oprnd, final_mask);
7156 else
7157 call = gimple_build_call_internal
7158 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7159 scale, vec_oprnd);
7160 gimple_call_set_nothrow (call, true);
7161 new_stmt_info
7162 = vect_finish_stmt_generation (stmt_info, call, gsi);
7163 break;
7166 if (i > 0)
7167 /* Bump the vector pointer. */
7168 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7169 stmt_info, bump);
7171 if (slp)
7172 vec_oprnd = vec_oprnds[i];
7173 else if (grouped_store)
7174 /* For grouped stores vectorized defs are interleaved in
7175 vect_permute_store_chain(). */
7176 vec_oprnd = result_chain[i];
7178 align = DR_TARGET_ALIGNMENT (first_dr);
7179 if (aligned_access_p (first_dr))
7180 misalign = 0;
7181 else if (DR_MISALIGNMENT (first_dr) == -1)
7183 align = dr_alignment (vect_dr_behavior (first_dr));
7184 misalign = 0;
7186 else
7187 misalign = DR_MISALIGNMENT (first_dr);
7188 if (dataref_offset == NULL_TREE
7189 && TREE_CODE (dataref_ptr) == SSA_NAME)
7190 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7191 misalign);
7193 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7195 tree perm_mask = perm_mask_for_reverse (vectype);
7196 tree perm_dest = vect_create_destination_var
7197 (vect_get_store_rhs (stmt_info), vectype);
7198 tree new_temp = make_ssa_name (perm_dest);
7200 /* Generate the permute statement. */
7201 gimple *perm_stmt
7202 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7203 vec_oprnd, perm_mask);
7204 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7206 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7207 vec_oprnd = new_temp;
7210 /* Arguments are ready. Create the new vector stmt. */
7211 if (final_mask)
7213 align = least_bit_hwi (misalign | align);
7214 tree ptr = build_int_cst (ref_type, align);
7215 gcall *call
7216 = gimple_build_call_internal (IFN_MASK_STORE, 4,
7217 dataref_ptr, ptr,
7218 final_mask, vec_oprnd);
7219 gimple_call_set_nothrow (call, true);
7220 new_stmt_info
7221 = vect_finish_stmt_generation (stmt_info, call, gsi);
7223 else
7225 data_ref = fold_build2 (MEM_REF, vectype,
7226 dataref_ptr,
7227 dataref_offset
7228 ? dataref_offset
7229 : build_int_cst (ref_type, 0));
7230 if (aligned_access_p (first_dr))
7232 else if (DR_MISALIGNMENT (first_dr) == -1)
7233 TREE_TYPE (data_ref)
7234 = build_aligned_type (TREE_TYPE (data_ref),
7235 align * BITS_PER_UNIT);
7236 else
7237 TREE_TYPE (data_ref)
7238 = build_aligned_type (TREE_TYPE (data_ref),
7239 TYPE_ALIGN (elem_type));
7240 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7241 gassign *new_stmt
7242 = gimple_build_assign (data_ref, vec_oprnd);
7243 new_stmt_info
7244 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7247 if (slp)
7248 continue;
7250 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7251 if (!next_stmt_info)
7252 break;
7255 if (!slp)
7257 if (j == 0)
7258 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7259 else
7260 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7261 prev_stmt_info = new_stmt_info;
7265 oprnds.release ();
7266 result_chain.release ();
7267 vec_oprnds.release ();
7269 return true;
7272 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7273 VECTOR_CST mask. No checks are made that the target platform supports the
7274 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7275 vect_gen_perm_mask_checked. */
7277 tree
7278 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7280 tree mask_type;
7282 poly_uint64 nunits = sel.length ();
7283 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7285 mask_type = build_vector_type (ssizetype, nunits);
7286 return vec_perm_indices_to_tree (mask_type, sel);
7289 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7290 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7292 tree
7293 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7295 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7296 return vect_gen_perm_mask_any (vectype, sel);
7299 /* Given a vector variable X and Y, that was generated for the scalar
7300 STMT_INFO, generate instructions to permute the vector elements of X and Y
7301 using permutation mask MASK_VEC, insert them at *GSI and return the
7302 permuted vector variable. */
7304 static tree
7305 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
7306 gimple_stmt_iterator *gsi)
7308 tree vectype = TREE_TYPE (x);
7309 tree perm_dest, data_ref;
7310 gimple *perm_stmt;
7312 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
7313 if (TREE_CODE (scalar_dest) == SSA_NAME)
7314 perm_dest = vect_create_destination_var (scalar_dest, vectype);
7315 else
7316 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7317 data_ref = make_ssa_name (perm_dest);
7319 /* Generate the permute statement. */
7320 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7321 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
7323 return data_ref;
7326 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7327 inserting them on the loops preheader edge. Returns true if we
7328 were successful in doing so (and thus STMT_INFO can be moved then),
7329 otherwise returns false. */
7331 static bool
7332 hoist_defs_of_uses (stmt_vec_info stmt_info, struct loop *loop)
7334 ssa_op_iter i;
7335 tree op;
7336 bool any = false;
7338 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7340 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7341 if (!gimple_nop_p (def_stmt)
7342 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7344 /* Make sure we don't need to recurse. While we could do
7345 so in simple cases when there are more complex use webs
7346 we don't have an easy way to preserve stmt order to fulfil
7347 dependencies within them. */
7348 tree op2;
7349 ssa_op_iter i2;
7350 if (gimple_code (def_stmt) == GIMPLE_PHI)
7351 return false;
7352 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7354 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7355 if (!gimple_nop_p (def_stmt2)
7356 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7357 return false;
7359 any = true;
7363 if (!any)
7364 return true;
7366 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
7368 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7369 if (!gimple_nop_p (def_stmt)
7370 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7372 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7373 gsi_remove (&gsi, false);
7374 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7378 return true;
7381 /* vectorizable_load.
7383 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7384 that can be vectorized.
7385 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7386 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7387 Return true if STMT_INFO is vectorizable in this way. */
7389 static bool
7390 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7391 stmt_vec_info *vec_stmt, slp_tree slp_node,
7392 slp_instance slp_node_instance,
7393 stmt_vector_for_cost *cost_vec)
7395 tree scalar_dest;
7396 tree vec_dest = NULL;
7397 tree data_ref = NULL;
7398 stmt_vec_info prev_stmt_info;
7399 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7400 struct loop *loop = NULL;
7401 struct loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
7402 bool nested_in_vect_loop = false;
7403 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7404 tree elem_type;
7405 tree new_temp;
7406 machine_mode mode;
7407 tree dummy;
7408 enum dr_alignment_support alignment_support_scheme;
7409 tree dataref_ptr = NULL_TREE;
7410 tree dataref_offset = NULL_TREE;
7411 gimple *ptr_incr = NULL;
7412 int ncopies;
7413 int i, j;
7414 unsigned int group_size;
7415 poly_uint64 group_gap_adj;
7416 tree msq = NULL_TREE, lsq;
7417 tree offset = NULL_TREE;
7418 tree byte_offset = NULL_TREE;
7419 tree realignment_token = NULL_TREE;
7420 gphi *phi = NULL;
7421 vec<tree> dr_chain = vNULL;
7422 bool grouped_load = false;
7423 stmt_vec_info first_stmt_info;
7424 stmt_vec_info first_stmt_info_for_drptr = NULL;
7425 bool inv_p;
7426 bool compute_in_loop = false;
7427 struct loop *at_loop;
7428 int vec_num;
7429 bool slp = (slp_node != NULL);
7430 bool slp_perm = false;
7431 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7432 poly_uint64 vf;
7433 tree aggr_type;
7434 gather_scatter_info gs_info;
7435 vec_info *vinfo = stmt_info->vinfo;
7436 tree ref_type;
7437 enum vect_def_type mask_dt = vect_unknown_def_type;
7439 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7440 return false;
7442 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7443 && ! vec_stmt)
7444 return false;
7446 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7447 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7449 scalar_dest = gimple_assign_lhs (assign);
7450 if (TREE_CODE (scalar_dest) != SSA_NAME)
7451 return false;
7453 tree_code code = gimple_assign_rhs_code (assign);
7454 if (code != ARRAY_REF
7455 && code != BIT_FIELD_REF
7456 && code != INDIRECT_REF
7457 && code != COMPONENT_REF
7458 && code != IMAGPART_EXPR
7459 && code != REALPART_EXPR
7460 && code != MEM_REF
7461 && TREE_CODE_CLASS (code) != tcc_declaration)
7462 return false;
7464 else
7466 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7467 if (!call || !gimple_call_internal_p (call))
7468 return false;
7470 internal_fn ifn = gimple_call_internal_fn (call);
7471 if (!internal_load_fn_p (ifn))
7472 return false;
7474 scalar_dest = gimple_call_lhs (call);
7475 if (!scalar_dest)
7476 return false;
7478 if (slp_node != NULL)
7480 if (dump_enabled_p ())
7481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7482 "SLP of masked loads not supported.\n");
7483 return false;
7486 int mask_index = internal_fn_mask_index (ifn);
7487 if (mask_index >= 0)
7489 mask = gimple_call_arg (call, mask_index);
7490 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7491 &mask_vectype))
7492 return false;
7496 if (!STMT_VINFO_DATA_REF (stmt_info))
7497 return false;
7499 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7500 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7502 if (loop_vinfo)
7504 loop = LOOP_VINFO_LOOP (loop_vinfo);
7505 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
7506 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7508 else
7509 vf = 1;
7511 /* Multiple types in SLP are handled by creating the appropriate number of
7512 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7513 case of SLP. */
7514 if (slp)
7515 ncopies = 1;
7516 else
7517 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7519 gcc_assert (ncopies >= 1);
7521 /* FORNOW. This restriction should be relaxed. */
7522 if (nested_in_vect_loop && ncopies > 1)
7524 if (dump_enabled_p ())
7525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7526 "multiple types in nested loop.\n");
7527 return false;
7530 /* Invalidate assumptions made by dependence analysis when vectorization
7531 on the unrolled body effectively re-orders stmts. */
7532 if (ncopies > 1
7533 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7534 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7535 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7537 if (dump_enabled_p ())
7538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7539 "cannot perform implicit CSE when unrolling "
7540 "with negative dependence distance\n");
7541 return false;
7544 elem_type = TREE_TYPE (vectype);
7545 mode = TYPE_MODE (vectype);
7547 /* FORNOW. In some cases can vectorize even if data-type not supported
7548 (e.g. - data copies). */
7549 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7551 if (dump_enabled_p ())
7552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7553 "Aligned load, but unsupported type.\n");
7554 return false;
7557 /* Check if the load is a part of an interleaving chain. */
7558 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7560 grouped_load = true;
7561 /* FORNOW */
7562 gcc_assert (!nested_in_vect_loop);
7563 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7565 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7566 group_size = DR_GROUP_SIZE (first_stmt_info);
7568 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7569 slp_perm = true;
7571 /* Invalidate assumptions made by dependence analysis when vectorization
7572 on the unrolled body effectively re-orders stmts. */
7573 if (!PURE_SLP_STMT (stmt_info)
7574 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7575 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7576 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7578 if (dump_enabled_p ())
7579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7580 "cannot perform implicit CSE when performing "
7581 "group loads with negative dependence distance\n");
7582 return false;
7585 /* Similarly when the stmt is a load that is both part of a SLP
7586 instance and a loop vectorized stmt via the same-dr mechanism
7587 we have to give up. */
7588 if (DR_GROUP_SAME_DR_STMT (stmt_info)
7589 && (STMT_SLP_TYPE (stmt_info)
7590 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info))))
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7594 "conflicting SLP types for CSEd load\n");
7595 return false;
7598 else
7599 group_size = 1;
7601 vect_memory_access_type memory_access_type;
7602 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
7603 &memory_access_type, &gs_info))
7604 return false;
7606 if (mask)
7608 if (memory_access_type == VMAT_CONTIGUOUS)
7610 machine_mode vec_mode = TYPE_MODE (vectype);
7611 if (!VECTOR_MODE_P (vec_mode)
7612 || !can_vec_mask_load_store_p (vec_mode,
7613 TYPE_MODE (mask_vectype), true))
7614 return false;
7616 else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7618 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7619 tree masktype
7620 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7621 if (TREE_CODE (masktype) == INTEGER_TYPE)
7623 if (dump_enabled_p ())
7624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7625 "masked gather with integer mask not"
7626 " supported.");
7627 return false;
7630 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7631 && memory_access_type != VMAT_GATHER_SCATTER)
7633 if (dump_enabled_p ())
7634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7635 "unsupported access type for masked load.\n");
7636 return false;
7640 if (!vec_stmt) /* transformation not required. */
7642 if (!slp)
7643 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7645 if (loop_vinfo
7646 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7647 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7648 memory_access_type, &gs_info);
7650 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7651 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7652 slp_node_instance, slp_node, cost_vec);
7653 return true;
7656 if (!slp)
7657 gcc_assert (memory_access_type
7658 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7660 if (dump_enabled_p ())
7661 dump_printf_loc (MSG_NOTE, vect_location,
7662 "transform load. ncopies = %d\n", ncopies);
7664 /* Transform. */
7666 ensure_base_align (dr);
7668 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7670 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
7671 return true;
7674 if (memory_access_type == VMAT_ELEMENTWISE
7675 || memory_access_type == VMAT_STRIDED_SLP)
7677 gimple_stmt_iterator incr_gsi;
7678 bool insert_after;
7679 gimple *incr;
7680 tree offvar;
7681 tree ivstep;
7682 tree running_off;
7683 vec<constructor_elt, va_gc> *v = NULL;
7684 tree stride_base, stride_step, alias_off;
7685 /* Checked by get_load_store_type. */
7686 unsigned int const_nunits = nunits.to_constant ();
7687 unsigned HOST_WIDE_INT cst_offset = 0;
7689 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7690 gcc_assert (!nested_in_vect_loop);
7692 if (grouped_load)
7694 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7695 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7697 else
7699 first_stmt_info = stmt_info;
7700 first_dr = dr;
7702 if (slp && grouped_load)
7704 group_size = DR_GROUP_SIZE (first_stmt_info);
7705 ref_type = get_group_alias_ptr_type (first_stmt_info);
7707 else
7709 if (grouped_load)
7710 cst_offset
7711 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7712 * vect_get_place_in_interleaving_chain (stmt_info,
7713 first_stmt_info));
7714 group_size = 1;
7715 ref_type = reference_alias_ptr_type (DR_REF (dr));
7718 stride_base
7719 = fold_build_pointer_plus
7720 (DR_BASE_ADDRESS (first_dr),
7721 size_binop (PLUS_EXPR,
7722 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7723 convert_to_ptrofftype (DR_INIT (first_dr))));
7724 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7726 /* For a load with loop-invariant (but other than power-of-2)
7727 stride (i.e. not a grouped access) like so:
7729 for (i = 0; i < n; i += stride)
7730 ... = array[i];
7732 we generate a new induction variable and new accesses to
7733 form a new vector (or vectors, depending on ncopies):
7735 for (j = 0; ; j += VF*stride)
7736 tmp1 = array[j];
7737 tmp2 = array[j + stride];
7739 vectemp = {tmp1, tmp2, ...}
7742 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7743 build_int_cst (TREE_TYPE (stride_step), vf));
7745 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7747 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7748 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7749 create_iv (stride_base, ivstep, NULL,
7750 loop, &incr_gsi, insert_after,
7751 &offvar, NULL);
7752 incr = gsi_stmt (incr_gsi);
7753 loop_vinfo->add_stmt (incr);
7755 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7757 prev_stmt_info = NULL;
7758 running_off = offvar;
7759 alias_off = build_int_cst (ref_type, 0);
7760 int nloads = const_nunits;
7761 int lnel = 1;
7762 tree ltype = TREE_TYPE (vectype);
7763 tree lvectype = vectype;
7764 auto_vec<tree> dr_chain;
7765 if (memory_access_type == VMAT_STRIDED_SLP)
7767 if (group_size < const_nunits)
7769 /* First check if vec_init optab supports construction from
7770 vector elts directly. */
7771 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7772 machine_mode vmode;
7773 if (mode_for_vector (elmode, group_size).exists (&vmode)
7774 && VECTOR_MODE_P (vmode)
7775 && targetm.vector_mode_supported_p (vmode)
7776 && (convert_optab_handler (vec_init_optab,
7777 TYPE_MODE (vectype), vmode)
7778 != CODE_FOR_nothing))
7780 nloads = const_nunits / group_size;
7781 lnel = group_size;
7782 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7784 else
7786 /* Otherwise avoid emitting a constructor of vector elements
7787 by performing the loads using an integer type of the same
7788 size, constructing a vector of those and then
7789 re-interpreting it as the original vector type.
7790 This avoids a huge runtime penalty due to the general
7791 inability to perform store forwarding from smaller stores
7792 to a larger load. */
7793 unsigned lsize
7794 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7795 elmode = int_mode_for_size (lsize, 0).require ();
7796 unsigned int lnunits = const_nunits / group_size;
7797 /* If we can't construct such a vector fall back to
7798 element loads of the original vector type. */
7799 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7800 && VECTOR_MODE_P (vmode)
7801 && targetm.vector_mode_supported_p (vmode)
7802 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7803 != CODE_FOR_nothing))
7805 nloads = lnunits;
7806 lnel = group_size;
7807 ltype = build_nonstandard_integer_type (lsize, 1);
7808 lvectype = build_vector_type (ltype, nloads);
7812 else
7814 nloads = 1;
7815 lnel = const_nunits;
7816 ltype = vectype;
7818 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7820 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7821 else if (nloads == 1)
7822 ltype = vectype;
7824 if (slp)
7826 /* For SLP permutation support we need to load the whole group,
7827 not only the number of vector stmts the permutation result
7828 fits in. */
7829 if (slp_perm)
7831 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7832 variable VF. */
7833 unsigned int const_vf = vf.to_constant ();
7834 ncopies = CEIL (group_size * const_vf, const_nunits);
7835 dr_chain.create (ncopies);
7837 else
7838 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7840 unsigned int group_el = 0;
7841 unsigned HOST_WIDE_INT
7842 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7843 for (j = 0; j < ncopies; j++)
7845 if (nloads > 1)
7846 vec_alloc (v, nloads);
7847 stmt_vec_info new_stmt_info = NULL;
7848 for (i = 0; i < nloads; i++)
7850 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7851 group_el * elsz + cst_offset);
7852 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7853 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7854 gassign *new_stmt
7855 = gimple_build_assign (make_ssa_name (ltype), data_ref);
7856 new_stmt_info
7857 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7858 if (nloads > 1)
7859 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7860 gimple_assign_lhs (new_stmt));
7862 group_el += lnel;
7863 if (! slp
7864 || group_el == group_size)
7866 tree newoff = copy_ssa_name (running_off);
7867 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7868 running_off, stride_step);
7869 vect_finish_stmt_generation (stmt_info, incr, gsi);
7871 running_off = newoff;
7872 group_el = 0;
7875 if (nloads > 1)
7877 tree vec_inv = build_constructor (lvectype, v);
7878 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
7879 new_stmt_info = vinfo->lookup_def (new_temp);
7880 if (lvectype != vectype)
7882 gassign *new_stmt
7883 = gimple_build_assign (make_ssa_name (vectype),
7884 VIEW_CONVERT_EXPR,
7885 build1 (VIEW_CONVERT_EXPR,
7886 vectype, new_temp));
7887 new_stmt_info
7888 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7892 if (slp)
7894 if (slp_perm)
7895 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
7896 else
7897 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
7899 else
7901 if (j == 0)
7902 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7903 else
7904 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7905 prev_stmt_info = new_stmt_info;
7908 if (slp_perm)
7910 unsigned n_perms;
7911 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7912 slp_node_instance, false, &n_perms);
7914 return true;
7917 if (memory_access_type == VMAT_GATHER_SCATTER
7918 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7919 grouped_load = false;
7921 if (grouped_load)
7923 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7924 group_size = DR_GROUP_SIZE (first_stmt_info);
7925 /* For SLP vectorization we directly vectorize a subchain
7926 without permutation. */
7927 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7928 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7929 /* For BB vectorization always use the first stmt to base
7930 the data ref pointer on. */
7931 if (bb_vinfo)
7932 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7934 /* Check if the chain of loads is already vectorized. */
7935 if (STMT_VINFO_VEC_STMT (first_stmt_info)
7936 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7937 ??? But we can only do so if there is exactly one
7938 as we have no way to get at the rest. Leave the CSE
7939 opportunity alone.
7940 ??? With the group load eventually participating
7941 in multiple different permutations (having multiple
7942 slp nodes which refer to the same group) the CSE
7943 is even wrong code. See PR56270. */
7944 && !slp)
7946 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7947 return true;
7949 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7950 group_gap_adj = 0;
7952 /* VEC_NUM is the number of vect stmts to be created for this group. */
7953 if (slp)
7955 grouped_load = false;
7956 /* For SLP permutation support we need to load the whole group,
7957 not only the number of vector stmts the permutation result
7958 fits in. */
7959 if (slp_perm)
7961 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7962 variable VF. */
7963 unsigned int const_vf = vf.to_constant ();
7964 unsigned int const_nunits = nunits.to_constant ();
7965 vec_num = CEIL (group_size * const_vf, const_nunits);
7966 group_gap_adj = vf * group_size - nunits * vec_num;
7968 else
7970 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7971 group_gap_adj
7972 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7975 else
7976 vec_num = group_size;
7978 ref_type = get_group_alias_ptr_type (first_stmt_info);
7980 else
7982 first_stmt_info = stmt_info;
7983 first_dr = dr;
7984 group_size = vec_num = 1;
7985 group_gap_adj = 0;
7986 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7989 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7990 gcc_assert (alignment_support_scheme);
7991 vec_loop_masks *loop_masks
7992 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7993 ? &LOOP_VINFO_MASKS (loop_vinfo)
7994 : NULL);
7995 /* Targets with store-lane instructions must not require explicit
7996 realignment. vect_supportable_dr_alignment always returns either
7997 dr_aligned or dr_unaligned_supported for masked operations. */
7998 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7999 && !mask
8000 && !loop_masks)
8001 || alignment_support_scheme == dr_aligned
8002 || alignment_support_scheme == dr_unaligned_supported);
8004 /* In case the vectorization factor (VF) is bigger than the number
8005 of elements that we can fit in a vectype (nunits), we have to generate
8006 more than one vector stmt - i.e - we need to "unroll" the
8007 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8008 from one copy of the vector stmt to the next, in the field
8009 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8010 stages to find the correct vector defs to be used when vectorizing
8011 stmts that use the defs of the current stmt. The example below
8012 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8013 need to create 4 vectorized stmts):
8015 before vectorization:
8016 RELATED_STMT VEC_STMT
8017 S1: x = memref - -
8018 S2: z = x + 1 - -
8020 step 1: vectorize stmt S1:
8021 We first create the vector stmt VS1_0, and, as usual, record a
8022 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8023 Next, we create the vector stmt VS1_1, and record a pointer to
8024 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8025 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8026 stmts and pointers:
8027 RELATED_STMT VEC_STMT
8028 VS1_0: vx0 = memref0 VS1_1 -
8029 VS1_1: vx1 = memref1 VS1_2 -
8030 VS1_2: vx2 = memref2 VS1_3 -
8031 VS1_3: vx3 = memref3 - -
8032 S1: x = load - VS1_0
8033 S2: z = x + 1 - -
8035 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8036 information we recorded in RELATED_STMT field is used to vectorize
8037 stmt S2. */
8039 /* In case of interleaving (non-unit grouped access):
8041 S1: x2 = &base + 2
8042 S2: x0 = &base
8043 S3: x1 = &base + 1
8044 S4: x3 = &base + 3
8046 Vectorized loads are created in the order of memory accesses
8047 starting from the access of the first stmt of the chain:
8049 VS1: vx0 = &base
8050 VS2: vx1 = &base + vec_size*1
8051 VS3: vx3 = &base + vec_size*2
8052 VS4: vx4 = &base + vec_size*3
8054 Then permutation statements are generated:
8056 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8057 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8060 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8061 (the order of the data-refs in the output of vect_permute_load_chain
8062 corresponds to the order of scalar stmts in the interleaving chain - see
8063 the documentation of vect_permute_load_chain()).
8064 The generation of permutation stmts and recording them in
8065 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8067 In case of both multiple types and interleaving, the vector loads and
8068 permutation stmts above are created for every copy. The result vector
8069 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8070 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8072 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8073 on a target that supports unaligned accesses (dr_unaligned_supported)
8074 we generate the following code:
8075 p = initial_addr;
8076 indx = 0;
8077 loop {
8078 p = p + indx * vectype_size;
8079 vec_dest = *(p);
8080 indx = indx + 1;
8083 Otherwise, the data reference is potentially unaligned on a target that
8084 does not support unaligned accesses (dr_explicit_realign_optimized) -
8085 then generate the following code, in which the data in each iteration is
8086 obtained by two vector loads, one from the previous iteration, and one
8087 from the current iteration:
8088 p1 = initial_addr;
8089 msq_init = *(floor(p1))
8090 p2 = initial_addr + VS - 1;
8091 realignment_token = call target_builtin;
8092 indx = 0;
8093 loop {
8094 p2 = p2 + indx * vectype_size
8095 lsq = *(floor(p2))
8096 vec_dest = realign_load (msq, lsq, realignment_token)
8097 indx = indx + 1;
8098 msq = lsq;
8099 } */
8101 /* If the misalignment remains the same throughout the execution of the
8102 loop, we can create the init_addr and permutation mask at the loop
8103 preheader. Otherwise, it needs to be created inside the loop.
8104 This can only occur when vectorizing memory accesses in the inner-loop
8105 nested within an outer-loop that is being vectorized. */
8107 if (nested_in_vect_loop
8108 && !multiple_p (DR_STEP_ALIGNMENT (dr),
8109 GET_MODE_SIZE (TYPE_MODE (vectype))))
8111 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
8112 compute_in_loop = true;
8115 if ((alignment_support_scheme == dr_explicit_realign_optimized
8116 || alignment_support_scheme == dr_explicit_realign)
8117 && !compute_in_loop)
8119 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
8120 alignment_support_scheme, NULL_TREE,
8121 &at_loop);
8122 if (alignment_support_scheme == dr_explicit_realign_optimized)
8124 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
8125 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
8126 size_one_node);
8129 else
8130 at_loop = loop;
8132 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8133 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8135 tree bump;
8136 tree vec_offset = NULL_TREE;
8137 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8139 aggr_type = NULL_TREE;
8140 bump = NULL_TREE;
8142 else if (memory_access_type == VMAT_GATHER_SCATTER)
8144 aggr_type = elem_type;
8145 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8146 &bump, &vec_offset);
8148 else
8150 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8151 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8152 else
8153 aggr_type = vectype;
8154 bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
8157 tree vec_mask = NULL_TREE;
8158 prev_stmt_info = NULL;
8159 poly_uint64 group_elt = 0;
8160 for (j = 0; j < ncopies; j++)
8162 stmt_vec_info new_stmt_info = NULL;
8163 /* 1. Create the vector or array pointer update chain. */
8164 if (j == 0)
8166 bool simd_lane_access_p
8167 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8168 if (simd_lane_access_p
8169 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8170 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8171 && integer_zerop (DR_OFFSET (first_dr))
8172 && integer_zerop (DR_INIT (first_dr))
8173 && alias_sets_conflict_p (get_alias_set (aggr_type),
8174 get_alias_set (TREE_TYPE (ref_type)))
8175 && (alignment_support_scheme == dr_aligned
8176 || alignment_support_scheme == dr_unaligned_supported))
8178 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8179 dataref_offset = build_int_cst (ref_type, 0);
8180 inv_p = false;
8182 else if (first_stmt_info_for_drptr
8183 && first_stmt_info != first_stmt_info_for_drptr)
8185 dataref_ptr
8186 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
8187 aggr_type, at_loop, offset, &dummy,
8188 gsi, &ptr_incr, simd_lane_access_p,
8189 &inv_p, byte_offset, bump);
8190 /* Adjust the pointer by the difference to first_stmt. */
8191 data_reference_p ptrdr
8192 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
8193 tree diff = fold_convert (sizetype,
8194 size_binop (MINUS_EXPR,
8195 DR_INIT (first_dr),
8196 DR_INIT (ptrdr)));
8197 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8198 stmt_info, diff);
8200 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8202 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8203 &dataref_ptr, &vec_offset);
8204 inv_p = false;
8206 else
8207 dataref_ptr
8208 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
8209 offset, &dummy, gsi, &ptr_incr,
8210 simd_lane_access_p, &inv_p,
8211 byte_offset, bump);
8212 if (mask)
8213 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8214 mask_vectype);
8216 else
8218 if (dataref_offset)
8219 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8220 bump);
8221 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8222 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8223 else
8224 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8225 stmt_info, bump);
8226 if (mask)
8227 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8230 if (grouped_load || slp_perm)
8231 dr_chain.create (vec_num);
8233 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8235 tree vec_array;
8237 vec_array = create_vector_array (vectype, vec_num);
8239 tree final_mask = NULL_TREE;
8240 if (loop_masks)
8241 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8242 vectype, j);
8243 if (vec_mask)
8244 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8245 vec_mask, gsi);
8247 gcall *call;
8248 if (final_mask)
8250 /* Emit:
8251 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8252 VEC_MASK). */
8253 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8254 tree alias_ptr = build_int_cst (ref_type, align);
8255 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8256 dataref_ptr, alias_ptr,
8257 final_mask);
8259 else
8261 /* Emit:
8262 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8263 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8264 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8266 gimple_call_set_lhs (call, vec_array);
8267 gimple_call_set_nothrow (call, true);
8268 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8270 /* Extract each vector into an SSA_NAME. */
8271 for (i = 0; i < vec_num; i++)
8273 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
8274 vec_array, i);
8275 dr_chain.quick_push (new_temp);
8278 /* Record the mapping between SSA_NAMEs and statements. */
8279 vect_record_grouped_load_vectors (stmt_info, dr_chain);
8281 /* Record that VEC_ARRAY is now dead. */
8282 vect_clobber_variable (stmt_info, gsi, vec_array);
8284 else
8286 for (i = 0; i < vec_num; i++)
8288 tree final_mask = NULL_TREE;
8289 if (loop_masks
8290 && memory_access_type != VMAT_INVARIANT)
8291 final_mask = vect_get_loop_mask (gsi, loop_masks,
8292 vec_num * ncopies,
8293 vectype, vec_num * j + i);
8294 if (vec_mask)
8295 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8296 vec_mask, gsi);
8298 if (i > 0)
8299 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8300 stmt_info, bump);
8302 /* 2. Create the vector-load in the loop. */
8303 gimple *new_stmt = NULL;
8304 switch (alignment_support_scheme)
8306 case dr_aligned:
8307 case dr_unaligned_supported:
8309 unsigned int align, misalign;
8311 if (memory_access_type == VMAT_GATHER_SCATTER)
8313 tree scale = size_int (gs_info.scale);
8314 gcall *call;
8315 if (loop_masks)
8316 call = gimple_build_call_internal
8317 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8318 vec_offset, scale, final_mask);
8319 else
8320 call = gimple_build_call_internal
8321 (IFN_GATHER_LOAD, 3, dataref_ptr,
8322 vec_offset, scale);
8323 gimple_call_set_nothrow (call, true);
8324 new_stmt = call;
8325 data_ref = NULL_TREE;
8326 break;
8329 align = DR_TARGET_ALIGNMENT (dr);
8330 if (alignment_support_scheme == dr_aligned)
8332 gcc_assert (aligned_access_p (first_dr));
8333 misalign = 0;
8335 else if (DR_MISALIGNMENT (first_dr) == -1)
8337 align = dr_alignment (vect_dr_behavior (first_dr));
8338 misalign = 0;
8340 else
8341 misalign = DR_MISALIGNMENT (first_dr);
8342 if (dataref_offset == NULL_TREE
8343 && TREE_CODE (dataref_ptr) == SSA_NAME)
8344 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8345 align, misalign);
8347 if (final_mask)
8349 align = least_bit_hwi (misalign | align);
8350 tree ptr = build_int_cst (ref_type, align);
8351 gcall *call
8352 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8353 dataref_ptr, ptr,
8354 final_mask);
8355 gimple_call_set_nothrow (call, true);
8356 new_stmt = call;
8357 data_ref = NULL_TREE;
8359 else
8361 data_ref
8362 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8363 dataref_offset
8364 ? dataref_offset
8365 : build_int_cst (ref_type, 0));
8366 if (alignment_support_scheme == dr_aligned)
8368 else if (DR_MISALIGNMENT (first_dr) == -1)
8369 TREE_TYPE (data_ref)
8370 = build_aligned_type (TREE_TYPE (data_ref),
8371 align * BITS_PER_UNIT);
8372 else
8373 TREE_TYPE (data_ref)
8374 = build_aligned_type (TREE_TYPE (data_ref),
8375 TYPE_ALIGN (elem_type));
8377 break;
8379 case dr_explicit_realign:
8381 tree ptr, bump;
8383 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8385 if (compute_in_loop)
8386 msq = vect_setup_realignment (first_stmt_info, gsi,
8387 &realignment_token,
8388 dr_explicit_realign,
8389 dataref_ptr, NULL);
8391 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8392 ptr = copy_ssa_name (dataref_ptr);
8393 else
8394 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8395 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8396 new_stmt = gimple_build_assign
8397 (ptr, BIT_AND_EXPR, dataref_ptr,
8398 build_int_cst
8399 (TREE_TYPE (dataref_ptr),
8400 -(HOST_WIDE_INT) align));
8401 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8402 data_ref
8403 = build2 (MEM_REF, vectype, ptr,
8404 build_int_cst (ref_type, 0));
8405 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8406 vec_dest = vect_create_destination_var (scalar_dest,
8407 vectype);
8408 new_stmt = gimple_build_assign (vec_dest, data_ref);
8409 new_temp = make_ssa_name (vec_dest, new_stmt);
8410 gimple_assign_set_lhs (new_stmt, new_temp);
8411 gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt));
8412 gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt));
8413 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8414 msq = new_temp;
8416 bump = size_binop (MULT_EXPR, vs,
8417 TYPE_SIZE_UNIT (elem_type));
8418 bump = size_binop (MINUS_EXPR, bump, size_one_node);
8419 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
8420 stmt_info, bump);
8421 new_stmt = gimple_build_assign
8422 (NULL_TREE, BIT_AND_EXPR, ptr,
8423 build_int_cst
8424 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8425 ptr = copy_ssa_name (ptr, new_stmt);
8426 gimple_assign_set_lhs (new_stmt, ptr);
8427 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8428 data_ref
8429 = build2 (MEM_REF, vectype, ptr,
8430 build_int_cst (ref_type, 0));
8431 break;
8433 case dr_explicit_realign_optimized:
8435 if (TREE_CODE (dataref_ptr) == SSA_NAME)
8436 new_temp = copy_ssa_name (dataref_ptr);
8437 else
8438 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8439 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8440 new_stmt = gimple_build_assign
8441 (new_temp, BIT_AND_EXPR, dataref_ptr,
8442 build_int_cst (TREE_TYPE (dataref_ptr),
8443 -(HOST_WIDE_INT) align));
8444 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8445 data_ref
8446 = build2 (MEM_REF, vectype, new_temp,
8447 build_int_cst (ref_type, 0));
8448 break;
8450 default:
8451 gcc_unreachable ();
8453 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8454 /* DATA_REF is null if we've already built the statement. */
8455 if (data_ref)
8457 vect_copy_ref_info (data_ref, DR_REF (first_dr));
8458 new_stmt = gimple_build_assign (vec_dest, data_ref);
8460 new_temp = make_ssa_name (vec_dest, new_stmt);
8461 gimple_set_lhs (new_stmt, new_temp);
8462 new_stmt_info
8463 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8465 /* 3. Handle explicit realignment if necessary/supported.
8466 Create in loop:
8467 vec_dest = realign_load (msq, lsq, realignment_token) */
8468 if (alignment_support_scheme == dr_explicit_realign_optimized
8469 || alignment_support_scheme == dr_explicit_realign)
8471 lsq = gimple_assign_lhs (new_stmt);
8472 if (!realignment_token)
8473 realignment_token = dataref_ptr;
8474 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8475 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8476 msq, lsq, realignment_token);
8477 new_temp = make_ssa_name (vec_dest, new_stmt);
8478 gimple_assign_set_lhs (new_stmt, new_temp);
8479 new_stmt_info
8480 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8482 if (alignment_support_scheme == dr_explicit_realign_optimized)
8484 gcc_assert (phi);
8485 if (i == vec_num - 1 && j == ncopies - 1)
8486 add_phi_arg (phi, lsq,
8487 loop_latch_edge (containing_loop),
8488 UNKNOWN_LOCATION);
8489 msq = lsq;
8493 /* 4. Handle invariant-load. */
8494 if (inv_p && !bb_vinfo)
8496 gcc_assert (!grouped_load);
8497 /* If we have versioned for aliasing or the loop doesn't
8498 have any data dependencies that would preclude this,
8499 then we are sure this is a loop invariant load and
8500 thus we can insert it on the preheader edge. */
8501 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8502 && !nested_in_vect_loop
8503 && hoist_defs_of_uses (stmt_info, loop))
8505 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8506 if (dump_enabled_p ())
8508 dump_printf_loc (MSG_NOTE, vect_location,
8509 "hoisting out of the vectorized "
8510 "loop: ");
8511 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8513 tree tem = copy_ssa_name (scalar_dest);
8514 gsi_insert_on_edge_immediate
8515 (loop_preheader_edge (loop),
8516 gimple_build_assign (tem,
8517 unshare_expr
8518 (gimple_assign_rhs1 (stmt))));
8519 new_temp = vect_init_vector (stmt_info, tem,
8520 vectype, NULL);
8521 new_stmt = SSA_NAME_DEF_STMT (new_temp);
8522 new_stmt_info = vinfo->add_stmt (new_stmt);
8524 else
8526 gimple_stmt_iterator gsi2 = *gsi;
8527 gsi_next (&gsi2);
8528 new_temp = vect_init_vector (stmt_info, scalar_dest,
8529 vectype, &gsi2);
8530 new_stmt_info = vinfo->lookup_def (new_temp);
8534 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8536 tree perm_mask = perm_mask_for_reverse (vectype);
8537 new_temp = permute_vec_elements (new_temp, new_temp,
8538 perm_mask, stmt_info, gsi);
8539 new_stmt_info = vinfo->lookup_def (new_temp);
8542 /* Collect vector loads and later create their permutation in
8543 vect_transform_grouped_load (). */
8544 if (grouped_load || slp_perm)
8545 dr_chain.quick_push (new_temp);
8547 /* Store vector loads in the corresponding SLP_NODE. */
8548 if (slp && !slp_perm)
8549 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8551 /* With SLP permutation we load the gaps as well, without
8552 we need to skip the gaps after we manage to fully load
8553 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8554 group_elt += nunits;
8555 if (maybe_ne (group_gap_adj, 0U)
8556 && !slp_perm
8557 && known_eq (group_elt, group_size - group_gap_adj))
8559 poly_wide_int bump_val
8560 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8561 * group_gap_adj);
8562 tree bump = wide_int_to_tree (sizetype, bump_val);
8563 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8564 stmt_info, bump);
8565 group_elt = 0;
8568 /* Bump the vector pointer to account for a gap or for excess
8569 elements loaded for a permuted SLP load. */
8570 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8572 poly_wide_int bump_val
8573 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8574 * group_gap_adj);
8575 tree bump = wide_int_to_tree (sizetype, bump_val);
8576 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8577 stmt_info, bump);
8581 if (slp && !slp_perm)
8582 continue;
8584 if (slp_perm)
8586 unsigned n_perms;
8587 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8588 slp_node_instance, false,
8589 &n_perms))
8591 dr_chain.release ();
8592 return false;
8595 else
8597 if (grouped_load)
8599 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8600 vect_transform_grouped_load (stmt_info, dr_chain,
8601 group_size, gsi);
8602 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8604 else
8606 if (j == 0)
8607 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8608 else
8609 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8610 prev_stmt_info = new_stmt_info;
8613 dr_chain.release ();
8616 return true;
8619 /* Function vect_is_simple_cond.
8621 Input:
8622 LOOP - the loop that is being vectorized.
8623 COND - Condition that is checked for simple use.
8625 Output:
8626 *COMP_VECTYPE - the vector type for the comparison.
8627 *DTS - The def types for the arguments of the comparison
8629 Returns whether a COND can be vectorized. Checks whether
8630 condition operands are supportable using vec_is_simple_use. */
8632 static bool
8633 vect_is_simple_cond (tree cond, vec_info *vinfo,
8634 tree *comp_vectype, enum vect_def_type *dts,
8635 tree vectype)
8637 tree lhs, rhs;
8638 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8640 /* Mask case. */
8641 if (TREE_CODE (cond) == SSA_NAME
8642 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8644 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
8645 || !*comp_vectype
8646 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8647 return false;
8648 return true;
8651 if (!COMPARISON_CLASS_P (cond))
8652 return false;
8654 lhs = TREE_OPERAND (cond, 0);
8655 rhs = TREE_OPERAND (cond, 1);
8657 if (TREE_CODE (lhs) == SSA_NAME)
8659 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
8660 return false;
8662 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8663 || TREE_CODE (lhs) == FIXED_CST)
8664 dts[0] = vect_constant_def;
8665 else
8666 return false;
8668 if (TREE_CODE (rhs) == SSA_NAME)
8670 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
8671 return false;
8673 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8674 || TREE_CODE (rhs) == FIXED_CST)
8675 dts[1] = vect_constant_def;
8676 else
8677 return false;
8679 if (vectype1 && vectype2
8680 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8681 TYPE_VECTOR_SUBPARTS (vectype2)))
8682 return false;
8684 *comp_vectype = vectype1 ? vectype1 : vectype2;
8685 /* Invariant comparison. */
8686 if (! *comp_vectype && vectype)
8688 tree scalar_type = TREE_TYPE (lhs);
8689 /* If we can widen the comparison to match vectype do so. */
8690 if (INTEGRAL_TYPE_P (scalar_type)
8691 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8692 TYPE_SIZE (TREE_TYPE (vectype))))
8693 scalar_type = build_nonstandard_integer_type
8694 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8695 TYPE_UNSIGNED (scalar_type));
8696 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8699 return true;
8702 /* vectorizable_condition.
8704 Check if STMT_INFO is conditional modify expression that can be vectorized.
8705 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8706 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8707 at GSI.
8709 When STMT_INFO is vectorized as a nested cycle, REDUC_DEF is the vector
8710 variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1,
8711 and in else clause if it is 2).
8713 Return true if STMT_INFO is vectorizable in this way. */
8715 bool
8716 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8717 stmt_vec_info *vec_stmt, tree reduc_def,
8718 int reduc_index, slp_tree slp_node,
8719 stmt_vector_for_cost *cost_vec)
8721 vec_info *vinfo = stmt_info->vinfo;
8722 tree scalar_dest = NULL_TREE;
8723 tree vec_dest = NULL_TREE;
8724 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8725 tree then_clause, else_clause;
8726 tree comp_vectype = NULL_TREE;
8727 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8728 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8729 tree vec_compare;
8730 tree new_temp;
8731 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8732 enum vect_def_type dts[4]
8733 = {vect_unknown_def_type, vect_unknown_def_type,
8734 vect_unknown_def_type, vect_unknown_def_type};
8735 int ndts = 4;
8736 int ncopies;
8737 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8738 stmt_vec_info prev_stmt_info = NULL;
8739 int i, j;
8740 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8741 vec<tree> vec_oprnds0 = vNULL;
8742 vec<tree> vec_oprnds1 = vNULL;
8743 vec<tree> vec_oprnds2 = vNULL;
8744 vec<tree> vec_oprnds3 = vNULL;
8745 tree vec_cmp_type;
8746 bool masked = false;
8748 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8749 return false;
8751 vect_reduction_type reduction_type
8752 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8753 if (reduction_type == TREE_CODE_REDUCTION)
8755 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8756 return false;
8758 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8759 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8760 && reduc_def))
8761 return false;
8763 /* FORNOW: not yet supported. */
8764 if (STMT_VINFO_LIVE_P (stmt_info))
8766 if (dump_enabled_p ())
8767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8768 "value used after loop.\n");
8769 return false;
8773 /* Is vectorizable conditional operation? */
8774 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
8775 if (!stmt)
8776 return false;
8778 code = gimple_assign_rhs_code (stmt);
8780 if (code != COND_EXPR)
8781 return false;
8783 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8784 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8786 if (slp_node)
8787 ncopies = 1;
8788 else
8789 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8791 gcc_assert (ncopies >= 1);
8792 if (reduc_index && ncopies > 1)
8793 return false; /* FORNOW */
8795 cond_expr = gimple_assign_rhs1 (stmt);
8796 then_clause = gimple_assign_rhs2 (stmt);
8797 else_clause = gimple_assign_rhs3 (stmt);
8799 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8800 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8801 || !comp_vectype)
8802 return false;
8804 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
8805 return false;
8806 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
8807 return false;
8809 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8810 return false;
8812 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8813 return false;
8815 masked = !COMPARISON_CLASS_P (cond_expr);
8816 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8818 if (vec_cmp_type == NULL_TREE)
8819 return false;
8821 cond_code = TREE_CODE (cond_expr);
8822 if (!masked)
8824 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8825 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8828 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8830 /* Boolean values may have another representation in vectors
8831 and therefore we prefer bit operations over comparison for
8832 them (which also works for scalar masks). We store opcodes
8833 to use in bitop1 and bitop2. Statement is vectorized as
8834 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8835 depending on bitop1 and bitop2 arity. */
8836 switch (cond_code)
8838 case GT_EXPR:
8839 bitop1 = BIT_NOT_EXPR;
8840 bitop2 = BIT_AND_EXPR;
8841 break;
8842 case GE_EXPR:
8843 bitop1 = BIT_NOT_EXPR;
8844 bitop2 = BIT_IOR_EXPR;
8845 break;
8846 case LT_EXPR:
8847 bitop1 = BIT_NOT_EXPR;
8848 bitop2 = BIT_AND_EXPR;
8849 std::swap (cond_expr0, cond_expr1);
8850 break;
8851 case LE_EXPR:
8852 bitop1 = BIT_NOT_EXPR;
8853 bitop2 = BIT_IOR_EXPR;
8854 std::swap (cond_expr0, cond_expr1);
8855 break;
8856 case NE_EXPR:
8857 bitop1 = BIT_XOR_EXPR;
8858 break;
8859 case EQ_EXPR:
8860 bitop1 = BIT_XOR_EXPR;
8861 bitop2 = BIT_NOT_EXPR;
8862 break;
8863 default:
8864 return false;
8866 cond_code = SSA_NAME;
8869 if (!vec_stmt)
8871 if (bitop1 != NOP_EXPR)
8873 machine_mode mode = TYPE_MODE (comp_vectype);
8874 optab optab;
8876 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8877 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8878 return false;
8880 if (bitop2 != NOP_EXPR)
8882 optab = optab_for_tree_code (bitop2, comp_vectype,
8883 optab_default);
8884 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8885 return false;
8888 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8889 cond_code))
8891 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8892 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
8893 cost_vec);
8894 return true;
8896 return false;
8899 /* Transform. */
8901 if (!slp_node)
8903 vec_oprnds0.create (1);
8904 vec_oprnds1.create (1);
8905 vec_oprnds2.create (1);
8906 vec_oprnds3.create (1);
8909 /* Handle def. */
8910 scalar_dest = gimple_assign_lhs (stmt);
8911 if (reduction_type != EXTRACT_LAST_REDUCTION)
8912 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8914 /* Handle cond expr. */
8915 for (j = 0; j < ncopies; j++)
8917 stmt_vec_info new_stmt_info = NULL;
8918 if (j == 0)
8920 if (slp_node)
8922 auto_vec<tree, 4> ops;
8923 auto_vec<vec<tree>, 4> vec_defs;
8925 if (masked)
8926 ops.safe_push (cond_expr);
8927 else
8929 ops.safe_push (cond_expr0);
8930 ops.safe_push (cond_expr1);
8932 ops.safe_push (then_clause);
8933 ops.safe_push (else_clause);
8934 vect_get_slp_defs (ops, slp_node, &vec_defs);
8935 vec_oprnds3 = vec_defs.pop ();
8936 vec_oprnds2 = vec_defs.pop ();
8937 if (!masked)
8938 vec_oprnds1 = vec_defs.pop ();
8939 vec_oprnds0 = vec_defs.pop ();
8941 else
8943 if (masked)
8945 vec_cond_lhs
8946 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
8947 comp_vectype);
8948 vect_is_simple_use (cond_expr, stmt_info->vinfo, &dts[0]);
8950 else
8952 vec_cond_lhs
8953 = vect_get_vec_def_for_operand (cond_expr0,
8954 stmt_info, comp_vectype);
8955 vect_is_simple_use (cond_expr0, loop_vinfo, &dts[0]);
8957 vec_cond_rhs
8958 = vect_get_vec_def_for_operand (cond_expr1,
8959 stmt_info, comp_vectype);
8960 vect_is_simple_use (cond_expr1, loop_vinfo, &dts[1]);
8962 if (reduc_index == 1)
8963 vec_then_clause = reduc_def;
8964 else
8966 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8967 stmt_info);
8968 vect_is_simple_use (then_clause, loop_vinfo, &dts[2]);
8970 if (reduc_index == 2)
8971 vec_else_clause = reduc_def;
8972 else
8974 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8975 stmt_info);
8976 vect_is_simple_use (else_clause, loop_vinfo, &dts[3]);
8980 else
8982 vec_cond_lhs
8983 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
8984 if (!masked)
8985 vec_cond_rhs
8986 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
8988 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8989 vec_oprnds2.pop ());
8990 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
8991 vec_oprnds3.pop ());
8994 if (!slp_node)
8996 vec_oprnds0.quick_push (vec_cond_lhs);
8997 if (!masked)
8998 vec_oprnds1.quick_push (vec_cond_rhs);
8999 vec_oprnds2.quick_push (vec_then_clause);
9000 vec_oprnds3.quick_push (vec_else_clause);
9003 /* Arguments are ready. Create the new vector stmt. */
9004 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
9006 vec_then_clause = vec_oprnds2[i];
9007 vec_else_clause = vec_oprnds3[i];
9009 if (masked)
9010 vec_compare = vec_cond_lhs;
9011 else
9013 vec_cond_rhs = vec_oprnds1[i];
9014 if (bitop1 == NOP_EXPR)
9015 vec_compare = build2 (cond_code, vec_cmp_type,
9016 vec_cond_lhs, vec_cond_rhs);
9017 else
9019 new_temp = make_ssa_name (vec_cmp_type);
9020 gassign *new_stmt;
9021 if (bitop1 == BIT_NOT_EXPR)
9022 new_stmt = gimple_build_assign (new_temp, bitop1,
9023 vec_cond_rhs);
9024 else
9025 new_stmt
9026 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
9027 vec_cond_rhs);
9028 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9029 if (bitop2 == NOP_EXPR)
9030 vec_compare = new_temp;
9031 else if (bitop2 == BIT_NOT_EXPR)
9033 /* Instead of doing ~x ? y : z do x ? z : y. */
9034 vec_compare = new_temp;
9035 std::swap (vec_then_clause, vec_else_clause);
9037 else
9039 vec_compare = make_ssa_name (vec_cmp_type);
9040 new_stmt
9041 = gimple_build_assign (vec_compare, bitop2,
9042 vec_cond_lhs, new_temp);
9043 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9047 if (reduction_type == EXTRACT_LAST_REDUCTION)
9049 if (!is_gimple_val (vec_compare))
9051 tree vec_compare_name = make_ssa_name (vec_cmp_type);
9052 gassign *new_stmt = gimple_build_assign (vec_compare_name,
9053 vec_compare);
9054 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9055 vec_compare = vec_compare_name;
9057 gcc_assert (reduc_index == 2);
9058 gcall *new_stmt = gimple_build_call_internal
9059 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
9060 vec_then_clause);
9061 gimple_call_set_lhs (new_stmt, scalar_dest);
9062 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
9063 if (stmt_info->stmt == gsi_stmt (*gsi))
9064 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
9065 else
9067 /* In this case we're moving the definition to later in the
9068 block. That doesn't matter because the only uses of the
9069 lhs are in phi statements. */
9070 gimple_stmt_iterator old_gsi
9071 = gsi_for_stmt (stmt_info->stmt);
9072 gsi_remove (&old_gsi, true);
9073 new_stmt_info
9074 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9077 else
9079 new_temp = make_ssa_name (vec_dest);
9080 gassign *new_stmt
9081 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
9082 vec_then_clause, vec_else_clause);
9083 new_stmt_info
9084 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9086 if (slp_node)
9087 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9090 if (slp_node)
9091 continue;
9093 if (j == 0)
9094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9095 else
9096 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9098 prev_stmt_info = new_stmt_info;
9101 vec_oprnds0.release ();
9102 vec_oprnds1.release ();
9103 vec_oprnds2.release ();
9104 vec_oprnds3.release ();
9106 return true;
9109 /* vectorizable_comparison.
9111 Check if STMT_INFO is comparison expression that can be vectorized.
9112 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9113 comparison, put it in VEC_STMT, and insert it at GSI.
9115 Return true if STMT_INFO is vectorizable in this way. */
9117 static bool
9118 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9119 stmt_vec_info *vec_stmt, tree reduc_def,
9120 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9122 vec_info *vinfo = stmt_info->vinfo;
9123 tree lhs, rhs1, rhs2;
9124 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9125 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9126 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
9127 tree new_temp;
9128 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9129 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
9130 int ndts = 2;
9131 poly_uint64 nunits;
9132 int ncopies;
9133 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9134 stmt_vec_info prev_stmt_info = NULL;
9135 int i, j;
9136 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9137 vec<tree> vec_oprnds0 = vNULL;
9138 vec<tree> vec_oprnds1 = vNULL;
9139 tree mask_type;
9140 tree mask;
9142 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9143 return false;
9145 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
9146 return false;
9148 mask_type = vectype;
9149 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9151 if (slp_node)
9152 ncopies = 1;
9153 else
9154 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9156 gcc_assert (ncopies >= 1);
9157 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9158 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9159 && reduc_def))
9160 return false;
9162 if (STMT_VINFO_LIVE_P (stmt_info))
9164 if (dump_enabled_p ())
9165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9166 "value used after loop.\n");
9167 return false;
9170 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9171 if (!stmt)
9172 return false;
9174 code = gimple_assign_rhs_code (stmt);
9176 if (TREE_CODE_CLASS (code) != tcc_comparison)
9177 return false;
9179 rhs1 = gimple_assign_rhs1 (stmt);
9180 rhs2 = gimple_assign_rhs2 (stmt);
9182 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
9183 return false;
9185 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
9186 return false;
9188 if (vectype1 && vectype2
9189 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9190 TYPE_VECTOR_SUBPARTS (vectype2)))
9191 return false;
9193 vectype = vectype1 ? vectype1 : vectype2;
9195 /* Invariant comparison. */
9196 if (!vectype)
9198 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9199 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9200 return false;
9202 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9203 return false;
9205 /* Can't compare mask and non-mask types. */
9206 if (vectype1 && vectype2
9207 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9208 return false;
9210 /* Boolean values may have another representation in vectors
9211 and therefore we prefer bit operations over comparison for
9212 them (which also works for scalar masks). We store opcodes
9213 to use in bitop1 and bitop2. Statement is vectorized as
9214 BITOP2 (rhs1 BITOP1 rhs2) or
9215 rhs1 BITOP2 (BITOP1 rhs2)
9216 depending on bitop1 and bitop2 arity. */
9217 if (VECTOR_BOOLEAN_TYPE_P (vectype))
9219 if (code == GT_EXPR)
9221 bitop1 = BIT_NOT_EXPR;
9222 bitop2 = BIT_AND_EXPR;
9224 else if (code == GE_EXPR)
9226 bitop1 = BIT_NOT_EXPR;
9227 bitop2 = BIT_IOR_EXPR;
9229 else if (code == LT_EXPR)
9231 bitop1 = BIT_NOT_EXPR;
9232 bitop2 = BIT_AND_EXPR;
9233 std::swap (rhs1, rhs2);
9234 std::swap (dts[0], dts[1]);
9236 else if (code == LE_EXPR)
9238 bitop1 = BIT_NOT_EXPR;
9239 bitop2 = BIT_IOR_EXPR;
9240 std::swap (rhs1, rhs2);
9241 std::swap (dts[0], dts[1]);
9243 else
9245 bitop1 = BIT_XOR_EXPR;
9246 if (code == EQ_EXPR)
9247 bitop2 = BIT_NOT_EXPR;
9251 if (!vec_stmt)
9253 if (bitop1 == NOP_EXPR)
9255 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
9256 return false;
9258 else
9260 machine_mode mode = TYPE_MODE (vectype);
9261 optab optab;
9263 optab = optab_for_tree_code (bitop1, vectype, optab_default);
9264 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9265 return false;
9267 if (bitop2 != NOP_EXPR)
9269 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9270 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9271 return false;
9275 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9276 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9277 dts, ndts, slp_node, cost_vec);
9278 return true;
9281 /* Transform. */
9282 if (!slp_node)
9284 vec_oprnds0.create (1);
9285 vec_oprnds1.create (1);
9288 /* Handle def. */
9289 lhs = gimple_assign_lhs (stmt);
9290 mask = vect_create_destination_var (lhs, mask_type);
9292 /* Handle cmp expr. */
9293 for (j = 0; j < ncopies; j++)
9295 stmt_vec_info new_stmt_info = NULL;
9296 if (j == 0)
9298 if (slp_node)
9300 auto_vec<tree, 2> ops;
9301 auto_vec<vec<tree>, 2> vec_defs;
9303 ops.safe_push (rhs1);
9304 ops.safe_push (rhs2);
9305 vect_get_slp_defs (ops, slp_node, &vec_defs);
9306 vec_oprnds1 = vec_defs.pop ();
9307 vec_oprnds0 = vec_defs.pop ();
9309 else
9311 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
9312 vectype);
9313 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
9314 vectype);
9317 else
9319 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
9320 vec_oprnds0.pop ());
9321 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
9322 vec_oprnds1.pop ());
9325 if (!slp_node)
9327 vec_oprnds0.quick_push (vec_rhs1);
9328 vec_oprnds1.quick_push (vec_rhs2);
9331 /* Arguments are ready. Create the new vector stmt. */
9332 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9334 vec_rhs2 = vec_oprnds1[i];
9336 new_temp = make_ssa_name (mask);
9337 if (bitop1 == NOP_EXPR)
9339 gassign *new_stmt = gimple_build_assign (new_temp, code,
9340 vec_rhs1, vec_rhs2);
9341 new_stmt_info
9342 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9344 else
9346 gassign *new_stmt;
9347 if (bitop1 == BIT_NOT_EXPR)
9348 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9349 else
9350 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9351 vec_rhs2);
9352 new_stmt_info
9353 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9354 if (bitop2 != NOP_EXPR)
9356 tree res = make_ssa_name (mask);
9357 if (bitop2 == BIT_NOT_EXPR)
9358 new_stmt = gimple_build_assign (res, bitop2, new_temp);
9359 else
9360 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9361 new_temp);
9362 new_stmt_info
9363 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9366 if (slp_node)
9367 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9370 if (slp_node)
9371 continue;
9373 if (j == 0)
9374 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9375 else
9376 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9378 prev_stmt_info = new_stmt_info;
9381 vec_oprnds0.release ();
9382 vec_oprnds1.release ();
9384 return true;
9387 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9388 can handle all live statements in the node. Otherwise return true
9389 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9390 GSI and VEC_STMT are as for vectorizable_live_operation. */
9392 static bool
9393 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9394 slp_tree slp_node, stmt_vec_info *vec_stmt,
9395 stmt_vector_for_cost *cost_vec)
9397 if (slp_node)
9399 stmt_vec_info slp_stmt_info;
9400 unsigned int i;
9401 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
9403 if (STMT_VINFO_LIVE_P (slp_stmt_info)
9404 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
9405 vec_stmt, cost_vec))
9406 return false;
9409 else if (STMT_VINFO_LIVE_P (stmt_info)
9410 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
9411 vec_stmt, cost_vec))
9412 return false;
9414 return true;
9417 /* Make sure the statement is vectorizable. */
9419 bool
9420 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
9421 slp_tree node, slp_instance node_instance,
9422 stmt_vector_for_cost *cost_vec)
9424 vec_info *vinfo = stmt_info->vinfo;
9425 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9426 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9427 bool ok;
9428 gimple_seq pattern_def_seq;
9430 if (dump_enabled_p ())
9432 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9433 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
9436 if (gimple_has_volatile_ops (stmt_info->stmt))
9438 if (dump_enabled_p ())
9439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9440 "not vectorized: stmt has volatile operands\n");
9442 return false;
9445 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9446 && node == NULL
9447 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9449 gimple_stmt_iterator si;
9451 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9453 stmt_vec_info pattern_def_stmt_info
9454 = vinfo->lookup_stmt (gsi_stmt (si));
9455 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
9456 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
9458 /* Analyze def stmt of STMT if it's a pattern stmt. */
9459 if (dump_enabled_p ())
9461 dump_printf_loc (MSG_NOTE, vect_location,
9462 "==> examining pattern def statement: ");
9463 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
9464 pattern_def_stmt_info->stmt, 0);
9467 if (!vect_analyze_stmt (pattern_def_stmt_info,
9468 need_to_vectorize, node, node_instance,
9469 cost_vec))
9470 return false;
9475 /* Skip stmts that do not need to be vectorized. In loops this is expected
9476 to include:
9477 - the COND_EXPR which is the loop exit condition
9478 - any LABEL_EXPRs in the loop
9479 - computations that are used only for array indexing or loop control.
9480 In basic blocks we only analyze statements that are a part of some SLP
9481 instance, therefore, all the statements are relevant.
9483 Pattern statement needs to be analyzed instead of the original statement
9484 if the original statement is not relevant. Otherwise, we analyze both
9485 statements. In basic blocks we are called from some SLP instance
9486 traversal, don't analyze pattern stmts instead, the pattern stmts
9487 already will be part of SLP instance. */
9489 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9490 if (!STMT_VINFO_RELEVANT_P (stmt_info)
9491 && !STMT_VINFO_LIVE_P (stmt_info))
9493 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9494 && pattern_stmt_info
9495 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9496 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9498 /* Analyze PATTERN_STMT instead of the original stmt. */
9499 stmt_info = pattern_stmt_info;
9500 if (dump_enabled_p ())
9502 dump_printf_loc (MSG_NOTE, vect_location,
9503 "==> examining pattern statement: ");
9504 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
9507 else
9509 if (dump_enabled_p ())
9510 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9512 return true;
9515 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9516 && node == NULL
9517 && pattern_stmt_info
9518 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
9519 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
9521 /* Analyze PATTERN_STMT too. */
9522 if (dump_enabled_p ())
9524 dump_printf_loc (MSG_NOTE, vect_location,
9525 "==> examining pattern statement: ");
9526 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt_info->stmt, 0);
9529 if (!vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
9530 node_instance, cost_vec))
9531 return false;
9534 switch (STMT_VINFO_DEF_TYPE (stmt_info))
9536 case vect_internal_def:
9537 break;
9539 case vect_reduction_def:
9540 case vect_nested_cycle:
9541 gcc_assert (!bb_vinfo
9542 && (relevance == vect_used_in_outer
9543 || relevance == vect_used_in_outer_by_reduction
9544 || relevance == vect_used_by_reduction
9545 || relevance == vect_unused_in_scope
9546 || relevance == vect_used_only_live));
9547 break;
9549 case vect_induction_def:
9550 gcc_assert (!bb_vinfo);
9551 break;
9553 case vect_constant_def:
9554 case vect_external_def:
9555 case vect_unknown_def_type:
9556 default:
9557 gcc_unreachable ();
9560 if (STMT_VINFO_RELEVANT_P (stmt_info))
9562 tree type = gimple_expr_type (stmt_info->stmt);
9563 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
9564 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9565 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9566 || (call && gimple_call_lhs (call) == NULL_TREE));
9567 *need_to_vectorize = true;
9570 if (PURE_SLP_STMT (stmt_info) && !node)
9572 dump_printf_loc (MSG_NOTE, vect_location,
9573 "handled only by SLP analysis\n");
9574 return true;
9577 ok = true;
9578 if (!bb_vinfo
9579 && (STMT_VINFO_RELEVANT_P (stmt_info)
9580 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9581 ok = (vectorizable_simd_clone_call (stmt_info, NULL, NULL, node, cost_vec)
9582 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
9583 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9584 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9585 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
9586 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9587 cost_vec)
9588 || vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9589 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9590 || vectorizable_reduction (stmt_info, NULL, NULL, node,
9591 node_instance, cost_vec)
9592 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
9593 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
9594 cost_vec)
9595 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
9596 cost_vec));
9597 else
9599 if (bb_vinfo)
9600 ok = (vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
9601 cost_vec)
9602 || vectorizable_conversion (stmt_info, NULL, NULL, node,
9603 cost_vec)
9604 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
9605 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
9606 || vectorizable_assignment (stmt_info, NULL, NULL, node,
9607 cost_vec)
9608 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
9609 cost_vec)
9610 || vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
9611 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
9612 || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
9613 cost_vec)
9614 || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
9615 cost_vec));
9618 if (!ok)
9620 if (dump_enabled_p ())
9622 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9623 "not vectorized: relevant stmt not ");
9624 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9625 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
9626 stmt_info->stmt, 0);
9629 return false;
9632 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9633 need extra handling, except for vectorizable reductions. */
9634 if (!bb_vinfo
9635 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9636 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
9638 if (dump_enabled_p ())
9640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9641 "not vectorized: live stmt not supported: ");
9642 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
9643 stmt_info->stmt, 0);
9646 return false;
9649 return true;
9653 /* Function vect_transform_stmt.
9655 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9657 bool
9658 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9659 bool *grouped_store, slp_tree slp_node,
9660 slp_instance slp_node_instance)
9662 vec_info *vinfo = stmt_info->vinfo;
9663 bool is_store = false;
9664 stmt_vec_info vec_stmt = NULL;
9665 bool done;
9667 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9668 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
9670 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9671 && nested_in_vect_loop_p
9672 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9673 stmt_info));
9675 gimple *stmt = stmt_info->stmt;
9676 switch (STMT_VINFO_TYPE (stmt_info))
9678 case type_demotion_vec_info_type:
9679 case type_promotion_vec_info_type:
9680 case type_conversion_vec_info_type:
9681 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
9682 NULL);
9683 gcc_assert (done);
9684 break;
9686 case induc_vec_info_type:
9687 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
9688 NULL);
9689 gcc_assert (done);
9690 break;
9692 case shift_vec_info_type:
9693 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9694 gcc_assert (done);
9695 break;
9697 case op_vec_info_type:
9698 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
9699 NULL);
9700 gcc_assert (done);
9701 break;
9703 case assignment_vec_info_type:
9704 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
9705 NULL);
9706 gcc_assert (done);
9707 break;
9709 case load_vec_info_type:
9710 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
9711 slp_node_instance, NULL);
9712 gcc_assert (done);
9713 break;
9715 case store_vec_info_type:
9716 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9717 gcc_assert (done);
9718 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9720 /* In case of interleaving, the whole chain is vectorized when the
9721 last store in the chain is reached. Store stmts before the last
9722 one are skipped, and there vec_stmt_info shouldn't be freed
9723 meanwhile. */
9724 *grouped_store = true;
9725 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9726 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
9727 is_store = true;
9729 else
9730 is_store = true;
9731 break;
9733 case condition_vec_info_type:
9734 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, NULL, 0,
9735 slp_node, NULL);
9736 gcc_assert (done);
9737 break;
9739 case comparison_vec_info_type:
9740 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt, NULL,
9741 slp_node, NULL);
9742 gcc_assert (done);
9743 break;
9745 case call_vec_info_type:
9746 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
9747 stmt = gsi_stmt (*gsi);
9748 break;
9750 case call_simd_clone_vec_info_type:
9751 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
9752 slp_node, NULL);
9753 stmt = gsi_stmt (*gsi);
9754 break;
9756 case reduc_vec_info_type:
9757 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
9758 slp_node_instance, NULL);
9759 gcc_assert (done);
9760 break;
9762 default:
9763 if (!STMT_VINFO_LIVE_P (stmt_info))
9765 if (dump_enabled_p ())
9766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9767 "stmt not supported.\n");
9768 gcc_unreachable ();
9772 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9773 This would break hybrid SLP vectorization. */
9774 if (slp_node)
9775 gcc_assert (!vec_stmt
9776 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
9778 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9779 is being vectorized, but outside the immediately enclosing loop. */
9780 if (vec_stmt
9781 && nested_p
9782 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9783 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9784 || STMT_VINFO_RELEVANT (stmt_info) ==
9785 vect_used_in_outer_by_reduction))
9787 struct loop *innerloop = LOOP_VINFO_LOOP (
9788 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9789 imm_use_iterator imm_iter;
9790 use_operand_p use_p;
9791 tree scalar_dest;
9793 if (dump_enabled_p ())
9794 dump_printf_loc (MSG_NOTE, vect_location,
9795 "Record the vdef for outer-loop vectorization.\n");
9797 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9798 (to be used when vectorizing outer-loop stmts that use the DEF of
9799 STMT). */
9800 if (gimple_code (stmt) == GIMPLE_PHI)
9801 scalar_dest = PHI_RESULT (stmt);
9802 else
9803 scalar_dest = gimple_assign_lhs (stmt);
9805 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9806 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9808 stmt_vec_info exit_phi_info
9809 = vinfo->lookup_stmt (USE_STMT (use_p));
9810 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
9814 /* Handle stmts whose DEF is used outside the loop-nest that is
9815 being vectorized. */
9816 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9818 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
9819 NULL);
9820 gcc_assert (done);
9823 if (vec_stmt)
9824 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9826 return is_store;
9830 /* Remove a group of stores (for SLP or interleaving), free their
9831 stmt_vec_info. */
9833 void
9834 vect_remove_stores (stmt_vec_info first_stmt_info)
9836 stmt_vec_info next_stmt_info = first_stmt_info;
9837 gimple_stmt_iterator next_si;
9839 while (next_stmt_info)
9841 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9842 if (is_pattern_stmt_p (next_stmt_info))
9843 next_stmt_info = STMT_VINFO_RELATED_STMT (next_stmt_info);
9844 /* Free the attached stmt_vec_info and remove the stmt. */
9845 next_si = gsi_for_stmt (next_stmt_info->stmt);
9846 unlink_stmt_vdef (next_stmt_info->stmt);
9847 gsi_remove (&next_si, true);
9848 release_defs (next_stmt_info->stmt);
9849 free_stmt_vec_info (next_stmt_info);
9850 next_stmt_info = tmp;
9855 /* Function new_stmt_vec_info.
9857 Create and initialize a new stmt_vec_info struct for STMT. */
9859 stmt_vec_info
9860 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9862 stmt_vec_info res;
9863 res = (_stmt_vec_info *) xcalloc (1, sizeof (struct _stmt_vec_info));
9865 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9866 STMT_VINFO_STMT (res) = stmt;
9867 res->vinfo = vinfo;
9868 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9869 STMT_VINFO_LIVE_P (res) = false;
9870 STMT_VINFO_VECTYPE (res) = NULL;
9871 STMT_VINFO_VEC_STMT (res) = NULL;
9872 STMT_VINFO_VECTORIZABLE (res) = true;
9873 STMT_VINFO_IN_PATTERN_P (res) = false;
9874 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9875 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9876 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9878 if (gimple_code (stmt) == GIMPLE_PHI
9879 && is_loop_header_bb_p (gimple_bb (stmt)))
9880 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9881 else
9882 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9884 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9885 STMT_SLP_TYPE (res) = loop_vect;
9886 STMT_VINFO_NUM_SLP_USES (res) = 0;
9888 res->first_element = NULL; /* GROUP_FIRST_ELEMENT */
9889 res->next_element = NULL; /* GROUP_NEXT_ELEMENT */
9890 res->size = 0; /* GROUP_SIZE */
9891 res->store_count = 0; /* GROUP_STORE_COUNT */
9892 res->gap = 0; /* GROUP_GAP */
9893 res->same_dr_stmt = NULL; /* GROUP_SAME_DR_STMT */
9895 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */
9896 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED;
9898 return res;
9902 /* Set the current stmt_vec_info vector to V. */
9904 void
9905 set_stmt_vec_info_vec (vec<stmt_vec_info> *v)
9907 stmt_vec_info_vec = v;
9910 /* Free the stmt_vec_info entries in V and release V. */
9912 void
9913 free_stmt_vec_infos (vec<stmt_vec_info> *v)
9915 unsigned int i;
9916 stmt_vec_info info;
9917 FOR_EACH_VEC_ELT (*v, i, info)
9918 if (info != NULL_STMT_VEC_INFO)
9919 free_stmt_vec_info (STMT_VINFO_STMT (info));
9920 if (v == stmt_vec_info_vec)
9921 stmt_vec_info_vec = NULL;
9922 v->release ();
9926 /* Free stmt vectorization related info. */
9928 void
9929 free_stmt_vec_info (gimple *stmt)
9931 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9933 if (!stmt_info)
9934 return;
9936 /* Check if this statement has a related "pattern stmt"
9937 (introduced by the vectorizer during the pattern recognition
9938 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9939 too. */
9940 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9942 if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
9943 for (gimple_stmt_iterator si = gsi_start (seq);
9944 !gsi_end_p (si); gsi_next (&si))
9946 gimple *seq_stmt = gsi_stmt (si);
9947 gimple_set_bb (seq_stmt, NULL);
9948 tree lhs = gimple_get_lhs (seq_stmt);
9949 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9950 release_ssa_name (lhs);
9951 free_stmt_vec_info (seq_stmt);
9953 stmt_vec_info patt_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
9954 if (patt_stmt_info)
9956 gimple_set_bb (patt_stmt_info->stmt, NULL);
9957 tree lhs = gimple_get_lhs (patt_stmt_info->stmt);
9958 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9959 release_ssa_name (lhs);
9960 free_stmt_vec_info (patt_stmt_info);
9964 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9965 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9966 set_vinfo_for_stmt (stmt, NULL);
9967 free (stmt_info);
9971 /* Function get_vectype_for_scalar_type_and_size.
9973 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9974 by the target. */
9976 tree
9977 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9979 tree orig_scalar_type = scalar_type;
9980 scalar_mode inner_mode;
9981 machine_mode simd_mode;
9982 poly_uint64 nunits;
9983 tree vectype;
9985 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9986 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9987 return NULL_TREE;
9989 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9991 /* For vector types of elements whose mode precision doesn't
9992 match their types precision we use a element type of mode
9993 precision. The vectorization routines will have to make sure
9994 they support the proper result truncation/extension.
9995 We also make sure to build vector types with INTEGER_TYPE
9996 component type only. */
9997 if (INTEGRAL_TYPE_P (scalar_type)
9998 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9999 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10000 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10001 TYPE_UNSIGNED (scalar_type));
10003 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10004 When the component mode passes the above test simply use a type
10005 corresponding to that mode. The theory is that any use that
10006 would cause problems with this will disable vectorization anyway. */
10007 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10008 && !INTEGRAL_TYPE_P (scalar_type))
10009 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10011 /* We can't build a vector type of elements with alignment bigger than
10012 their size. */
10013 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10014 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10015 TYPE_UNSIGNED (scalar_type));
10017 /* If we felt back to using the mode fail if there was
10018 no scalar type for it. */
10019 if (scalar_type == NULL_TREE)
10020 return NULL_TREE;
10022 /* If no size was supplied use the mode the target prefers. Otherwise
10023 lookup a vector mode of the specified size. */
10024 if (known_eq (size, 0U))
10025 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10026 else if (!multiple_p (size, nbytes, &nunits)
10027 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10028 return NULL_TREE;
10029 /* NOTE: nunits == 1 is allowed to support single element vector types. */
10030 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
10031 return NULL_TREE;
10033 vectype = build_vector_type (scalar_type, nunits);
10035 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10036 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10037 return NULL_TREE;
10039 /* Re-attach the address-space qualifier if we canonicalized the scalar
10040 type. */
10041 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10042 return build_qualified_type
10043 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10045 return vectype;
10048 poly_uint64 current_vector_size;
10050 /* Function get_vectype_for_scalar_type.
10052 Returns the vector type corresponding to SCALAR_TYPE as supported
10053 by the target. */
10055 tree
10056 get_vectype_for_scalar_type (tree scalar_type)
10058 tree vectype;
10059 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10060 current_vector_size);
10061 if (vectype
10062 && known_eq (current_vector_size, 0U))
10063 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10064 return vectype;
10067 /* Function get_mask_type_for_scalar_type.
10069 Returns the mask type corresponding to a result of comparison
10070 of vectors of specified SCALAR_TYPE as supported by target. */
10072 tree
10073 get_mask_type_for_scalar_type (tree scalar_type)
10075 tree vectype = get_vectype_for_scalar_type (scalar_type);
10077 if (!vectype)
10078 return NULL;
10080 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10081 current_vector_size);
10084 /* Function get_same_sized_vectype
10086 Returns a vector type corresponding to SCALAR_TYPE of size
10087 VECTOR_TYPE if supported by the target. */
10089 tree
10090 get_same_sized_vectype (tree scalar_type, tree vector_type)
10092 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10093 return build_same_sized_truth_vector_type (vector_type);
10095 return get_vectype_for_scalar_type_and_size
10096 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
10099 /* Function vect_is_simple_use.
10101 Input:
10102 VINFO - the vect info of the loop or basic block that is being vectorized.
10103 OPERAND - operand in the loop or bb.
10104 Output:
10105 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
10106 case OPERAND is an SSA_NAME that is defined in the vectorizable region
10107 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
10108 the definition could be anywhere in the function
10109 DT - the type of definition
10111 Returns whether a stmt with OPERAND can be vectorized.
10112 For loops, supportable operands are constants, loop invariants, and operands
10113 that are defined by the current iteration of the loop. Unsupportable
10114 operands are those that are defined by a previous iteration of the loop (as
10115 is the case in reduction/induction computations).
10116 For basic blocks, supportable operands are constants and bb invariants.
10117 For now, operands defined outside the basic block are not supported. */
10119 bool
10120 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10121 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
10123 if (def_stmt_info_out)
10124 *def_stmt_info_out = NULL;
10125 if (def_stmt_out)
10126 *def_stmt_out = NULL;
10127 *dt = vect_unknown_def_type;
10129 if (dump_enabled_p ())
10131 dump_printf_loc (MSG_NOTE, vect_location,
10132 "vect_is_simple_use: operand ");
10133 if (TREE_CODE (operand) == SSA_NAME
10134 && !SSA_NAME_IS_DEFAULT_DEF (operand))
10135 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
10136 else
10137 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
10140 if (CONSTANT_CLASS_P (operand))
10141 *dt = vect_constant_def;
10142 else if (is_gimple_min_invariant (operand))
10143 *dt = vect_external_def;
10144 else if (TREE_CODE (operand) != SSA_NAME)
10145 *dt = vect_unknown_def_type;
10146 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
10147 *dt = vect_external_def;
10148 else
10150 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
10151 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
10152 if (!stmt_vinfo)
10153 *dt = vect_external_def;
10154 else
10156 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
10158 stmt_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
10159 def_stmt = stmt_vinfo->stmt;
10161 switch (gimple_code (def_stmt))
10163 case GIMPLE_PHI:
10164 case GIMPLE_ASSIGN:
10165 case GIMPLE_CALL:
10166 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
10167 break;
10168 default:
10169 *dt = vect_unknown_def_type;
10170 break;
10172 if (def_stmt_info_out)
10173 *def_stmt_info_out = stmt_vinfo;
10175 if (def_stmt_out)
10176 *def_stmt_out = def_stmt;
10179 if (dump_enabled_p ())
10181 dump_printf (MSG_NOTE, ", type of def: ");
10182 switch (*dt)
10184 case vect_uninitialized_def:
10185 dump_printf (MSG_NOTE, "uninitialized\n");
10186 break;
10187 case vect_constant_def:
10188 dump_printf (MSG_NOTE, "constant\n");
10189 break;
10190 case vect_external_def:
10191 dump_printf (MSG_NOTE, "external\n");
10192 break;
10193 case vect_internal_def:
10194 dump_printf (MSG_NOTE, "internal\n");
10195 break;
10196 case vect_induction_def:
10197 dump_printf (MSG_NOTE, "induction\n");
10198 break;
10199 case vect_reduction_def:
10200 dump_printf (MSG_NOTE, "reduction\n");
10201 break;
10202 case vect_double_reduction_def:
10203 dump_printf (MSG_NOTE, "double reduction\n");
10204 break;
10205 case vect_nested_cycle:
10206 dump_printf (MSG_NOTE, "nested cycle\n");
10207 break;
10208 case vect_unknown_def_type:
10209 dump_printf (MSG_NOTE, "unknown\n");
10210 break;
10214 if (*dt == vect_unknown_def_type)
10216 if (dump_enabled_p ())
10217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10218 "Unsupported pattern.\n");
10219 return false;
10222 return true;
10225 /* Function vect_is_simple_use.
10227 Same as vect_is_simple_use but also determines the vector operand
10228 type of OPERAND and stores it to *VECTYPE. If the definition of
10229 OPERAND is vect_uninitialized_def, vect_constant_def or
10230 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10231 is responsible to compute the best suited vector type for the
10232 scalar operand. */
10234 bool
10235 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
10236 tree *vectype, stmt_vec_info *def_stmt_info_out,
10237 gimple **def_stmt_out)
10239 stmt_vec_info def_stmt_info;
10240 gimple *def_stmt;
10241 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
10242 return false;
10244 if (def_stmt_out)
10245 *def_stmt_out = def_stmt;
10246 if (def_stmt_info_out)
10247 *def_stmt_info_out = def_stmt_info;
10249 /* Now get a vector type if the def is internal, otherwise supply
10250 NULL_TREE and leave it up to the caller to figure out a proper
10251 type for the use stmt. */
10252 if (*dt == vect_internal_def
10253 || *dt == vect_induction_def
10254 || *dt == vect_reduction_def
10255 || *dt == vect_double_reduction_def
10256 || *dt == vect_nested_cycle)
10258 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
10259 gcc_assert (*vectype != NULL_TREE);
10260 if (dump_enabled_p ())
10262 dump_printf_loc (MSG_NOTE, vect_location,
10263 "vect_is_simple_use: vectype ");
10264 dump_generic_expr (MSG_NOTE, TDF_SLIM, *vectype);
10265 dump_printf (MSG_NOTE, "\n");
10268 else if (*dt == vect_uninitialized_def
10269 || *dt == vect_constant_def
10270 || *dt == vect_external_def)
10271 *vectype = NULL_TREE;
10272 else
10273 gcc_unreachable ();
10275 return true;
10279 /* Function supportable_widening_operation
10281 Check whether an operation represented by the code CODE is a
10282 widening operation that is supported by the target platform in
10283 vector form (i.e., when operating on arguments of type VECTYPE_IN
10284 producing a result of type VECTYPE_OUT).
10286 Widening operations we currently support are NOP (CONVERT), FLOAT,
10287 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10288 are supported by the target platform either directly (via vector
10289 tree-codes), or via target builtins.
10291 Output:
10292 - CODE1 and CODE2 are codes of vector operations to be used when
10293 vectorizing the operation, if available.
10294 - MULTI_STEP_CVT determines the number of required intermediate steps in
10295 case of multi-step conversion (like char->short->int - in that case
10296 MULTI_STEP_CVT will be 1).
10297 - INTERM_TYPES contains the intermediate type required to perform the
10298 widening operation (short in the above example). */
10300 bool
10301 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
10302 tree vectype_out, tree vectype_in,
10303 enum tree_code *code1, enum tree_code *code2,
10304 int *multi_step_cvt,
10305 vec<tree> *interm_types)
10307 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10308 struct loop *vect_loop = NULL;
10309 machine_mode vec_mode;
10310 enum insn_code icode1, icode2;
10311 optab optab1, optab2;
10312 tree vectype = vectype_in;
10313 tree wide_vectype = vectype_out;
10314 enum tree_code c1, c2;
10315 int i;
10316 tree prev_type, intermediate_type;
10317 machine_mode intermediate_mode, prev_mode;
10318 optab optab3, optab4;
10320 *multi_step_cvt = 0;
10321 if (loop_info)
10322 vect_loop = LOOP_VINFO_LOOP (loop_info);
10324 switch (code)
10326 case WIDEN_MULT_EXPR:
10327 /* The result of a vectorized widening operation usually requires
10328 two vectors (because the widened results do not fit into one vector).
10329 The generated vector results would normally be expected to be
10330 generated in the same order as in the original scalar computation,
10331 i.e. if 8 results are generated in each vector iteration, they are
10332 to be organized as follows:
10333 vect1: [res1,res2,res3,res4],
10334 vect2: [res5,res6,res7,res8].
10336 However, in the special case that the result of the widening
10337 operation is used in a reduction computation only, the order doesn't
10338 matter (because when vectorizing a reduction we change the order of
10339 the computation). Some targets can take advantage of this and
10340 generate more efficient code. For example, targets like Altivec,
10341 that support widen_mult using a sequence of {mult_even,mult_odd}
10342 generate the following vectors:
10343 vect1: [res1,res3,res5,res7],
10344 vect2: [res2,res4,res6,res8].
10346 When vectorizing outer-loops, we execute the inner-loop sequentially
10347 (each vectorized inner-loop iteration contributes to VF outer-loop
10348 iterations in parallel). We therefore don't allow to change the
10349 order of the computation in the inner-loop during outer-loop
10350 vectorization. */
10351 /* TODO: Another case in which order doesn't *really* matter is when we
10352 widen and then contract again, e.g. (short)((int)x * y >> 8).
10353 Normally, pack_trunc performs an even/odd permute, whereas the
10354 repack from an even/odd expansion would be an interleave, which
10355 would be significantly simpler for e.g. AVX2. */
10356 /* In any case, in order to avoid duplicating the code below, recurse
10357 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10358 are properly set up for the caller. If we fail, we'll continue with
10359 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10360 if (vect_loop
10361 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10362 && !nested_in_vect_loop_p (vect_loop, stmt_info)
10363 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10364 stmt_info, vectype_out,
10365 vectype_in, code1, code2,
10366 multi_step_cvt, interm_types))
10368 /* Elements in a vector with vect_used_by_reduction property cannot
10369 be reordered if the use chain with this property does not have the
10370 same operation. One such an example is s += a * b, where elements
10371 in a and b cannot be reordered. Here we check if the vector defined
10372 by STMT is only directly used in the reduction statement. */
10373 tree lhs = gimple_assign_lhs (stmt_info->stmt);
10374 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
10375 if (use_stmt_info
10376 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10377 return true;
10379 c1 = VEC_WIDEN_MULT_LO_EXPR;
10380 c2 = VEC_WIDEN_MULT_HI_EXPR;
10381 break;
10383 case DOT_PROD_EXPR:
10384 c1 = DOT_PROD_EXPR;
10385 c2 = DOT_PROD_EXPR;
10386 break;
10388 case SAD_EXPR:
10389 c1 = SAD_EXPR;
10390 c2 = SAD_EXPR;
10391 break;
10393 case VEC_WIDEN_MULT_EVEN_EXPR:
10394 /* Support the recursion induced just above. */
10395 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10396 c2 = VEC_WIDEN_MULT_ODD_EXPR;
10397 break;
10399 case WIDEN_LSHIFT_EXPR:
10400 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10401 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10402 break;
10404 CASE_CONVERT:
10405 c1 = VEC_UNPACK_LO_EXPR;
10406 c2 = VEC_UNPACK_HI_EXPR;
10407 break;
10409 case FLOAT_EXPR:
10410 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10411 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10412 break;
10414 case FIX_TRUNC_EXPR:
10415 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
10416 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
10417 break;
10419 default:
10420 gcc_unreachable ();
10423 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10424 std::swap (c1, c2);
10426 if (code == FIX_TRUNC_EXPR)
10428 /* The signedness is determined from output operand. */
10429 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10430 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10432 else
10434 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10435 optab2 = optab_for_tree_code (c2, vectype, optab_default);
10438 if (!optab1 || !optab2)
10439 return false;
10441 vec_mode = TYPE_MODE (vectype);
10442 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10443 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10444 return false;
10446 *code1 = c1;
10447 *code2 = c2;
10449 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10450 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10451 /* For scalar masks we may have different boolean
10452 vector types having the same QImode. Thus we
10453 add additional check for elements number. */
10454 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10455 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10456 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10458 /* Check if it's a multi-step conversion that can be done using intermediate
10459 types. */
10461 prev_type = vectype;
10462 prev_mode = vec_mode;
10464 if (!CONVERT_EXPR_CODE_P (code))
10465 return false;
10467 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10468 intermediate steps in promotion sequence. We try
10469 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10470 not. */
10471 interm_types->create (MAX_INTERM_CVT_STEPS);
10472 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10474 intermediate_mode = insn_data[icode1].operand[0].mode;
10475 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10477 intermediate_type = vect_halve_mask_nunits (prev_type);
10478 if (intermediate_mode != TYPE_MODE (intermediate_type))
10479 return false;
10481 else
10482 intermediate_type
10483 = lang_hooks.types.type_for_mode (intermediate_mode,
10484 TYPE_UNSIGNED (prev_type));
10486 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10487 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10489 if (!optab3 || !optab4
10490 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10491 || insn_data[icode1].operand[0].mode != intermediate_mode
10492 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10493 || insn_data[icode2].operand[0].mode != intermediate_mode
10494 || ((icode1 = optab_handler (optab3, intermediate_mode))
10495 == CODE_FOR_nothing)
10496 || ((icode2 = optab_handler (optab4, intermediate_mode))
10497 == CODE_FOR_nothing))
10498 break;
10500 interm_types->quick_push (intermediate_type);
10501 (*multi_step_cvt)++;
10503 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10504 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10505 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10506 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10507 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10509 prev_type = intermediate_type;
10510 prev_mode = intermediate_mode;
10513 interm_types->release ();
10514 return false;
10518 /* Function supportable_narrowing_operation
10520 Check whether an operation represented by the code CODE is a
10521 narrowing operation that is supported by the target platform in
10522 vector form (i.e., when operating on arguments of type VECTYPE_IN
10523 and producing a result of type VECTYPE_OUT).
10525 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10526 and FLOAT. This function checks if these operations are supported by
10527 the target platform directly via vector tree-codes.
10529 Output:
10530 - CODE1 is the code of a vector operation to be used when
10531 vectorizing the operation, if available.
10532 - MULTI_STEP_CVT determines the number of required intermediate steps in
10533 case of multi-step conversion (like int->short->char - in that case
10534 MULTI_STEP_CVT will be 1).
10535 - INTERM_TYPES contains the intermediate type required to perform the
10536 narrowing operation (short in the above example). */
10538 bool
10539 supportable_narrowing_operation (enum tree_code code,
10540 tree vectype_out, tree vectype_in,
10541 enum tree_code *code1, int *multi_step_cvt,
10542 vec<tree> *interm_types)
10544 machine_mode vec_mode;
10545 enum insn_code icode1;
10546 optab optab1, interm_optab;
10547 tree vectype = vectype_in;
10548 tree narrow_vectype = vectype_out;
10549 enum tree_code c1;
10550 tree intermediate_type, prev_type;
10551 machine_mode intermediate_mode, prev_mode;
10552 int i;
10553 bool uns;
10555 *multi_step_cvt = 0;
10556 switch (code)
10558 CASE_CONVERT:
10559 c1 = VEC_PACK_TRUNC_EXPR;
10560 break;
10562 case FIX_TRUNC_EXPR:
10563 c1 = VEC_PACK_FIX_TRUNC_EXPR;
10564 break;
10566 case FLOAT_EXPR:
10567 c1 = VEC_PACK_FLOAT_EXPR;
10568 break;
10570 default:
10571 gcc_unreachable ();
10574 if (code == FIX_TRUNC_EXPR)
10575 /* The signedness is determined from output operand. */
10576 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10577 else
10578 optab1 = optab_for_tree_code (c1, vectype, optab_default);
10580 if (!optab1)
10581 return false;
10583 vec_mode = TYPE_MODE (vectype);
10584 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10585 return false;
10587 *code1 = c1;
10589 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10590 /* For scalar masks we may have different boolean
10591 vector types having the same QImode. Thus we
10592 add additional check for elements number. */
10593 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10594 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10595 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10597 if (code == FLOAT_EXPR)
10598 return false;
10600 /* Check if it's a multi-step conversion that can be done using intermediate
10601 types. */
10602 prev_mode = vec_mode;
10603 prev_type = vectype;
10604 if (code == FIX_TRUNC_EXPR)
10605 uns = TYPE_UNSIGNED (vectype_out);
10606 else
10607 uns = TYPE_UNSIGNED (vectype);
10609 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10610 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10611 costly than signed. */
10612 if (code == FIX_TRUNC_EXPR && uns)
10614 enum insn_code icode2;
10616 intermediate_type
10617 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10618 interm_optab
10619 = optab_for_tree_code (c1, intermediate_type, optab_default);
10620 if (interm_optab != unknown_optab
10621 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10622 && insn_data[icode1].operand[0].mode
10623 == insn_data[icode2].operand[0].mode)
10625 uns = false;
10626 optab1 = interm_optab;
10627 icode1 = icode2;
10631 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10632 intermediate steps in promotion sequence. We try
10633 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10634 interm_types->create (MAX_INTERM_CVT_STEPS);
10635 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10637 intermediate_mode = insn_data[icode1].operand[0].mode;
10638 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10640 intermediate_type = vect_double_mask_nunits (prev_type);
10641 if (intermediate_mode != TYPE_MODE (intermediate_type))
10642 return false;
10644 else
10645 intermediate_type
10646 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10647 interm_optab
10648 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10649 optab_default);
10650 if (!interm_optab
10651 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10652 || insn_data[icode1].operand[0].mode != intermediate_mode
10653 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10654 == CODE_FOR_nothing))
10655 break;
10657 interm_types->quick_push (intermediate_type);
10658 (*multi_step_cvt)++;
10660 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10661 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10662 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10663 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10665 prev_mode = intermediate_mode;
10666 prev_type = intermediate_type;
10667 optab1 = interm_optab;
10670 interm_types->release ();
10671 return false;
10674 /* Generate and return a statement that sets vector mask MASK such that
10675 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10677 gcall *
10678 vect_gen_while (tree mask, tree start_index, tree end_index)
10680 tree cmp_type = TREE_TYPE (start_index);
10681 tree mask_type = TREE_TYPE (mask);
10682 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10683 cmp_type, mask_type,
10684 OPTIMIZE_FOR_SPEED));
10685 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10686 start_index, end_index,
10687 build_zero_cst (mask_type));
10688 gimple_call_set_lhs (call, mask);
10689 return call;
10692 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10693 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10695 tree
10696 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10697 tree end_index)
10699 tree tmp = make_ssa_name (mask_type);
10700 gcall *call = vect_gen_while (tmp, start_index, end_index);
10701 gimple_seq_add_stmt (seq, call);
10702 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10705 /* Try to compute the vector types required to vectorize STMT_INFO,
10706 returning true on success and false if vectorization isn't possible.
10708 On success:
10710 - Set *STMT_VECTYPE_OUT to:
10711 - NULL_TREE if the statement doesn't need to be vectorized;
10712 - boolean_type_node if the statement is a boolean operation whose
10713 vector type can only be determined once all the other vector types
10714 are known; and
10715 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10717 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10718 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10719 statement does not help to determine the overall number of units. */
10721 bool
10722 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
10723 tree *stmt_vectype_out,
10724 tree *nunits_vectype_out)
10726 gimple *stmt = stmt_info->stmt;
10728 *stmt_vectype_out = NULL_TREE;
10729 *nunits_vectype_out = NULL_TREE;
10731 if (gimple_get_lhs (stmt) == NULL_TREE
10732 /* MASK_STORE has no lhs, but is ok. */
10733 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
10735 if (is_a <gcall *> (stmt))
10737 /* Ignore calls with no lhs. These must be calls to
10738 #pragma omp simd functions, and what vectorization factor
10739 it really needs can't be determined until
10740 vectorizable_simd_clone_call. */
10741 if (dump_enabled_p ())
10742 dump_printf_loc (MSG_NOTE, vect_location,
10743 "defer to SIMD clone analysis.\n");
10744 return true;
10747 if (dump_enabled_p ())
10749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10750 "not vectorized: irregular stmt.");
10751 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10753 return false;
10756 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
10758 if (dump_enabled_p ())
10760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10761 "not vectorized: vector stmt in loop:");
10762 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
10764 return false;
10767 tree vectype;
10768 tree scalar_type = NULL_TREE;
10769 if (STMT_VINFO_VECTYPE (stmt_info))
10770 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
10771 else
10773 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
10774 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
10775 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
10776 else
10777 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
10779 /* Pure bool ops don't participate in number-of-units computation.
10780 For comparisons use the types being compared. */
10781 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
10782 && is_gimple_assign (stmt)
10783 && gimple_assign_rhs_code (stmt) != COND_EXPR)
10785 *stmt_vectype_out = boolean_type_node;
10787 tree rhs1 = gimple_assign_rhs1 (stmt);
10788 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10789 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10790 scalar_type = TREE_TYPE (rhs1);
10791 else
10793 if (dump_enabled_p ())
10794 dump_printf_loc (MSG_NOTE, vect_location,
10795 "pure bool operation.\n");
10796 return true;
10800 if (dump_enabled_p ())
10802 dump_printf_loc (MSG_NOTE, vect_location,
10803 "get vectype for scalar type: ");
10804 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10805 dump_printf (MSG_NOTE, "\n");
10807 vectype = get_vectype_for_scalar_type (scalar_type);
10808 if (!vectype)
10810 if (dump_enabled_p ())
10812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10813 "not vectorized: unsupported data-type ");
10814 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10815 scalar_type);
10816 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10818 return false;
10821 if (!*stmt_vectype_out)
10822 *stmt_vectype_out = vectype;
10824 if (dump_enabled_p ())
10826 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10827 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
10828 dump_printf (MSG_NOTE, "\n");
10832 /* Don't try to compute scalar types if the stmt produces a boolean
10833 vector; use the existing vector type instead. */
10834 tree nunits_vectype;
10835 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10836 nunits_vectype = vectype;
10837 else
10839 /* The number of units is set according to the smallest scalar
10840 type (or the largest vector size, but we only support one
10841 vector size per vectorization). */
10842 if (*stmt_vectype_out != boolean_type_node)
10844 HOST_WIDE_INT dummy;
10845 scalar_type = vect_get_smallest_scalar_type (stmt_info,
10846 &dummy, &dummy);
10848 if (dump_enabled_p ())
10850 dump_printf_loc (MSG_NOTE, vect_location,
10851 "get vectype for scalar type: ");
10852 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
10853 dump_printf (MSG_NOTE, "\n");
10855 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
10857 if (!nunits_vectype)
10859 if (dump_enabled_p ())
10861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10862 "not vectorized: unsupported data-type ");
10863 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
10864 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10866 return false;
10869 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
10870 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
10872 if (dump_enabled_p ())
10874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10875 "not vectorized: different sized vector "
10876 "types in statement, ");
10877 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
10878 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10879 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
10880 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10882 return false;
10885 if (dump_enabled_p ())
10887 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
10888 dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
10889 dump_printf (MSG_NOTE, "\n");
10891 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
10892 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
10893 dump_printf (MSG_NOTE, "\n");
10896 *nunits_vectype_out = nunits_vectype;
10897 return true;
10900 /* Try to determine the correct vector type for STMT_INFO, which is a
10901 statement that produces a scalar boolean result. Return the vector
10902 type on success, otherwise return NULL_TREE. */
10904 tree
10905 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
10907 gimple *stmt = stmt_info->stmt;
10908 tree mask_type = NULL;
10909 tree vectype, scalar_type;
10911 if (is_gimple_assign (stmt)
10912 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
10913 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
10915 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
10916 mask_type = get_mask_type_for_scalar_type (scalar_type);
10918 if (!mask_type)
10920 if (dump_enabled_p ())
10921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10922 "not vectorized: unsupported mask\n");
10923 return NULL_TREE;
10926 else
10928 tree rhs;
10929 ssa_op_iter iter;
10930 enum vect_def_type dt;
10932 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
10934 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
10936 if (dump_enabled_p ())
10938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10939 "not vectorized: can't compute mask type "
10940 "for statement, ");
10941 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
10944 return NULL_TREE;
10947 /* No vectype probably means external definition.
10948 Allow it in case there is another operand which
10949 allows to determine mask type. */
10950 if (!vectype)
10951 continue;
10953 if (!mask_type)
10954 mask_type = vectype;
10955 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
10956 TYPE_VECTOR_SUBPARTS (vectype)))
10958 if (dump_enabled_p ())
10960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10961 "not vectorized: different sized masks "
10962 "types in statement, ");
10963 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10964 mask_type);
10965 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10966 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10967 vectype);
10968 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10970 return NULL_TREE;
10972 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
10973 != VECTOR_BOOLEAN_TYPE_P (vectype))
10975 if (dump_enabled_p ())
10977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10978 "not vectorized: mixed mask and "
10979 "nonmask vector types in statement, ");
10980 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10981 mask_type);
10982 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
10983 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
10984 vectype);
10985 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
10987 return NULL_TREE;
10991 /* We may compare boolean value loaded as vector of integers.
10992 Fix mask_type in such case. */
10993 if (mask_type
10994 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
10995 && gimple_code (stmt) == GIMPLE_ASSIGN
10996 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
10997 mask_type = build_same_sized_truth_vector_type (mask_type);
11000 /* No mask_type should mean loop invariant predicate.
11001 This is probably a subject for optimization in if-conversion. */
11002 if (!mask_type && dump_enabled_p ())
11004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11005 "not vectorized: can't compute mask type "
11006 "for statement, ");
11007 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
11009 return mask_type;