[Ada] New aspect/pragma No_Caching for analysis of volatile data
[official-gcc.git] / gcc / tree-vect-stmts.c
blob1e2dfe5d22dc6715ba0ca633f6ef98ca4abbc797
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 tree vect_type, vect, vect_name, array_ref;
131 gimple *new_stmt;
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
145 return vect_name;
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
152 static void
153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 tree vect, tree array, unsigned HOST_WIDE_INT n)
156 tree array_ref;
157 gimple *new_stmt;
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
171 static tree
172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
174 tree mem_ref;
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
185 static void
186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 tree var)
189 tree clobber = build_clobber (TREE_TYPE (var));
190 gimple *new_stmt = gimple_build_assign (var, clobber);
191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
200 static void
201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 enum vect_relevant relevant, bool live_p)
204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: %G", relevant, live_p,
210 stmt_info->stmt);
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE, vect_location,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info = stmt_info;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
247 worklist->safe_push (stmt_info);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
255 bool
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 loop_vec_info loop_vinfo)
259 tree op;
260 ssa_op_iter iter;
262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263 if (!stmt)
264 return false;
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
268 enum vect_def_type dt = vect_uninitialized_def;
270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
281 return true;
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
296 static bool
297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info->stmt)
311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
314 /* changing memory. */
315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt_info->stmt)
317 && !gimple_clobber_p (stmt_info->stmt))
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE, vect_location,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant = vect_used_in_scope;
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
333 if (dump_enabled_p ())
334 dump_printf_loc (MSG_NOTE, vect_location,
335 "vec_stmt_relevant_p: used out of loop.\n");
337 if (is_gimple_debug (USE_STMT (use_p)))
338 continue;
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
345 *live_p = true;
350 if (*live_p && *relevant == vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant = vect_used_only_live;
359 return (*live_p || *relevant);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
368 static bool
369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
371 tree operand;
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396 if (call && gimple_call_internal_p (call))
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
411 return false;
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
420 if (operand == use)
421 return true;
423 return false;
428 Function process_use.
430 Inputs:
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
454 static opt_result
455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 bool force)
459 stmt_vec_info dstmt_vinfo;
460 basic_block bb, def_bb;
461 enum vect_def_type dt;
463 /* case 1: we are only interested in uses that need to be vectorized. Uses
464 that are used for address computation are not considered relevant. */
465 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
466 return opt_result::success ();
468 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
469 return opt_result::failure_at (stmt_vinfo->stmt,
470 "not vectorized:"
471 " unsupported use in stmt.\n");
473 if (!dstmt_vinfo)
474 return opt_result::success ();
476 def_bb = gimple_bb (dstmt_vinfo->stmt);
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 DSTMT_VINFO must have already been processed, because this should be the
480 only way that STMT, which is a reduction-phi, was put in the worklist,
481 as there should be no other uses for DSTMT_VINFO in the loop. So we just
482 check that everything is as expected, and we are done. */
483 bb = gimple_bb (stmt_vinfo->stmt);
484 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
485 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
486 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
487 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
488 && bb->loop_father == def_bb->loop_father)
490 if (dump_enabled_p ())
491 dump_printf_loc (MSG_NOTE, vect_location,
492 "reduc-stmt defining reduc-phi in the same nest.\n");
493 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
494 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
495 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
496 return opt_result::success ();
499 /* case 3a: outer-loop stmt defining an inner-loop stmt:
500 outer-loop-header-bb:
501 d = dstmt_vinfo
502 inner-loop:
503 stmt # use (d)
504 outer-loop-tail-bb:
505 ... */
506 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
508 if (dump_enabled_p ())
509 dump_printf_loc (MSG_NOTE, vect_location,
510 "outer-loop def-stmt defining inner-loop stmt.\n");
512 switch (relevant)
514 case vect_unused_in_scope:
515 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
516 vect_used_in_scope : vect_unused_in_scope;
517 break;
519 case vect_used_in_outer_by_reduction:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_by_reduction;
522 break;
524 case vect_used_in_outer:
525 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
526 relevant = vect_used_in_scope;
527 break;
529 case vect_used_in_scope:
530 break;
532 default:
533 gcc_unreachable ();
537 /* case 3b: inner-loop stmt defining an outer-loop stmt:
538 outer-loop-header-bb:
540 inner-loop:
541 d = dstmt_vinfo
542 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
543 stmt # use (d) */
544 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_NOTE, vect_location,
548 "inner-loop def-stmt defining outer-loop stmt.\n");
550 switch (relevant)
552 case vect_unused_in_scope:
553 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
554 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
555 vect_used_in_outer_by_reduction : vect_unused_in_scope;
556 break;
558 case vect_used_by_reduction:
559 case vect_used_only_live:
560 relevant = vect_used_in_outer_by_reduction;
561 break;
563 case vect_used_in_scope:
564 relevant = vect_used_in_outer;
565 break;
567 default:
568 gcc_unreachable ();
571 /* We are also not interested in uses on loop PHI backedges that are
572 inductions. Otherwise we'll needlessly vectorize the IV increment
573 and cause hybrid SLP for SLP inductions. Unless the PHI is live
574 of course. */
575 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
576 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
577 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
578 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
579 loop_latch_edge (bb->loop_father))
580 == use))
582 if (dump_enabled_p ())
583 dump_printf_loc (MSG_NOTE, vect_location,
584 "induction value on backedge.\n");
585 return opt_result::success ();
589 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
590 return opt_result::success ();
594 /* Function vect_mark_stmts_to_be_vectorized.
596 Not all stmts in the loop need to be vectorized. For example:
598 for i...
599 for j...
600 1. T0 = i + j
601 2. T1 = a[T0]
603 3. j = j + 1
605 Stmt 1 and 3 do not need to be vectorized, because loop control and
606 addressing of vectorized data-refs are handled differently.
608 This pass detects such stmts. */
610 opt_result
611 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
613 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
614 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
615 unsigned int nbbs = loop->num_nodes;
616 gimple_stmt_iterator si;
617 unsigned int i;
618 basic_block bb;
619 bool live_p;
620 enum vect_relevant relevant;
622 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
624 auto_vec<stmt_vec_info, 64> worklist;
626 /* 1. Init worklist. */
627 for (i = 0; i < nbbs; i++)
629 bb = bbs[i];
630 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
632 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
635 phi_info->stmt);
637 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
638 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
640 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
642 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
643 if (dump_enabled_p ())
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "init: stmt relevant? %G", stmt_info->stmt);
647 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
648 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
652 /* 2. Process_worklist */
653 while (worklist.length () > 0)
655 use_operand_p use_p;
656 ssa_op_iter iter;
658 stmt_vec_info stmt_vinfo = worklist.pop ();
659 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location,
661 "worklist: examine stmt: %G", stmt_vinfo->stmt);
663 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
664 (DEF_STMT) as relevant/irrelevant according to the relevance property
665 of STMT. */
666 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
668 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
669 propagated as is to the DEF_STMTs of its USEs.
671 One exception is when STMT has been identified as defining a reduction
672 variable; in this case we set the relevance to vect_used_by_reduction.
673 This is because we distinguish between two kinds of relevant stmts -
674 those that are used by a reduction computation, and those that are
675 (also) used by a regular computation. This allows us later on to
676 identify stmts that are used solely by a reduction, and therefore the
677 order of the results that they produce does not have to be kept. */
679 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
681 case vect_reduction_def:
682 gcc_assert (relevant != vect_unused_in_scope);
683 if (relevant != vect_unused_in_scope
684 && relevant != vect_used_in_scope
685 && relevant != vect_used_by_reduction
686 && relevant != vect_used_only_live)
687 return opt_result::failure_at
688 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
689 break;
691 case vect_nested_cycle:
692 if (relevant != vect_unused_in_scope
693 && relevant != vect_used_in_outer_by_reduction
694 && relevant != vect_used_in_outer)
695 return opt_result::failure_at
696 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
697 break;
699 case vect_double_reduction_def:
700 if (relevant != vect_unused_in_scope
701 && relevant != vect_used_by_reduction
702 && relevant != vect_used_only_live)
703 return opt_result::failure_at
704 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
705 break;
707 default:
708 break;
711 if (is_pattern_stmt_p (stmt_vinfo))
713 /* Pattern statements are not inserted into the code, so
714 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
715 have to scan the RHS or function arguments instead. */
716 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
718 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
719 tree op = gimple_assign_rhs1 (assign);
721 i = 1;
722 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
724 opt_result res
725 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
726 loop_vinfo, relevant, &worklist, false);
727 if (!res)
728 return res;
729 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
730 loop_vinfo, relevant, &worklist, false);
731 if (!res)
732 return res;
733 i = 2;
735 for (; i < gimple_num_ops (assign); i++)
737 op = gimple_op (assign, i);
738 if (TREE_CODE (op) == SSA_NAME)
740 opt_result res
741 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
742 &worklist, false);
743 if (!res)
744 return res;
748 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
750 for (i = 0; i < gimple_call_num_args (call); i++)
752 tree arg = gimple_call_arg (call, i);
753 opt_result res
754 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
755 &worklist, false);
756 if (!res)
757 return res;
761 else
762 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
764 tree op = USE_FROM_PTR (use_p);
765 opt_result res
766 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
767 &worklist, false);
768 if (!res)
769 return res;
772 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
774 gather_scatter_info gs_info;
775 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
776 gcc_unreachable ();
777 opt_result res
778 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
779 &worklist, true);
780 if (!res)
782 if (fatal)
783 *fatal = false;
784 return res;
787 } /* while worklist */
789 return opt_result::success ();
792 /* Compute the prologue cost for invariant or constant operands. */
794 static unsigned
795 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
796 unsigned opno, enum vect_def_type dt,
797 stmt_vector_for_cost *cost_vec)
799 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
800 tree op = gimple_op (stmt, opno);
801 unsigned prologue_cost = 0;
803 /* Without looking at the actual initializer a vector of
804 constants can be implemented as load from the constant pool.
805 When all elements are the same we can use a splat. */
806 tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
807 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
808 unsigned num_vects_to_check;
809 unsigned HOST_WIDE_INT const_nunits;
810 unsigned nelt_limit;
811 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
812 && ! multiple_p (const_nunits, group_size))
814 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
815 nelt_limit = const_nunits;
817 else
819 /* If either the vector has variable length or the vectors
820 are composed of repeated whole groups we only need to
821 cost construction once. All vectors will be the same. */
822 num_vects_to_check = 1;
823 nelt_limit = group_size;
825 tree elt = NULL_TREE;
826 unsigned nelt = 0;
827 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
829 unsigned si = j % group_size;
830 if (nelt == 0)
831 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
832 /* ??? We're just tracking whether all operands of a single
833 vector initializer are the same, ideally we'd check if
834 we emitted the same one already. */
835 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
836 opno))
837 elt = NULL_TREE;
838 nelt++;
839 if (nelt == nelt_limit)
841 /* ??? We need to pass down stmt_info for a vector type
842 even if it points to the wrong stmt. */
843 prologue_cost += record_stmt_cost
844 (cost_vec, 1,
845 dt == vect_external_def
846 ? (elt ? scalar_to_vec : vec_construct)
847 : vector_load,
848 stmt_info, 0, vect_prologue);
849 nelt = 0;
853 return prologue_cost;
856 /* Function vect_model_simple_cost.
858 Models cost for simple operations, i.e. those that only emit ncopies of a
859 single op. Right now, this does not account for multiple insns that could
860 be generated for the single vector op. We will handle that shortly. */
862 static void
863 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
864 enum vect_def_type *dt,
865 int ndts,
866 slp_tree node,
867 stmt_vector_for_cost *cost_vec)
869 int inside_cost = 0, prologue_cost = 0;
871 gcc_assert (cost_vec != NULL);
873 /* ??? Somehow we need to fix this at the callers. */
874 if (node)
875 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
877 if (node)
879 /* Scan operands and account for prologue cost of constants/externals.
880 ??? This over-estimates cost for multiple uses and should be
881 re-engineered. */
882 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
883 tree lhs = gimple_get_lhs (stmt);
884 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
886 tree op = gimple_op (stmt, i);
887 enum vect_def_type dt;
888 if (!op || op == lhs)
889 continue;
890 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
891 && (dt == vect_constant_def || dt == vect_external_def))
892 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
893 i, dt, cost_vec);
896 else
897 /* Cost the "broadcast" of a scalar operand in to a vector operand.
898 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
899 cost model. */
900 for (int i = 0; i < ndts; i++)
901 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
902 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
903 stmt_info, 0, vect_prologue);
905 /* Adjust for two-operator SLP nodes. */
906 if (node && SLP_TREE_TWO_OPERATORS (node))
908 ncopies *= 2;
909 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
910 stmt_info, 0, vect_body);
913 /* Pass the inside-of-loop statements to the target-specific cost model. */
914 inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
915 stmt_info, 0, vect_body);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_simple_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
924 /* Model cost for type demotion and promotion operations. PWR is normally
925 zero for single-step promotions and demotions. It will be one if
926 two-step promotion/demotion is required, and so on. Each additional
927 step doubles the number of instructions required. */
929 static void
930 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
931 enum vect_def_type *dt, int pwr,
932 stmt_vector_for_cost *cost_vec)
934 int i, tmp;
935 int inside_cost = 0, prologue_cost = 0;
937 for (i = 0; i < pwr + 1; i++)
939 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
940 (i + 1) : i;
941 inside_cost += record_stmt_cost (cost_vec, vect_pow2 (tmp),
942 vec_promote_demote, stmt_info, 0,
943 vect_body);
946 /* FORNOW: Assuming maximum 2 args per stmts. */
947 for (i = 0; i < 2; i++)
948 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
949 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
950 stmt_info, 0, vect_prologue);
952 if (dump_enabled_p ())
953 dump_printf_loc (MSG_NOTE, vect_location,
954 "vect_model_promotion_demotion_cost: inside_cost = %d, "
955 "prologue_cost = %d .\n", inside_cost, prologue_cost);
958 /* Returns true if the current function returns DECL. */
960 static bool
961 cfun_returns (tree decl)
963 edge_iterator ei;
964 edge e;
965 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
967 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
968 if (!ret)
969 continue;
970 if (gimple_return_retval (ret) == decl)
971 return true;
972 /* We often end up with an aggregate copy to the result decl,
973 handle that case as well. First skip intermediate clobbers
974 though. */
975 gimple *def = ret;
978 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
980 while (gimple_clobber_p (def));
981 if (is_a <gassign *> (def)
982 && gimple_assign_lhs (def) == gimple_return_retval (ret)
983 && gimple_assign_rhs1 (def) == decl)
984 return true;
986 return false;
989 /* Function vect_model_store_cost
991 Models cost for stores. In the case of grouped accesses, one access
992 has the overhead of the grouped access attributed to it. */
994 static void
995 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
996 enum vect_def_type dt,
997 vect_memory_access_type memory_access_type,
998 vec_load_store_type vls_type, slp_tree slp_node,
999 stmt_vector_for_cost *cost_vec)
1001 unsigned int inside_cost = 0, prologue_cost = 0;
1002 stmt_vec_info first_stmt_info = stmt_info;
1003 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1005 /* ??? Somehow we need to fix this at the callers. */
1006 if (slp_node)
1007 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1009 if (vls_type == VLS_STORE_INVARIANT)
1011 if (slp_node)
1012 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1013 1, dt, cost_vec);
1014 else
1015 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1016 stmt_info, 0, vect_prologue);
1019 /* Grouped stores update all elements in the group at once,
1020 so we want the DR for the first statement. */
1021 if (!slp_node && grouped_access_p)
1022 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1024 /* True if we should include any once-per-group costs as well as
1025 the cost of the statement itself. For SLP we only get called
1026 once per group anyhow. */
1027 bool first_stmt_p = (first_stmt_info == stmt_info);
1029 /* We assume that the cost of a single store-lanes instruction is
1030 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1031 access is instead being provided by a permute-and-store operation,
1032 include the cost of the permutes. */
1033 if (first_stmt_p
1034 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1036 /* Uses a high and low interleave or shuffle operations for each
1037 needed permute. */
1038 int group_size = DR_GROUP_SIZE (first_stmt_info);
1039 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1040 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1041 stmt_info, 0, vect_body);
1043 if (dump_enabled_p ())
1044 dump_printf_loc (MSG_NOTE, vect_location,
1045 "vect_model_store_cost: strided group_size = %d .\n",
1046 group_size);
1049 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1050 /* Costs of the stores. */
1051 if (memory_access_type == VMAT_ELEMENTWISE
1052 || memory_access_type == VMAT_GATHER_SCATTER)
1054 /* N scalar stores plus extracting the elements. */
1055 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1056 inside_cost += record_stmt_cost (cost_vec,
1057 ncopies * assumed_nunits,
1058 scalar_store, stmt_info, 0, vect_body);
1060 else
1061 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1063 if (memory_access_type == VMAT_ELEMENTWISE
1064 || memory_access_type == VMAT_STRIDED_SLP)
1066 /* N scalar stores plus extracting the elements. */
1067 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1068 inside_cost += record_stmt_cost (cost_vec,
1069 ncopies * assumed_nunits,
1070 vec_to_scalar, stmt_info, 0, vect_body);
1073 /* When vectorizing a store into the function result assign
1074 a penalty if the function returns in a multi-register location.
1075 In this case we assume we'll end up with having to spill the
1076 vector result and do piecewise loads as a conservative estimate. */
1077 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1078 if (base
1079 && (TREE_CODE (base) == RESULT_DECL
1080 || (DECL_P (base) && cfun_returns (base)))
1081 && !aggregate_value_p (base, cfun->decl))
1083 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1084 /* ??? Handle PARALLEL in some way. */
1085 if (REG_P (reg))
1087 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1088 /* Assume that a single reg-reg move is possible and cheap,
1089 do not account for vector to gp register move cost. */
1090 if (nregs > 1)
1092 /* Spill. */
1093 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1094 vector_store,
1095 stmt_info, 0, vect_epilogue);
1096 /* Loads. */
1097 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1098 scalar_load,
1099 stmt_info, 0, vect_epilogue);
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_NOTE, vect_location,
1106 "vect_model_store_cost: inside_cost = %d, "
1107 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1111 /* Calculate cost of DR's memory access. */
1112 void
1113 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1114 unsigned int *inside_cost,
1115 stmt_vector_for_cost *body_cost_vec)
1117 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1118 int alignment_support_scheme
1119 = vect_supportable_dr_alignment (dr_info, false);
1121 switch (alignment_support_scheme)
1123 case dr_aligned:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1126 vector_store, stmt_info, 0,
1127 vect_body);
1129 if (dump_enabled_p ())
1130 dump_printf_loc (MSG_NOTE, vect_location,
1131 "vect_model_store_cost: aligned.\n");
1132 break;
1135 case dr_unaligned_supported:
1137 /* Here, we assign an additional cost for the unaligned store. */
1138 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1139 unaligned_store, stmt_info,
1140 DR_MISALIGNMENT (dr_info),
1141 vect_body);
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_store_cost: unaligned supported by "
1145 "hardware.\n");
1146 break;
1149 case dr_unaligned_unsupported:
1151 *inside_cost = VECT_MAX_COST;
1153 if (dump_enabled_p ())
1154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1155 "vect_model_store_cost: unsupported access.\n");
1156 break;
1159 default:
1160 gcc_unreachable ();
1165 /* Function vect_model_load_cost
1167 Models cost for loads. In the case of grouped accesses, one access has
1168 the overhead of the grouped access attributed to it. Since unaligned
1169 accesses are supported for loads, we also account for the costs of the
1170 access scheme chosen. */
1172 static void
1173 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1174 vect_memory_access_type memory_access_type,
1175 slp_instance instance,
1176 slp_tree slp_node,
1177 stmt_vector_for_cost *cost_vec)
1179 unsigned int inside_cost = 0, prologue_cost = 0;
1180 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1182 gcc_assert (cost_vec);
1184 /* ??? Somehow we need to fix this at the callers. */
1185 if (slp_node)
1186 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1188 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1190 /* If the load is permuted then the alignment is determined by
1191 the first group element not by the first scalar stmt DR. */
1192 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1193 /* Record the cost for the permutation. */
1194 unsigned n_perms;
1195 unsigned assumed_nunits
1196 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1197 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1198 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1199 slp_vf, instance, true,
1200 &n_perms);
1201 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1202 first_stmt_info, 0, vect_body);
1203 /* And adjust the number of loads performed. This handles
1204 redundancies as well as loads that are later dead. */
1205 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1206 bitmap_clear (perm);
1207 for (unsigned i = 0;
1208 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1209 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1210 ncopies = 0;
1211 bool load_seen = false;
1212 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1214 if (i % assumed_nunits == 0)
1216 if (load_seen)
1217 ncopies++;
1218 load_seen = false;
1220 if (bitmap_bit_p (perm, i))
1221 load_seen = true;
1223 if (load_seen)
1224 ncopies++;
1225 gcc_assert (ncopies
1226 <= (DR_GROUP_SIZE (first_stmt_info)
1227 - DR_GROUP_GAP (first_stmt_info)
1228 + assumed_nunits - 1) / assumed_nunits);
1231 /* Grouped loads read all elements in the group at once,
1232 so we want the DR for the first statement. */
1233 stmt_vec_info first_stmt_info = stmt_info;
1234 if (!slp_node && grouped_access_p)
1235 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1237 /* True if we should include any once-per-group costs as well as
1238 the cost of the statement itself. For SLP we only get called
1239 once per group anyhow. */
1240 bool first_stmt_p = (first_stmt_info == stmt_info);
1242 /* We assume that the cost of a single load-lanes instruction is
1243 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1244 access is instead being provided by a load-and-permute operation,
1245 include the cost of the permutes. */
1246 if (first_stmt_p
1247 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1249 /* Uses an even and odd extract operations or shuffle operations
1250 for each needed permute. */
1251 int group_size = DR_GROUP_SIZE (first_stmt_info);
1252 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1253 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1254 stmt_info, 0, vect_body);
1256 if (dump_enabled_p ())
1257 dump_printf_loc (MSG_NOTE, vect_location,
1258 "vect_model_load_cost: strided group_size = %d .\n",
1259 group_size);
1262 /* The loads themselves. */
1263 if (memory_access_type == VMAT_ELEMENTWISE
1264 || memory_access_type == VMAT_GATHER_SCATTER)
1266 /* N scalar loads plus gathering them into a vector. */
1267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1268 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1269 inside_cost += record_stmt_cost (cost_vec,
1270 ncopies * assumed_nunits,
1271 scalar_load, stmt_info, 0, vect_body);
1273 else
1274 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1275 &inside_cost, &prologue_cost,
1276 cost_vec, cost_vec, true);
1277 if (memory_access_type == VMAT_ELEMENTWISE
1278 || memory_access_type == VMAT_STRIDED_SLP)
1279 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1280 stmt_info, 0, vect_body);
1282 if (dump_enabled_p ())
1283 dump_printf_loc (MSG_NOTE, vect_location,
1284 "vect_model_load_cost: inside_cost = %d, "
1285 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1289 /* Calculate cost of DR's memory access. */
1290 void
1291 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1292 bool add_realign_cost, unsigned int *inside_cost,
1293 unsigned int *prologue_cost,
1294 stmt_vector_for_cost *prologue_cost_vec,
1295 stmt_vector_for_cost *body_cost_vec,
1296 bool record_prologue_costs)
1298 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1299 int alignment_support_scheme
1300 = vect_supportable_dr_alignment (dr_info, false);
1302 switch (alignment_support_scheme)
1304 case dr_aligned:
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1309 if (dump_enabled_p ())
1310 dump_printf_loc (MSG_NOTE, vect_location,
1311 "vect_model_load_cost: aligned.\n");
1313 break;
1315 case dr_unaligned_supported:
1317 /* Here, we assign an additional cost for the unaligned load. */
1318 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1319 unaligned_load, stmt_info,
1320 DR_MISALIGNMENT (dr_info),
1321 vect_body);
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_NOTE, vect_location,
1325 "vect_model_load_cost: unaligned supported by "
1326 "hardware.\n");
1328 break;
1330 case dr_explicit_realign:
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1333 vector_load, stmt_info, 0, vect_body);
1334 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1335 vec_perm, stmt_info, 0, vect_body);
1337 /* FIXME: If the misalignment remains fixed across the iterations of
1338 the containing loop, the following cost should be added to the
1339 prologue costs. */
1340 if (targetm.vectorize.builtin_mask_for_load)
1341 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1342 stmt_info, 0, vect_body);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_model_load_cost: explicit realign\n");
1348 break;
1350 case dr_explicit_realign_optimized:
1352 if (dump_enabled_p ())
1353 dump_printf_loc (MSG_NOTE, vect_location,
1354 "vect_model_load_cost: unaligned software "
1355 "pipelined.\n");
1357 /* Unaligned software pipeline has a load of an address, an initial
1358 load, and possibly a mask operation to "prime" the loop. However,
1359 if this is an access in a group of loads, which provide grouped
1360 access, then the above cost should only be considered for one
1361 access in the group. Inside the loop, there is a load op
1362 and a realignment op. */
1364 if (add_realign_cost && record_prologue_costs)
1366 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1367 vector_stmt, stmt_info,
1368 0, vect_prologue);
1369 if (targetm.vectorize.builtin_mask_for_load)
1370 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1371 vector_stmt, stmt_info,
1372 0, vect_prologue);
1375 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1376 stmt_info, 0, vect_body);
1377 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1378 stmt_info, 0, vect_body);
1380 if (dump_enabled_p ())
1381 dump_printf_loc (MSG_NOTE, vect_location,
1382 "vect_model_load_cost: explicit realign optimized"
1383 "\n");
1385 break;
1388 case dr_unaligned_unsupported:
1390 *inside_cost = VECT_MAX_COST;
1392 if (dump_enabled_p ())
1393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1394 "vect_model_load_cost: unsupported access.\n");
1395 break;
1398 default:
1399 gcc_unreachable ();
1403 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1404 the loop preheader for the vectorized stmt STMT_VINFO. */
1406 static void
1407 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1408 gimple_stmt_iterator *gsi)
1410 if (gsi)
1411 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1412 else
1414 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1416 if (loop_vinfo)
1418 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1419 basic_block new_bb;
1420 edge pe;
1422 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1423 loop = loop->inner;
1425 pe = loop_preheader_edge (loop);
1426 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1427 gcc_assert (!new_bb);
1429 else
1431 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1432 basic_block bb;
1433 gimple_stmt_iterator gsi_bb_start;
1435 gcc_assert (bb_vinfo);
1436 bb = BB_VINFO_BB (bb_vinfo);
1437 gsi_bb_start = gsi_after_labels (bb);
1438 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1442 if (dump_enabled_p ())
1443 dump_printf_loc (MSG_NOTE, vect_location,
1444 "created new init_stmt: %G", new_stmt);
1447 /* Function vect_init_vector.
1449 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1450 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1451 vector type a vector with all elements equal to VAL is created first.
1452 Place the initialization at BSI if it is not NULL. Otherwise, place the
1453 initialization at the loop preheader.
1454 Return the DEF of INIT_STMT.
1455 It will be used in the vectorization of STMT_INFO. */
1457 tree
1458 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1459 gimple_stmt_iterator *gsi)
1461 gimple *init_stmt;
1462 tree new_temp;
1464 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1465 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1467 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1468 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1470 /* Scalar boolean value should be transformed into
1471 all zeros or all ones value before building a vector. */
1472 if (VECTOR_BOOLEAN_TYPE_P (type))
1474 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1475 tree false_val = build_zero_cst (TREE_TYPE (type));
1477 if (CONSTANT_CLASS_P (val))
1478 val = integer_zerop (val) ? false_val : true_val;
1479 else
1481 new_temp = make_ssa_name (TREE_TYPE (type));
1482 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1483 val, true_val, false_val);
1484 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1485 val = new_temp;
1488 else
1490 gimple_seq stmts = NULL;
1491 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1492 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1493 TREE_TYPE (type), val);
1494 else
1495 /* ??? Condition vectorization expects us to do
1496 promotion of invariant/external defs. */
1497 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1498 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1499 !gsi_end_p (gsi2); )
1501 init_stmt = gsi_stmt (gsi2);
1502 gsi_remove (&gsi2, false);
1503 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1507 val = build_vector_from_val (type, val);
1510 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1511 init_stmt = gimple_build_assign (new_temp, val);
1512 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1513 return new_temp;
1516 /* Function vect_get_vec_def_for_operand_1.
1518 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1519 with type DT that will be used in the vectorized stmt. */
1521 tree
1522 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1523 enum vect_def_type dt)
1525 tree vec_oprnd;
1526 stmt_vec_info vec_stmt_info;
1528 switch (dt)
1530 /* operand is a constant or a loop invariant. */
1531 case vect_constant_def:
1532 case vect_external_def:
1533 /* Code should use vect_get_vec_def_for_operand. */
1534 gcc_unreachable ();
1536 /* Operand is defined by a loop header phi. In case of nested
1537 cycles we also may have uses of the backedge def. */
1538 case vect_reduction_def:
1539 case vect_double_reduction_def:
1540 case vect_nested_cycle:
1541 case vect_induction_def:
1542 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1543 || dt == vect_nested_cycle);
1544 /* Fallthru. */
1546 /* operand is defined inside the loop. */
1547 case vect_internal_def:
1549 /* Get the def from the vectorized stmt. */
1550 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1551 /* Get vectorized pattern statement. */
1552 if (!vec_stmt_info
1553 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1554 && !STMT_VINFO_RELEVANT (def_stmt_info))
1555 vec_stmt_info = (STMT_VINFO_VEC_STMT
1556 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1557 gcc_assert (vec_stmt_info);
1558 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1559 vec_oprnd = PHI_RESULT (phi);
1560 else
1561 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1562 return vec_oprnd;
1565 default:
1566 gcc_unreachable ();
1571 /* Function vect_get_vec_def_for_operand.
1573 OP is an operand in STMT_VINFO. This function returns a (vector) def
1574 that will be used in the vectorized stmt for STMT_VINFO.
1576 In the case that OP is an SSA_NAME which is defined in the loop, then
1577 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1579 In case OP is an invariant or constant, a new stmt that creates a vector def
1580 needs to be introduced. VECTYPE may be used to specify a required type for
1581 vector invariant. */
1583 tree
1584 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1586 gimple *def_stmt;
1587 enum vect_def_type dt;
1588 bool is_simple_use;
1589 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1591 if (dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE, vect_location,
1593 "vect_get_vec_def_for_operand: %T\n", op);
1595 stmt_vec_info def_stmt_info;
1596 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1597 &def_stmt_info, &def_stmt);
1598 gcc_assert (is_simple_use);
1599 if (def_stmt && dump_enabled_p ())
1600 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1602 if (dt == vect_constant_def || dt == vect_external_def)
1604 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1605 tree vector_type;
1607 if (vectype)
1608 vector_type = vectype;
1609 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1610 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1611 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1612 else
1613 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1615 gcc_assert (vector_type);
1616 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1618 else
1619 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1623 /* Function vect_get_vec_def_for_stmt_copy
1625 Return a vector-def for an operand. This function is used when the
1626 vectorized stmt to be created (by the caller to this function) is a "copy"
1627 created in case the vectorized result cannot fit in one vector, and several
1628 copies of the vector-stmt are required. In this case the vector-def is
1629 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1630 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1632 Context:
1633 In case the vectorization factor (VF) is bigger than the number
1634 of elements that can fit in a vectype (nunits), we have to generate
1635 more than one vector stmt to vectorize the scalar stmt. This situation
1636 arises when there are multiple data-types operated upon in the loop; the
1637 smallest data-type determines the VF, and as a result, when vectorizing
1638 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1639 vector stmt (each computing a vector of 'nunits' results, and together
1640 computing 'VF' results in each iteration). This function is called when
1641 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1642 which VF=16 and nunits=4, so the number of copies required is 4):
1644 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1646 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1647 VS1.1: vx.1 = memref1 VS1.2
1648 VS1.2: vx.2 = memref2 VS1.3
1649 VS1.3: vx.3 = memref3
1651 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1652 VSnew.1: vz1 = vx.1 + ... VSnew.2
1653 VSnew.2: vz2 = vx.2 + ... VSnew.3
1654 VSnew.3: vz3 = vx.3 + ...
1656 The vectorization of S1 is explained in vectorizable_load.
1657 The vectorization of S2:
1658 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1659 the function 'vect_get_vec_def_for_operand' is called to
1660 get the relevant vector-def for each operand of S2. For operand x it
1661 returns the vector-def 'vx.0'.
1663 To create the remaining copies of the vector-stmt (VSnew.j), this
1664 function is called to get the relevant vector-def for each operand. It is
1665 obtained from the respective VS1.j stmt, which is recorded in the
1666 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1668 For example, to obtain the vector-def 'vx.1' in order to create the
1669 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1670 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1671 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1672 and return its def ('vx.1').
1673 Overall, to create the above sequence this function will be called 3 times:
1674 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1675 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1676 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1678 tree
1679 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1681 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1682 if (!def_stmt_info)
1683 /* Do nothing; can reuse same def. */
1684 return vec_oprnd;
1686 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1687 gcc_assert (def_stmt_info);
1688 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1689 vec_oprnd = PHI_RESULT (phi);
1690 else
1691 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1692 return vec_oprnd;
1696 /* Get vectorized definitions for the operands to create a copy of an original
1697 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1699 void
1700 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1701 vec<tree> *vec_oprnds0,
1702 vec<tree> *vec_oprnds1)
1704 tree vec_oprnd = vec_oprnds0->pop ();
1706 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1707 vec_oprnds0->quick_push (vec_oprnd);
1709 if (vec_oprnds1 && vec_oprnds1->length ())
1711 vec_oprnd = vec_oprnds1->pop ();
1712 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1713 vec_oprnds1->quick_push (vec_oprnd);
1718 /* Get vectorized definitions for OP0 and OP1. */
1720 void
1721 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1722 vec<tree> *vec_oprnds0,
1723 vec<tree> *vec_oprnds1,
1724 slp_tree slp_node)
1726 if (slp_node)
1728 int nops = (op1 == NULL_TREE) ? 1 : 2;
1729 auto_vec<tree> ops (nops);
1730 auto_vec<vec<tree> > vec_defs (nops);
1732 ops.quick_push (op0);
1733 if (op1)
1734 ops.quick_push (op1);
1736 vect_get_slp_defs (ops, slp_node, &vec_defs);
1738 *vec_oprnds0 = vec_defs[0];
1739 if (op1)
1740 *vec_oprnds1 = vec_defs[1];
1742 else
1744 tree vec_oprnd;
1746 vec_oprnds0->create (1);
1747 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1748 vec_oprnds0->quick_push (vec_oprnd);
1750 if (op1)
1752 vec_oprnds1->create (1);
1753 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1754 vec_oprnds1->quick_push (vec_oprnd);
1759 /* Helper function called by vect_finish_replace_stmt and
1760 vect_finish_stmt_generation. Set the location of the new
1761 statement and create and return a stmt_vec_info for it. */
1763 static stmt_vec_info
1764 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1766 vec_info *vinfo = stmt_info->vinfo;
1768 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1773 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1775 /* While EH edges will generally prevent vectorization, stmt might
1776 e.g. be in a must-not-throw region. Ensure newly created stmts
1777 that could throw are part of the same region. */
1778 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1779 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1780 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1782 return vec_stmt_info;
1785 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1786 which sets the same scalar result as STMT_INFO did. Create and return a
1787 stmt_vec_info for VEC_STMT. */
1789 stmt_vec_info
1790 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1792 gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
1794 gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
1795 gsi_replace (&gsi, vec_stmt, true);
1797 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1800 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1801 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1803 stmt_vec_info
1804 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1805 gimple_stmt_iterator *gsi)
1807 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1809 if (!gsi_end_p (*gsi)
1810 && gimple_has_mem_ops (vec_stmt))
1812 gimple *at_stmt = gsi_stmt (*gsi);
1813 tree vuse = gimple_vuse (at_stmt);
1814 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1816 tree vdef = gimple_vdef (at_stmt);
1817 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1818 /* If we have an SSA vuse and insert a store, update virtual
1819 SSA form to avoid triggering the renamer. Do so only
1820 if we can easily see all uses - which is what almost always
1821 happens with the way vectorized stmts are inserted. */
1822 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1823 && ((is_gimple_assign (vec_stmt)
1824 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1825 || (is_gimple_call (vec_stmt)
1826 && !(gimple_call_flags (vec_stmt)
1827 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1829 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1830 gimple_set_vdef (vec_stmt, new_vdef);
1831 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1835 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1836 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1839 /* We want to vectorize a call to combined function CFN with function
1840 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1841 as the types of all inputs. Check whether this is possible using
1842 an internal function, returning its code if so or IFN_LAST if not. */
1844 static internal_fn
1845 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1846 tree vectype_out, tree vectype_in)
1848 internal_fn ifn;
1849 if (internal_fn_p (cfn))
1850 ifn = as_internal_fn (cfn);
1851 else
1852 ifn = associated_internal_fn (fndecl);
1853 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1855 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1856 if (info.vectorizable)
1858 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1859 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1860 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1861 OPTIMIZE_FOR_SPEED))
1862 return ifn;
1865 return IFN_LAST;
1869 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1870 gimple_stmt_iterator *);
1872 /* Check whether a load or store statement in the loop described by
1873 LOOP_VINFO is possible in a fully-masked loop. This is testing
1874 whether the vectorizer pass has the appropriate support, as well as
1875 whether the target does.
1877 VLS_TYPE says whether the statement is a load or store and VECTYPE
1878 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1879 says how the load or store is going to be implemented and GROUP_SIZE
1880 is the number of load or store statements in the containing group.
1881 If the access is a gather load or scatter store, GS_INFO describes
1882 its arguments.
1884 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1885 supported, otherwise record the required mask types. */
1887 static void
1888 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1889 vec_load_store_type vls_type, int group_size,
1890 vect_memory_access_type memory_access_type,
1891 gather_scatter_info *gs_info)
1893 /* Invariant loads need no special support. */
1894 if (memory_access_type == VMAT_INVARIANT)
1895 return;
1897 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1898 machine_mode vecmode = TYPE_MODE (vectype);
1899 bool is_load = (vls_type == VLS_LOAD);
1900 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1902 if (is_load
1903 ? !vect_load_lanes_supported (vectype, group_size, true)
1904 : !vect_store_lanes_supported (vectype, group_size, true))
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1908 "can't use a fully-masked loop because the"
1909 " target doesn't have an appropriate masked"
1910 " load/store-lanes instruction.\n");
1911 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1912 return;
1914 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1915 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1916 return;
1919 if (memory_access_type == VMAT_GATHER_SCATTER)
1921 internal_fn ifn = (is_load
1922 ? IFN_MASK_GATHER_LOAD
1923 : IFN_MASK_SCATTER_STORE);
1924 tree offset_type = TREE_TYPE (gs_info->offset);
1925 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1926 gs_info->memory_type,
1927 TYPE_SIGN (offset_type),
1928 gs_info->scale))
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1932 "can't use a fully-masked loop because the"
1933 " target doesn't have an appropriate masked"
1934 " gather load or scatter store instruction.\n");
1935 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1936 return;
1938 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1939 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1940 return;
1943 if (memory_access_type != VMAT_CONTIGUOUS
1944 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1946 /* Element X of the data must come from iteration i * VF + X of the
1947 scalar loop. We need more work to support other mappings. */
1948 if (dump_enabled_p ())
1949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1950 "can't use a fully-masked loop because an access"
1951 " isn't contiguous.\n");
1952 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1953 return;
1956 machine_mode mask_mode;
1957 if (!(targetm.vectorize.get_mask_mode
1958 (GET_MODE_NUNITS (vecmode),
1959 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1960 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1962 if (dump_enabled_p ())
1963 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1964 "can't use a fully-masked loop because the target"
1965 " doesn't have the appropriate masked load or"
1966 " store.\n");
1967 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1968 return;
1970 /* We might load more scalars than we need for permuting SLP loads.
1971 We checked in get_group_load_store_type that the extra elements
1972 don't leak into a new vector. */
1973 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1974 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1975 unsigned int nvectors;
1976 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1977 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1978 else
1979 gcc_unreachable ();
1982 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1983 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1984 that needs to be applied to all loads and stores in a vectorized loop.
1985 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1987 MASK_TYPE is the type of both masks. If new statements are needed,
1988 insert them before GSI. */
1990 static tree
1991 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1992 gimple_stmt_iterator *gsi)
1994 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1995 if (!loop_mask)
1996 return vec_mask;
1998 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1999 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
2000 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
2001 vec_mask, loop_mask);
2002 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
2003 return and_res;
2006 /* Determine whether we can use a gather load or scatter store to vectorize
2007 strided load or store STMT_INFO by truncating the current offset to a
2008 smaller width. We need to be able to construct an offset vector:
2010 { 0, X, X*2, X*3, ... }
2012 without loss of precision, where X is STMT_INFO's DR_STEP.
2014 Return true if this is possible, describing the gather load or scatter
2015 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2017 static bool
2018 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2019 loop_vec_info loop_vinfo, bool masked_p,
2020 gather_scatter_info *gs_info)
2022 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2023 data_reference *dr = dr_info->dr;
2024 tree step = DR_STEP (dr);
2025 if (TREE_CODE (step) != INTEGER_CST)
2027 /* ??? Perhaps we could use range information here? */
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE, vect_location,
2030 "cannot truncate variable step.\n");
2031 return false;
2034 /* Get the number of bits in an element. */
2035 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2036 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2037 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2039 /* Set COUNT to the upper limit on the number of elements - 1.
2040 Start with the maximum vectorization factor. */
2041 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2043 /* Try lowering COUNT to the number of scalar latch iterations. */
2044 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2045 widest_int max_iters;
2046 if (max_loop_iterations (loop, &max_iters)
2047 && max_iters < count)
2048 count = max_iters.to_shwi ();
2050 /* Try scales of 1 and the element size. */
2051 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2052 wi::overflow_type overflow = wi::OVF_NONE;
2053 for (int i = 0; i < 2; ++i)
2055 int scale = scales[i];
2056 widest_int factor;
2057 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2058 continue;
2060 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
2061 in OFFSET_BITS bits. */
2062 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2063 if (overflow)
2064 continue;
2065 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2066 if (wi::min_precision (range, sign) > element_bits)
2068 overflow = wi::OVF_UNKNOWN;
2069 continue;
2072 /* See whether the target supports the operation. */
2073 tree memory_type = TREE_TYPE (DR_REF (dr));
2074 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
2075 memory_type, element_bits, sign, scale,
2076 &gs_info->ifn, &gs_info->element_type))
2077 continue;
2079 tree offset_type = build_nonstandard_integer_type (element_bits,
2080 sign == UNSIGNED);
2082 gs_info->decl = NULL_TREE;
2083 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2084 but we don't need to store that here. */
2085 gs_info->base = NULL_TREE;
2086 gs_info->offset = fold_convert (offset_type, step);
2087 gs_info->offset_dt = vect_constant_def;
2088 gs_info->offset_vectype = NULL_TREE;
2089 gs_info->scale = scale;
2090 gs_info->memory_type = memory_type;
2091 return true;
2094 if (overflow && dump_enabled_p ())
2095 dump_printf_loc (MSG_NOTE, vect_location,
2096 "truncating gather/scatter offset to %d bits"
2097 " might change its value.\n", element_bits);
2099 return false;
2102 /* Return true if we can use gather/scatter internal functions to
2103 vectorize STMT_INFO, which is a grouped or strided load or store.
2104 MASKED_P is true if load or store is conditional. When returning
2105 true, fill in GS_INFO with the information required to perform the
2106 operation. */
2108 static bool
2109 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2110 loop_vec_info loop_vinfo, bool masked_p,
2111 gather_scatter_info *gs_info)
2113 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2114 || gs_info->decl)
2115 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2116 masked_p, gs_info);
2118 scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
2119 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2120 tree offset_type = TREE_TYPE (gs_info->offset);
2121 unsigned int offset_bits = TYPE_PRECISION (offset_type);
2123 /* Enforced by vect_check_gather_scatter. */
2124 gcc_assert (element_bits >= offset_bits);
2126 /* If the elements are wider than the offset, convert the offset to the
2127 same width, without changing its sign. */
2128 if (element_bits > offset_bits)
2130 bool unsigned_p = TYPE_UNSIGNED (offset_type);
2131 offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
2132 gs_info->offset = fold_convert (offset_type, gs_info->offset);
2135 if (dump_enabled_p ())
2136 dump_printf_loc (MSG_NOTE, vect_location,
2137 "using gather/scatter for strided/grouped access,"
2138 " scale = %d\n", gs_info->scale);
2140 return true;
2143 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2144 elements with a known constant step. Return -1 if that step
2145 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2147 static int
2148 compare_step_with_zero (stmt_vec_info stmt_info)
2150 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2151 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2152 size_zero_node);
2155 /* If the target supports a permute mask that reverses the elements in
2156 a vector of type VECTYPE, return that mask, otherwise return null. */
2158 static tree
2159 perm_mask_for_reverse (tree vectype)
2161 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2163 /* The encoding has a single stepped pattern. */
2164 vec_perm_builder sel (nunits, 1, 3);
2165 for (int i = 0; i < 3; ++i)
2166 sel.quick_push (nunits - 1 - i);
2168 vec_perm_indices indices (sel, 1, nunits);
2169 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2170 return NULL_TREE;
2171 return vect_gen_perm_mask_checked (vectype, indices);
2174 /* STMT_INFO is either a masked or unconditional store. Return the value
2175 being stored. */
2177 tree
2178 vect_get_store_rhs (stmt_vec_info stmt_info)
2180 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2182 gcc_assert (gimple_assign_single_p (assign));
2183 return gimple_assign_rhs1 (assign);
2185 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2187 internal_fn ifn = gimple_call_internal_fn (call);
2188 int index = internal_fn_stored_value_index (ifn);
2189 gcc_assert (index >= 0);
2190 return gimple_call_arg (call, index);
2192 gcc_unreachable ();
2195 /* A subroutine of get_load_store_type, with a subset of the same
2196 arguments. Handle the case where STMT_INFO is part of a grouped load
2197 or store.
2199 For stores, the statements in the group are all consecutive
2200 and there is no gap at the end. For loads, the statements in the
2201 group might not be consecutive; there can be gaps between statements
2202 as well as at the end. */
2204 static bool
2205 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2206 bool masked_p, vec_load_store_type vls_type,
2207 vect_memory_access_type *memory_access_type,
2208 gather_scatter_info *gs_info)
2210 vec_info *vinfo = stmt_info->vinfo;
2211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2212 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2213 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2214 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2215 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2216 bool single_element_p = (stmt_info == first_stmt_info
2217 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2218 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2219 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2221 /* True if the vectorized statements would access beyond the last
2222 statement in the group. */
2223 bool overrun_p = false;
2225 /* True if we can cope with such overrun by peeling for gaps, so that
2226 there is at least one final scalar iteration after the vector loop. */
2227 bool can_overrun_p = (!masked_p
2228 && vls_type == VLS_LOAD
2229 && loop_vinfo
2230 && !loop->inner);
2232 /* There can only be a gap at the end of the group if the stride is
2233 known at compile time. */
2234 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2236 /* Stores can't yet have gaps. */
2237 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2239 if (slp)
2241 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2243 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2244 separated by the stride, until we have a complete vector.
2245 Fall back to scalar accesses if that isn't possible. */
2246 if (multiple_p (nunits, group_size))
2247 *memory_access_type = VMAT_STRIDED_SLP;
2248 else
2249 *memory_access_type = VMAT_ELEMENTWISE;
2251 else
2253 overrun_p = loop_vinfo && gap != 0;
2254 if (overrun_p && vls_type != VLS_LOAD)
2256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2257 "Grouped store with gaps requires"
2258 " non-consecutive accesses\n");
2259 return false;
2261 /* An overrun is fine if the trailing elements are smaller
2262 than the alignment boundary B. Every vector access will
2263 be a multiple of B and so we are guaranteed to access a
2264 non-gap element in the same B-sized block. */
2265 if (overrun_p
2266 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2267 / vect_get_scalar_dr_size (first_dr_info)))
2268 overrun_p = false;
2270 /* If the gap splits the vector in half and the target
2271 can do half-vector operations avoid the epilogue peeling
2272 by simply loading half of the vector only. Usually
2273 the construction with an upper zero half will be elided. */
2274 dr_alignment_support alignment_support_scheme;
2275 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2276 machine_mode vmode;
2277 if (overrun_p
2278 && !masked_p
2279 && (((alignment_support_scheme
2280 = vect_supportable_dr_alignment (first_dr_info, false)))
2281 == dr_aligned
2282 || alignment_support_scheme == dr_unaligned_supported)
2283 && known_eq (nunits, (group_size - gap) * 2)
2284 && known_eq (nunits, group_size)
2285 && mode_for_vector (elmode, (group_size - gap)).exists (&vmode)
2286 && VECTOR_MODE_P (vmode)
2287 && targetm.vector_mode_supported_p (vmode)
2288 && (convert_optab_handler (vec_init_optab,
2289 TYPE_MODE (vectype), vmode)
2290 != CODE_FOR_nothing))
2291 overrun_p = false;
2293 if (overrun_p && !can_overrun_p)
2295 if (dump_enabled_p ())
2296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2297 "Peeling for outer loop is not supported\n");
2298 return false;
2300 *memory_access_type = VMAT_CONTIGUOUS;
2303 else
2305 /* We can always handle this case using elementwise accesses,
2306 but see if something more efficient is available. */
2307 *memory_access_type = VMAT_ELEMENTWISE;
2309 /* If there is a gap at the end of the group then these optimizations
2310 would access excess elements in the last iteration. */
2311 bool would_overrun_p = (gap != 0);
2312 /* An overrun is fine if the trailing elements are smaller than the
2313 alignment boundary B. Every vector access will be a multiple of B
2314 and so we are guaranteed to access a non-gap element in the
2315 same B-sized block. */
2316 if (would_overrun_p
2317 && !masked_p
2318 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2319 / vect_get_scalar_dr_size (first_dr_info)))
2320 would_overrun_p = false;
2322 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2323 && (can_overrun_p || !would_overrun_p)
2324 && compare_step_with_zero (stmt_info) > 0)
2326 /* First cope with the degenerate case of a single-element
2327 vector. */
2328 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2329 *memory_access_type = VMAT_CONTIGUOUS;
2331 /* Otherwise try using LOAD/STORE_LANES. */
2332 if (*memory_access_type == VMAT_ELEMENTWISE
2333 && (vls_type == VLS_LOAD
2334 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2335 : vect_store_lanes_supported (vectype, group_size,
2336 masked_p)))
2338 *memory_access_type = VMAT_LOAD_STORE_LANES;
2339 overrun_p = would_overrun_p;
2342 /* If that fails, try using permuting loads. */
2343 if (*memory_access_type == VMAT_ELEMENTWISE
2344 && (vls_type == VLS_LOAD
2345 ? vect_grouped_load_supported (vectype, single_element_p,
2346 group_size)
2347 : vect_grouped_store_supported (vectype, group_size)))
2349 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2350 overrun_p = would_overrun_p;
2354 /* As a last resort, trying using a gather load or scatter store.
2356 ??? Although the code can handle all group sizes correctly,
2357 it probably isn't a win to use separate strided accesses based
2358 on nearby locations. Or, even if it's a win over scalar code,
2359 it might not be a win over vectorizing at a lower VF, if that
2360 allows us to use contiguous accesses. */
2361 if (*memory_access_type == VMAT_ELEMENTWISE
2362 && single_element_p
2363 && loop_vinfo
2364 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2365 masked_p, gs_info))
2366 *memory_access_type = VMAT_GATHER_SCATTER;
2369 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2371 /* STMT is the leader of the group. Check the operands of all the
2372 stmts of the group. */
2373 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2374 while (next_stmt_info)
2376 tree op = vect_get_store_rhs (next_stmt_info);
2377 enum vect_def_type dt;
2378 if (!vect_is_simple_use (op, vinfo, &dt))
2380 if (dump_enabled_p ())
2381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2382 "use not simple.\n");
2383 return false;
2385 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2389 if (overrun_p)
2391 gcc_assert (can_overrun_p);
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2394 "Data access with gaps requires scalar "
2395 "epilogue loop\n");
2396 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2399 return true;
2402 /* A subroutine of get_load_store_type, with a subset of the same
2403 arguments. Handle the case where STMT_INFO is a load or store that
2404 accesses consecutive elements with a negative step. */
2406 static vect_memory_access_type
2407 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2408 vec_load_store_type vls_type,
2409 unsigned int ncopies)
2411 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2412 dr_alignment_support alignment_support_scheme;
2414 if (ncopies > 1)
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2418 "multiple types with negative step.\n");
2419 return VMAT_ELEMENTWISE;
2422 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2423 if (alignment_support_scheme != dr_aligned
2424 && alignment_support_scheme != dr_unaligned_supported)
2426 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2428 "negative step but alignment required.\n");
2429 return VMAT_ELEMENTWISE;
2432 if (vls_type == VLS_STORE_INVARIANT)
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_NOTE, vect_location,
2436 "negative step with invariant source;"
2437 " no permute needed.\n");
2438 return VMAT_CONTIGUOUS_DOWN;
2441 if (!perm_mask_for_reverse (vectype))
2443 if (dump_enabled_p ())
2444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2445 "negative step and reversing not supported.\n");
2446 return VMAT_ELEMENTWISE;
2449 return VMAT_CONTIGUOUS_REVERSE;
2452 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2453 if there is a memory access type that the vectorized form can use,
2454 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2455 or scatters, fill in GS_INFO accordingly.
2457 SLP says whether we're performing SLP rather than loop vectorization.
2458 MASKED_P is true if the statement is conditional on a vectorized mask.
2459 VECTYPE is the vector type that the vectorized statements will use.
2460 NCOPIES is the number of vector statements that will be needed. */
2462 static bool
2463 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2464 bool masked_p, vec_load_store_type vls_type,
2465 unsigned int ncopies,
2466 vect_memory_access_type *memory_access_type,
2467 gather_scatter_info *gs_info)
2469 vec_info *vinfo = stmt_info->vinfo;
2470 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2471 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2474 *memory_access_type = VMAT_GATHER_SCATTER;
2475 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2476 gcc_unreachable ();
2477 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2478 &gs_info->offset_dt,
2479 &gs_info->offset_vectype))
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 "%s index use not simple.\n",
2484 vls_type == VLS_LOAD ? "gather" : "scatter");
2485 return false;
2488 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2490 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2491 vls_type, memory_access_type, gs_info))
2492 return false;
2494 else if (STMT_VINFO_STRIDED_P (stmt_info))
2496 gcc_assert (!slp);
2497 if (loop_vinfo
2498 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2499 masked_p, gs_info))
2500 *memory_access_type = VMAT_GATHER_SCATTER;
2501 else
2502 *memory_access_type = VMAT_ELEMENTWISE;
2504 else
2506 int cmp = compare_step_with_zero (stmt_info);
2507 if (cmp < 0)
2508 *memory_access_type = get_negative_load_store_type
2509 (stmt_info, vectype, vls_type, ncopies);
2510 else if (cmp == 0)
2512 gcc_assert (vls_type == VLS_LOAD);
2513 *memory_access_type = VMAT_INVARIANT;
2515 else
2516 *memory_access_type = VMAT_CONTIGUOUS;
2519 if ((*memory_access_type == VMAT_ELEMENTWISE
2520 || *memory_access_type == VMAT_STRIDED_SLP)
2521 && !nunits.is_constant ())
2523 if (dump_enabled_p ())
2524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2525 "Not using elementwise accesses due to variable "
2526 "vectorization factor.\n");
2527 return false;
2530 /* FIXME: At the moment the cost model seems to underestimate the
2531 cost of using elementwise accesses. This check preserves the
2532 traditional behavior until that can be fixed. */
2533 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2534 if (!first_stmt_info)
2535 first_stmt_info = stmt_info;
2536 if (*memory_access_type == VMAT_ELEMENTWISE
2537 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2538 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2539 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2540 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2542 if (dump_enabled_p ())
2543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2544 "not falling back to elementwise accesses\n");
2545 return false;
2547 return true;
2550 /* Return true if boolean argument MASK is suitable for vectorizing
2551 conditional load or store STMT_INFO. When returning true, store the type
2552 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2553 in *MASK_VECTYPE_OUT. */
2555 static bool
2556 vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
2557 vect_def_type *mask_dt_out,
2558 tree *mask_vectype_out)
2560 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "mask argument is not a boolean.\n");
2565 return false;
2568 if (TREE_CODE (mask) != SSA_NAME)
2570 if (dump_enabled_p ())
2571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2572 "mask argument is not an SSA name.\n");
2573 return false;
2576 enum vect_def_type mask_dt;
2577 tree mask_vectype;
2578 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2582 "mask use not simple.\n");
2583 return false;
2586 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2587 if (!mask_vectype)
2588 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2590 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2594 "could not find an appropriate vector mask type.\n");
2595 return false;
2598 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2599 TYPE_VECTOR_SUBPARTS (vectype)))
2601 if (dump_enabled_p ())
2602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2603 "vector mask type %T"
2604 " does not match vector data type %T.\n",
2605 mask_vectype, vectype);
2607 return false;
2610 *mask_dt_out = mask_dt;
2611 *mask_vectype_out = mask_vectype;
2612 return true;
2615 /* Return true if stored value RHS is suitable for vectorizing store
2616 statement STMT_INFO. When returning true, store the type of the
2617 definition in *RHS_DT_OUT, the type of the vectorized store value in
2618 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2620 static bool
2621 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2622 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2623 vec_load_store_type *vls_type_out)
2625 /* In the case this is a store from a constant make sure
2626 native_encode_expr can handle it. */
2627 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2629 if (dump_enabled_p ())
2630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2631 "cannot encode constant as a byte sequence.\n");
2632 return false;
2635 enum vect_def_type rhs_dt;
2636 tree rhs_vectype;
2637 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "use not simple.\n");
2642 return false;
2645 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2646 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650 "incompatible vector types.\n");
2651 return false;
2654 *rhs_dt_out = rhs_dt;
2655 *rhs_vectype_out = rhs_vectype;
2656 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2657 *vls_type_out = VLS_STORE_INVARIANT;
2658 else
2659 *vls_type_out = VLS_STORE;
2660 return true;
2663 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2664 Note that we support masks with floating-point type, in which case the
2665 floats are interpreted as a bitmask. */
2667 static tree
2668 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2670 if (TREE_CODE (masktype) == INTEGER_TYPE)
2671 return build_int_cst (masktype, -1);
2672 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2674 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2675 mask = build_vector_from_val (masktype, mask);
2676 return vect_init_vector (stmt_info, mask, masktype, NULL);
2678 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2680 REAL_VALUE_TYPE r;
2681 long tmp[6];
2682 for (int j = 0; j < 6; ++j)
2683 tmp[j] = -1;
2684 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2685 tree mask = build_real (TREE_TYPE (masktype), r);
2686 mask = build_vector_from_val (masktype, mask);
2687 return vect_init_vector (stmt_info, mask, masktype, NULL);
2689 gcc_unreachable ();
2692 /* Build an all-zero merge value of type VECTYPE while vectorizing
2693 STMT_INFO as a gather load. */
2695 static tree
2696 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2698 tree merge;
2699 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2700 merge = build_int_cst (TREE_TYPE (vectype), 0);
2701 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2703 REAL_VALUE_TYPE r;
2704 long tmp[6];
2705 for (int j = 0; j < 6; ++j)
2706 tmp[j] = 0;
2707 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2708 merge = build_real (TREE_TYPE (vectype), r);
2710 else
2711 gcc_unreachable ();
2712 merge = build_vector_from_val (vectype, merge);
2713 return vect_init_vector (stmt_info, merge, vectype, NULL);
2716 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2717 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2718 the gather load operation. If the load is conditional, MASK is the
2719 unvectorized condition and MASK_DT is its definition type, otherwise
2720 MASK is null. */
2722 static void
2723 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2724 gimple_stmt_iterator *gsi,
2725 stmt_vec_info *vec_stmt,
2726 gather_scatter_info *gs_info,
2727 tree mask)
2729 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2730 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2731 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2732 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2733 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2734 edge pe = loop_preheader_edge (loop);
2735 enum { NARROW, NONE, WIDEN } modifier;
2736 poly_uint64 gather_off_nunits
2737 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2739 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2740 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2741 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2742 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2743 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2744 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2745 tree scaletype = TREE_VALUE (arglist);
2746 tree real_masktype = masktype;
2747 gcc_checking_assert (types_compatible_p (srctype, rettype)
2748 && (!mask
2749 || TREE_CODE (masktype) == INTEGER_TYPE
2750 || types_compatible_p (srctype, masktype)));
2751 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2752 masktype = build_same_sized_truth_vector_type (srctype);
2754 tree mask_halftype = masktype;
2755 tree perm_mask = NULL_TREE;
2756 tree mask_perm_mask = NULL_TREE;
2757 if (known_eq (nunits, gather_off_nunits))
2758 modifier = NONE;
2759 else if (known_eq (nunits * 2, gather_off_nunits))
2761 modifier = WIDEN;
2763 /* Currently widening gathers and scatters are only supported for
2764 fixed-length vectors. */
2765 int count = gather_off_nunits.to_constant ();
2766 vec_perm_builder sel (count, count, 1);
2767 for (int i = 0; i < count; ++i)
2768 sel.quick_push (i | (count / 2));
2770 vec_perm_indices indices (sel, 1, count);
2771 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2772 indices);
2774 else if (known_eq (nunits, gather_off_nunits * 2))
2776 modifier = NARROW;
2778 /* Currently narrowing gathers and scatters are only supported for
2779 fixed-length vectors. */
2780 int count = nunits.to_constant ();
2781 vec_perm_builder sel (count, count, 1);
2782 sel.quick_grow (count);
2783 for (int i = 0; i < count; ++i)
2784 sel[i] = i < count / 2 ? i : i + count / 2;
2785 vec_perm_indices indices (sel, 2, count);
2786 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2788 ncopies *= 2;
2790 if (mask && masktype == real_masktype)
2792 for (int i = 0; i < count; ++i)
2793 sel[i] = i | (count / 2);
2794 indices.new_vector (sel, 2, count);
2795 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2797 else if (mask)
2798 mask_halftype
2799 = build_same_sized_truth_vector_type (gs_info->offset_vectype);
2801 else
2802 gcc_unreachable ();
2804 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2805 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2807 tree ptr = fold_convert (ptrtype, gs_info->base);
2808 if (!is_gimple_min_invariant (ptr))
2810 gimple_seq seq;
2811 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2812 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2813 gcc_assert (!new_bb);
2816 tree scale = build_int_cst (scaletype, gs_info->scale);
2818 tree vec_oprnd0 = NULL_TREE;
2819 tree vec_mask = NULL_TREE;
2820 tree src_op = NULL_TREE;
2821 tree mask_op = NULL_TREE;
2822 tree prev_res = NULL_TREE;
2823 stmt_vec_info prev_stmt_info = NULL;
2825 if (!mask)
2827 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2828 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2831 for (int j = 0; j < ncopies; ++j)
2833 tree op, var;
2834 if (modifier == WIDEN && (j & 1))
2835 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2836 perm_mask, stmt_info, gsi);
2837 else if (j == 0)
2838 op = vec_oprnd0
2839 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2840 else
2841 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2842 vec_oprnd0);
2844 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2846 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2847 TYPE_VECTOR_SUBPARTS (idxtype)));
2848 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2849 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2850 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2851 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2852 op = var;
2855 if (mask)
2857 if (mask_perm_mask && (j & 1))
2858 mask_op = permute_vec_elements (mask_op, mask_op,
2859 mask_perm_mask, stmt_info, gsi);
2860 else
2862 if (j == 0)
2863 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2864 else if (modifier != NARROW || (j & 1) == 0)
2865 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2866 vec_mask);
2868 mask_op = vec_mask;
2869 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2871 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2872 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2873 gcc_assert (known_eq (sub1, sub2));
2874 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2875 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2876 gassign *new_stmt
2877 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2878 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2879 mask_op = var;
2882 if (modifier == NARROW && masktype != real_masktype)
2884 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2885 gassign *new_stmt
2886 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2887 : VEC_UNPACK_LO_EXPR,
2888 mask_op);
2889 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2890 mask_op = var;
2892 src_op = mask_op;
2895 tree mask_arg = mask_op;
2896 if (masktype != real_masktype)
2898 tree utype, optype = TREE_TYPE (mask_op);
2899 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2900 utype = real_masktype;
2901 else
2902 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2903 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2904 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2905 gassign *new_stmt
2906 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2907 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2908 mask_arg = var;
2909 if (!useless_type_conversion_p (real_masktype, utype))
2911 gcc_assert (TYPE_PRECISION (utype)
2912 <= TYPE_PRECISION (real_masktype));
2913 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2914 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2915 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2916 mask_arg = var;
2918 src_op = build_zero_cst (srctype);
2920 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2921 mask_arg, scale);
2923 stmt_vec_info new_stmt_info;
2924 if (!useless_type_conversion_p (vectype, rettype))
2926 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2927 TYPE_VECTOR_SUBPARTS (rettype)));
2928 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2929 gimple_call_set_lhs (new_call, op);
2930 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2931 var = make_ssa_name (vec_dest);
2932 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2933 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2934 new_stmt_info
2935 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2937 else
2939 var = make_ssa_name (vec_dest, new_call);
2940 gimple_call_set_lhs (new_call, var);
2941 new_stmt_info
2942 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2945 if (modifier == NARROW)
2947 if ((j & 1) == 0)
2949 prev_res = var;
2950 continue;
2952 var = permute_vec_elements (prev_res, var, perm_mask,
2953 stmt_info, gsi);
2954 new_stmt_info = loop_vinfo->lookup_def (var);
2957 if (prev_stmt_info == NULL)
2958 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2959 else
2960 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2961 prev_stmt_info = new_stmt_info;
2965 /* Prepare the base and offset in GS_INFO for vectorization.
2966 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2967 to the vectorized offset argument for the first copy of STMT_INFO.
2968 STMT_INFO is the statement described by GS_INFO and LOOP is the
2969 containing loop. */
2971 static void
2972 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
2973 gather_scatter_info *gs_info,
2974 tree *dataref_ptr, tree *vec_offset)
2976 gimple_seq stmts = NULL;
2977 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2978 if (stmts != NULL)
2980 basic_block new_bb;
2981 edge pe = loop_preheader_edge (loop);
2982 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2983 gcc_assert (!new_bb);
2985 tree offset_type = TREE_TYPE (gs_info->offset);
2986 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2987 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2988 offset_vectype);
2991 /* Prepare to implement a grouped or strided load or store using
2992 the gather load or scatter store operation described by GS_INFO.
2993 STMT_INFO is the load or store statement.
2995 Set *DATAREF_BUMP to the amount that should be added to the base
2996 address after each copy of the vectorized statement. Set *VEC_OFFSET
2997 to an invariant offset vector in which element I has the value
2998 I * DR_STEP / SCALE. */
3000 static void
3001 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3002 loop_vec_info loop_vinfo,
3003 gather_scatter_info *gs_info,
3004 tree *dataref_bump, tree *vec_offset)
3006 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3007 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3008 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3009 gimple_seq stmts;
3011 tree bump = size_binop (MULT_EXPR,
3012 fold_convert (sizetype, DR_STEP (dr)),
3013 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3014 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
3015 if (stmts)
3016 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3018 /* The offset given in GS_INFO can have pointer type, so use the element
3019 type of the vector instead. */
3020 tree offset_type = TREE_TYPE (gs_info->offset);
3021 tree offset_vectype = get_vectype_for_scalar_type (offset_type);
3022 offset_type = TREE_TYPE (offset_vectype);
3024 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3025 tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
3026 ssize_int (gs_info->scale));
3027 step = fold_convert (offset_type, step);
3028 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3030 /* Create {0, X, X*2, X*3, ...}. */
3031 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
3032 build_zero_cst (offset_type), step);
3033 if (stmts)
3034 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3037 /* Return the amount that should be added to a vector pointer to move
3038 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3039 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3040 vectorization. */
3042 static tree
3043 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3044 vect_memory_access_type memory_access_type)
3046 if (memory_access_type == VMAT_INVARIANT)
3047 return size_zero_node;
3049 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3050 tree step = vect_dr_behavior (dr_info)->step;
3051 if (tree_int_cst_sgn (step) == -1)
3052 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3053 return iv_step;
3056 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3058 static bool
3059 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3060 stmt_vec_info *vec_stmt, slp_tree slp_node,
3061 tree vectype_in, stmt_vector_for_cost *cost_vec)
3063 tree op, vectype;
3064 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3065 vec_info *vinfo = stmt_info->vinfo;
3066 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3067 unsigned ncopies;
3069 op = gimple_call_arg (stmt, 0);
3070 vectype = STMT_VINFO_VECTYPE (stmt_info);
3071 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3073 /* Multiple types in SLP are handled by creating the appropriate number of
3074 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3075 case of SLP. */
3076 if (slp_node)
3077 ncopies = 1;
3078 else
3079 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3081 gcc_assert (ncopies >= 1);
3083 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3084 if (! char_vectype)
3085 return false;
3087 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3088 unsigned word_bytes;
3089 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3090 return false;
3092 /* The encoding uses one stepped pattern for each byte in the word. */
3093 vec_perm_builder elts (num_bytes, word_bytes, 3);
3094 for (unsigned i = 0; i < 3; ++i)
3095 for (unsigned j = 0; j < word_bytes; ++j)
3096 elts.quick_push ((i + 1) * word_bytes - j - 1);
3098 vec_perm_indices indices (elts, 1, num_bytes);
3099 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3100 return false;
3102 if (! vec_stmt)
3104 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3105 DUMP_VECT_SCOPE ("vectorizable_bswap");
3106 if (! slp_node)
3108 record_stmt_cost (cost_vec,
3109 1, vector_stmt, stmt_info, 0, vect_prologue);
3110 record_stmt_cost (cost_vec,
3111 ncopies, vec_perm, stmt_info, 0, vect_body);
3113 return true;
3116 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3118 /* Transform. */
3119 vec<tree> vec_oprnds = vNULL;
3120 stmt_vec_info new_stmt_info = NULL;
3121 stmt_vec_info prev_stmt_info = NULL;
3122 for (unsigned j = 0; j < ncopies; j++)
3124 /* Handle uses. */
3125 if (j == 0)
3126 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3127 else
3128 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3130 /* Arguments are ready. create the new vector stmt. */
3131 unsigned i;
3132 tree vop;
3133 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3135 gimple *new_stmt;
3136 tree tem = make_ssa_name (char_vectype);
3137 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3138 char_vectype, vop));
3139 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3140 tree tem2 = make_ssa_name (char_vectype);
3141 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3142 tem, tem, bswap_vconst);
3143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3144 tem = make_ssa_name (vectype);
3145 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3146 vectype, tem2));
3147 new_stmt_info
3148 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3149 if (slp_node)
3150 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3153 if (slp_node)
3154 continue;
3156 if (j == 0)
3157 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3158 else
3159 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3161 prev_stmt_info = new_stmt_info;
3164 vec_oprnds.release ();
3165 return true;
3168 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3169 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3170 in a single step. On success, store the binary pack code in
3171 *CONVERT_CODE. */
3173 static bool
3174 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3175 tree_code *convert_code)
3177 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3178 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3179 return false;
3181 tree_code code;
3182 int multi_step_cvt = 0;
3183 auto_vec <tree, 8> interm_types;
3184 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3185 &code, &multi_step_cvt,
3186 &interm_types)
3187 || multi_step_cvt)
3188 return false;
3190 *convert_code = code;
3191 return true;
3194 /* Function vectorizable_call.
3196 Check if STMT_INFO performs a function call that can be vectorized.
3197 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3198 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3199 Return true if STMT_INFO is vectorizable in this way. */
3201 static bool
3202 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3203 stmt_vec_info *vec_stmt, slp_tree slp_node,
3204 stmt_vector_for_cost *cost_vec)
3206 gcall *stmt;
3207 tree vec_dest;
3208 tree scalar_dest;
3209 tree op;
3210 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3211 stmt_vec_info prev_stmt_info;
3212 tree vectype_out, vectype_in;
3213 poly_uint64 nunits_in;
3214 poly_uint64 nunits_out;
3215 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3216 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3217 vec_info *vinfo = stmt_info->vinfo;
3218 tree fndecl, new_temp, rhs_type;
3219 enum vect_def_type dt[4]
3220 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3221 vect_unknown_def_type };
3222 tree vectypes[ARRAY_SIZE (dt)] = {};
3223 int ndts = ARRAY_SIZE (dt);
3224 int ncopies, j;
3225 auto_vec<tree, 8> vargs;
3226 auto_vec<tree, 8> orig_vargs;
3227 enum { NARROW, NONE, WIDEN } modifier;
3228 size_t i, nargs;
3229 tree lhs;
3231 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3232 return false;
3234 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3235 && ! vec_stmt)
3236 return false;
3238 /* Is STMT_INFO a vectorizable call? */
3239 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3240 if (!stmt)
3241 return false;
3243 if (gimple_call_internal_p (stmt)
3244 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3245 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3246 /* Handled by vectorizable_load and vectorizable_store. */
3247 return false;
3249 if (gimple_call_lhs (stmt) == NULL_TREE
3250 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3251 return false;
3253 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3255 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3257 /* Process function arguments. */
3258 rhs_type = NULL_TREE;
3259 vectype_in = NULL_TREE;
3260 nargs = gimple_call_num_args (stmt);
3262 /* Bail out if the function has more than three arguments, we do not have
3263 interesting builtin functions to vectorize with more than two arguments
3264 except for fma. No arguments is also not good. */
3265 if (nargs == 0 || nargs > 4)
3266 return false;
3268 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3269 combined_fn cfn = gimple_call_combined_fn (stmt);
3270 if (cfn == CFN_GOMP_SIMD_LANE)
3272 nargs = 0;
3273 rhs_type = unsigned_type_node;
3276 int mask_opno = -1;
3277 if (internal_fn_p (cfn))
3278 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3280 for (i = 0; i < nargs; i++)
3282 op = gimple_call_arg (stmt, i);
3283 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "use not simple.\n");
3288 return false;
3291 /* Skip the mask argument to an internal function. This operand
3292 has been converted via a pattern if necessary. */
3293 if ((int) i == mask_opno)
3294 continue;
3296 /* We can only handle calls with arguments of the same type. */
3297 if (rhs_type
3298 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3300 if (dump_enabled_p ())
3301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3302 "argument types differ.\n");
3303 return false;
3305 if (!rhs_type)
3306 rhs_type = TREE_TYPE (op);
3308 if (!vectype_in)
3309 vectype_in = vectypes[i];
3310 else if (vectypes[i]
3311 && vectypes[i] != vectype_in)
3313 if (dump_enabled_p ())
3314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3315 "argument vector types differ.\n");
3316 return false;
3319 /* If all arguments are external or constant defs use a vector type with
3320 the same size as the output vector type. */
3321 if (!vectype_in)
3322 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3323 if (vec_stmt)
3324 gcc_assert (vectype_in);
3325 if (!vectype_in)
3327 if (dump_enabled_p ())
3328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3329 "no vectype for scalar type %T\n", rhs_type);
3331 return false;
3334 /* FORNOW */
3335 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3336 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3337 if (known_eq (nunits_in * 2, nunits_out))
3338 modifier = NARROW;
3339 else if (known_eq (nunits_out, nunits_in))
3340 modifier = NONE;
3341 else if (known_eq (nunits_out * 2, nunits_in))
3342 modifier = WIDEN;
3343 else
3344 return false;
3346 /* We only handle functions that do not read or clobber memory. */
3347 if (gimple_vuse (stmt))
3349 if (dump_enabled_p ())
3350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3351 "function reads from or writes to memory.\n");
3352 return false;
3355 /* For now, we only vectorize functions if a target specific builtin
3356 is available. TODO -- in some cases, it might be profitable to
3357 insert the calls for pieces of the vector, in order to be able
3358 to vectorize other operations in the loop. */
3359 fndecl = NULL_TREE;
3360 internal_fn ifn = IFN_LAST;
3361 tree callee = gimple_call_fndecl (stmt);
3363 /* First try using an internal function. */
3364 tree_code convert_code = ERROR_MARK;
3365 if (cfn != CFN_LAST
3366 && (modifier == NONE
3367 || (modifier == NARROW
3368 && simple_integer_narrowing (vectype_out, vectype_in,
3369 &convert_code))))
3370 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3371 vectype_in);
3373 /* If that fails, try asking for a target-specific built-in function. */
3374 if (ifn == IFN_LAST)
3376 if (cfn != CFN_LAST)
3377 fndecl = targetm.vectorize.builtin_vectorized_function
3378 (cfn, vectype_out, vectype_in);
3379 else if (callee)
3380 fndecl = targetm.vectorize.builtin_md_vectorized_function
3381 (callee, vectype_out, vectype_in);
3384 if (ifn == IFN_LAST && !fndecl)
3386 if (cfn == CFN_GOMP_SIMD_LANE
3387 && !slp_node
3388 && loop_vinfo
3389 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3390 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3391 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3392 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3394 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3395 { 0, 1, 2, ... vf - 1 } vector. */
3396 gcc_assert (nargs == 0);
3398 else if (modifier == NONE
3399 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3400 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3401 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3402 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3403 vectype_in, cost_vec);
3404 else
3406 if (dump_enabled_p ())
3407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3408 "function is not vectorizable.\n");
3409 return false;
3413 if (slp_node)
3414 ncopies = 1;
3415 else if (modifier == NARROW && ifn == IFN_LAST)
3416 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3417 else
3418 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3420 /* Sanity check: make sure that at least one copy of the vectorized stmt
3421 needs to be generated. */
3422 gcc_assert (ncopies >= 1);
3424 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3425 if (!vec_stmt) /* transformation not required. */
3427 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3428 DUMP_VECT_SCOPE ("vectorizable_call");
3429 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3430 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3431 record_stmt_cost (cost_vec, ncopies / 2,
3432 vec_promote_demote, stmt_info, 0, vect_body);
3434 if (loop_vinfo && mask_opno >= 0)
3436 unsigned int nvectors = (slp_node
3437 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3438 : ncopies);
3439 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
3441 return true;
3444 /* Transform. */
3446 if (dump_enabled_p ())
3447 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3449 /* Handle def. */
3450 scalar_dest = gimple_call_lhs (stmt);
3451 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3453 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3455 stmt_vec_info new_stmt_info = NULL;
3456 prev_stmt_info = NULL;
3457 if (modifier == NONE || ifn != IFN_LAST)
3459 tree prev_res = NULL_TREE;
3460 vargs.safe_grow (nargs);
3461 orig_vargs.safe_grow (nargs);
3462 for (j = 0; j < ncopies; ++j)
3464 /* Build argument list for the vectorized call. */
3465 if (slp_node)
3467 auto_vec<vec<tree> > vec_defs (nargs);
3468 vec<tree> vec_oprnds0;
3470 for (i = 0; i < nargs; i++)
3471 vargs[i] = gimple_call_arg (stmt, i);
3472 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3473 vec_oprnds0 = vec_defs[0];
3475 /* Arguments are ready. Create the new vector stmt. */
3476 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3478 size_t k;
3479 for (k = 0; k < nargs; k++)
3481 vec<tree> vec_oprndsk = vec_defs[k];
3482 vargs[k] = vec_oprndsk[i];
3484 if (modifier == NARROW)
3486 /* We don't define any narrowing conditional functions
3487 at present. */
3488 gcc_assert (mask_opno < 0);
3489 tree half_res = make_ssa_name (vectype_in);
3490 gcall *call
3491 = gimple_build_call_internal_vec (ifn, vargs);
3492 gimple_call_set_lhs (call, half_res);
3493 gimple_call_set_nothrow (call, true);
3494 vect_finish_stmt_generation (stmt_info, call, gsi);
3495 if ((i & 1) == 0)
3497 prev_res = half_res;
3498 continue;
3500 new_temp = make_ssa_name (vec_dest);
3501 gimple *new_stmt
3502 = gimple_build_assign (new_temp, convert_code,
3503 prev_res, half_res);
3504 new_stmt_info
3505 = vect_finish_stmt_generation (stmt_info, new_stmt,
3506 gsi);
3508 else
3510 if (mask_opno >= 0 && masked_loop_p)
3512 unsigned int vec_num = vec_oprnds0.length ();
3513 /* Always true for SLP. */
3514 gcc_assert (ncopies == 1);
3515 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3516 vectype_out, i);
3517 vargs[mask_opno] = prepare_load_store_mask
3518 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3521 gcall *call;
3522 if (ifn != IFN_LAST)
3523 call = gimple_build_call_internal_vec (ifn, vargs);
3524 else
3525 call = gimple_build_call_vec (fndecl, vargs);
3526 new_temp = make_ssa_name (vec_dest, call);
3527 gimple_call_set_lhs (call, new_temp);
3528 gimple_call_set_nothrow (call, true);
3529 new_stmt_info
3530 = vect_finish_stmt_generation (stmt_info, call, gsi);
3532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3535 for (i = 0; i < nargs; i++)
3537 vec<tree> vec_oprndsi = vec_defs[i];
3538 vec_oprndsi.release ();
3540 continue;
3543 if (mask_opno >= 0 && !vectypes[mask_opno])
3545 gcc_assert (modifier != WIDEN);
3546 vectypes[mask_opno]
3547 = build_same_sized_truth_vector_type (vectype_in);
3550 for (i = 0; i < nargs; i++)
3552 op = gimple_call_arg (stmt, i);
3553 if (j == 0)
3554 vec_oprnd0
3555 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3556 else
3557 vec_oprnd0
3558 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3560 orig_vargs[i] = vargs[i] = vec_oprnd0;
3563 if (mask_opno >= 0 && masked_loop_p)
3565 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3566 vectype_out, j);
3567 vargs[mask_opno]
3568 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3569 vargs[mask_opno], gsi);
3572 if (cfn == CFN_GOMP_SIMD_LANE)
3574 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3575 tree new_var
3576 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3577 gimple *init_stmt = gimple_build_assign (new_var, cst);
3578 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3579 new_temp = make_ssa_name (vec_dest);
3580 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3581 new_stmt_info
3582 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3584 else if (modifier == NARROW)
3586 /* We don't define any narrowing conditional functions at
3587 present. */
3588 gcc_assert (mask_opno < 0);
3589 tree half_res = make_ssa_name (vectype_in);
3590 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3591 gimple_call_set_lhs (call, half_res);
3592 gimple_call_set_nothrow (call, true);
3593 vect_finish_stmt_generation (stmt_info, call, gsi);
3594 if ((j & 1) == 0)
3596 prev_res = half_res;
3597 continue;
3599 new_temp = make_ssa_name (vec_dest);
3600 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3601 prev_res, half_res);
3602 new_stmt_info
3603 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3605 else
3607 gcall *call;
3608 if (ifn != IFN_LAST)
3609 call = gimple_build_call_internal_vec (ifn, vargs);
3610 else
3611 call = gimple_build_call_vec (fndecl, vargs);
3612 new_temp = make_ssa_name (vec_dest, call);
3613 gimple_call_set_lhs (call, new_temp);
3614 gimple_call_set_nothrow (call, true);
3615 new_stmt_info
3616 = vect_finish_stmt_generation (stmt_info, call, gsi);
3619 if (j == (modifier == NARROW ? 1 : 0))
3620 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3621 else
3622 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3624 prev_stmt_info = new_stmt_info;
3627 else if (modifier == NARROW)
3629 /* We don't define any narrowing conditional functions at present. */
3630 gcc_assert (mask_opno < 0);
3631 for (j = 0; j < ncopies; ++j)
3633 /* Build argument list for the vectorized call. */
3634 if (j == 0)
3635 vargs.create (nargs * 2);
3636 else
3637 vargs.truncate (0);
3639 if (slp_node)
3641 auto_vec<vec<tree> > vec_defs (nargs);
3642 vec<tree> vec_oprnds0;
3644 for (i = 0; i < nargs; i++)
3645 vargs.quick_push (gimple_call_arg (stmt, i));
3646 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3647 vec_oprnds0 = vec_defs[0];
3649 /* Arguments are ready. Create the new vector stmt. */
3650 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3652 size_t k;
3653 vargs.truncate (0);
3654 for (k = 0; k < nargs; k++)
3656 vec<tree> vec_oprndsk = vec_defs[k];
3657 vargs.quick_push (vec_oprndsk[i]);
3658 vargs.quick_push (vec_oprndsk[i + 1]);
3660 gcall *call;
3661 if (ifn != IFN_LAST)
3662 call = gimple_build_call_internal_vec (ifn, vargs);
3663 else
3664 call = gimple_build_call_vec (fndecl, vargs);
3665 new_temp = make_ssa_name (vec_dest, call);
3666 gimple_call_set_lhs (call, new_temp);
3667 gimple_call_set_nothrow (call, true);
3668 new_stmt_info
3669 = vect_finish_stmt_generation (stmt_info, call, gsi);
3670 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3673 for (i = 0; i < nargs; i++)
3675 vec<tree> vec_oprndsi = vec_defs[i];
3676 vec_oprndsi.release ();
3678 continue;
3681 for (i = 0; i < nargs; i++)
3683 op = gimple_call_arg (stmt, i);
3684 if (j == 0)
3686 vec_oprnd0
3687 = vect_get_vec_def_for_operand (op, stmt_info,
3688 vectypes[i]);
3689 vec_oprnd1
3690 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3692 else
3694 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3695 2 * i + 1);
3696 vec_oprnd0
3697 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3698 vec_oprnd1
3699 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3702 vargs.quick_push (vec_oprnd0);
3703 vargs.quick_push (vec_oprnd1);
3706 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3707 new_temp = make_ssa_name (vec_dest, new_stmt);
3708 gimple_call_set_lhs (new_stmt, new_temp);
3709 new_stmt_info
3710 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3712 if (j == 0)
3713 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3714 else
3715 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3717 prev_stmt_info = new_stmt_info;
3720 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3722 else
3723 /* No current target implements this case. */
3724 return false;
3726 vargs.release ();
3728 /* The call in STMT might prevent it from being removed in dce.
3729 We however cannot remove it here, due to the way the ssa name
3730 it defines is mapped to the new definition. So just replace
3731 rhs of the statement with something harmless. */
3733 if (slp_node)
3734 return true;
3736 stmt_info = vect_orig_stmt (stmt_info);
3737 lhs = gimple_get_lhs (stmt_info->stmt);
3739 gassign *new_stmt
3740 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3741 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3743 return true;
3747 struct simd_call_arg_info
3749 tree vectype;
3750 tree op;
3751 HOST_WIDE_INT linear_step;
3752 enum vect_def_type dt;
3753 unsigned int align;
3754 bool simd_lane_linear;
3757 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3758 is linear within simd lane (but not within whole loop), note it in
3759 *ARGINFO. */
3761 static void
3762 vect_simd_lane_linear (tree op, class loop *loop,
3763 struct simd_call_arg_info *arginfo)
3765 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3767 if (!is_gimple_assign (def_stmt)
3768 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3769 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3770 return;
3772 tree base = gimple_assign_rhs1 (def_stmt);
3773 HOST_WIDE_INT linear_step = 0;
3774 tree v = gimple_assign_rhs2 (def_stmt);
3775 while (TREE_CODE (v) == SSA_NAME)
3777 tree t;
3778 def_stmt = SSA_NAME_DEF_STMT (v);
3779 if (is_gimple_assign (def_stmt))
3780 switch (gimple_assign_rhs_code (def_stmt))
3782 case PLUS_EXPR:
3783 t = gimple_assign_rhs2 (def_stmt);
3784 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3785 return;
3786 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3787 v = gimple_assign_rhs1 (def_stmt);
3788 continue;
3789 case MULT_EXPR:
3790 t = gimple_assign_rhs2 (def_stmt);
3791 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3792 return;
3793 linear_step = tree_to_shwi (t);
3794 v = gimple_assign_rhs1 (def_stmt);
3795 continue;
3796 CASE_CONVERT:
3797 t = gimple_assign_rhs1 (def_stmt);
3798 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3799 || (TYPE_PRECISION (TREE_TYPE (v))
3800 < TYPE_PRECISION (TREE_TYPE (t))))
3801 return;
3802 if (!linear_step)
3803 linear_step = 1;
3804 v = t;
3805 continue;
3806 default:
3807 return;
3809 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3810 && loop->simduid
3811 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3812 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3813 == loop->simduid))
3815 if (!linear_step)
3816 linear_step = 1;
3817 arginfo->linear_step = linear_step;
3818 arginfo->op = base;
3819 arginfo->simd_lane_linear = true;
3820 return;
3825 /* Return the number of elements in vector type VECTYPE, which is associated
3826 with a SIMD clone. At present these vectors always have a constant
3827 length. */
3829 static unsigned HOST_WIDE_INT
3830 simd_clone_subparts (tree vectype)
3832 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3835 /* Function vectorizable_simd_clone_call.
3837 Check if STMT_INFO performs a function call that can be vectorized
3838 by calling a simd clone of the function.
3839 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3840 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3841 Return true if STMT_INFO is vectorizable in this way. */
3843 static bool
3844 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3845 gimple_stmt_iterator *gsi,
3846 stmt_vec_info *vec_stmt, slp_tree slp_node,
3847 stmt_vector_for_cost *)
3849 tree vec_dest;
3850 tree scalar_dest;
3851 tree op, type;
3852 tree vec_oprnd0 = NULL_TREE;
3853 stmt_vec_info prev_stmt_info;
3854 tree vectype;
3855 unsigned int nunits;
3856 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3857 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3858 vec_info *vinfo = stmt_info->vinfo;
3859 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3860 tree fndecl, new_temp;
3861 int ncopies, j;
3862 auto_vec<simd_call_arg_info> arginfo;
3863 vec<tree> vargs = vNULL;
3864 size_t i, nargs;
3865 tree lhs, rtype, ratype;
3866 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3868 /* Is STMT a vectorizable call? */
3869 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3870 if (!stmt)
3871 return false;
3873 fndecl = gimple_call_fndecl (stmt);
3874 if (fndecl == NULL_TREE)
3875 return false;
3877 struct cgraph_node *node = cgraph_node::get (fndecl);
3878 if (node == NULL || node->simd_clones == NULL)
3879 return false;
3881 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3882 return false;
3884 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3885 && ! vec_stmt)
3886 return false;
3888 if (gimple_call_lhs (stmt)
3889 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3890 return false;
3892 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3894 vectype = STMT_VINFO_VECTYPE (stmt_info);
3896 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3897 return false;
3899 /* FORNOW */
3900 if (slp_node)
3901 return false;
3903 /* Process function arguments. */
3904 nargs = gimple_call_num_args (stmt);
3906 /* Bail out if the function has zero arguments. */
3907 if (nargs == 0)
3908 return false;
3910 arginfo.reserve (nargs, true);
3912 for (i = 0; i < nargs; i++)
3914 simd_call_arg_info thisarginfo;
3915 affine_iv iv;
3917 thisarginfo.linear_step = 0;
3918 thisarginfo.align = 0;
3919 thisarginfo.op = NULL_TREE;
3920 thisarginfo.simd_lane_linear = false;
3922 op = gimple_call_arg (stmt, i);
3923 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3924 &thisarginfo.vectype)
3925 || thisarginfo.dt == vect_uninitialized_def)
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3929 "use not simple.\n");
3930 return false;
3933 if (thisarginfo.dt == vect_constant_def
3934 || thisarginfo.dt == vect_external_def)
3935 gcc_assert (thisarginfo.vectype == NULL_TREE);
3936 else
3937 gcc_assert (thisarginfo.vectype != NULL_TREE);
3939 /* For linear arguments, the analyze phase should have saved
3940 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3941 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3942 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3944 gcc_assert (vec_stmt);
3945 thisarginfo.linear_step
3946 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3947 thisarginfo.op
3948 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3949 thisarginfo.simd_lane_linear
3950 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3951 == boolean_true_node);
3952 /* If loop has been peeled for alignment, we need to adjust it. */
3953 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3954 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3955 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3957 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3958 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3959 tree opt = TREE_TYPE (thisarginfo.op);
3960 bias = fold_convert (TREE_TYPE (step), bias);
3961 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3962 thisarginfo.op
3963 = fold_build2 (POINTER_TYPE_P (opt)
3964 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3965 thisarginfo.op, bias);
3968 else if (!vec_stmt
3969 && thisarginfo.dt != vect_constant_def
3970 && thisarginfo.dt != vect_external_def
3971 && loop_vinfo
3972 && TREE_CODE (op) == SSA_NAME
3973 && simple_iv (loop, loop_containing_stmt (stmt), op,
3974 &iv, false)
3975 && tree_fits_shwi_p (iv.step))
3977 thisarginfo.linear_step = tree_to_shwi (iv.step);
3978 thisarginfo.op = iv.base;
3980 else if ((thisarginfo.dt == vect_constant_def
3981 || thisarginfo.dt == vect_external_def)
3982 && POINTER_TYPE_P (TREE_TYPE (op)))
3983 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3984 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3985 linear too. */
3986 if (POINTER_TYPE_P (TREE_TYPE (op))
3987 && !thisarginfo.linear_step
3988 && !vec_stmt
3989 && thisarginfo.dt != vect_constant_def
3990 && thisarginfo.dt != vect_external_def
3991 && loop_vinfo
3992 && !slp_node
3993 && TREE_CODE (op) == SSA_NAME)
3994 vect_simd_lane_linear (op, loop, &thisarginfo);
3996 arginfo.quick_push (thisarginfo);
3999 unsigned HOST_WIDE_INT vf;
4000 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4002 if (dump_enabled_p ())
4003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4004 "not considering SIMD clones; not yet supported"
4005 " for variable-width vectors.\n");
4006 return false;
4009 unsigned int badness = 0;
4010 struct cgraph_node *bestn = NULL;
4011 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4012 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4013 else
4014 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4015 n = n->simdclone->next_clone)
4017 unsigned int this_badness = 0;
4018 if (n->simdclone->simdlen > vf
4019 || n->simdclone->nargs != nargs)
4020 continue;
4021 if (n->simdclone->simdlen < vf)
4022 this_badness += (exact_log2 (vf)
4023 - exact_log2 (n->simdclone->simdlen)) * 1024;
4024 if (n->simdclone->inbranch)
4025 this_badness += 2048;
4026 int target_badness = targetm.simd_clone.usable (n);
4027 if (target_badness < 0)
4028 continue;
4029 this_badness += target_badness * 512;
4030 /* FORNOW: Have to add code to add the mask argument. */
4031 if (n->simdclone->inbranch)
4032 continue;
4033 for (i = 0; i < nargs; i++)
4035 switch (n->simdclone->args[i].arg_type)
4037 case SIMD_CLONE_ARG_TYPE_VECTOR:
4038 if (!useless_type_conversion_p
4039 (n->simdclone->args[i].orig_type,
4040 TREE_TYPE (gimple_call_arg (stmt, i))))
4041 i = -1;
4042 else if (arginfo[i].dt == vect_constant_def
4043 || arginfo[i].dt == vect_external_def
4044 || arginfo[i].linear_step)
4045 this_badness += 64;
4046 break;
4047 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4048 if (arginfo[i].dt != vect_constant_def
4049 && arginfo[i].dt != vect_external_def)
4050 i = -1;
4051 break;
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4054 if (arginfo[i].dt == vect_constant_def
4055 || arginfo[i].dt == vect_external_def
4056 || (arginfo[i].linear_step
4057 != n->simdclone->args[i].linear_step))
4058 i = -1;
4059 break;
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4063 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4064 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4065 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4066 /* FORNOW */
4067 i = -1;
4068 break;
4069 case SIMD_CLONE_ARG_TYPE_MASK:
4070 gcc_unreachable ();
4072 if (i == (size_t) -1)
4073 break;
4074 if (n->simdclone->args[i].alignment > arginfo[i].align)
4076 i = -1;
4077 break;
4079 if (arginfo[i].align)
4080 this_badness += (exact_log2 (arginfo[i].align)
4081 - exact_log2 (n->simdclone->args[i].alignment));
4083 if (i == (size_t) -1)
4084 continue;
4085 if (bestn == NULL || this_badness < badness)
4087 bestn = n;
4088 badness = this_badness;
4092 if (bestn == NULL)
4093 return false;
4095 for (i = 0; i < nargs; i++)
4096 if ((arginfo[i].dt == vect_constant_def
4097 || arginfo[i].dt == vect_external_def)
4098 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4100 arginfo[i].vectype
4101 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
4102 i)));
4103 if (arginfo[i].vectype == NULL
4104 || (simd_clone_subparts (arginfo[i].vectype)
4105 > bestn->simdclone->simdlen))
4106 return false;
4109 fndecl = bestn->decl;
4110 nunits = bestn->simdclone->simdlen;
4111 ncopies = vf / nunits;
4113 /* If the function isn't const, only allow it in simd loops where user
4114 has asserted that at least nunits consecutive iterations can be
4115 performed using SIMD instructions. */
4116 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4117 && gimple_vuse (stmt))
4118 return false;
4120 /* Sanity check: make sure that at least one copy of the vectorized stmt
4121 needs to be generated. */
4122 gcc_assert (ncopies >= 1);
4124 if (!vec_stmt) /* transformation not required. */
4126 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4127 for (i = 0; i < nargs; i++)
4128 if ((bestn->simdclone->args[i].arg_type
4129 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4130 || (bestn->simdclone->args[i].arg_type
4131 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4134 + 1);
4135 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4136 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4137 ? size_type_node : TREE_TYPE (arginfo[i].op);
4138 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4139 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4140 tree sll = arginfo[i].simd_lane_linear
4141 ? boolean_true_node : boolean_false_node;
4142 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4144 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4145 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4146 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4147 return true;
4150 /* Transform. */
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4155 /* Handle def. */
4156 scalar_dest = gimple_call_lhs (stmt);
4157 vec_dest = NULL_TREE;
4158 rtype = NULL_TREE;
4159 ratype = NULL_TREE;
4160 if (scalar_dest)
4162 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4163 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4164 if (TREE_CODE (rtype) == ARRAY_TYPE)
4166 ratype = rtype;
4167 rtype = TREE_TYPE (ratype);
4171 prev_stmt_info = NULL;
4172 for (j = 0; j < ncopies; ++j)
4174 /* Build argument list for the vectorized call. */
4175 if (j == 0)
4176 vargs.create (nargs);
4177 else
4178 vargs.truncate (0);
4180 for (i = 0; i < nargs; i++)
4182 unsigned int k, l, m, o;
4183 tree atype;
4184 op = gimple_call_arg (stmt, i);
4185 switch (bestn->simdclone->args[i].arg_type)
4187 case SIMD_CLONE_ARG_TYPE_VECTOR:
4188 atype = bestn->simdclone->args[i].vector_type;
4189 o = nunits / simd_clone_subparts (atype);
4190 for (m = j * o; m < (j + 1) * o; m++)
4192 if (simd_clone_subparts (atype)
4193 < simd_clone_subparts (arginfo[i].vectype))
4195 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4196 k = (simd_clone_subparts (arginfo[i].vectype)
4197 / simd_clone_subparts (atype));
4198 gcc_assert ((k & (k - 1)) == 0);
4199 if (m == 0)
4200 vec_oprnd0
4201 = vect_get_vec_def_for_operand (op, stmt_info);
4202 else
4204 vec_oprnd0 = arginfo[i].op;
4205 if ((m & (k - 1)) == 0)
4206 vec_oprnd0
4207 = vect_get_vec_def_for_stmt_copy (vinfo,
4208 vec_oprnd0);
4210 arginfo[i].op = vec_oprnd0;
4211 vec_oprnd0
4212 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4213 bitsize_int (prec),
4214 bitsize_int ((m & (k - 1)) * prec));
4215 gassign *new_stmt
4216 = gimple_build_assign (make_ssa_name (atype),
4217 vec_oprnd0);
4218 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4219 vargs.safe_push (gimple_assign_lhs (new_stmt));
4221 else
4223 k = (simd_clone_subparts (atype)
4224 / simd_clone_subparts (arginfo[i].vectype));
4225 gcc_assert ((k & (k - 1)) == 0);
4226 vec<constructor_elt, va_gc> *ctor_elts;
4227 if (k != 1)
4228 vec_alloc (ctor_elts, k);
4229 else
4230 ctor_elts = NULL;
4231 for (l = 0; l < k; l++)
4233 if (m == 0 && l == 0)
4234 vec_oprnd0
4235 = vect_get_vec_def_for_operand (op, stmt_info);
4236 else
4237 vec_oprnd0
4238 = vect_get_vec_def_for_stmt_copy (vinfo,
4239 arginfo[i].op);
4240 arginfo[i].op = vec_oprnd0;
4241 if (k == 1)
4242 break;
4243 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4244 vec_oprnd0);
4246 if (k == 1)
4247 vargs.safe_push (vec_oprnd0);
4248 else
4250 vec_oprnd0 = build_constructor (atype, ctor_elts);
4251 gassign *new_stmt
4252 = gimple_build_assign (make_ssa_name (atype),
4253 vec_oprnd0);
4254 vect_finish_stmt_generation (stmt_info, new_stmt,
4255 gsi);
4256 vargs.safe_push (gimple_assign_lhs (new_stmt));
4260 break;
4261 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4262 vargs.safe_push (op);
4263 break;
4264 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4265 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4266 if (j == 0)
4268 gimple_seq stmts;
4269 arginfo[i].op
4270 = force_gimple_operand (arginfo[i].op, &stmts, true,
4271 NULL_TREE);
4272 if (stmts != NULL)
4274 basic_block new_bb;
4275 edge pe = loop_preheader_edge (loop);
4276 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4277 gcc_assert (!new_bb);
4279 if (arginfo[i].simd_lane_linear)
4281 vargs.safe_push (arginfo[i].op);
4282 break;
4284 tree phi_res = copy_ssa_name (op);
4285 gphi *new_phi = create_phi_node (phi_res, loop->header);
4286 loop_vinfo->add_stmt (new_phi);
4287 add_phi_arg (new_phi, arginfo[i].op,
4288 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4289 enum tree_code code
4290 = POINTER_TYPE_P (TREE_TYPE (op))
4291 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4292 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4293 ? sizetype : TREE_TYPE (op);
4294 widest_int cst
4295 = wi::mul (bestn->simdclone->args[i].linear_step,
4296 ncopies * nunits);
4297 tree tcst = wide_int_to_tree (type, cst);
4298 tree phi_arg = copy_ssa_name (op);
4299 gassign *new_stmt
4300 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4301 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4302 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4303 loop_vinfo->add_stmt (new_stmt);
4304 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4305 UNKNOWN_LOCATION);
4306 arginfo[i].op = phi_res;
4307 vargs.safe_push (phi_res);
4309 else
4311 enum tree_code code
4312 = POINTER_TYPE_P (TREE_TYPE (op))
4313 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4314 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4315 ? sizetype : TREE_TYPE (op);
4316 widest_int cst
4317 = wi::mul (bestn->simdclone->args[i].linear_step,
4318 j * nunits);
4319 tree tcst = wide_int_to_tree (type, cst);
4320 new_temp = make_ssa_name (TREE_TYPE (op));
4321 gassign *new_stmt
4322 = gimple_build_assign (new_temp, code,
4323 arginfo[i].op, tcst);
4324 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4325 vargs.safe_push (new_temp);
4327 break;
4328 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4329 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4330 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4331 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4332 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4333 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4334 default:
4335 gcc_unreachable ();
4339 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4340 if (vec_dest)
4342 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4343 if (ratype)
4344 new_temp = create_tmp_var (ratype);
4345 else if (simd_clone_subparts (vectype)
4346 == simd_clone_subparts (rtype))
4347 new_temp = make_ssa_name (vec_dest, new_call);
4348 else
4349 new_temp = make_ssa_name (rtype, new_call);
4350 gimple_call_set_lhs (new_call, new_temp);
4352 stmt_vec_info new_stmt_info
4353 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4355 if (vec_dest)
4357 if (simd_clone_subparts (vectype) < nunits)
4359 unsigned int k, l;
4360 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4361 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4362 k = nunits / simd_clone_subparts (vectype);
4363 gcc_assert ((k & (k - 1)) == 0);
4364 for (l = 0; l < k; l++)
4366 tree t;
4367 if (ratype)
4369 t = build_fold_addr_expr (new_temp);
4370 t = build2 (MEM_REF, vectype, t,
4371 build_int_cst (TREE_TYPE (t), l * bytes));
4373 else
4374 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4375 bitsize_int (prec), bitsize_int (l * prec));
4376 gimple *new_stmt
4377 = gimple_build_assign (make_ssa_name (vectype), t);
4378 new_stmt_info
4379 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4381 if (j == 0 && l == 0)
4382 STMT_VINFO_VEC_STMT (stmt_info)
4383 = *vec_stmt = new_stmt_info;
4384 else
4385 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4387 prev_stmt_info = new_stmt_info;
4390 if (ratype)
4391 vect_clobber_variable (stmt_info, gsi, new_temp);
4392 continue;
4394 else if (simd_clone_subparts (vectype) > nunits)
4396 unsigned int k = (simd_clone_subparts (vectype)
4397 / simd_clone_subparts (rtype));
4398 gcc_assert ((k & (k - 1)) == 0);
4399 if ((j & (k - 1)) == 0)
4400 vec_alloc (ret_ctor_elts, k);
4401 if (ratype)
4403 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4404 for (m = 0; m < o; m++)
4406 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4407 size_int (m), NULL_TREE, NULL_TREE);
4408 gimple *new_stmt
4409 = gimple_build_assign (make_ssa_name (rtype), tem);
4410 new_stmt_info
4411 = vect_finish_stmt_generation (stmt_info, new_stmt,
4412 gsi);
4413 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4414 gimple_assign_lhs (new_stmt));
4416 vect_clobber_variable (stmt_info, gsi, new_temp);
4418 else
4419 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4420 if ((j & (k - 1)) != k - 1)
4421 continue;
4422 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4423 gimple *new_stmt
4424 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4425 new_stmt_info
4426 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4428 if ((unsigned) j == k - 1)
4429 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4430 else
4431 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4433 prev_stmt_info = new_stmt_info;
4434 continue;
4436 else if (ratype)
4438 tree t = build_fold_addr_expr (new_temp);
4439 t = build2 (MEM_REF, vectype, t,
4440 build_int_cst (TREE_TYPE (t), 0));
4441 gimple *new_stmt
4442 = gimple_build_assign (make_ssa_name (vec_dest), t);
4443 new_stmt_info
4444 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4445 vect_clobber_variable (stmt_info, gsi, new_temp);
4449 if (j == 0)
4450 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4451 else
4452 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4454 prev_stmt_info = new_stmt_info;
4457 vargs.release ();
4459 /* The call in STMT might prevent it from being removed in dce.
4460 We however cannot remove it here, due to the way the ssa name
4461 it defines is mapped to the new definition. So just replace
4462 rhs of the statement with something harmless. */
4464 if (slp_node)
4465 return true;
4467 gimple *new_stmt;
4468 if (scalar_dest)
4470 type = TREE_TYPE (scalar_dest);
4471 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4472 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4474 else
4475 new_stmt = gimple_build_nop ();
4476 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4477 unlink_stmt_vdef (stmt);
4479 return true;
4483 /* Function vect_gen_widened_results_half
4485 Create a vector stmt whose code, type, number of arguments, and result
4486 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4487 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4488 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4489 needs to be created (DECL is a function-decl of a target-builtin).
4490 STMT_INFO is the original scalar stmt that we are vectorizing. */
4492 static gimple *
4493 vect_gen_widened_results_half (enum tree_code code,
4494 tree decl,
4495 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4496 tree vec_dest, gimple_stmt_iterator *gsi,
4497 stmt_vec_info stmt_info)
4499 gimple *new_stmt;
4500 tree new_temp;
4502 /* Generate half of the widened result: */
4503 if (code == CALL_EXPR)
4505 /* Target specific support */
4506 if (op_type == binary_op)
4507 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4508 else
4509 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4510 new_temp = make_ssa_name (vec_dest, new_stmt);
4511 gimple_call_set_lhs (new_stmt, new_temp);
4513 else
4515 /* Generic support */
4516 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4517 if (op_type != binary_op)
4518 vec_oprnd1 = NULL;
4519 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4520 new_temp = make_ssa_name (vec_dest, new_stmt);
4521 gimple_assign_set_lhs (new_stmt, new_temp);
4523 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4525 return new_stmt;
4529 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4530 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4531 containing scalar operand), and for the rest we get a copy with
4532 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4533 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4534 The vectors are collected into VEC_OPRNDS. */
4536 static void
4537 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4538 vec<tree> *vec_oprnds, int multi_step_cvt)
4540 vec_info *vinfo = stmt_info->vinfo;
4541 tree vec_oprnd;
4543 /* Get first vector operand. */
4544 /* All the vector operands except the very first one (that is scalar oprnd)
4545 are stmt copies. */
4546 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4547 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4548 else
4549 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4551 vec_oprnds->quick_push (vec_oprnd);
4553 /* Get second vector operand. */
4554 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4555 vec_oprnds->quick_push (vec_oprnd);
4557 *oprnd = vec_oprnd;
4559 /* For conversion in multiple steps, continue to get operands
4560 recursively. */
4561 if (multi_step_cvt)
4562 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4563 multi_step_cvt - 1);
4567 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4568 For multi-step conversions store the resulting vectors and call the function
4569 recursively. */
4571 static void
4572 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4573 int multi_step_cvt,
4574 stmt_vec_info stmt_info,
4575 vec<tree> vec_dsts,
4576 gimple_stmt_iterator *gsi,
4577 slp_tree slp_node, enum tree_code code,
4578 stmt_vec_info *prev_stmt_info)
4580 unsigned int i;
4581 tree vop0, vop1, new_tmp, vec_dest;
4583 vec_dest = vec_dsts.pop ();
4585 for (i = 0; i < vec_oprnds->length (); i += 2)
4587 /* Create demotion operation. */
4588 vop0 = (*vec_oprnds)[i];
4589 vop1 = (*vec_oprnds)[i + 1];
4590 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4591 new_tmp = make_ssa_name (vec_dest, new_stmt);
4592 gimple_assign_set_lhs (new_stmt, new_tmp);
4593 stmt_vec_info new_stmt_info
4594 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4596 if (multi_step_cvt)
4597 /* Store the resulting vector for next recursive call. */
4598 (*vec_oprnds)[i/2] = new_tmp;
4599 else
4601 /* This is the last step of the conversion sequence. Store the
4602 vectors in SLP_NODE or in vector info of the scalar statement
4603 (or in STMT_VINFO_RELATED_STMT chain). */
4604 if (slp_node)
4605 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4606 else
4608 if (!*prev_stmt_info)
4609 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4610 else
4611 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4613 *prev_stmt_info = new_stmt_info;
4618 /* For multi-step demotion operations we first generate demotion operations
4619 from the source type to the intermediate types, and then combine the
4620 results (stored in VEC_OPRNDS) in demotion operation to the destination
4621 type. */
4622 if (multi_step_cvt)
4624 /* At each level of recursion we have half of the operands we had at the
4625 previous level. */
4626 vec_oprnds->truncate ((i+1)/2);
4627 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4628 stmt_info, vec_dsts, gsi,
4629 slp_node, VEC_PACK_TRUNC_EXPR,
4630 prev_stmt_info);
4633 vec_dsts.quick_push (vec_dest);
4637 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4638 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4639 STMT_INFO. For multi-step conversions store the resulting vectors and
4640 call the function recursively. */
4642 static void
4643 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4644 vec<tree> *vec_oprnds1,
4645 stmt_vec_info stmt_info, tree vec_dest,
4646 gimple_stmt_iterator *gsi,
4647 enum tree_code code1,
4648 enum tree_code code2, tree decl1,
4649 tree decl2, int op_type)
4651 int i;
4652 tree vop0, vop1, new_tmp1, new_tmp2;
4653 gimple *new_stmt1, *new_stmt2;
4654 vec<tree> vec_tmp = vNULL;
4656 vec_tmp.create (vec_oprnds0->length () * 2);
4657 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4659 if (op_type == binary_op)
4660 vop1 = (*vec_oprnds1)[i];
4661 else
4662 vop1 = NULL_TREE;
4664 /* Generate the two halves of promotion operation. */
4665 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4666 op_type, vec_dest, gsi,
4667 stmt_info);
4668 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4669 op_type, vec_dest, gsi,
4670 stmt_info);
4671 if (is_gimple_call (new_stmt1))
4673 new_tmp1 = gimple_call_lhs (new_stmt1);
4674 new_tmp2 = gimple_call_lhs (new_stmt2);
4676 else
4678 new_tmp1 = gimple_assign_lhs (new_stmt1);
4679 new_tmp2 = gimple_assign_lhs (new_stmt2);
4682 /* Store the results for the next step. */
4683 vec_tmp.quick_push (new_tmp1);
4684 vec_tmp.quick_push (new_tmp2);
4687 vec_oprnds0->release ();
4688 *vec_oprnds0 = vec_tmp;
4692 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4693 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4694 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4695 Return true if STMT_INFO is vectorizable in this way. */
4697 static bool
4698 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4699 stmt_vec_info *vec_stmt, slp_tree slp_node,
4700 stmt_vector_for_cost *cost_vec)
4702 tree vec_dest;
4703 tree scalar_dest;
4704 tree op0, op1 = NULL_TREE;
4705 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4706 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4707 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4708 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4709 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4710 tree new_temp;
4711 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4712 int ndts = 2;
4713 stmt_vec_info prev_stmt_info;
4714 poly_uint64 nunits_in;
4715 poly_uint64 nunits_out;
4716 tree vectype_out, vectype_in;
4717 int ncopies, i, j;
4718 tree lhs_type, rhs_type;
4719 enum { NARROW, NONE, WIDEN } modifier;
4720 vec<tree> vec_oprnds0 = vNULL;
4721 vec<tree> vec_oprnds1 = vNULL;
4722 tree vop0;
4723 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4724 vec_info *vinfo = stmt_info->vinfo;
4725 int multi_step_cvt = 0;
4726 vec<tree> interm_types = vNULL;
4727 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4728 int op_type;
4729 unsigned short fltsz;
4731 /* Is STMT a vectorizable conversion? */
4733 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4734 return false;
4736 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4737 && ! vec_stmt)
4738 return false;
4740 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4741 if (!stmt)
4742 return false;
4744 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4745 return false;
4747 code = gimple_assign_rhs_code (stmt);
4748 if (!CONVERT_EXPR_CODE_P (code)
4749 && code != FIX_TRUNC_EXPR
4750 && code != FLOAT_EXPR
4751 && code != WIDEN_MULT_EXPR
4752 && code != WIDEN_LSHIFT_EXPR)
4753 return false;
4755 op_type = TREE_CODE_LENGTH (code);
4757 /* Check types of lhs and rhs. */
4758 scalar_dest = gimple_assign_lhs (stmt);
4759 lhs_type = TREE_TYPE (scalar_dest);
4760 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4762 op0 = gimple_assign_rhs1 (stmt);
4763 rhs_type = TREE_TYPE (op0);
4765 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4766 && !((INTEGRAL_TYPE_P (lhs_type)
4767 && INTEGRAL_TYPE_P (rhs_type))
4768 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4769 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4770 return false;
4772 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4773 && ((INTEGRAL_TYPE_P (lhs_type)
4774 && !type_has_mode_precision_p (lhs_type))
4775 || (INTEGRAL_TYPE_P (rhs_type)
4776 && !type_has_mode_precision_p (rhs_type))))
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4780 "type conversion to/from bit-precision unsupported."
4781 "\n");
4782 return false;
4785 /* Check the operands of the operation. */
4786 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4790 "use not simple.\n");
4791 return false;
4793 if (op_type == binary_op)
4795 bool ok;
4797 op1 = gimple_assign_rhs2 (stmt);
4798 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4799 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4800 OP1. */
4801 if (CONSTANT_CLASS_P (op0))
4802 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4803 else
4804 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4806 if (!ok)
4808 if (dump_enabled_p ())
4809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4810 "use not simple.\n");
4811 return false;
4815 /* If op0 is an external or constant defs use a vector type of
4816 the same size as the output vector type. */
4817 if (!vectype_in)
4818 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4819 if (vec_stmt)
4820 gcc_assert (vectype_in);
4821 if (!vectype_in)
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4825 "no vectype for scalar type %T\n", rhs_type);
4827 return false;
4830 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4831 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4835 "can't convert between boolean and non "
4836 "boolean vectors %T\n", rhs_type);
4838 return false;
4841 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4842 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4843 if (known_eq (nunits_out, nunits_in))
4844 modifier = NONE;
4845 else if (multiple_p (nunits_out, nunits_in))
4846 modifier = NARROW;
4847 else
4849 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4850 modifier = WIDEN;
4853 /* Multiple types in SLP are handled by creating the appropriate number of
4854 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4855 case of SLP. */
4856 if (slp_node)
4857 ncopies = 1;
4858 else if (modifier == NARROW)
4859 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4860 else
4861 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4863 /* Sanity check: make sure that at least one copy of the vectorized stmt
4864 needs to be generated. */
4865 gcc_assert (ncopies >= 1);
4867 bool found_mode = false;
4868 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4869 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4870 opt_scalar_mode rhs_mode_iter;
4872 /* Supportable by target? */
4873 switch (modifier)
4875 case NONE:
4876 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4877 return false;
4878 if (supportable_convert_operation (code, vectype_out, vectype_in,
4879 &decl1, &code1))
4880 break;
4881 /* FALLTHRU */
4882 unsupported:
4883 if (dump_enabled_p ())
4884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4885 "conversion not supported by target.\n");
4886 return false;
4888 case WIDEN:
4889 if (supportable_widening_operation (code, stmt_info, vectype_out,
4890 vectype_in, &code1, &code2,
4891 &multi_step_cvt, &interm_types))
4893 /* Binary widening operation can only be supported directly by the
4894 architecture. */
4895 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4896 break;
4899 if (code != FLOAT_EXPR
4900 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4901 goto unsupported;
4903 fltsz = GET_MODE_SIZE (lhs_mode);
4904 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4906 rhs_mode = rhs_mode_iter.require ();
4907 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4908 break;
4910 cvt_type
4911 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4912 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4913 if (cvt_type == NULL_TREE)
4914 goto unsupported;
4916 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4918 if (!supportable_convert_operation (code, vectype_out,
4919 cvt_type, &decl1, &codecvt1))
4920 goto unsupported;
4922 else if (!supportable_widening_operation (code, stmt_info,
4923 vectype_out, cvt_type,
4924 &codecvt1, &codecvt2,
4925 &multi_step_cvt,
4926 &interm_types))
4927 continue;
4928 else
4929 gcc_assert (multi_step_cvt == 0);
4931 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4932 vectype_in, &code1, &code2,
4933 &multi_step_cvt, &interm_types))
4935 found_mode = true;
4936 break;
4940 if (!found_mode)
4941 goto unsupported;
4943 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4944 codecvt2 = ERROR_MARK;
4945 else
4947 multi_step_cvt++;
4948 interm_types.safe_push (cvt_type);
4949 cvt_type = NULL_TREE;
4951 break;
4953 case NARROW:
4954 gcc_assert (op_type == unary_op);
4955 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4956 &code1, &multi_step_cvt,
4957 &interm_types))
4958 break;
4960 if (code != FIX_TRUNC_EXPR
4961 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4962 goto unsupported;
4964 cvt_type
4965 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4966 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4967 if (cvt_type == NULL_TREE)
4968 goto unsupported;
4969 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4970 &decl1, &codecvt1))
4971 goto unsupported;
4972 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4973 &code1, &multi_step_cvt,
4974 &interm_types))
4975 break;
4976 goto unsupported;
4978 default:
4979 gcc_unreachable ();
4982 if (!vec_stmt) /* transformation not required. */
4984 DUMP_VECT_SCOPE ("vectorizable_conversion");
4985 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4987 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4988 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4989 cost_vec);
4991 else if (modifier == NARROW)
4993 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4994 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
4995 cost_vec);
4997 else
4999 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5000 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt,
5001 cost_vec);
5003 interm_types.release ();
5004 return true;
5007 /* Transform. */
5008 if (dump_enabled_p ())
5009 dump_printf_loc (MSG_NOTE, vect_location,
5010 "transform conversion. ncopies = %d.\n", ncopies);
5012 if (op_type == binary_op)
5014 if (CONSTANT_CLASS_P (op0))
5015 op0 = fold_convert (TREE_TYPE (op1), op0);
5016 else if (CONSTANT_CLASS_P (op1))
5017 op1 = fold_convert (TREE_TYPE (op0), op1);
5020 /* In case of multi-step conversion, we first generate conversion operations
5021 to the intermediate types, and then from that types to the final one.
5022 We create vector destinations for the intermediate type (TYPES) received
5023 from supportable_*_operation, and store them in the correct order
5024 for future use in vect_create_vectorized_*_stmts (). */
5025 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5026 vec_dest = vect_create_destination_var (scalar_dest,
5027 (cvt_type && modifier == WIDEN)
5028 ? cvt_type : vectype_out);
5029 vec_dsts.quick_push (vec_dest);
5031 if (multi_step_cvt)
5033 for (i = interm_types.length () - 1;
5034 interm_types.iterate (i, &intermediate_type); i--)
5036 vec_dest = vect_create_destination_var (scalar_dest,
5037 intermediate_type);
5038 vec_dsts.quick_push (vec_dest);
5042 if (cvt_type)
5043 vec_dest = vect_create_destination_var (scalar_dest,
5044 modifier == WIDEN
5045 ? vectype_out : cvt_type);
5047 if (!slp_node)
5049 if (modifier == WIDEN)
5051 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5052 if (op_type == binary_op)
5053 vec_oprnds1.create (1);
5055 else if (modifier == NARROW)
5056 vec_oprnds0.create (
5057 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5059 else if (code == WIDEN_LSHIFT_EXPR)
5060 vec_oprnds1.create (slp_node->vec_stmts_size);
5062 last_oprnd = op0;
5063 prev_stmt_info = NULL;
5064 switch (modifier)
5066 case NONE:
5067 for (j = 0; j < ncopies; j++)
5069 if (j == 0)
5070 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5071 NULL, slp_node);
5072 else
5073 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5075 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5077 stmt_vec_info new_stmt_info;
5078 /* Arguments are ready, create the new vector stmt. */
5079 if (code1 == CALL_EXPR)
5081 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5082 new_temp = make_ssa_name (vec_dest, new_stmt);
5083 gimple_call_set_lhs (new_stmt, new_temp);
5084 new_stmt_info
5085 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5087 else
5089 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5090 gassign *new_stmt
5091 = gimple_build_assign (vec_dest, code1, vop0);
5092 new_temp = make_ssa_name (vec_dest, new_stmt);
5093 gimple_assign_set_lhs (new_stmt, new_temp);
5094 new_stmt_info
5095 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5098 if (slp_node)
5099 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5100 else
5102 if (!prev_stmt_info)
5103 STMT_VINFO_VEC_STMT (stmt_info)
5104 = *vec_stmt = new_stmt_info;
5105 else
5106 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5107 prev_stmt_info = new_stmt_info;
5111 break;
5113 case WIDEN:
5114 /* In case the vectorization factor (VF) is bigger than the number
5115 of elements that we can fit in a vectype (nunits), we have to
5116 generate more than one vector stmt - i.e - we need to "unroll"
5117 the vector stmt by a factor VF/nunits. */
5118 for (j = 0; j < ncopies; j++)
5120 /* Handle uses. */
5121 if (j == 0)
5123 if (slp_node)
5125 if (code == WIDEN_LSHIFT_EXPR)
5127 unsigned int k;
5129 vec_oprnd1 = op1;
5130 /* Store vec_oprnd1 for every vector stmt to be created
5131 for SLP_NODE. We check during the analysis that all
5132 the shift arguments are the same. */
5133 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5134 vec_oprnds1.quick_push (vec_oprnd1);
5136 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5137 &vec_oprnds0, NULL, slp_node);
5139 else
5140 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5141 &vec_oprnds1, slp_node);
5143 else
5145 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5146 vec_oprnds0.quick_push (vec_oprnd0);
5147 if (op_type == binary_op)
5149 if (code == WIDEN_LSHIFT_EXPR)
5150 vec_oprnd1 = op1;
5151 else
5152 vec_oprnd1
5153 = vect_get_vec_def_for_operand (op1, stmt_info);
5154 vec_oprnds1.quick_push (vec_oprnd1);
5158 else
5160 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5161 vec_oprnds0.truncate (0);
5162 vec_oprnds0.quick_push (vec_oprnd0);
5163 if (op_type == binary_op)
5165 if (code == WIDEN_LSHIFT_EXPR)
5166 vec_oprnd1 = op1;
5167 else
5168 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5169 vec_oprnd1);
5170 vec_oprnds1.truncate (0);
5171 vec_oprnds1.quick_push (vec_oprnd1);
5175 /* Arguments are ready. Create the new vector stmts. */
5176 for (i = multi_step_cvt; i >= 0; i--)
5178 tree this_dest = vec_dsts[i];
5179 enum tree_code c1 = code1, c2 = code2;
5180 if (i == 0 && codecvt2 != ERROR_MARK)
5182 c1 = codecvt1;
5183 c2 = codecvt2;
5185 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5186 &vec_oprnds1, stmt_info,
5187 this_dest, gsi,
5188 c1, c2, decl1, decl2,
5189 op_type);
5192 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5194 stmt_vec_info new_stmt_info;
5195 if (cvt_type)
5197 if (codecvt1 == CALL_EXPR)
5199 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5200 new_temp = make_ssa_name (vec_dest, new_stmt);
5201 gimple_call_set_lhs (new_stmt, new_temp);
5202 new_stmt_info
5203 = vect_finish_stmt_generation (stmt_info, new_stmt,
5204 gsi);
5206 else
5208 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5209 new_temp = make_ssa_name (vec_dest);
5210 gassign *new_stmt
5211 = gimple_build_assign (new_temp, codecvt1, vop0);
5212 new_stmt_info
5213 = vect_finish_stmt_generation (stmt_info, new_stmt,
5214 gsi);
5217 else
5218 new_stmt_info = vinfo->lookup_def (vop0);
5220 if (slp_node)
5221 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5222 else
5224 if (!prev_stmt_info)
5225 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5226 else
5227 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5228 prev_stmt_info = new_stmt_info;
5233 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5234 break;
5236 case NARROW:
5237 /* In case the vectorization factor (VF) is bigger than the number
5238 of elements that we can fit in a vectype (nunits), we have to
5239 generate more than one vector stmt - i.e - we need to "unroll"
5240 the vector stmt by a factor VF/nunits. */
5241 for (j = 0; j < ncopies; j++)
5243 /* Handle uses. */
5244 if (slp_node)
5245 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5246 slp_node);
5247 else
5249 vec_oprnds0.truncate (0);
5250 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5251 vect_pow2 (multi_step_cvt) - 1);
5254 /* Arguments are ready. Create the new vector stmts. */
5255 if (cvt_type)
5256 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5258 if (codecvt1 == CALL_EXPR)
5260 gcall *new_stmt = gimple_build_call (decl1, 1, vop0);
5261 new_temp = make_ssa_name (vec_dest, new_stmt);
5262 gimple_call_set_lhs (new_stmt, new_temp);
5263 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5265 else
5267 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5268 new_temp = make_ssa_name (vec_dest);
5269 gassign *new_stmt
5270 = gimple_build_assign (new_temp, codecvt1, vop0);
5271 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5274 vec_oprnds0[i] = new_temp;
5277 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5278 stmt_info, vec_dsts, gsi,
5279 slp_node, code1,
5280 &prev_stmt_info);
5283 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5284 break;
5287 vec_oprnds0.release ();
5288 vec_oprnds1.release ();
5289 interm_types.release ();
5291 return true;
5295 /* Function vectorizable_assignment.
5297 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5298 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5299 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5300 Return true if STMT_INFO is vectorizable in this way. */
5302 static bool
5303 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5304 stmt_vec_info *vec_stmt, slp_tree slp_node,
5305 stmt_vector_for_cost *cost_vec)
5307 tree vec_dest;
5308 tree scalar_dest;
5309 tree op;
5310 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5311 tree new_temp;
5312 enum vect_def_type dt[1] = {vect_unknown_def_type};
5313 int ndts = 1;
5314 int ncopies;
5315 int i, j;
5316 vec<tree> vec_oprnds = vNULL;
5317 tree vop;
5318 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5319 vec_info *vinfo = stmt_info->vinfo;
5320 stmt_vec_info prev_stmt_info = NULL;
5321 enum tree_code code;
5322 tree vectype_in;
5324 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5325 return false;
5327 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5328 && ! vec_stmt)
5329 return false;
5331 /* Is vectorizable assignment? */
5332 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5333 if (!stmt)
5334 return false;
5336 scalar_dest = gimple_assign_lhs (stmt);
5337 if (TREE_CODE (scalar_dest) != SSA_NAME)
5338 return false;
5340 code = gimple_assign_rhs_code (stmt);
5341 if (gimple_assign_single_p (stmt)
5342 || code == PAREN_EXPR
5343 || CONVERT_EXPR_CODE_P (code))
5344 op = gimple_assign_rhs1 (stmt);
5345 else
5346 return false;
5348 if (code == VIEW_CONVERT_EXPR)
5349 op = TREE_OPERAND (op, 0);
5351 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5352 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5354 /* Multiple types in SLP are handled by creating the appropriate number of
5355 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5356 case of SLP. */
5357 if (slp_node)
5358 ncopies = 1;
5359 else
5360 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5362 gcc_assert (ncopies >= 1);
5364 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5366 if (dump_enabled_p ())
5367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5368 "use not simple.\n");
5369 return false;
5372 /* We can handle NOP_EXPR conversions that do not change the number
5373 of elements or the vector size. */
5374 if ((CONVERT_EXPR_CODE_P (code)
5375 || code == VIEW_CONVERT_EXPR)
5376 && (!vectype_in
5377 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5378 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5379 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5380 return false;
5382 /* We do not handle bit-precision changes. */
5383 if ((CONVERT_EXPR_CODE_P (code)
5384 || code == VIEW_CONVERT_EXPR)
5385 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5386 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5387 || !type_has_mode_precision_p (TREE_TYPE (op)))
5388 /* But a conversion that does not change the bit-pattern is ok. */
5389 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5390 > TYPE_PRECISION (TREE_TYPE (op)))
5391 && TYPE_UNSIGNED (TREE_TYPE (op)))
5392 /* Conversion between boolean types of different sizes is
5393 a simple assignment in case their vectypes are same
5394 boolean vectors. */
5395 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5396 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5398 if (dump_enabled_p ())
5399 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5400 "type conversion to/from bit-precision "
5401 "unsupported.\n");
5402 return false;
5405 if (!vec_stmt) /* transformation not required. */
5407 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5408 DUMP_VECT_SCOPE ("vectorizable_assignment");
5409 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5410 return true;
5413 /* Transform. */
5414 if (dump_enabled_p ())
5415 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5417 /* Handle def. */
5418 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5420 /* Handle use. */
5421 for (j = 0; j < ncopies; j++)
5423 /* Handle uses. */
5424 if (j == 0)
5425 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5426 else
5427 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5429 /* Arguments are ready. create the new vector stmt. */
5430 stmt_vec_info new_stmt_info = NULL;
5431 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5433 if (CONVERT_EXPR_CODE_P (code)
5434 || code == VIEW_CONVERT_EXPR)
5435 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5436 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5437 new_temp = make_ssa_name (vec_dest, new_stmt);
5438 gimple_assign_set_lhs (new_stmt, new_temp);
5439 new_stmt_info
5440 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5441 if (slp_node)
5442 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5445 if (slp_node)
5446 continue;
5448 if (j == 0)
5449 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5450 else
5451 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5453 prev_stmt_info = new_stmt_info;
5456 vec_oprnds.release ();
5457 return true;
5461 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5462 either as shift by a scalar or by a vector. */
5464 bool
5465 vect_supportable_shift (enum tree_code code, tree scalar_type)
5468 machine_mode vec_mode;
5469 optab optab;
5470 int icode;
5471 tree vectype;
5473 vectype = get_vectype_for_scalar_type (scalar_type);
5474 if (!vectype)
5475 return false;
5477 optab = optab_for_tree_code (code, vectype, optab_scalar);
5478 if (!optab
5479 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5481 optab = optab_for_tree_code (code, vectype, optab_vector);
5482 if (!optab
5483 || (optab_handler (optab, TYPE_MODE (vectype))
5484 == CODE_FOR_nothing))
5485 return false;
5488 vec_mode = TYPE_MODE (vectype);
5489 icode = (int) optab_handler (optab, vec_mode);
5490 if (icode == CODE_FOR_nothing)
5491 return false;
5493 return true;
5497 /* Function vectorizable_shift.
5499 Check if STMT_INFO performs a shift operation that can be vectorized.
5500 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5501 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5502 Return true if STMT_INFO is vectorizable in this way. */
5504 bool
5505 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5506 stmt_vec_info *vec_stmt, slp_tree slp_node,
5507 stmt_vector_for_cost *cost_vec)
5509 tree vec_dest;
5510 tree scalar_dest;
5511 tree op0, op1 = NULL;
5512 tree vec_oprnd1 = NULL_TREE;
5513 tree vectype;
5514 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5515 enum tree_code code;
5516 machine_mode vec_mode;
5517 tree new_temp;
5518 optab optab;
5519 int icode;
5520 machine_mode optab_op2_mode;
5521 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5522 int ndts = 2;
5523 stmt_vec_info prev_stmt_info;
5524 poly_uint64 nunits_in;
5525 poly_uint64 nunits_out;
5526 tree vectype_out;
5527 tree op1_vectype;
5528 int ncopies;
5529 int j, i;
5530 vec<tree> vec_oprnds0 = vNULL;
5531 vec<tree> vec_oprnds1 = vNULL;
5532 tree vop0, vop1;
5533 unsigned int k;
5534 bool scalar_shift_arg = true;
5535 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5536 vec_info *vinfo = stmt_info->vinfo;
5538 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5539 return false;
5541 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5542 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5543 && ! vec_stmt)
5544 return false;
5546 /* Is STMT a vectorizable binary/unary operation? */
5547 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5548 if (!stmt)
5549 return false;
5551 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5552 return false;
5554 code = gimple_assign_rhs_code (stmt);
5556 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5557 || code == RROTATE_EXPR))
5558 return false;
5560 scalar_dest = gimple_assign_lhs (stmt);
5561 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5562 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5564 if (dump_enabled_p ())
5565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5566 "bit-precision shifts not supported.\n");
5567 return false;
5570 op0 = gimple_assign_rhs1 (stmt);
5571 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5573 if (dump_enabled_p ())
5574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5575 "use not simple.\n");
5576 return false;
5578 /* If op0 is an external or constant def use a vector type with
5579 the same size as the output vector type. */
5580 if (!vectype)
5581 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5582 if (vec_stmt)
5583 gcc_assert (vectype);
5584 if (!vectype)
5586 if (dump_enabled_p ())
5587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5588 "no vectype for scalar type\n");
5589 return false;
5592 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5593 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5594 if (maybe_ne (nunits_out, nunits_in))
5595 return false;
5597 op1 = gimple_assign_rhs2 (stmt);
5598 stmt_vec_info op1_def_stmt_info;
5599 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5600 &op1_def_stmt_info))
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5604 "use not simple.\n");
5605 return false;
5608 /* Multiple types in SLP are handled by creating the appropriate number of
5609 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5610 case of SLP. */
5611 if (slp_node)
5612 ncopies = 1;
5613 else
5614 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5616 gcc_assert (ncopies >= 1);
5618 /* Determine whether the shift amount is a vector, or scalar. If the
5619 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5621 if ((dt[1] == vect_internal_def
5622 || dt[1] == vect_induction_def
5623 || dt[1] == vect_nested_cycle)
5624 && !slp_node)
5625 scalar_shift_arg = false;
5626 else if (dt[1] == vect_constant_def
5627 || dt[1] == vect_external_def
5628 || dt[1] == vect_internal_def)
5630 /* In SLP, need to check whether the shift count is the same,
5631 in loops if it is a constant or invariant, it is always
5632 a scalar shift. */
5633 if (slp_node)
5635 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5636 stmt_vec_info slpstmt_info;
5638 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5640 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5641 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5642 scalar_shift_arg = false;
5645 /* For internal SLP defs we have to make sure we see scalar stmts
5646 for all vector elements.
5647 ??? For different vectors we could resort to a different
5648 scalar shift operand but code-generation below simply always
5649 takes the first. */
5650 if (dt[1] == vect_internal_def
5651 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5652 stmts.length ()))
5653 scalar_shift_arg = false;
5656 /* If the shift amount is computed by a pattern stmt we cannot
5657 use the scalar amount directly thus give up and use a vector
5658 shift. */
5659 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5660 scalar_shift_arg = false;
5662 else
5664 if (dump_enabled_p ())
5665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5666 "operand mode requires invariant argument.\n");
5667 return false;
5670 /* Vector shifted by vector. */
5671 if (!scalar_shift_arg)
5673 optab = optab_for_tree_code (code, vectype, optab_vector);
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_NOTE, vect_location,
5676 "vector/vector shift/rotate found.\n");
5678 if (!op1_vectype)
5679 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5680 if (op1_vectype == NULL_TREE
5681 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5683 if (dump_enabled_p ())
5684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5685 "unusable type for last operand in"
5686 " vector/vector shift/rotate.\n");
5687 return false;
5690 /* See if the machine has a vector shifted by scalar insn and if not
5691 then see if it has a vector shifted by vector insn. */
5692 else
5694 optab = optab_for_tree_code (code, vectype, optab_scalar);
5695 if (optab
5696 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5698 if (dump_enabled_p ())
5699 dump_printf_loc (MSG_NOTE, vect_location,
5700 "vector/scalar shift/rotate found.\n");
5702 else
5704 optab = optab_for_tree_code (code, vectype, optab_vector);
5705 if (optab
5706 && (optab_handler (optab, TYPE_MODE (vectype))
5707 != CODE_FOR_nothing))
5709 scalar_shift_arg = false;
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_NOTE, vect_location,
5713 "vector/vector shift/rotate found.\n");
5715 /* Unlike the other binary operators, shifts/rotates have
5716 the rhs being int, instead of the same type as the lhs,
5717 so make sure the scalar is the right type if we are
5718 dealing with vectors of long long/long/short/char. */
5719 if (dt[1] == vect_constant_def)
5720 op1 = fold_convert (TREE_TYPE (vectype), op1);
5721 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5722 TREE_TYPE (op1)))
5724 if (slp_node
5725 && TYPE_MODE (TREE_TYPE (vectype))
5726 != TYPE_MODE (TREE_TYPE (op1)))
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5730 "unusable type for last operand in"
5731 " vector/vector shift/rotate.\n");
5732 return false;
5734 if (vec_stmt && !slp_node)
5736 op1 = fold_convert (TREE_TYPE (vectype), op1);
5737 op1 = vect_init_vector (stmt_info, op1,
5738 TREE_TYPE (vectype), NULL);
5745 /* Supportable by target? */
5746 if (!optab)
5748 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5750 "no optab.\n");
5751 return false;
5753 vec_mode = TYPE_MODE (vectype);
5754 icode = (int) optab_handler (optab, vec_mode);
5755 if (icode == CODE_FOR_nothing)
5757 if (dump_enabled_p ())
5758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5759 "op not supported by target.\n");
5760 /* Check only during analysis. */
5761 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5762 || (!vec_stmt
5763 && !vect_worthwhile_without_simd_p (vinfo, code)))
5764 return false;
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_NOTE, vect_location,
5767 "proceeding using word mode.\n");
5770 /* Worthwhile without SIMD support? Check only during analysis. */
5771 if (!vec_stmt
5772 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5773 && !vect_worthwhile_without_simd_p (vinfo, code))
5775 if (dump_enabled_p ())
5776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5777 "not worthwhile without SIMD support.\n");
5778 return false;
5781 if (!vec_stmt) /* transformation not required. */
5783 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5784 DUMP_VECT_SCOPE ("vectorizable_shift");
5785 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
5786 return true;
5789 /* Transform. */
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_NOTE, vect_location,
5793 "transform binary/unary operation.\n");
5795 /* Handle def. */
5796 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5798 prev_stmt_info = NULL;
5799 for (j = 0; j < ncopies; j++)
5801 /* Handle uses. */
5802 if (j == 0)
5804 if (scalar_shift_arg)
5806 /* Vector shl and shr insn patterns can be defined with scalar
5807 operand 2 (shift operand). In this case, use constant or loop
5808 invariant op1 directly, without extending it to vector mode
5809 first. */
5810 optab_op2_mode = insn_data[icode].operand[2].mode;
5811 if (!VECTOR_MODE_P (optab_op2_mode))
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_NOTE, vect_location,
5815 "operand 1 using scalar mode.\n");
5816 vec_oprnd1 = op1;
5817 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5818 vec_oprnds1.quick_push (vec_oprnd1);
5819 if (slp_node)
5821 /* Store vec_oprnd1 for every vector stmt to be created
5822 for SLP_NODE. We check during the analysis that all
5823 the shift arguments are the same.
5824 TODO: Allow different constants for different vector
5825 stmts generated for an SLP instance. */
5826 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5827 vec_oprnds1.quick_push (vec_oprnd1);
5832 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5833 (a special case for certain kind of vector shifts); otherwise,
5834 operand 1 should be of a vector type (the usual case). */
5835 if (vec_oprnd1)
5836 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5837 slp_node);
5838 else
5839 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5840 slp_node);
5842 else
5843 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5845 /* Arguments are ready. Create the new vector stmt. */
5846 stmt_vec_info new_stmt_info = NULL;
5847 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5849 vop1 = vec_oprnds1[i];
5850 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5851 new_temp = make_ssa_name (vec_dest, new_stmt);
5852 gimple_assign_set_lhs (new_stmt, new_temp);
5853 new_stmt_info
5854 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5855 if (slp_node)
5856 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5859 if (slp_node)
5860 continue;
5862 if (j == 0)
5863 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5864 else
5865 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5866 prev_stmt_info = new_stmt_info;
5869 vec_oprnds0.release ();
5870 vec_oprnds1.release ();
5872 return true;
5876 /* Function vectorizable_operation.
5878 Check if STMT_INFO performs a binary, unary or ternary operation that can
5879 be vectorized.
5880 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5881 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5882 Return true if STMT_INFO is vectorizable in this way. */
5884 static bool
5885 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5886 stmt_vec_info *vec_stmt, slp_tree slp_node,
5887 stmt_vector_for_cost *cost_vec)
5889 tree vec_dest;
5890 tree scalar_dest;
5891 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5892 tree vectype;
5893 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5894 enum tree_code code, orig_code;
5895 machine_mode vec_mode;
5896 tree new_temp;
5897 int op_type;
5898 optab optab;
5899 bool target_support_p;
5900 enum vect_def_type dt[3]
5901 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5902 int ndts = 3;
5903 stmt_vec_info prev_stmt_info;
5904 poly_uint64 nunits_in;
5905 poly_uint64 nunits_out;
5906 tree vectype_out;
5907 int ncopies;
5908 int j, i;
5909 vec<tree> vec_oprnds0 = vNULL;
5910 vec<tree> vec_oprnds1 = vNULL;
5911 vec<tree> vec_oprnds2 = vNULL;
5912 tree vop0, vop1, vop2;
5913 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5914 vec_info *vinfo = stmt_info->vinfo;
5916 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5917 return false;
5919 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5920 && ! vec_stmt)
5921 return false;
5923 /* Is STMT a vectorizable binary/unary operation? */
5924 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5925 if (!stmt)
5926 return false;
5928 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5929 return false;
5931 orig_code = code = gimple_assign_rhs_code (stmt);
5933 /* For pointer addition and subtraction, we should use the normal
5934 plus and minus for the vector operation. */
5935 if (code == POINTER_PLUS_EXPR)
5936 code = PLUS_EXPR;
5937 if (code == POINTER_DIFF_EXPR)
5938 code = MINUS_EXPR;
5940 /* Support only unary or binary operations. */
5941 op_type = TREE_CODE_LENGTH (code);
5942 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5944 if (dump_enabled_p ())
5945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5946 "num. args = %d (not unary/binary/ternary op).\n",
5947 op_type);
5948 return false;
5951 scalar_dest = gimple_assign_lhs (stmt);
5952 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5954 /* Most operations cannot handle bit-precision types without extra
5955 truncations. */
5956 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5957 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5958 /* Exception are bitwise binary operations. */
5959 && code != BIT_IOR_EXPR
5960 && code != BIT_XOR_EXPR
5961 && code != BIT_AND_EXPR)
5963 if (dump_enabled_p ())
5964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5965 "bit-precision arithmetic not supported.\n");
5966 return false;
5969 op0 = gimple_assign_rhs1 (stmt);
5970 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5972 if (dump_enabled_p ())
5973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5974 "use not simple.\n");
5975 return false;
5977 /* If op0 is an external or constant def use a vector type with
5978 the same size as the output vector type. */
5979 if (!vectype)
5981 /* For boolean type we cannot determine vectype by
5982 invariant value (don't know whether it is a vector
5983 of booleans or vector of integers). We use output
5984 vectype because operations on boolean don't change
5985 type. */
5986 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5988 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5990 if (dump_enabled_p ())
5991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5992 "not supported operation on bool value.\n");
5993 return false;
5995 vectype = vectype_out;
5997 else
5998 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
6000 if (vec_stmt)
6001 gcc_assert (vectype);
6002 if (!vectype)
6004 if (dump_enabled_p ())
6005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6006 "no vectype for scalar type %T\n",
6007 TREE_TYPE (op0));
6009 return false;
6012 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6013 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6014 if (maybe_ne (nunits_out, nunits_in))
6015 return false;
6017 if (op_type == binary_op || op_type == ternary_op)
6019 op1 = gimple_assign_rhs2 (stmt);
6020 if (!vect_is_simple_use (op1, vinfo, &dt[1]))
6022 if (dump_enabled_p ())
6023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6024 "use not simple.\n");
6025 return false;
6028 if (op_type == ternary_op)
6030 op2 = gimple_assign_rhs3 (stmt);
6031 if (!vect_is_simple_use (op2, vinfo, &dt[2]))
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035 "use not simple.\n");
6036 return false;
6040 /* Multiple types in SLP are handled by creating the appropriate number of
6041 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6042 case of SLP. */
6043 if (slp_node)
6044 ncopies = 1;
6045 else
6046 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6048 gcc_assert (ncopies >= 1);
6050 /* Shifts are handled in vectorizable_shift (). */
6051 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6052 || code == RROTATE_EXPR)
6053 return false;
6055 /* Supportable by target? */
6057 vec_mode = TYPE_MODE (vectype);
6058 if (code == MULT_HIGHPART_EXPR)
6059 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6060 else
6062 optab = optab_for_tree_code (code, vectype, optab_default);
6063 if (!optab)
6065 if (dump_enabled_p ())
6066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6067 "no optab.\n");
6068 return false;
6070 target_support_p = (optab_handler (optab, vec_mode)
6071 != CODE_FOR_nothing);
6074 if (!target_support_p)
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6078 "op not supported by target.\n");
6079 /* Check only during analysis. */
6080 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6081 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6082 return false;
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_NOTE, vect_location,
6085 "proceeding using word mode.\n");
6088 /* Worthwhile without SIMD support? Check only during analysis. */
6089 if (!VECTOR_MODE_P (vec_mode)
6090 && !vec_stmt
6091 && !vect_worthwhile_without_simd_p (vinfo, code))
6093 if (dump_enabled_p ())
6094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6095 "not worthwhile without SIMD support.\n");
6096 return false;
6099 if (!vec_stmt) /* transformation not required. */
6101 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6102 DUMP_VECT_SCOPE ("vectorizable_operation");
6103 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6104 return true;
6107 /* Transform. */
6109 if (dump_enabled_p ())
6110 dump_printf_loc (MSG_NOTE, vect_location,
6111 "transform binary/unary operation.\n");
6113 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6114 vectors with unsigned elements, but the result is signed. So, we
6115 need to compute the MINUS_EXPR into vectype temporary and
6116 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6117 tree vec_cvt_dest = NULL_TREE;
6118 if (orig_code == POINTER_DIFF_EXPR)
6120 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6121 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6123 /* Handle def. */
6124 else
6125 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6127 /* In case the vectorization factor (VF) is bigger than the number
6128 of elements that we can fit in a vectype (nunits), we have to generate
6129 more than one vector stmt - i.e - we need to "unroll" the
6130 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6131 from one copy of the vector stmt to the next, in the field
6132 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6133 stages to find the correct vector defs to be used when vectorizing
6134 stmts that use the defs of the current stmt. The example below
6135 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6136 we need to create 4 vectorized stmts):
6138 before vectorization:
6139 RELATED_STMT VEC_STMT
6140 S1: x = memref - -
6141 S2: z = x + 1 - -
6143 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6144 there):
6145 RELATED_STMT VEC_STMT
6146 VS1_0: vx0 = memref0 VS1_1 -
6147 VS1_1: vx1 = memref1 VS1_2 -
6148 VS1_2: vx2 = memref2 VS1_3 -
6149 VS1_3: vx3 = memref3 - -
6150 S1: x = load - VS1_0
6151 S2: z = x + 1 - -
6153 step2: vectorize stmt S2 (done here):
6154 To vectorize stmt S2 we first need to find the relevant vector
6155 def for the first operand 'x'. This is, as usual, obtained from
6156 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6157 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6158 relevant vector def 'vx0'. Having found 'vx0' we can generate
6159 the vector stmt VS2_0, and as usual, record it in the
6160 STMT_VINFO_VEC_STMT of stmt S2.
6161 When creating the second copy (VS2_1), we obtain the relevant vector
6162 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6163 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6164 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6165 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6166 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6167 chain of stmts and pointers:
6168 RELATED_STMT VEC_STMT
6169 VS1_0: vx0 = memref0 VS1_1 -
6170 VS1_1: vx1 = memref1 VS1_2 -
6171 VS1_2: vx2 = memref2 VS1_3 -
6172 VS1_3: vx3 = memref3 - -
6173 S1: x = load - VS1_0
6174 VS2_0: vz0 = vx0 + v1 VS2_1 -
6175 VS2_1: vz1 = vx1 + v1 VS2_2 -
6176 VS2_2: vz2 = vx2 + v1 VS2_3 -
6177 VS2_3: vz3 = vx3 + v1 - -
6178 S2: z = x + 1 - VS2_0 */
6180 prev_stmt_info = NULL;
6181 for (j = 0; j < ncopies; j++)
6183 /* Handle uses. */
6184 if (j == 0)
6186 if (op_type == binary_op)
6187 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6188 slp_node);
6189 else if (op_type == ternary_op)
6191 if (slp_node)
6193 auto_vec<tree> ops(3);
6194 ops.quick_push (op0);
6195 ops.quick_push (op1);
6196 ops.quick_push (op2);
6197 auto_vec<vec<tree> > vec_defs(3);
6198 vect_get_slp_defs (ops, slp_node, &vec_defs);
6199 vec_oprnds0 = vec_defs[0];
6200 vec_oprnds1 = vec_defs[1];
6201 vec_oprnds2 = vec_defs[2];
6203 else
6205 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6206 &vec_oprnds1, NULL);
6207 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6208 NULL, NULL);
6211 else
6212 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6213 slp_node);
6215 else
6217 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6218 if (op_type == ternary_op)
6220 tree vec_oprnd = vec_oprnds2.pop ();
6221 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6222 vec_oprnd));
6226 /* Arguments are ready. Create the new vector stmt. */
6227 stmt_vec_info new_stmt_info = NULL;
6228 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6230 vop1 = ((op_type == binary_op || op_type == ternary_op)
6231 ? vec_oprnds1[i] : NULL_TREE);
6232 vop2 = ((op_type == ternary_op)
6233 ? vec_oprnds2[i] : NULL_TREE);
6234 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6235 vop0, vop1, vop2);
6236 new_temp = make_ssa_name (vec_dest, new_stmt);
6237 gimple_assign_set_lhs (new_stmt, new_temp);
6238 new_stmt_info
6239 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6240 if (vec_cvt_dest)
6242 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6243 gassign *new_stmt
6244 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6245 new_temp);
6246 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6247 gimple_assign_set_lhs (new_stmt, new_temp);
6248 new_stmt_info
6249 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6251 if (slp_node)
6252 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6255 if (slp_node)
6256 continue;
6258 if (j == 0)
6259 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6260 else
6261 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6262 prev_stmt_info = new_stmt_info;
6265 vec_oprnds0.release ();
6266 vec_oprnds1.release ();
6267 vec_oprnds2.release ();
6269 return true;
6272 /* A helper function to ensure data reference DR_INFO's base alignment. */
6274 static void
6275 ensure_base_align (dr_vec_info *dr_info)
6277 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6278 return;
6280 if (dr_info->base_misaligned)
6282 tree base_decl = dr_info->base_decl;
6284 // We should only be able to increase the alignment of a base object if
6285 // we know what its new alignment should be at compile time.
6286 unsigned HOST_WIDE_INT align_base_to =
6287 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6289 if (decl_in_symtab_p (base_decl))
6290 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6291 else
6293 SET_DECL_ALIGN (base_decl, align_base_to);
6294 DECL_USER_ALIGN (base_decl) = 1;
6296 dr_info->base_misaligned = false;
6301 /* Function get_group_alias_ptr_type.
6303 Return the alias type for the group starting at FIRST_STMT_INFO. */
6305 static tree
6306 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6308 struct data_reference *first_dr, *next_dr;
6310 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6311 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6312 while (next_stmt_info)
6314 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6315 if (get_alias_set (DR_REF (first_dr))
6316 != get_alias_set (DR_REF (next_dr)))
6318 if (dump_enabled_p ())
6319 dump_printf_loc (MSG_NOTE, vect_location,
6320 "conflicting alias set types.\n");
6321 return ptr_type_node;
6323 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6325 return reference_alias_ptr_type (DR_REF (first_dr));
6329 /* Function scan_operand_equal_p.
6331 Helper function for check_scan_store. Compare two references
6332 with .GOMP_SIMD_LANE bases. */
6334 static bool
6335 scan_operand_equal_p (tree ref1, tree ref2)
6337 tree ref[2] = { ref1, ref2 };
6338 poly_int64 bitsize[2], bitpos[2];
6339 tree offset[2], base[2];
6340 for (int i = 0; i < 2; ++i)
6342 machine_mode mode;
6343 int unsignedp, reversep, volatilep = 0;
6344 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6345 &offset[i], &mode, &unsignedp,
6346 &reversep, &volatilep);
6347 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6348 return false;
6349 if (TREE_CODE (base[i]) == MEM_REF
6350 && offset[i] == NULL_TREE
6351 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6353 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6354 if (is_gimple_assign (def_stmt)
6355 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6356 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6357 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6359 if (maybe_ne (mem_ref_offset (base[i]), 0))
6360 return false;
6361 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6362 offset[i] = gimple_assign_rhs2 (def_stmt);
6367 if (!operand_equal_p (base[0], base[1], 0))
6368 return false;
6369 if (maybe_ne (bitsize[0], bitsize[1]))
6370 return false;
6371 if (offset[0] != offset[1])
6373 if (!offset[0] || !offset[1])
6374 return false;
6375 if (!operand_equal_p (offset[0], offset[1], 0))
6377 tree step[2];
6378 for (int i = 0; i < 2; ++i)
6380 step[i] = integer_one_node;
6381 if (TREE_CODE (offset[i]) == SSA_NAME)
6383 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6384 if (is_gimple_assign (def_stmt)
6385 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6386 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6387 == INTEGER_CST))
6389 step[i] = gimple_assign_rhs2 (def_stmt);
6390 offset[i] = gimple_assign_rhs1 (def_stmt);
6393 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6395 step[i] = TREE_OPERAND (offset[i], 1);
6396 offset[i] = TREE_OPERAND (offset[i], 0);
6398 tree rhs1 = NULL_TREE;
6399 if (TREE_CODE (offset[i]) == SSA_NAME)
6401 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6402 if (gimple_assign_cast_p (def_stmt))
6403 rhs1 = gimple_assign_rhs1 (def_stmt);
6405 else if (CONVERT_EXPR_P (offset[i]))
6406 rhs1 = TREE_OPERAND (offset[i], 0);
6407 if (rhs1
6408 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6409 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6410 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6411 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6412 offset[i] = rhs1;
6414 if (!operand_equal_p (offset[0], offset[1], 0)
6415 || !operand_equal_p (step[0], step[1], 0))
6416 return false;
6419 return true;
6423 enum scan_store_kind {
6424 /* Normal permutation. */
6425 scan_store_kind_perm,
6427 /* Whole vector left shift permutation with zero init. */
6428 scan_store_kind_lshift_zero,
6430 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6431 scan_store_kind_lshift_cond
6434 /* Function check_scan_store.
6436 Verify if we can perform the needed permutations or whole vector shifts.
6437 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6438 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6439 to do at each step. */
6441 static int
6442 scan_store_can_perm_p (tree vectype, tree init,
6443 vec<enum scan_store_kind> *use_whole_vector = NULL)
6445 enum machine_mode vec_mode = TYPE_MODE (vectype);
6446 unsigned HOST_WIDE_INT nunits;
6447 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6448 return -1;
6449 int units_log2 = exact_log2 (nunits);
6450 if (units_log2 <= 0)
6451 return -1;
6453 int i;
6454 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6455 for (i = 0; i <= units_log2; ++i)
6457 unsigned HOST_WIDE_INT j, k;
6458 enum scan_store_kind kind = scan_store_kind_perm;
6459 vec_perm_builder sel (nunits, nunits, 1);
6460 sel.quick_grow (nunits);
6461 if (i == units_log2)
6463 for (j = 0; j < nunits; ++j)
6464 sel[j] = nunits - 1;
6466 else
6468 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6469 sel[j] = j;
6470 for (k = 0; j < nunits; ++j, ++k)
6471 sel[j] = nunits + k;
6473 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6474 if (!can_vec_perm_const_p (vec_mode, indices))
6476 if (i == units_log2)
6477 return -1;
6479 if (whole_vector_shift_kind == scan_store_kind_perm)
6481 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6482 return -1;
6483 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6484 /* Whole vector shifts shift in zeros, so if init is all zero
6485 constant, there is no need to do anything further. */
6486 if ((TREE_CODE (init) != INTEGER_CST
6487 && TREE_CODE (init) != REAL_CST)
6488 || !initializer_zerop (init))
6490 tree masktype = build_same_sized_truth_vector_type (vectype);
6491 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6492 return -1;
6493 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6496 kind = whole_vector_shift_kind;
6498 if (use_whole_vector)
6500 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6501 use_whole_vector->safe_grow_cleared (i);
6502 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6503 use_whole_vector->safe_push (kind);
6507 return units_log2;
6511 /* Function check_scan_store.
6513 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6515 static bool
6516 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6517 enum vect_def_type rhs_dt, bool slp, tree mask,
6518 vect_memory_access_type memory_access_type)
6520 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6521 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6522 tree ref_type;
6524 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6525 if (slp
6526 || mask
6527 || memory_access_type != VMAT_CONTIGUOUS
6528 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6529 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6530 || loop_vinfo == NULL
6531 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6532 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6533 || !integer_zerop (DR_OFFSET (dr_info->dr))
6534 || !integer_zerop (DR_INIT (dr_info->dr))
6535 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6536 || !alias_sets_conflict_p (get_alias_set (vectype),
6537 get_alias_set (TREE_TYPE (ref_type))))
6539 if (dump_enabled_p ())
6540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6541 "unsupported OpenMP scan store.\n");
6542 return false;
6545 /* We need to pattern match code built by OpenMP lowering and simplified
6546 by following optimizations into something we can handle.
6547 #pragma omp simd reduction(inscan,+:r)
6548 for (...)
6550 r += something ();
6551 #pragma omp scan inclusive (r)
6552 use (r);
6554 shall have body with:
6555 // Initialization for input phase, store the reduction initializer:
6556 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6557 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6558 D.2042[_21] = 0;
6559 // Actual input phase:
6561 r.0_5 = D.2042[_20];
6562 _6 = _4 + r.0_5;
6563 D.2042[_20] = _6;
6564 // Initialization for scan phase:
6565 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6566 _26 = D.2043[_25];
6567 _27 = D.2042[_25];
6568 _28 = _26 + _27;
6569 D.2043[_25] = _28;
6570 D.2042[_25] = _28;
6571 // Actual scan phase:
6573 r.1_8 = D.2042[_20];
6575 The "omp simd array" variable D.2042 holds the privatized copy used
6576 inside of the loop and D.2043 is another one that holds copies of
6577 the current original list item. The separate GOMP_SIMD_LANE ifn
6578 kinds are there in order to allow optimizing the initializer store
6579 and combiner sequence, e.g. if it is originally some C++ish user
6580 defined reduction, but allow the vectorizer to pattern recognize it
6581 and turn into the appropriate vectorized scan.
6583 For exclusive scan, this is slightly different:
6584 #pragma omp simd reduction(inscan,+:r)
6585 for (...)
6587 use (r);
6588 #pragma omp scan exclusive (r)
6589 r += something ();
6591 shall have body with:
6592 // Initialization for input phase, store the reduction initializer:
6593 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6594 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6595 D.2042[_21] = 0;
6596 // Actual input phase:
6598 r.0_5 = D.2042[_20];
6599 _6 = _4 + r.0_5;
6600 D.2042[_20] = _6;
6601 // Initialization for scan phase:
6602 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6603 _26 = D.2043[_25];
6604 D.2044[_25] = _26;
6605 _27 = D.2042[_25];
6606 _28 = _26 + _27;
6607 D.2043[_25] = _28;
6608 // Actual scan phase:
6610 r.1_8 = D.2044[_20];
6611 ... */
6613 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6615 /* Match the D.2042[_21] = 0; store above. Just require that
6616 it is a constant or external definition store. */
6617 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6619 fail_init:
6620 if (dump_enabled_p ())
6621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6622 "unsupported OpenMP scan initializer store.\n");
6623 return false;
6626 if (! loop_vinfo->scan_map)
6627 loop_vinfo->scan_map = new hash_map<tree, tree>;
6628 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6629 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6630 if (cached)
6631 goto fail_init;
6632 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6634 /* These stores can be vectorized normally. */
6635 return true;
6638 if (rhs_dt != vect_internal_def)
6640 fail:
6641 if (dump_enabled_p ())
6642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6643 "unsupported OpenMP scan combiner pattern.\n");
6644 return false;
6647 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6648 tree rhs = gimple_assign_rhs1 (stmt);
6649 if (TREE_CODE (rhs) != SSA_NAME)
6650 goto fail;
6652 gimple *other_store_stmt = NULL;
6653 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6654 bool inscan_var_store
6655 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6657 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6659 if (!inscan_var_store)
6661 use_operand_p use_p;
6662 imm_use_iterator iter;
6663 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6665 gimple *use_stmt = USE_STMT (use_p);
6666 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6667 continue;
6668 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6669 || !is_gimple_assign (use_stmt)
6670 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6671 || other_store_stmt
6672 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6673 goto fail;
6674 other_store_stmt = use_stmt;
6676 if (other_store_stmt == NULL)
6677 goto fail;
6678 rhs = gimple_assign_lhs (other_store_stmt);
6679 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6680 goto fail;
6683 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6685 use_operand_p use_p;
6686 imm_use_iterator iter;
6687 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6689 gimple *use_stmt = USE_STMT (use_p);
6690 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6691 continue;
6692 if (other_store_stmt)
6693 goto fail;
6694 other_store_stmt = use_stmt;
6697 else
6698 goto fail;
6700 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6701 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6702 || !is_gimple_assign (def_stmt)
6703 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6704 goto fail;
6706 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6707 /* For pointer addition, we should use the normal plus for the vector
6708 operation. */
6709 switch (code)
6711 case POINTER_PLUS_EXPR:
6712 code = PLUS_EXPR;
6713 break;
6714 case MULT_HIGHPART_EXPR:
6715 goto fail;
6716 default:
6717 break;
6719 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6720 goto fail;
6722 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6723 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6724 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6725 goto fail;
6727 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6728 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6729 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6730 || !gimple_assign_load_p (load1_stmt)
6731 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6732 || !gimple_assign_load_p (load2_stmt))
6733 goto fail;
6735 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6736 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6737 if (load1_stmt_info == NULL
6738 || load2_stmt_info == NULL
6739 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6740 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6741 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6742 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6743 goto fail;
6745 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6747 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6748 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6749 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6750 goto fail;
6751 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6752 tree lrhs;
6753 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6754 lrhs = rhs1;
6755 else
6756 lrhs = rhs2;
6757 use_operand_p use_p;
6758 imm_use_iterator iter;
6759 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6761 gimple *use_stmt = USE_STMT (use_p);
6762 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6763 continue;
6764 if (other_store_stmt)
6765 goto fail;
6766 other_store_stmt = use_stmt;
6770 if (other_store_stmt == NULL)
6771 goto fail;
6772 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6773 || !gimple_store_p (other_store_stmt))
6774 goto fail;
6776 stmt_vec_info other_store_stmt_info
6777 = loop_vinfo->lookup_stmt (other_store_stmt);
6778 if (other_store_stmt_info == NULL
6779 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6780 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6781 goto fail;
6783 gimple *stmt1 = stmt;
6784 gimple *stmt2 = other_store_stmt;
6785 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6786 std::swap (stmt1, stmt2);
6787 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6788 gimple_assign_rhs1 (load2_stmt)))
6790 std::swap (rhs1, rhs2);
6791 std::swap (load1_stmt, load2_stmt);
6792 std::swap (load1_stmt_info, load2_stmt_info);
6794 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6795 gimple_assign_rhs1 (load1_stmt)))
6796 goto fail;
6798 tree var3 = NULL_TREE;
6799 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6800 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6801 gimple_assign_rhs1 (load2_stmt)))
6802 goto fail;
6803 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6805 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6806 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6807 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6808 goto fail;
6809 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6810 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6811 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6812 || lookup_attribute ("omp simd inscan exclusive",
6813 DECL_ATTRIBUTES (var3)))
6814 goto fail;
6817 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6818 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6819 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6820 goto fail;
6822 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6823 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6824 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6825 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6826 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6827 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6828 goto fail;
6830 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6831 std::swap (var1, var2);
6833 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6835 if (!lookup_attribute ("omp simd inscan exclusive",
6836 DECL_ATTRIBUTES (var1)))
6837 goto fail;
6838 var1 = var3;
6841 if (loop_vinfo->scan_map == NULL)
6842 goto fail;
6843 tree *init = loop_vinfo->scan_map->get (var1);
6844 if (init == NULL)
6845 goto fail;
6847 /* The IL is as expected, now check if we can actually vectorize it.
6848 Inclusive scan:
6849 _26 = D.2043[_25];
6850 _27 = D.2042[_25];
6851 _28 = _26 + _27;
6852 D.2043[_25] = _28;
6853 D.2042[_25] = _28;
6854 should be vectorized as (where _40 is the vectorized rhs
6855 from the D.2042[_21] = 0; store):
6856 _30 = MEM <vector(8) int> [(int *)&D.2043];
6857 _31 = MEM <vector(8) int> [(int *)&D.2042];
6858 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6859 _33 = _31 + _32;
6860 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6861 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6862 _35 = _33 + _34;
6863 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6864 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6865 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6866 _37 = _35 + _36;
6867 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6868 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6869 _38 = _30 + _37;
6870 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6871 MEM <vector(8) int> [(int *)&D.2043] = _39;
6872 MEM <vector(8) int> [(int *)&D.2042] = _38;
6873 Exclusive scan:
6874 _26 = D.2043[_25];
6875 D.2044[_25] = _26;
6876 _27 = D.2042[_25];
6877 _28 = _26 + _27;
6878 D.2043[_25] = _28;
6879 should be vectorized as (where _40 is the vectorized rhs
6880 from the D.2042[_21] = 0; store):
6881 _30 = MEM <vector(8) int> [(int *)&D.2043];
6882 _31 = MEM <vector(8) int> [(int *)&D.2042];
6883 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6884 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6885 _34 = _32 + _33;
6886 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6887 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6888 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6889 _36 = _34 + _35;
6890 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6891 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6892 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6893 _38 = _36 + _37;
6894 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6895 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6896 _39 = _30 + _38;
6897 _50 = _31 + _39;
6898 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6899 MEM <vector(8) int> [(int *)&D.2044] = _39;
6900 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6901 enum machine_mode vec_mode = TYPE_MODE (vectype);
6902 optab optab = optab_for_tree_code (code, vectype, optab_default);
6903 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6904 goto fail;
6906 int units_log2 = scan_store_can_perm_p (vectype, *init);
6907 if (units_log2 == -1)
6908 goto fail;
6910 return true;
6914 /* Function vectorizable_scan_store.
6916 Helper of vectorizable_score, arguments like on vectorizable_store.
6917 Handle only the transformation, checking is done in check_scan_store. */
6919 static bool
6920 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6921 stmt_vec_info *vec_stmt, int ncopies)
6923 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6924 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6925 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6926 vec_info *vinfo = stmt_info->vinfo;
6927 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6929 if (dump_enabled_p ())
6930 dump_printf_loc (MSG_NOTE, vect_location,
6931 "transform scan store. ncopies = %d\n", ncopies);
6933 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6934 tree rhs = gimple_assign_rhs1 (stmt);
6935 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6937 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6938 bool inscan_var_store
6939 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6941 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6943 use_operand_p use_p;
6944 imm_use_iterator iter;
6945 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6947 gimple *use_stmt = USE_STMT (use_p);
6948 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6949 continue;
6950 rhs = gimple_assign_lhs (use_stmt);
6951 break;
6955 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6956 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6957 if (code == POINTER_PLUS_EXPR)
6958 code = PLUS_EXPR;
6959 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6960 && commutative_tree_code (code));
6961 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6962 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6963 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6964 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6965 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6966 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6967 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6968 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6969 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6970 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6971 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6973 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6975 std::swap (rhs1, rhs2);
6976 std::swap (var1, var2);
6977 std::swap (load1_dr_info, load2_dr_info);
6980 tree *init = loop_vinfo->scan_map->get (var1);
6981 gcc_assert (init);
6983 unsigned HOST_WIDE_INT nunits;
6984 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6985 gcc_unreachable ();
6986 auto_vec<enum scan_store_kind, 16> use_whole_vector;
6987 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
6988 gcc_assert (units_log2 > 0);
6989 auto_vec<tree, 16> perms;
6990 perms.quick_grow (units_log2 + 1);
6991 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
6992 for (int i = 0; i <= units_log2; ++i)
6994 unsigned HOST_WIDE_INT j, k;
6995 vec_perm_builder sel (nunits, nunits, 1);
6996 sel.quick_grow (nunits);
6997 if (i == units_log2)
6998 for (j = 0; j < nunits; ++j)
6999 sel[j] = nunits - 1;
7000 else
7002 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7003 sel[j] = j;
7004 for (k = 0; j < nunits; ++j, ++k)
7005 sel[j] = nunits + k;
7007 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7008 if (!use_whole_vector.is_empty ()
7009 && use_whole_vector[i] != scan_store_kind_perm)
7011 if (zero_vec == NULL_TREE)
7012 zero_vec = build_zero_cst (vectype);
7013 if (masktype == NULL_TREE
7014 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7015 masktype = build_same_sized_truth_vector_type (vectype);
7016 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7018 else
7019 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7022 stmt_vec_info prev_stmt_info = NULL;
7023 tree vec_oprnd1 = NULL_TREE;
7024 tree vec_oprnd2 = NULL_TREE;
7025 tree vec_oprnd3 = NULL_TREE;
7026 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7027 tree dataref_offset = build_int_cst (ref_type, 0);
7028 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7029 tree ldataref_ptr = NULL_TREE;
7030 tree orig = NULL_TREE;
7031 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7032 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7033 for (int j = 0; j < ncopies; j++)
7035 stmt_vec_info new_stmt_info;
7036 if (j == 0)
7038 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7039 if (ldataref_ptr == NULL)
7040 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7041 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7042 orig = vec_oprnd3;
7044 else
7046 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7047 if (ldataref_ptr == NULL)
7048 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7049 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7050 if (!inscan_var_store)
7051 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7054 if (ldataref_ptr)
7056 vec_oprnd2 = make_ssa_name (vectype);
7057 tree data_ref = fold_build2 (MEM_REF, vectype,
7058 unshare_expr (ldataref_ptr),
7059 dataref_offset);
7060 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7061 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7062 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7063 if (prev_stmt_info == NULL)
7064 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7065 else
7066 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7067 prev_stmt_info = new_stmt_info;
7070 tree v = vec_oprnd2;
7071 for (int i = 0; i < units_log2; ++i)
7073 tree new_temp = make_ssa_name (vectype);
7074 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7075 (zero_vec
7076 && (use_whole_vector[i]
7077 != scan_store_kind_perm))
7078 ? zero_vec : vec_oprnd1, v,
7079 perms[i]);
7080 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7081 if (prev_stmt_info == NULL)
7082 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7083 else
7084 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7085 prev_stmt_info = new_stmt_info;
7087 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7089 /* Whole vector shift shifted in zero bits, but if *init
7090 is not initializer_zerop, we need to replace those elements
7091 with elements from vec_oprnd1. */
7092 tree_vector_builder vb (masktype, nunits, 1);
7093 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7094 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7095 ? boolean_false_node : boolean_true_node);
7097 tree new_temp2 = make_ssa_name (vectype);
7098 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7099 new_temp, vec_oprnd1);
7100 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7101 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7102 prev_stmt_info = new_stmt_info;
7103 new_temp = new_temp2;
7106 /* For exclusive scan, perform the perms[i] permutation once
7107 more. */
7108 if (i == 0
7109 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7110 && v == vec_oprnd2)
7112 v = new_temp;
7113 --i;
7114 continue;
7117 tree new_temp2 = make_ssa_name (vectype);
7118 g = gimple_build_assign (new_temp2, code, v, new_temp);
7119 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7120 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7121 prev_stmt_info = new_stmt_info;
7123 v = new_temp2;
7126 tree new_temp = make_ssa_name (vectype);
7127 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7128 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7129 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7130 prev_stmt_info = new_stmt_info;
7132 tree last_perm_arg = new_temp;
7133 /* For exclusive scan, new_temp computed above is the exclusive scan
7134 prefix sum. Turn it into inclusive prefix sum for the broadcast
7135 of the last element into orig. */
7136 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7138 last_perm_arg = make_ssa_name (vectype);
7139 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7140 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7141 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7142 prev_stmt_info = new_stmt_info;
7145 orig = make_ssa_name (vectype);
7146 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7147 last_perm_arg, perms[units_log2]);
7148 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7149 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7150 prev_stmt_info = new_stmt_info;
7152 if (!inscan_var_store)
7154 tree data_ref = fold_build2 (MEM_REF, vectype,
7155 unshare_expr (dataref_ptr),
7156 dataref_offset);
7157 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7158 g = gimple_build_assign (data_ref, new_temp);
7159 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7160 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7161 prev_stmt_info = new_stmt_info;
7165 if (inscan_var_store)
7166 for (int j = 0; j < ncopies; j++)
7168 if (j != 0)
7169 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7171 tree data_ref = fold_build2 (MEM_REF, vectype,
7172 unshare_expr (dataref_ptr),
7173 dataref_offset);
7174 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7175 gimple *g = gimple_build_assign (data_ref, orig);
7176 stmt_vec_info new_stmt_info
7177 = vect_finish_stmt_generation (stmt_info, g, gsi);
7178 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7179 prev_stmt_info = new_stmt_info;
7181 return true;
7185 /* Function vectorizable_store.
7187 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7188 that can be vectorized.
7189 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7190 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7191 Return true if STMT_INFO is vectorizable in this way. */
7193 static bool
7194 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7195 stmt_vec_info *vec_stmt, slp_tree slp_node,
7196 stmt_vector_for_cost *cost_vec)
7198 tree data_ref;
7199 tree op;
7200 tree vec_oprnd = NULL_TREE;
7201 tree elem_type;
7202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7203 class loop *loop = NULL;
7204 machine_mode vec_mode;
7205 tree dummy;
7206 enum dr_alignment_support alignment_support_scheme;
7207 enum vect_def_type rhs_dt = vect_unknown_def_type;
7208 enum vect_def_type mask_dt = vect_unknown_def_type;
7209 stmt_vec_info prev_stmt_info = NULL;
7210 tree dataref_ptr = NULL_TREE;
7211 tree dataref_offset = NULL_TREE;
7212 gimple *ptr_incr = NULL;
7213 int ncopies;
7214 int j;
7215 stmt_vec_info first_stmt_info;
7216 bool grouped_store;
7217 unsigned int group_size, i;
7218 vec<tree> oprnds = vNULL;
7219 vec<tree> result_chain = vNULL;
7220 tree offset = NULL_TREE;
7221 vec<tree> vec_oprnds = vNULL;
7222 bool slp = (slp_node != NULL);
7223 unsigned int vec_num;
7224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7225 vec_info *vinfo = stmt_info->vinfo;
7226 tree aggr_type;
7227 gather_scatter_info gs_info;
7228 poly_uint64 vf;
7229 vec_load_store_type vls_type;
7230 tree ref_type;
7232 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7233 return false;
7235 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7236 && ! vec_stmt)
7237 return false;
7239 /* Is vectorizable store? */
7241 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7242 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7244 tree scalar_dest = gimple_assign_lhs (assign);
7245 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7246 && is_pattern_stmt_p (stmt_info))
7247 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7248 if (TREE_CODE (scalar_dest) != ARRAY_REF
7249 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7250 && TREE_CODE (scalar_dest) != INDIRECT_REF
7251 && TREE_CODE (scalar_dest) != COMPONENT_REF
7252 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7253 && TREE_CODE (scalar_dest) != REALPART_EXPR
7254 && TREE_CODE (scalar_dest) != MEM_REF)
7255 return false;
7257 else
7259 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7260 if (!call || !gimple_call_internal_p (call))
7261 return false;
7263 internal_fn ifn = gimple_call_internal_fn (call);
7264 if (!internal_store_fn_p (ifn))
7265 return false;
7267 if (slp_node != NULL)
7269 if (dump_enabled_p ())
7270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7271 "SLP of masked stores not supported.\n");
7272 return false;
7275 int mask_index = internal_fn_mask_index (ifn);
7276 if (mask_index >= 0)
7278 mask = gimple_call_arg (call, mask_index);
7279 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
7280 &mask_vectype))
7281 return false;
7285 op = vect_get_store_rhs (stmt_info);
7287 /* Cannot have hybrid store SLP -- that would mean storing to the
7288 same location twice. */
7289 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7291 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7292 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7294 if (loop_vinfo)
7296 loop = LOOP_VINFO_LOOP (loop_vinfo);
7297 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7299 else
7300 vf = 1;
7302 /* Multiple types in SLP are handled by creating the appropriate number of
7303 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7304 case of SLP. */
7305 if (slp)
7306 ncopies = 1;
7307 else
7308 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7310 gcc_assert (ncopies >= 1);
7312 /* FORNOW. This restriction should be relaxed. */
7313 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7315 if (dump_enabled_p ())
7316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7317 "multiple types in nested loop.\n");
7318 return false;
7321 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7322 return false;
7324 elem_type = TREE_TYPE (vectype);
7325 vec_mode = TYPE_MODE (vectype);
7327 if (!STMT_VINFO_DATA_REF (stmt_info))
7328 return false;
7330 vect_memory_access_type memory_access_type;
7331 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7332 &memory_access_type, &gs_info))
7333 return false;
7335 if (mask)
7337 if (memory_access_type == VMAT_CONTIGUOUS)
7339 if (!VECTOR_MODE_P (vec_mode)
7340 || !can_vec_mask_load_store_p (vec_mode,
7341 TYPE_MODE (mask_vectype), false))
7342 return false;
7344 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7345 && (memory_access_type != VMAT_GATHER_SCATTER
7346 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7348 if (dump_enabled_p ())
7349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7350 "unsupported access type for masked store.\n");
7351 return false;
7354 else
7356 /* FORNOW. In some cases can vectorize even if data-type not supported
7357 (e.g. - array initialization with 0). */
7358 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7359 return false;
7362 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7363 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7364 && memory_access_type != VMAT_GATHER_SCATTER
7365 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7366 if (grouped_store)
7368 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7369 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7370 group_size = DR_GROUP_SIZE (first_stmt_info);
7372 else
7374 first_stmt_info = stmt_info;
7375 first_dr_info = dr_info;
7376 group_size = vec_num = 1;
7379 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7381 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7382 memory_access_type))
7383 return false;
7386 if (!vec_stmt) /* transformation not required. */
7388 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7390 if (loop_vinfo
7391 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7392 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7393 memory_access_type, &gs_info);
7395 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7396 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7397 vls_type, slp_node, cost_vec);
7398 return true;
7400 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7402 /* Transform. */
7404 ensure_base_align (dr_info);
7406 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7408 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7409 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7410 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7411 tree ptr, var, scale, vec_mask;
7412 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7413 tree mask_halfvectype = mask_vectype;
7414 edge pe = loop_preheader_edge (loop);
7415 gimple_seq seq;
7416 basic_block new_bb;
7417 enum { NARROW, NONE, WIDEN } modifier;
7418 poly_uint64 scatter_off_nunits
7419 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7421 if (known_eq (nunits, scatter_off_nunits))
7422 modifier = NONE;
7423 else if (known_eq (nunits * 2, scatter_off_nunits))
7425 modifier = WIDEN;
7427 /* Currently gathers and scatters are only supported for
7428 fixed-length vectors. */
7429 unsigned int count = scatter_off_nunits.to_constant ();
7430 vec_perm_builder sel (count, count, 1);
7431 for (i = 0; i < (unsigned int) count; ++i)
7432 sel.quick_push (i | (count / 2));
7434 vec_perm_indices indices (sel, 1, count);
7435 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7436 indices);
7437 gcc_assert (perm_mask != NULL_TREE);
7439 else if (known_eq (nunits, scatter_off_nunits * 2))
7441 modifier = NARROW;
7443 /* Currently gathers and scatters are only supported for
7444 fixed-length vectors. */
7445 unsigned int count = nunits.to_constant ();
7446 vec_perm_builder sel (count, count, 1);
7447 for (i = 0; i < (unsigned int) count; ++i)
7448 sel.quick_push (i | (count / 2));
7450 vec_perm_indices indices (sel, 2, count);
7451 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7452 gcc_assert (perm_mask != NULL_TREE);
7453 ncopies *= 2;
7455 if (mask)
7456 mask_halfvectype
7457 = build_same_sized_truth_vector_type (gs_info.offset_vectype);
7459 else
7460 gcc_unreachable ();
7462 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7463 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7464 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7465 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7466 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7467 scaletype = TREE_VALUE (arglist);
7469 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7470 && TREE_CODE (rettype) == VOID_TYPE);
7472 ptr = fold_convert (ptrtype, gs_info.base);
7473 if (!is_gimple_min_invariant (ptr))
7475 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7476 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7477 gcc_assert (!new_bb);
7480 if (mask == NULL_TREE)
7482 mask_arg = build_int_cst (masktype, -1);
7483 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7486 scale = build_int_cst (scaletype, gs_info.scale);
7488 prev_stmt_info = NULL;
7489 for (j = 0; j < ncopies; ++j)
7491 if (j == 0)
7493 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7494 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7495 stmt_info);
7496 if (mask)
7497 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
7498 stmt_info);
7500 else if (modifier != NONE && (j & 1))
7502 if (modifier == WIDEN)
7505 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7506 vec_oprnd1);
7507 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7508 stmt_info, gsi);
7509 if (mask)
7510 mask_op
7511 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7512 vec_mask);
7514 else if (modifier == NARROW)
7516 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7517 stmt_info, gsi);
7518 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7519 vec_oprnd0);
7521 else
7522 gcc_unreachable ();
7524 else
7526 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7527 vec_oprnd1);
7528 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7529 vec_oprnd0);
7530 if (mask)
7531 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7532 vec_mask);
7535 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7537 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7538 TYPE_VECTOR_SUBPARTS (srctype)));
7539 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7540 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7541 gassign *new_stmt
7542 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7543 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7544 src = var;
7547 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7549 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7550 TYPE_VECTOR_SUBPARTS (idxtype)));
7551 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7552 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7553 gassign *new_stmt
7554 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7555 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7556 op = var;
7559 if (mask)
7561 tree utype;
7562 mask_arg = mask_op;
7563 if (modifier == NARROW)
7565 var = vect_get_new_ssa_name (mask_halfvectype,
7566 vect_simple_var);
7567 gassign *new_stmt
7568 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7569 : VEC_UNPACK_LO_EXPR,
7570 mask_op);
7571 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7572 mask_arg = var;
7574 tree optype = TREE_TYPE (mask_arg);
7575 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7576 utype = masktype;
7577 else
7578 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7579 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7580 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7581 gassign *new_stmt
7582 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7583 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7584 mask_arg = var;
7585 if (!useless_type_conversion_p (masktype, utype))
7587 gcc_assert (TYPE_PRECISION (utype)
7588 <= TYPE_PRECISION (masktype));
7589 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7590 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7591 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7592 mask_arg = var;
7596 gcall *new_stmt
7597 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7598 stmt_vec_info new_stmt_info
7599 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7601 if (prev_stmt_info == NULL)
7602 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7603 else
7604 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7605 prev_stmt_info = new_stmt_info;
7607 return true;
7609 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7610 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7612 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7613 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7615 if (grouped_store)
7617 /* FORNOW */
7618 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7620 /* We vectorize all the stmts of the interleaving group when we
7621 reach the last stmt in the group. */
7622 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7623 < DR_GROUP_SIZE (first_stmt_info)
7624 && !slp)
7626 *vec_stmt = NULL;
7627 return true;
7630 if (slp)
7632 grouped_store = false;
7633 /* VEC_NUM is the number of vect stmts to be created for this
7634 group. */
7635 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7636 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7637 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7638 == first_stmt_info);
7639 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7640 op = vect_get_store_rhs (first_stmt_info);
7642 else
7643 /* VEC_NUM is the number of vect stmts to be created for this
7644 group. */
7645 vec_num = group_size;
7647 ref_type = get_group_alias_ptr_type (first_stmt_info);
7649 else
7650 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7652 if (dump_enabled_p ())
7653 dump_printf_loc (MSG_NOTE, vect_location,
7654 "transform store. ncopies = %d\n", ncopies);
7656 if (memory_access_type == VMAT_ELEMENTWISE
7657 || memory_access_type == VMAT_STRIDED_SLP)
7659 gimple_stmt_iterator incr_gsi;
7660 bool insert_after;
7661 gimple *incr;
7662 tree offvar;
7663 tree ivstep;
7664 tree running_off;
7665 tree stride_base, stride_step, alias_off;
7666 tree vec_oprnd;
7667 unsigned int g;
7668 /* Checked by get_load_store_type. */
7669 unsigned int const_nunits = nunits.to_constant ();
7671 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7672 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7674 stride_base
7675 = fold_build_pointer_plus
7676 (DR_BASE_ADDRESS (first_dr_info->dr),
7677 size_binop (PLUS_EXPR,
7678 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
7679 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7680 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7682 /* For a store with loop-invariant (but other than power-of-2)
7683 stride (i.e. not a grouped access) like so:
7685 for (i = 0; i < n; i += stride)
7686 array[i] = ...;
7688 we generate a new induction variable and new stores from
7689 the components of the (vectorized) rhs:
7691 for (j = 0; ; j += VF*stride)
7692 vectemp = ...;
7693 tmp1 = vectemp[0];
7694 array[j] = tmp1;
7695 tmp2 = vectemp[1];
7696 array[j + stride] = tmp2;
7700 unsigned nstores = const_nunits;
7701 unsigned lnel = 1;
7702 tree ltype = elem_type;
7703 tree lvectype = vectype;
7704 if (slp)
7706 if (group_size < const_nunits
7707 && const_nunits % group_size == 0)
7709 nstores = const_nunits / group_size;
7710 lnel = group_size;
7711 ltype = build_vector_type (elem_type, group_size);
7712 lvectype = vectype;
7714 /* First check if vec_extract optab doesn't support extraction
7715 of vector elts directly. */
7716 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7717 machine_mode vmode;
7718 if (!mode_for_vector (elmode, group_size).exists (&vmode)
7719 || !VECTOR_MODE_P (vmode)
7720 || !targetm.vector_mode_supported_p (vmode)
7721 || (convert_optab_handler (vec_extract_optab,
7722 TYPE_MODE (vectype), vmode)
7723 == CODE_FOR_nothing))
7725 /* Try to avoid emitting an extract of vector elements
7726 by performing the extracts using an integer type of the
7727 same size, extracting from a vector of those and then
7728 re-interpreting it as the original vector type if
7729 supported. */
7730 unsigned lsize
7731 = group_size * GET_MODE_BITSIZE (elmode);
7732 unsigned int lnunits = const_nunits / group_size;
7733 /* If we can't construct such a vector fall back to
7734 element extracts from the original vector type and
7735 element size stores. */
7736 if (int_mode_for_size (lsize, 0).exists (&elmode)
7737 && mode_for_vector (elmode, lnunits).exists (&vmode)
7738 && VECTOR_MODE_P (vmode)
7739 && targetm.vector_mode_supported_p (vmode)
7740 && (convert_optab_handler (vec_extract_optab,
7741 vmode, elmode)
7742 != CODE_FOR_nothing))
7744 nstores = lnunits;
7745 lnel = group_size;
7746 ltype = build_nonstandard_integer_type (lsize, 1);
7747 lvectype = build_vector_type (ltype, nstores);
7749 /* Else fall back to vector extraction anyway.
7750 Fewer stores are more important than avoiding spilling
7751 of the vector we extract from. Compared to the
7752 construction case in vectorizable_load no store-forwarding
7753 issue exists here for reasonable archs. */
7756 else if (group_size >= const_nunits
7757 && group_size % const_nunits == 0)
7759 nstores = 1;
7760 lnel = const_nunits;
7761 ltype = vectype;
7762 lvectype = vectype;
7764 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7765 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7768 ivstep = stride_step;
7769 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7770 build_int_cst (TREE_TYPE (ivstep), vf));
7772 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7774 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7775 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7776 create_iv (stride_base, ivstep, NULL,
7777 loop, &incr_gsi, insert_after,
7778 &offvar, NULL);
7779 incr = gsi_stmt (incr_gsi);
7780 loop_vinfo->add_stmt (incr);
7782 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7784 prev_stmt_info = NULL;
7785 alias_off = build_int_cst (ref_type, 0);
7786 stmt_vec_info next_stmt_info = first_stmt_info;
7787 for (g = 0; g < group_size; g++)
7789 running_off = offvar;
7790 if (g)
7792 tree size = TYPE_SIZE_UNIT (ltype);
7793 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7794 size);
7795 tree newoff = copy_ssa_name (running_off, NULL);
7796 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7797 running_off, pos);
7798 vect_finish_stmt_generation (stmt_info, incr, gsi);
7799 running_off = newoff;
7801 unsigned int group_el = 0;
7802 unsigned HOST_WIDE_INT
7803 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7804 for (j = 0; j < ncopies; j++)
7806 /* We've set op and dt above, from vect_get_store_rhs,
7807 and first_stmt_info == stmt_info. */
7808 if (j == 0)
7810 if (slp)
7812 vect_get_vec_defs (op, NULL_TREE, stmt_info,
7813 &vec_oprnds, NULL, slp_node);
7814 vec_oprnd = vec_oprnds[0];
7816 else
7818 op = vect_get_store_rhs (next_stmt_info);
7819 vec_oprnd = vect_get_vec_def_for_operand
7820 (op, next_stmt_info);
7823 else
7825 if (slp)
7826 vec_oprnd = vec_oprnds[j];
7827 else
7828 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7829 vec_oprnd);
7831 /* Pun the vector to extract from if necessary. */
7832 if (lvectype != vectype)
7834 tree tem = make_ssa_name (lvectype);
7835 gimple *pun
7836 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7837 lvectype, vec_oprnd));
7838 vect_finish_stmt_generation (stmt_info, pun, gsi);
7839 vec_oprnd = tem;
7841 for (i = 0; i < nstores; i++)
7843 tree newref, newoff;
7844 gimple *incr, *assign;
7845 tree size = TYPE_SIZE (ltype);
7846 /* Extract the i'th component. */
7847 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7848 bitsize_int (i), size);
7849 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7850 size, pos);
7852 elem = force_gimple_operand_gsi (gsi, elem, true,
7853 NULL_TREE, true,
7854 GSI_SAME_STMT);
7856 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7857 group_el * elsz);
7858 newref = build2 (MEM_REF, ltype,
7859 running_off, this_off);
7860 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7862 /* And store it to *running_off. */
7863 assign = gimple_build_assign (newref, elem);
7864 stmt_vec_info assign_info
7865 = vect_finish_stmt_generation (stmt_info, assign, gsi);
7867 group_el += lnel;
7868 if (! slp
7869 || group_el == group_size)
7871 newoff = copy_ssa_name (running_off, NULL);
7872 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7873 running_off, stride_step);
7874 vect_finish_stmt_generation (stmt_info, incr, gsi);
7876 running_off = newoff;
7877 group_el = 0;
7879 if (g == group_size - 1
7880 && !slp)
7882 if (j == 0 && i == 0)
7883 STMT_VINFO_VEC_STMT (stmt_info)
7884 = *vec_stmt = assign_info;
7885 else
7886 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
7887 prev_stmt_info = assign_info;
7891 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7892 if (slp)
7893 break;
7896 vec_oprnds.release ();
7897 return true;
7900 auto_vec<tree> dr_chain (group_size);
7901 oprnds.create (group_size);
7903 alignment_support_scheme
7904 = vect_supportable_dr_alignment (first_dr_info, false);
7905 gcc_assert (alignment_support_scheme);
7906 vec_loop_masks *loop_masks
7907 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7908 ? &LOOP_VINFO_MASKS (loop_vinfo)
7909 : NULL);
7910 /* Targets with store-lane instructions must not require explicit
7911 realignment. vect_supportable_dr_alignment always returns either
7912 dr_aligned or dr_unaligned_supported for masked operations. */
7913 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7914 && !mask
7915 && !loop_masks)
7916 || alignment_support_scheme == dr_aligned
7917 || alignment_support_scheme == dr_unaligned_supported);
7919 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7920 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7921 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7923 tree bump;
7924 tree vec_offset = NULL_TREE;
7925 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7927 aggr_type = NULL_TREE;
7928 bump = NULL_TREE;
7930 else if (memory_access_type == VMAT_GATHER_SCATTER)
7932 aggr_type = elem_type;
7933 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7934 &bump, &vec_offset);
7936 else
7938 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7939 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7940 else
7941 aggr_type = vectype;
7942 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
7943 memory_access_type);
7946 if (mask)
7947 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7949 /* In case the vectorization factor (VF) is bigger than the number
7950 of elements that we can fit in a vectype (nunits), we have to generate
7951 more than one vector stmt - i.e - we need to "unroll" the
7952 vector stmt by a factor VF/nunits. For more details see documentation in
7953 vect_get_vec_def_for_copy_stmt. */
7955 /* In case of interleaving (non-unit grouped access):
7957 S1: &base + 2 = x2
7958 S2: &base = x0
7959 S3: &base + 1 = x1
7960 S4: &base + 3 = x3
7962 We create vectorized stores starting from base address (the access of the
7963 first stmt in the chain (S2 in the above example), when the last store stmt
7964 of the chain (S4) is reached:
7966 VS1: &base = vx2
7967 VS2: &base + vec_size*1 = vx0
7968 VS3: &base + vec_size*2 = vx1
7969 VS4: &base + vec_size*3 = vx3
7971 Then permutation statements are generated:
7973 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7974 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7977 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7978 (the order of the data-refs in the output of vect_permute_store_chain
7979 corresponds to the order of scalar stmts in the interleaving chain - see
7980 the documentation of vect_permute_store_chain()).
7982 In case of both multiple types and interleaving, above vector stores and
7983 permutation stmts are created for every copy. The result vector stmts are
7984 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7985 STMT_VINFO_RELATED_STMT for the next copies.
7988 prev_stmt_info = NULL;
7989 tree vec_mask = NULL_TREE;
7990 for (j = 0; j < ncopies; j++)
7992 stmt_vec_info new_stmt_info;
7993 if (j == 0)
7995 if (slp)
7997 /* Get vectorized arguments for SLP_NODE. */
7998 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
7999 NULL, slp_node);
8001 vec_oprnd = vec_oprnds[0];
8003 else
8005 /* For interleaved stores we collect vectorized defs for all the
8006 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8007 used as an input to vect_permute_store_chain(), and OPRNDS as
8008 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8010 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8011 OPRNDS are of size 1. */
8012 stmt_vec_info next_stmt_info = first_stmt_info;
8013 for (i = 0; i < group_size; i++)
8015 /* Since gaps are not supported for interleaved stores,
8016 DR_GROUP_SIZE is the exact number of stmts in the chain.
8017 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8018 that there is no interleaving, DR_GROUP_SIZE is 1,
8019 and only one iteration of the loop will be executed. */
8020 op = vect_get_store_rhs (next_stmt_info);
8021 vec_oprnd = vect_get_vec_def_for_operand
8022 (op, next_stmt_info);
8023 dr_chain.quick_push (vec_oprnd);
8024 oprnds.quick_push (vec_oprnd);
8025 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8027 if (mask)
8028 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8029 mask_vectype);
8032 /* We should have catched mismatched types earlier. */
8033 gcc_assert (useless_type_conversion_p (vectype,
8034 TREE_TYPE (vec_oprnd)));
8035 bool simd_lane_access_p
8036 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8037 if (simd_lane_access_p
8038 && !loop_masks
8039 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8040 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8041 && integer_zerop (DR_OFFSET (first_dr_info->dr))
8042 && integer_zerop (DR_INIT (first_dr_info->dr))
8043 && alias_sets_conflict_p (get_alias_set (aggr_type),
8044 get_alias_set (TREE_TYPE (ref_type))))
8046 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8047 dataref_offset = build_int_cst (ref_type, 0);
8049 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8050 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8051 &dataref_ptr, &vec_offset);
8052 else
8053 dataref_ptr
8054 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8055 simd_lane_access_p ? loop : NULL,
8056 offset, &dummy, gsi, &ptr_incr,
8057 simd_lane_access_p, NULL_TREE, bump);
8059 else
8061 /* For interleaved stores we created vectorized defs for all the
8062 defs stored in OPRNDS in the previous iteration (previous copy).
8063 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8064 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8065 next copy.
8066 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8067 OPRNDS are of size 1. */
8068 for (i = 0; i < group_size; i++)
8070 op = oprnds[i];
8071 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8072 dr_chain[i] = vec_oprnd;
8073 oprnds[i] = vec_oprnd;
8075 if (mask)
8076 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8077 if (dataref_offset)
8078 dataref_offset
8079 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8080 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8081 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8082 else
8083 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8084 stmt_info, bump);
8087 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8089 tree vec_array;
8091 /* Get an array into which we can store the individual vectors. */
8092 vec_array = create_vector_array (vectype, vec_num);
8094 /* Invalidate the current contents of VEC_ARRAY. This should
8095 become an RTL clobber too, which prevents the vector registers
8096 from being upward-exposed. */
8097 vect_clobber_variable (stmt_info, gsi, vec_array);
8099 /* Store the individual vectors into the array. */
8100 for (i = 0; i < vec_num; i++)
8102 vec_oprnd = dr_chain[i];
8103 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8106 tree final_mask = NULL;
8107 if (loop_masks)
8108 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8109 vectype, j);
8110 if (vec_mask)
8111 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8112 vec_mask, gsi);
8114 gcall *call;
8115 if (final_mask)
8117 /* Emit:
8118 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8119 VEC_ARRAY). */
8120 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8121 tree alias_ptr = build_int_cst (ref_type, align);
8122 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8123 dataref_ptr, alias_ptr,
8124 final_mask, vec_array);
8126 else
8128 /* Emit:
8129 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8130 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8131 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8132 vec_array);
8133 gimple_call_set_lhs (call, data_ref);
8135 gimple_call_set_nothrow (call, true);
8136 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8138 /* Record that VEC_ARRAY is now dead. */
8139 vect_clobber_variable (stmt_info, gsi, vec_array);
8141 else
8143 new_stmt_info = NULL;
8144 if (grouped_store)
8146 if (j == 0)
8147 result_chain.create (group_size);
8148 /* Permute. */
8149 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8150 &result_chain);
8153 stmt_vec_info next_stmt_info = first_stmt_info;
8154 for (i = 0; i < vec_num; i++)
8156 unsigned misalign;
8157 unsigned HOST_WIDE_INT align;
8159 tree final_mask = NULL_TREE;
8160 if (loop_masks)
8161 final_mask = vect_get_loop_mask (gsi, loop_masks,
8162 vec_num * ncopies,
8163 vectype, vec_num * j + i);
8164 if (vec_mask)
8165 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8166 vec_mask, gsi);
8168 if (memory_access_type == VMAT_GATHER_SCATTER)
8170 tree scale = size_int (gs_info.scale);
8171 gcall *call;
8172 if (loop_masks)
8173 call = gimple_build_call_internal
8174 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8175 scale, vec_oprnd, final_mask);
8176 else
8177 call = gimple_build_call_internal
8178 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8179 scale, vec_oprnd);
8180 gimple_call_set_nothrow (call, true);
8181 new_stmt_info
8182 = vect_finish_stmt_generation (stmt_info, call, gsi);
8183 break;
8186 if (i > 0)
8187 /* Bump the vector pointer. */
8188 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8189 stmt_info, bump);
8191 if (slp)
8192 vec_oprnd = vec_oprnds[i];
8193 else if (grouped_store)
8194 /* For grouped stores vectorized defs are interleaved in
8195 vect_permute_store_chain(). */
8196 vec_oprnd = result_chain[i];
8198 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8199 if (aligned_access_p (first_dr_info))
8200 misalign = 0;
8201 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8203 align = dr_alignment (vect_dr_behavior (first_dr_info));
8204 misalign = 0;
8206 else
8207 misalign = DR_MISALIGNMENT (first_dr_info);
8208 if (dataref_offset == NULL_TREE
8209 && TREE_CODE (dataref_ptr) == SSA_NAME)
8210 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8211 misalign);
8213 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8215 tree perm_mask = perm_mask_for_reverse (vectype);
8216 tree perm_dest = vect_create_destination_var
8217 (vect_get_store_rhs (stmt_info), vectype);
8218 tree new_temp = make_ssa_name (perm_dest);
8220 /* Generate the permute statement. */
8221 gimple *perm_stmt
8222 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8223 vec_oprnd, perm_mask);
8224 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8226 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8227 vec_oprnd = new_temp;
8230 /* Arguments are ready. Create the new vector stmt. */
8231 if (final_mask)
8233 align = least_bit_hwi (misalign | align);
8234 tree ptr = build_int_cst (ref_type, align);
8235 gcall *call
8236 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8237 dataref_ptr, ptr,
8238 final_mask, vec_oprnd);
8239 gimple_call_set_nothrow (call, true);
8240 new_stmt_info
8241 = vect_finish_stmt_generation (stmt_info, call, gsi);
8243 else
8245 data_ref = fold_build2 (MEM_REF, vectype,
8246 dataref_ptr,
8247 dataref_offset
8248 ? dataref_offset
8249 : build_int_cst (ref_type, 0));
8250 if (aligned_access_p (first_dr_info))
8252 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8253 TREE_TYPE (data_ref)
8254 = build_aligned_type (TREE_TYPE (data_ref),
8255 align * BITS_PER_UNIT);
8256 else
8257 TREE_TYPE (data_ref)
8258 = build_aligned_type (TREE_TYPE (data_ref),
8259 TYPE_ALIGN (elem_type));
8260 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8261 gassign *new_stmt
8262 = gimple_build_assign (data_ref, vec_oprnd);
8263 new_stmt_info
8264 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8267 if (slp)
8268 continue;
8270 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8271 if (!next_stmt_info)
8272 break;
8275 if (!slp)
8277 if (j == 0)
8278 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8279 else
8280 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8281 prev_stmt_info = new_stmt_info;
8285 oprnds.release ();
8286 result_chain.release ();
8287 vec_oprnds.release ();
8289 return true;
8292 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8293 VECTOR_CST mask. No checks are made that the target platform supports the
8294 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8295 vect_gen_perm_mask_checked. */
8297 tree
8298 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8300 tree mask_type;
8302 poly_uint64 nunits = sel.length ();
8303 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8305 mask_type = build_vector_type (ssizetype, nunits);
8306 return vec_perm_indices_to_tree (mask_type, sel);
8309 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8310 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8312 tree
8313 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8315 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8316 return vect_gen_perm_mask_any (vectype, sel);
8319 /* Given a vector variable X and Y, that was generated for the scalar
8320 STMT_INFO, generate instructions to permute the vector elements of X and Y
8321 using permutation mask MASK_VEC, insert them at *GSI and return the
8322 permuted vector variable. */
8324 static tree
8325 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8326 gimple_stmt_iterator *gsi)
8328 tree vectype = TREE_TYPE (x);
8329 tree perm_dest, data_ref;
8330 gimple *perm_stmt;
8332 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8333 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8334 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8335 else
8336 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8337 data_ref = make_ssa_name (perm_dest);
8339 /* Generate the permute statement. */
8340 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8341 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8343 return data_ref;
8346 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8347 inserting them on the loops preheader edge. Returns true if we
8348 were successful in doing so (and thus STMT_INFO can be moved then),
8349 otherwise returns false. */
8351 static bool
8352 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8354 ssa_op_iter i;
8355 tree op;
8356 bool any = false;
8358 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8360 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8361 if (!gimple_nop_p (def_stmt)
8362 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8364 /* Make sure we don't need to recurse. While we could do
8365 so in simple cases when there are more complex use webs
8366 we don't have an easy way to preserve stmt order to fulfil
8367 dependencies within them. */
8368 tree op2;
8369 ssa_op_iter i2;
8370 if (gimple_code (def_stmt) == GIMPLE_PHI)
8371 return false;
8372 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8374 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8375 if (!gimple_nop_p (def_stmt2)
8376 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8377 return false;
8379 any = true;
8383 if (!any)
8384 return true;
8386 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8388 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8389 if (!gimple_nop_p (def_stmt)
8390 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8392 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8393 gsi_remove (&gsi, false);
8394 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8398 return true;
8401 /* vectorizable_load.
8403 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8404 that can be vectorized.
8405 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8406 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8407 Return true if STMT_INFO is vectorizable in this way. */
8409 static bool
8410 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8411 stmt_vec_info *vec_stmt, slp_tree slp_node,
8412 slp_instance slp_node_instance,
8413 stmt_vector_for_cost *cost_vec)
8415 tree scalar_dest;
8416 tree vec_dest = NULL;
8417 tree data_ref = NULL;
8418 stmt_vec_info prev_stmt_info;
8419 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8420 class loop *loop = NULL;
8421 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8422 bool nested_in_vect_loop = false;
8423 tree elem_type;
8424 tree new_temp;
8425 machine_mode mode;
8426 tree dummy;
8427 enum dr_alignment_support alignment_support_scheme;
8428 tree dataref_ptr = NULL_TREE;
8429 tree dataref_offset = NULL_TREE;
8430 gimple *ptr_incr = NULL;
8431 int ncopies;
8432 int i, j;
8433 unsigned int group_size;
8434 poly_uint64 group_gap_adj;
8435 tree msq = NULL_TREE, lsq;
8436 tree offset = NULL_TREE;
8437 tree byte_offset = NULL_TREE;
8438 tree realignment_token = NULL_TREE;
8439 gphi *phi = NULL;
8440 vec<tree> dr_chain = vNULL;
8441 bool grouped_load = false;
8442 stmt_vec_info first_stmt_info;
8443 stmt_vec_info first_stmt_info_for_drptr = NULL;
8444 bool compute_in_loop = false;
8445 class loop *at_loop;
8446 int vec_num;
8447 bool slp = (slp_node != NULL);
8448 bool slp_perm = false;
8449 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8450 poly_uint64 vf;
8451 tree aggr_type;
8452 gather_scatter_info gs_info;
8453 vec_info *vinfo = stmt_info->vinfo;
8454 tree ref_type;
8455 enum vect_def_type mask_dt = vect_unknown_def_type;
8457 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8458 return false;
8460 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8461 && ! vec_stmt)
8462 return false;
8464 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8465 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8467 scalar_dest = gimple_assign_lhs (assign);
8468 if (TREE_CODE (scalar_dest) != SSA_NAME)
8469 return false;
8471 tree_code code = gimple_assign_rhs_code (assign);
8472 if (code != ARRAY_REF
8473 && code != BIT_FIELD_REF
8474 && code != INDIRECT_REF
8475 && code != COMPONENT_REF
8476 && code != IMAGPART_EXPR
8477 && code != REALPART_EXPR
8478 && code != MEM_REF
8479 && TREE_CODE_CLASS (code) != tcc_declaration)
8480 return false;
8482 else
8484 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8485 if (!call || !gimple_call_internal_p (call))
8486 return false;
8488 internal_fn ifn = gimple_call_internal_fn (call);
8489 if (!internal_load_fn_p (ifn))
8490 return false;
8492 scalar_dest = gimple_call_lhs (call);
8493 if (!scalar_dest)
8494 return false;
8496 int mask_index = internal_fn_mask_index (ifn);
8497 if (mask_index >= 0)
8499 mask = gimple_call_arg (call, mask_index);
8500 if (!vect_check_load_store_mask (stmt_info, mask, &mask_dt,
8501 &mask_vectype))
8502 return false;
8506 if (!STMT_VINFO_DATA_REF (stmt_info))
8507 return false;
8509 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8510 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8512 if (loop_vinfo)
8514 loop = LOOP_VINFO_LOOP (loop_vinfo);
8515 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8516 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8518 else
8519 vf = 1;
8521 /* Multiple types in SLP are handled by creating the appropriate number of
8522 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8523 case of SLP. */
8524 if (slp)
8525 ncopies = 1;
8526 else
8527 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8529 gcc_assert (ncopies >= 1);
8531 /* FORNOW. This restriction should be relaxed. */
8532 if (nested_in_vect_loop && ncopies > 1)
8534 if (dump_enabled_p ())
8535 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8536 "multiple types in nested loop.\n");
8537 return false;
8540 /* Invalidate assumptions made by dependence analysis when vectorization
8541 on the unrolled body effectively re-orders stmts. */
8542 if (ncopies > 1
8543 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8544 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8545 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8547 if (dump_enabled_p ())
8548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8549 "cannot perform implicit CSE when unrolling "
8550 "with negative dependence distance\n");
8551 return false;
8554 elem_type = TREE_TYPE (vectype);
8555 mode = TYPE_MODE (vectype);
8557 /* FORNOW. In some cases can vectorize even if data-type not supported
8558 (e.g. - data copies). */
8559 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8561 if (dump_enabled_p ())
8562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8563 "Aligned load, but unsupported type.\n");
8564 return false;
8567 /* Check if the load is a part of an interleaving chain. */
8568 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8570 grouped_load = true;
8571 /* FORNOW */
8572 gcc_assert (!nested_in_vect_loop);
8573 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8575 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8576 group_size = DR_GROUP_SIZE (first_stmt_info);
8578 /* Refuse non-SLP vectorization of SLP-only groups. */
8579 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8581 if (dump_enabled_p ())
8582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8583 "cannot vectorize load in non-SLP mode.\n");
8584 return false;
8587 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8588 slp_perm = true;
8590 /* Invalidate assumptions made by dependence analysis when vectorization
8591 on the unrolled body effectively re-orders stmts. */
8592 if (!PURE_SLP_STMT (stmt_info)
8593 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8594 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8595 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8597 if (dump_enabled_p ())
8598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8599 "cannot perform implicit CSE when performing "
8600 "group loads with negative dependence distance\n");
8601 return false;
8604 else
8605 group_size = 1;
8607 vect_memory_access_type memory_access_type;
8608 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8609 &memory_access_type, &gs_info))
8610 return false;
8612 if (mask)
8614 if (memory_access_type == VMAT_CONTIGUOUS)
8616 machine_mode vec_mode = TYPE_MODE (vectype);
8617 if (!VECTOR_MODE_P (vec_mode)
8618 || !can_vec_mask_load_store_p (vec_mode,
8619 TYPE_MODE (mask_vectype), true))
8620 return false;
8622 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8623 && memory_access_type != VMAT_GATHER_SCATTER)
8625 if (dump_enabled_p ())
8626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8627 "unsupported access type for masked load.\n");
8628 return false;
8632 if (!vec_stmt) /* transformation not required. */
8634 if (!slp)
8635 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8637 if (loop_vinfo
8638 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8639 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8640 memory_access_type, &gs_info);
8642 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8643 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8644 slp_node_instance, slp_node, cost_vec);
8645 return true;
8648 if (!slp)
8649 gcc_assert (memory_access_type
8650 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8652 if (dump_enabled_p ())
8653 dump_printf_loc (MSG_NOTE, vect_location,
8654 "transform load. ncopies = %d\n", ncopies);
8656 /* Transform. */
8658 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8659 ensure_base_align (dr_info);
8661 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8663 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8664 return true;
8667 if (memory_access_type == VMAT_INVARIANT)
8669 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8670 /* If we have versioned for aliasing or the loop doesn't
8671 have any data dependencies that would preclude this,
8672 then we are sure this is a loop invariant load and
8673 thus we can insert it on the preheader edge. */
8674 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8675 && !nested_in_vect_loop
8676 && hoist_defs_of_uses (stmt_info, loop));
8677 if (hoist_p)
8679 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8680 if (dump_enabled_p ())
8681 dump_printf_loc (MSG_NOTE, vect_location,
8682 "hoisting out of the vectorized loop: %G", stmt);
8683 scalar_dest = copy_ssa_name (scalar_dest);
8684 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8685 gsi_insert_on_edge_immediate
8686 (loop_preheader_edge (loop),
8687 gimple_build_assign (scalar_dest, rhs));
8689 /* These copies are all equivalent, but currently the representation
8690 requires a separate STMT_VINFO_VEC_STMT for each one. */
8691 prev_stmt_info = NULL;
8692 gimple_stmt_iterator gsi2 = *gsi;
8693 gsi_next (&gsi2);
8694 for (j = 0; j < ncopies; j++)
8696 stmt_vec_info new_stmt_info;
8697 if (hoist_p)
8699 new_temp = vect_init_vector (stmt_info, scalar_dest,
8700 vectype, NULL);
8701 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8702 new_stmt_info = vinfo->add_stmt (new_stmt);
8704 else
8706 new_temp = vect_init_vector (stmt_info, scalar_dest,
8707 vectype, &gsi2);
8708 new_stmt_info = vinfo->lookup_def (new_temp);
8710 if (slp)
8711 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8712 else if (j == 0)
8713 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8714 else
8715 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8716 prev_stmt_info = new_stmt_info;
8718 return true;
8721 if (memory_access_type == VMAT_ELEMENTWISE
8722 || memory_access_type == VMAT_STRIDED_SLP)
8724 gimple_stmt_iterator incr_gsi;
8725 bool insert_after;
8726 gimple *incr;
8727 tree offvar;
8728 tree ivstep;
8729 tree running_off;
8730 vec<constructor_elt, va_gc> *v = NULL;
8731 tree stride_base, stride_step, alias_off;
8732 /* Checked by get_load_store_type. */
8733 unsigned int const_nunits = nunits.to_constant ();
8734 unsigned HOST_WIDE_INT cst_offset = 0;
8736 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8737 gcc_assert (!nested_in_vect_loop);
8739 if (grouped_load)
8741 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8742 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8744 else
8746 first_stmt_info = stmt_info;
8747 first_dr_info = dr_info;
8749 if (slp && grouped_load)
8751 group_size = DR_GROUP_SIZE (first_stmt_info);
8752 ref_type = get_group_alias_ptr_type (first_stmt_info);
8754 else
8756 if (grouped_load)
8757 cst_offset
8758 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8759 * vect_get_place_in_interleaving_chain (stmt_info,
8760 first_stmt_info));
8761 group_size = 1;
8762 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8765 stride_base
8766 = fold_build_pointer_plus
8767 (DR_BASE_ADDRESS (first_dr_info->dr),
8768 size_binop (PLUS_EXPR,
8769 convert_to_ptrofftype (DR_OFFSET (first_dr_info->dr)),
8770 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8771 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8773 /* For a load with loop-invariant (but other than power-of-2)
8774 stride (i.e. not a grouped access) like so:
8776 for (i = 0; i < n; i += stride)
8777 ... = array[i];
8779 we generate a new induction variable and new accesses to
8780 form a new vector (or vectors, depending on ncopies):
8782 for (j = 0; ; j += VF*stride)
8783 tmp1 = array[j];
8784 tmp2 = array[j + stride];
8786 vectemp = {tmp1, tmp2, ...}
8789 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8790 build_int_cst (TREE_TYPE (stride_step), vf));
8792 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8794 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8795 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8796 create_iv (stride_base, ivstep, NULL,
8797 loop, &incr_gsi, insert_after,
8798 &offvar, NULL);
8799 incr = gsi_stmt (incr_gsi);
8800 loop_vinfo->add_stmt (incr);
8802 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8804 prev_stmt_info = NULL;
8805 running_off = offvar;
8806 alias_off = build_int_cst (ref_type, 0);
8807 int nloads = const_nunits;
8808 int lnel = 1;
8809 tree ltype = TREE_TYPE (vectype);
8810 tree lvectype = vectype;
8811 auto_vec<tree> dr_chain;
8812 if (memory_access_type == VMAT_STRIDED_SLP)
8814 if (group_size < const_nunits)
8816 /* First check if vec_init optab supports construction from
8817 vector elts directly. */
8818 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
8819 machine_mode vmode;
8820 if (mode_for_vector (elmode, group_size).exists (&vmode)
8821 && VECTOR_MODE_P (vmode)
8822 && targetm.vector_mode_supported_p (vmode)
8823 && (convert_optab_handler (vec_init_optab,
8824 TYPE_MODE (vectype), vmode)
8825 != CODE_FOR_nothing))
8827 nloads = const_nunits / group_size;
8828 lnel = group_size;
8829 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
8831 else
8833 /* Otherwise avoid emitting a constructor of vector elements
8834 by performing the loads using an integer type of the same
8835 size, constructing a vector of those and then
8836 re-interpreting it as the original vector type.
8837 This avoids a huge runtime penalty due to the general
8838 inability to perform store forwarding from smaller stores
8839 to a larger load. */
8840 unsigned lsize
8841 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
8842 unsigned int lnunits = const_nunits / group_size;
8843 /* If we can't construct such a vector fall back to
8844 element loads of the original vector type. */
8845 if (int_mode_for_size (lsize, 0).exists (&elmode)
8846 && mode_for_vector (elmode, lnunits).exists (&vmode)
8847 && VECTOR_MODE_P (vmode)
8848 && targetm.vector_mode_supported_p (vmode)
8849 && (convert_optab_handler (vec_init_optab, vmode, elmode)
8850 != CODE_FOR_nothing))
8852 nloads = lnunits;
8853 lnel = group_size;
8854 ltype = build_nonstandard_integer_type (lsize, 1);
8855 lvectype = build_vector_type (ltype, nloads);
8859 else
8861 nloads = 1;
8862 lnel = const_nunits;
8863 ltype = vectype;
8865 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8867 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8868 else if (nloads == 1)
8869 ltype = vectype;
8871 if (slp)
8873 /* For SLP permutation support we need to load the whole group,
8874 not only the number of vector stmts the permutation result
8875 fits in. */
8876 if (slp_perm)
8878 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8879 variable VF. */
8880 unsigned int const_vf = vf.to_constant ();
8881 ncopies = CEIL (group_size * const_vf, const_nunits);
8882 dr_chain.create (ncopies);
8884 else
8885 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8887 unsigned int group_el = 0;
8888 unsigned HOST_WIDE_INT
8889 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8890 for (j = 0; j < ncopies; j++)
8892 if (nloads > 1)
8893 vec_alloc (v, nloads);
8894 stmt_vec_info new_stmt_info = NULL;
8895 for (i = 0; i < nloads; i++)
8897 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8898 group_el * elsz + cst_offset);
8899 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8900 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8901 gassign *new_stmt
8902 = gimple_build_assign (make_ssa_name (ltype), data_ref);
8903 new_stmt_info
8904 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8905 if (nloads > 1)
8906 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
8907 gimple_assign_lhs (new_stmt));
8909 group_el += lnel;
8910 if (! slp
8911 || group_el == group_size)
8913 tree newoff = copy_ssa_name (running_off);
8914 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8915 running_off, stride_step);
8916 vect_finish_stmt_generation (stmt_info, incr, gsi);
8918 running_off = newoff;
8919 group_el = 0;
8922 if (nloads > 1)
8924 tree vec_inv = build_constructor (lvectype, v);
8925 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
8926 new_stmt_info = vinfo->lookup_def (new_temp);
8927 if (lvectype != vectype)
8929 gassign *new_stmt
8930 = gimple_build_assign (make_ssa_name (vectype),
8931 VIEW_CONVERT_EXPR,
8932 build1 (VIEW_CONVERT_EXPR,
8933 vectype, new_temp));
8934 new_stmt_info
8935 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8939 if (slp)
8941 if (slp_perm)
8942 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
8943 else
8944 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8946 else
8948 if (j == 0)
8949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8950 else
8951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8952 prev_stmt_info = new_stmt_info;
8955 if (slp_perm)
8957 unsigned n_perms;
8958 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8959 slp_node_instance, false, &n_perms);
8961 return true;
8964 if (memory_access_type == VMAT_GATHER_SCATTER
8965 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
8966 grouped_load = false;
8968 if (grouped_load)
8970 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8971 group_size = DR_GROUP_SIZE (first_stmt_info);
8972 /* For SLP vectorization we directly vectorize a subchain
8973 without permutation. */
8974 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8975 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8976 /* For BB vectorization always use the first stmt to base
8977 the data ref pointer on. */
8978 if (bb_vinfo)
8979 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8981 /* Check if the chain of loads is already vectorized. */
8982 if (STMT_VINFO_VEC_STMT (first_stmt_info)
8983 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8984 ??? But we can only do so if there is exactly one
8985 as we have no way to get at the rest. Leave the CSE
8986 opportunity alone.
8987 ??? With the group load eventually participating
8988 in multiple different permutations (having multiple
8989 slp nodes which refer to the same group) the CSE
8990 is even wrong code. See PR56270. */
8991 && !slp)
8993 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8994 return true;
8996 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8997 group_gap_adj = 0;
8999 /* VEC_NUM is the number of vect stmts to be created for this group. */
9000 if (slp)
9002 grouped_load = false;
9003 /* If an SLP permutation is from N elements to N elements,
9004 and if one vector holds a whole number of N, we can load
9005 the inputs to the permutation in the same way as an
9006 unpermuted sequence. In other cases we need to load the
9007 whole group, not only the number of vector stmts the
9008 permutation result fits in. */
9009 if (slp_perm
9010 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9011 || !multiple_p (nunits, group_size)))
9013 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9014 variable VF; see vect_transform_slp_perm_load. */
9015 unsigned int const_vf = vf.to_constant ();
9016 unsigned int const_nunits = nunits.to_constant ();
9017 vec_num = CEIL (group_size * const_vf, const_nunits);
9018 group_gap_adj = vf * group_size - nunits * vec_num;
9020 else
9022 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9023 group_gap_adj
9024 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9027 else
9028 vec_num = group_size;
9030 ref_type = get_group_alias_ptr_type (first_stmt_info);
9032 else
9034 first_stmt_info = stmt_info;
9035 first_dr_info = dr_info;
9036 group_size = vec_num = 1;
9037 group_gap_adj = 0;
9038 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9041 alignment_support_scheme
9042 = vect_supportable_dr_alignment (first_dr_info, false);
9043 gcc_assert (alignment_support_scheme);
9044 vec_loop_masks *loop_masks
9045 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9046 ? &LOOP_VINFO_MASKS (loop_vinfo)
9047 : NULL);
9048 /* Targets with store-lane instructions must not require explicit
9049 realignment. vect_supportable_dr_alignment always returns either
9050 dr_aligned or dr_unaligned_supported for masked operations. */
9051 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9052 && !mask
9053 && !loop_masks)
9054 || alignment_support_scheme == dr_aligned
9055 || alignment_support_scheme == dr_unaligned_supported);
9057 /* In case the vectorization factor (VF) is bigger than the number
9058 of elements that we can fit in a vectype (nunits), we have to generate
9059 more than one vector stmt - i.e - we need to "unroll" the
9060 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9061 from one copy of the vector stmt to the next, in the field
9062 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9063 stages to find the correct vector defs to be used when vectorizing
9064 stmts that use the defs of the current stmt. The example below
9065 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9066 need to create 4 vectorized stmts):
9068 before vectorization:
9069 RELATED_STMT VEC_STMT
9070 S1: x = memref - -
9071 S2: z = x + 1 - -
9073 step 1: vectorize stmt S1:
9074 We first create the vector stmt VS1_0, and, as usual, record a
9075 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9076 Next, we create the vector stmt VS1_1, and record a pointer to
9077 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9078 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9079 stmts and pointers:
9080 RELATED_STMT VEC_STMT
9081 VS1_0: vx0 = memref0 VS1_1 -
9082 VS1_1: vx1 = memref1 VS1_2 -
9083 VS1_2: vx2 = memref2 VS1_3 -
9084 VS1_3: vx3 = memref3 - -
9085 S1: x = load - VS1_0
9086 S2: z = x + 1 - -
9088 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9089 information we recorded in RELATED_STMT field is used to vectorize
9090 stmt S2. */
9092 /* In case of interleaving (non-unit grouped access):
9094 S1: x2 = &base + 2
9095 S2: x0 = &base
9096 S3: x1 = &base + 1
9097 S4: x3 = &base + 3
9099 Vectorized loads are created in the order of memory accesses
9100 starting from the access of the first stmt of the chain:
9102 VS1: vx0 = &base
9103 VS2: vx1 = &base + vec_size*1
9104 VS3: vx3 = &base + vec_size*2
9105 VS4: vx4 = &base + vec_size*3
9107 Then permutation statements are generated:
9109 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9110 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9113 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9114 (the order of the data-refs in the output of vect_permute_load_chain
9115 corresponds to the order of scalar stmts in the interleaving chain - see
9116 the documentation of vect_permute_load_chain()).
9117 The generation of permutation stmts and recording them in
9118 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9120 In case of both multiple types and interleaving, the vector loads and
9121 permutation stmts above are created for every copy. The result vector
9122 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9123 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9125 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9126 on a target that supports unaligned accesses (dr_unaligned_supported)
9127 we generate the following code:
9128 p = initial_addr;
9129 indx = 0;
9130 loop {
9131 p = p + indx * vectype_size;
9132 vec_dest = *(p);
9133 indx = indx + 1;
9136 Otherwise, the data reference is potentially unaligned on a target that
9137 does not support unaligned accesses (dr_explicit_realign_optimized) -
9138 then generate the following code, in which the data in each iteration is
9139 obtained by two vector loads, one from the previous iteration, and one
9140 from the current iteration:
9141 p1 = initial_addr;
9142 msq_init = *(floor(p1))
9143 p2 = initial_addr + VS - 1;
9144 realignment_token = call target_builtin;
9145 indx = 0;
9146 loop {
9147 p2 = p2 + indx * vectype_size
9148 lsq = *(floor(p2))
9149 vec_dest = realign_load (msq, lsq, realignment_token)
9150 indx = indx + 1;
9151 msq = lsq;
9152 } */
9154 /* If the misalignment remains the same throughout the execution of the
9155 loop, we can create the init_addr and permutation mask at the loop
9156 preheader. Otherwise, it needs to be created inside the loop.
9157 This can only occur when vectorizing memory accesses in the inner-loop
9158 nested within an outer-loop that is being vectorized. */
9160 if (nested_in_vect_loop
9161 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9162 GET_MODE_SIZE (TYPE_MODE (vectype))))
9164 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9165 compute_in_loop = true;
9168 if ((alignment_support_scheme == dr_explicit_realign_optimized
9169 || alignment_support_scheme == dr_explicit_realign)
9170 && !compute_in_loop)
9172 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
9173 alignment_support_scheme, NULL_TREE,
9174 &at_loop);
9175 if (alignment_support_scheme == dr_explicit_realign_optimized)
9177 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9178 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9179 size_one_node);
9182 else
9183 at_loop = loop;
9185 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9186 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9188 tree bump;
9189 tree vec_offset = NULL_TREE;
9190 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9192 aggr_type = NULL_TREE;
9193 bump = NULL_TREE;
9195 else if (memory_access_type == VMAT_GATHER_SCATTER)
9197 aggr_type = elem_type;
9198 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9199 &bump, &vec_offset);
9201 else
9203 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9204 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9205 else
9206 aggr_type = vectype;
9207 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9208 memory_access_type);
9211 tree vec_mask = NULL_TREE;
9212 prev_stmt_info = NULL;
9213 poly_uint64 group_elt = 0;
9214 for (j = 0; j < ncopies; j++)
9216 stmt_vec_info new_stmt_info = NULL;
9217 /* 1. Create the vector or array pointer update chain. */
9218 if (j == 0)
9220 bool simd_lane_access_p
9221 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9222 if (simd_lane_access_p
9223 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9224 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9225 && integer_zerop (DR_OFFSET (first_dr_info->dr))
9226 && integer_zerop (DR_INIT (first_dr_info->dr))
9227 && alias_sets_conflict_p (get_alias_set (aggr_type),
9228 get_alias_set (TREE_TYPE (ref_type)))
9229 && (alignment_support_scheme == dr_aligned
9230 || alignment_support_scheme == dr_unaligned_supported))
9232 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9233 dataref_offset = build_int_cst (ref_type, 0);
9235 else if (first_stmt_info_for_drptr
9236 && first_stmt_info != first_stmt_info_for_drptr)
9238 dataref_ptr
9239 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9240 aggr_type, at_loop, offset, &dummy,
9241 gsi, &ptr_incr, simd_lane_access_p,
9242 byte_offset, bump);
9243 /* Adjust the pointer by the difference to first_stmt. */
9244 data_reference_p ptrdr
9245 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9246 tree diff
9247 = fold_convert (sizetype,
9248 size_binop (MINUS_EXPR,
9249 DR_INIT (first_dr_info->dr),
9250 DR_INIT (ptrdr)));
9251 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9252 stmt_info, diff);
9254 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9255 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9256 &dataref_ptr, &vec_offset);
9257 else
9258 dataref_ptr
9259 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9260 offset, &dummy, gsi, &ptr_incr,
9261 simd_lane_access_p,
9262 byte_offset, bump);
9263 if (mask)
9265 if (slp_node)
9267 auto_vec<tree> ops (1);
9268 auto_vec<vec<tree> > vec_defs (1);
9269 ops.quick_push (mask);
9270 vect_get_slp_defs (ops, slp_node, &vec_defs);
9271 vec_mask = vec_defs[0][0];
9273 else
9274 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9275 mask_vectype);
9278 else
9280 if (dataref_offset)
9281 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9282 bump);
9283 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9284 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9285 else
9286 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9287 stmt_info, bump);
9288 if (mask)
9289 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9292 if (grouped_load || slp_perm)
9293 dr_chain.create (vec_num);
9295 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9297 tree vec_array;
9299 vec_array = create_vector_array (vectype, vec_num);
9301 tree final_mask = NULL_TREE;
9302 if (loop_masks)
9303 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9304 vectype, j);
9305 if (vec_mask)
9306 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9307 vec_mask, gsi);
9309 gcall *call;
9310 if (final_mask)
9312 /* Emit:
9313 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9314 VEC_MASK). */
9315 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9316 tree alias_ptr = build_int_cst (ref_type, align);
9317 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9318 dataref_ptr, alias_ptr,
9319 final_mask);
9321 else
9323 /* Emit:
9324 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9325 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9326 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9328 gimple_call_set_lhs (call, vec_array);
9329 gimple_call_set_nothrow (call, true);
9330 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9332 /* Extract each vector into an SSA_NAME. */
9333 for (i = 0; i < vec_num; i++)
9335 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9336 vec_array, i);
9337 dr_chain.quick_push (new_temp);
9340 /* Record the mapping between SSA_NAMEs and statements. */
9341 vect_record_grouped_load_vectors (stmt_info, dr_chain);
9343 /* Record that VEC_ARRAY is now dead. */
9344 vect_clobber_variable (stmt_info, gsi, vec_array);
9346 else
9348 for (i = 0; i < vec_num; i++)
9350 tree final_mask = NULL_TREE;
9351 if (loop_masks
9352 && memory_access_type != VMAT_INVARIANT)
9353 final_mask = vect_get_loop_mask (gsi, loop_masks,
9354 vec_num * ncopies,
9355 vectype, vec_num * j + i);
9356 if (vec_mask)
9357 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9358 vec_mask, gsi);
9360 if (i > 0)
9361 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9362 stmt_info, bump);
9364 /* 2. Create the vector-load in the loop. */
9365 gimple *new_stmt = NULL;
9366 switch (alignment_support_scheme)
9368 case dr_aligned:
9369 case dr_unaligned_supported:
9371 unsigned int misalign;
9372 unsigned HOST_WIDE_INT align;
9374 if (memory_access_type == VMAT_GATHER_SCATTER)
9376 tree scale = size_int (gs_info.scale);
9377 gcall *call;
9378 if (loop_masks)
9379 call = gimple_build_call_internal
9380 (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
9381 vec_offset, scale, final_mask);
9382 else
9383 call = gimple_build_call_internal
9384 (IFN_GATHER_LOAD, 3, dataref_ptr,
9385 vec_offset, scale);
9386 gimple_call_set_nothrow (call, true);
9387 new_stmt = call;
9388 data_ref = NULL_TREE;
9389 break;
9392 align =
9393 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9394 if (alignment_support_scheme == dr_aligned)
9396 gcc_assert (aligned_access_p (first_dr_info));
9397 misalign = 0;
9399 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9401 align = dr_alignment
9402 (vect_dr_behavior (first_dr_info));
9403 misalign = 0;
9405 else
9406 misalign = DR_MISALIGNMENT (first_dr_info);
9407 if (dataref_offset == NULL_TREE
9408 && TREE_CODE (dataref_ptr) == SSA_NAME)
9409 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9410 align, misalign);
9412 if (final_mask)
9414 align = least_bit_hwi (misalign | align);
9415 tree ptr = build_int_cst (ref_type, align);
9416 gcall *call
9417 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9418 dataref_ptr, ptr,
9419 final_mask);
9420 gimple_call_set_nothrow (call, true);
9421 new_stmt = call;
9422 data_ref = NULL_TREE;
9424 else
9426 tree ltype = vectype;
9427 /* If there's no peeling for gaps but we have a gap
9428 with slp loads then load the lower half of the
9429 vector only. See get_group_load_store_type for
9430 when we apply this optimization. */
9431 if (slp
9432 && loop_vinfo
9433 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9434 && DR_GROUP_GAP (first_stmt_info) != 0
9435 && known_eq (nunits,
9436 (group_size
9437 - DR_GROUP_GAP (first_stmt_info)) * 2)
9438 && known_eq (nunits, group_size))
9439 ltype = build_vector_type (TREE_TYPE (vectype),
9440 (group_size
9441 - DR_GROUP_GAP
9442 (first_stmt_info)));
9443 data_ref
9444 = fold_build2 (MEM_REF, ltype, dataref_ptr,
9445 dataref_offset
9446 ? dataref_offset
9447 : build_int_cst (ref_type, 0));
9448 if (alignment_support_scheme == dr_aligned)
9450 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9451 TREE_TYPE (data_ref)
9452 = build_aligned_type (TREE_TYPE (data_ref),
9453 align * BITS_PER_UNIT);
9454 else
9455 TREE_TYPE (data_ref)
9456 = build_aligned_type (TREE_TYPE (data_ref),
9457 TYPE_ALIGN (elem_type));
9458 if (ltype != vectype)
9460 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9461 tree tem = make_ssa_name (ltype);
9462 new_stmt = gimple_build_assign (tem, data_ref);
9463 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9464 data_ref = NULL;
9465 vec<constructor_elt, va_gc> *v;
9466 vec_alloc (v, 2);
9467 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9468 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9469 build_zero_cst (ltype));
9470 new_stmt
9471 = gimple_build_assign (vec_dest,
9472 build_constructor
9473 (vectype, v));
9476 break;
9478 case dr_explicit_realign:
9480 tree ptr, bump;
9482 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9484 if (compute_in_loop)
9485 msq = vect_setup_realignment (first_stmt_info, gsi,
9486 &realignment_token,
9487 dr_explicit_realign,
9488 dataref_ptr, NULL);
9490 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9491 ptr = copy_ssa_name (dataref_ptr);
9492 else
9493 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9494 // For explicit realign the target alignment should be
9495 // known at compile time.
9496 unsigned HOST_WIDE_INT align =
9497 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9498 new_stmt = gimple_build_assign
9499 (ptr, BIT_AND_EXPR, dataref_ptr,
9500 build_int_cst
9501 (TREE_TYPE (dataref_ptr),
9502 -(HOST_WIDE_INT) align));
9503 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9504 data_ref
9505 = build2 (MEM_REF, vectype, ptr,
9506 build_int_cst (ref_type, 0));
9507 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9508 vec_dest = vect_create_destination_var (scalar_dest,
9509 vectype);
9510 new_stmt = gimple_build_assign (vec_dest, data_ref);
9511 new_temp = make_ssa_name (vec_dest, new_stmt);
9512 gimple_assign_set_lhs (new_stmt, new_temp);
9513 gimple_move_vops (new_stmt, stmt_info->stmt);
9514 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9515 msq = new_temp;
9517 bump = size_binop (MULT_EXPR, vs,
9518 TYPE_SIZE_UNIT (elem_type));
9519 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9520 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9521 stmt_info, bump);
9522 new_stmt = gimple_build_assign
9523 (NULL_TREE, BIT_AND_EXPR, ptr,
9524 build_int_cst
9525 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9526 ptr = copy_ssa_name (ptr, new_stmt);
9527 gimple_assign_set_lhs (new_stmt, ptr);
9528 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9529 data_ref
9530 = build2 (MEM_REF, vectype, ptr,
9531 build_int_cst (ref_type, 0));
9532 break;
9534 case dr_explicit_realign_optimized:
9536 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9537 new_temp = copy_ssa_name (dataref_ptr);
9538 else
9539 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9540 // We should only be doing this if we know the target
9541 // alignment at compile time.
9542 unsigned HOST_WIDE_INT align =
9543 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9544 new_stmt = gimple_build_assign
9545 (new_temp, BIT_AND_EXPR, dataref_ptr,
9546 build_int_cst (TREE_TYPE (dataref_ptr),
9547 -(HOST_WIDE_INT) align));
9548 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9549 data_ref
9550 = build2 (MEM_REF, vectype, new_temp,
9551 build_int_cst (ref_type, 0));
9552 break;
9554 default:
9555 gcc_unreachable ();
9557 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9558 /* DATA_REF is null if we've already built the statement. */
9559 if (data_ref)
9561 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9562 new_stmt = gimple_build_assign (vec_dest, data_ref);
9564 new_temp = make_ssa_name (vec_dest, new_stmt);
9565 gimple_set_lhs (new_stmt, new_temp);
9566 new_stmt_info
9567 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9569 /* 3. Handle explicit realignment if necessary/supported.
9570 Create in loop:
9571 vec_dest = realign_load (msq, lsq, realignment_token) */
9572 if (alignment_support_scheme == dr_explicit_realign_optimized
9573 || alignment_support_scheme == dr_explicit_realign)
9575 lsq = gimple_assign_lhs (new_stmt);
9576 if (!realignment_token)
9577 realignment_token = dataref_ptr;
9578 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9579 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9580 msq, lsq, realignment_token);
9581 new_temp = make_ssa_name (vec_dest, new_stmt);
9582 gimple_assign_set_lhs (new_stmt, new_temp);
9583 new_stmt_info
9584 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9586 if (alignment_support_scheme == dr_explicit_realign_optimized)
9588 gcc_assert (phi);
9589 if (i == vec_num - 1 && j == ncopies - 1)
9590 add_phi_arg (phi, lsq,
9591 loop_latch_edge (containing_loop),
9592 UNKNOWN_LOCATION);
9593 msq = lsq;
9597 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9599 tree perm_mask = perm_mask_for_reverse (vectype);
9600 new_temp = permute_vec_elements (new_temp, new_temp,
9601 perm_mask, stmt_info, gsi);
9602 new_stmt_info = vinfo->lookup_def (new_temp);
9605 /* Collect vector loads and later create their permutation in
9606 vect_transform_grouped_load (). */
9607 if (grouped_load || slp_perm)
9608 dr_chain.quick_push (new_temp);
9610 /* Store vector loads in the corresponding SLP_NODE. */
9611 if (slp && !slp_perm)
9612 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9614 /* With SLP permutation we load the gaps as well, without
9615 we need to skip the gaps after we manage to fully load
9616 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9617 group_elt += nunits;
9618 if (maybe_ne (group_gap_adj, 0U)
9619 && !slp_perm
9620 && known_eq (group_elt, group_size - group_gap_adj))
9622 poly_wide_int bump_val
9623 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9624 * group_gap_adj);
9625 tree bump = wide_int_to_tree (sizetype, bump_val);
9626 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9627 stmt_info, bump);
9628 group_elt = 0;
9631 /* Bump the vector pointer to account for a gap or for excess
9632 elements loaded for a permuted SLP load. */
9633 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9635 poly_wide_int bump_val
9636 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9637 * group_gap_adj);
9638 tree bump = wide_int_to_tree (sizetype, bump_val);
9639 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9640 stmt_info, bump);
9644 if (slp && !slp_perm)
9645 continue;
9647 if (slp_perm)
9649 unsigned n_perms;
9650 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9651 slp_node_instance, false,
9652 &n_perms))
9654 dr_chain.release ();
9655 return false;
9658 else
9660 if (grouped_load)
9662 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9663 vect_transform_grouped_load (stmt_info, dr_chain,
9664 group_size, gsi);
9665 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9667 else
9669 if (j == 0)
9670 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9671 else
9672 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9673 prev_stmt_info = new_stmt_info;
9676 dr_chain.release ();
9679 return true;
9682 /* Function vect_is_simple_cond.
9684 Input:
9685 LOOP - the loop that is being vectorized.
9686 COND - Condition that is checked for simple use.
9688 Output:
9689 *COMP_VECTYPE - the vector type for the comparison.
9690 *DTS - The def types for the arguments of the comparison
9692 Returns whether a COND can be vectorized. Checks whether
9693 condition operands are supportable using vec_is_simple_use. */
9695 static bool
9696 vect_is_simple_cond (tree cond, vec_info *vinfo,
9697 tree *comp_vectype, enum vect_def_type *dts,
9698 tree vectype)
9700 tree lhs, rhs;
9701 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9703 /* Mask case. */
9704 if (TREE_CODE (cond) == SSA_NAME
9705 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9707 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9708 || !*comp_vectype
9709 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9710 return false;
9711 return true;
9714 if (!COMPARISON_CLASS_P (cond))
9715 return false;
9717 lhs = TREE_OPERAND (cond, 0);
9718 rhs = TREE_OPERAND (cond, 1);
9720 if (TREE_CODE (lhs) == SSA_NAME)
9722 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9723 return false;
9725 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9726 || TREE_CODE (lhs) == FIXED_CST)
9727 dts[0] = vect_constant_def;
9728 else
9729 return false;
9731 if (TREE_CODE (rhs) == SSA_NAME)
9733 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9734 return false;
9736 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9737 || TREE_CODE (rhs) == FIXED_CST)
9738 dts[1] = vect_constant_def;
9739 else
9740 return false;
9742 if (vectype1 && vectype2
9743 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9744 TYPE_VECTOR_SUBPARTS (vectype2)))
9745 return false;
9747 *comp_vectype = vectype1 ? vectype1 : vectype2;
9748 /* Invariant comparison. */
9749 if (! *comp_vectype)
9751 tree scalar_type = TREE_TYPE (lhs);
9752 /* If we can widen the comparison to match vectype do so. */
9753 if (INTEGRAL_TYPE_P (scalar_type)
9754 && vectype
9755 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9756 TYPE_SIZE (TREE_TYPE (vectype))))
9757 scalar_type = build_nonstandard_integer_type
9758 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9759 TYPE_UNSIGNED (scalar_type));
9760 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
9763 return true;
9766 /* vectorizable_condition.
9768 Check if STMT_INFO is conditional modify expression that can be vectorized.
9769 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9770 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9771 at GSI.
9773 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9775 Return true if STMT_INFO is vectorizable in this way. */
9777 bool
9778 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9779 stmt_vec_info *vec_stmt, bool for_reduction,
9780 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9782 vec_info *vinfo = stmt_info->vinfo;
9783 tree scalar_dest = NULL_TREE;
9784 tree vec_dest = NULL_TREE;
9785 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9786 tree then_clause, else_clause;
9787 tree comp_vectype = NULL_TREE;
9788 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9789 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9790 tree vec_compare;
9791 tree new_temp;
9792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9793 enum vect_def_type dts[4]
9794 = {vect_unknown_def_type, vect_unknown_def_type,
9795 vect_unknown_def_type, vect_unknown_def_type};
9796 int ndts = 4;
9797 int ncopies;
9798 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9799 stmt_vec_info prev_stmt_info = NULL;
9800 int i, j;
9801 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9802 vec<tree> vec_oprnds0 = vNULL;
9803 vec<tree> vec_oprnds1 = vNULL;
9804 vec<tree> vec_oprnds2 = vNULL;
9805 vec<tree> vec_oprnds3 = vNULL;
9806 tree vec_cmp_type;
9807 bool masked = false;
9809 if (for_reduction && STMT_SLP_TYPE (stmt_info))
9810 return false;
9812 vect_reduction_type reduction_type
9813 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
9814 if (reduction_type == TREE_CODE_REDUCTION)
9816 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9817 return false;
9819 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9820 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
9821 && for_reduction))
9822 return false;
9824 /* FORNOW: not yet supported. */
9825 if (STMT_VINFO_LIVE_P (stmt_info))
9827 if (dump_enabled_p ())
9828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9829 "value used after loop.\n");
9830 return false;
9834 /* Is vectorizable conditional operation? */
9835 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9836 if (!stmt)
9837 return false;
9839 code = gimple_assign_rhs_code (stmt);
9841 if (code != COND_EXPR)
9842 return false;
9844 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9845 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9847 if (slp_node)
9848 ncopies = 1;
9849 else
9850 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9852 gcc_assert (ncopies >= 1);
9853 if (for_reduction && ncopies > 1)
9854 return false; /* FORNOW */
9856 cond_expr = gimple_assign_rhs1 (stmt);
9857 then_clause = gimple_assign_rhs2 (stmt);
9858 else_clause = gimple_assign_rhs3 (stmt);
9860 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
9861 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
9862 || !comp_vectype)
9863 return false;
9865 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
9866 return false;
9867 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
9868 return false;
9870 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
9871 return false;
9873 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
9874 return false;
9876 masked = !COMPARISON_CLASS_P (cond_expr);
9877 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
9879 if (vec_cmp_type == NULL_TREE)
9880 return false;
9882 cond_code = TREE_CODE (cond_expr);
9883 if (!masked)
9885 cond_expr0 = TREE_OPERAND (cond_expr, 0);
9886 cond_expr1 = TREE_OPERAND (cond_expr, 1);
9889 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
9891 /* Boolean values may have another representation in vectors
9892 and therefore we prefer bit operations over comparison for
9893 them (which also works for scalar masks). We store opcodes
9894 to use in bitop1 and bitop2. Statement is vectorized as
9895 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9896 depending on bitop1 and bitop2 arity. */
9897 switch (cond_code)
9899 case GT_EXPR:
9900 bitop1 = BIT_NOT_EXPR;
9901 bitop2 = BIT_AND_EXPR;
9902 break;
9903 case GE_EXPR:
9904 bitop1 = BIT_NOT_EXPR;
9905 bitop2 = BIT_IOR_EXPR;
9906 break;
9907 case LT_EXPR:
9908 bitop1 = BIT_NOT_EXPR;
9909 bitop2 = BIT_AND_EXPR;
9910 std::swap (cond_expr0, cond_expr1);
9911 break;
9912 case LE_EXPR:
9913 bitop1 = BIT_NOT_EXPR;
9914 bitop2 = BIT_IOR_EXPR;
9915 std::swap (cond_expr0, cond_expr1);
9916 break;
9917 case NE_EXPR:
9918 bitop1 = BIT_XOR_EXPR;
9919 break;
9920 case EQ_EXPR:
9921 bitop1 = BIT_XOR_EXPR;
9922 bitop2 = BIT_NOT_EXPR;
9923 break;
9924 default:
9925 return false;
9927 cond_code = SSA_NAME;
9930 if (!vec_stmt)
9932 if (bitop1 != NOP_EXPR)
9934 machine_mode mode = TYPE_MODE (comp_vectype);
9935 optab optab;
9937 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
9938 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9939 return false;
9941 if (bitop2 != NOP_EXPR)
9943 optab = optab_for_tree_code (bitop2, comp_vectype,
9944 optab_default);
9945 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9946 return false;
9949 if (expand_vec_cond_expr_p (vectype, comp_vectype,
9950 cond_code))
9952 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
9953 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
9954 cost_vec);
9955 return true;
9957 return false;
9960 /* Transform. */
9962 if (!slp_node)
9964 vec_oprnds0.create (1);
9965 vec_oprnds1.create (1);
9966 vec_oprnds2.create (1);
9967 vec_oprnds3.create (1);
9970 /* Handle def. */
9971 scalar_dest = gimple_assign_lhs (stmt);
9972 if (reduction_type != EXTRACT_LAST_REDUCTION)
9973 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9975 /* Handle cond expr. */
9976 for (j = 0; j < ncopies; j++)
9978 stmt_vec_info new_stmt_info = NULL;
9979 if (j == 0)
9981 if (slp_node)
9983 auto_vec<tree, 4> ops;
9984 auto_vec<vec<tree>, 4> vec_defs;
9986 if (masked)
9987 ops.safe_push (cond_expr);
9988 else
9990 ops.safe_push (cond_expr0);
9991 ops.safe_push (cond_expr1);
9993 ops.safe_push (then_clause);
9994 ops.safe_push (else_clause);
9995 vect_get_slp_defs (ops, slp_node, &vec_defs);
9996 vec_oprnds3 = vec_defs.pop ();
9997 vec_oprnds2 = vec_defs.pop ();
9998 if (!masked)
9999 vec_oprnds1 = vec_defs.pop ();
10000 vec_oprnds0 = vec_defs.pop ();
10002 else
10004 if (masked)
10006 vec_cond_lhs
10007 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10008 comp_vectype);
10010 else
10012 vec_cond_lhs
10013 = vect_get_vec_def_for_operand (cond_expr0,
10014 stmt_info, comp_vectype);
10015 vec_cond_rhs
10016 = vect_get_vec_def_for_operand (cond_expr1,
10017 stmt_info, comp_vectype);
10019 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10020 stmt_info);
10021 if (reduction_type != EXTRACT_LAST_REDUCTION)
10022 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10023 stmt_info);
10026 else
10028 vec_cond_lhs
10029 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10030 if (!masked)
10031 vec_cond_rhs
10032 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10034 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10035 vec_oprnds2.pop ());
10036 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10037 vec_oprnds3.pop ());
10040 if (!slp_node)
10042 vec_oprnds0.quick_push (vec_cond_lhs);
10043 if (!masked)
10044 vec_oprnds1.quick_push (vec_cond_rhs);
10045 vec_oprnds2.quick_push (vec_then_clause);
10046 vec_oprnds3.quick_push (vec_else_clause);
10049 /* Arguments are ready. Create the new vector stmt. */
10050 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10052 vec_then_clause = vec_oprnds2[i];
10053 vec_else_clause = vec_oprnds3[i];
10055 if (masked)
10056 vec_compare = vec_cond_lhs;
10057 else
10059 vec_cond_rhs = vec_oprnds1[i];
10060 if (bitop1 == NOP_EXPR)
10061 vec_compare = build2 (cond_code, vec_cmp_type,
10062 vec_cond_lhs, vec_cond_rhs);
10063 else
10065 new_temp = make_ssa_name (vec_cmp_type);
10066 gassign *new_stmt;
10067 if (bitop1 == BIT_NOT_EXPR)
10068 new_stmt = gimple_build_assign (new_temp, bitop1,
10069 vec_cond_rhs);
10070 else
10071 new_stmt
10072 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10073 vec_cond_rhs);
10074 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10075 if (bitop2 == NOP_EXPR)
10076 vec_compare = new_temp;
10077 else if (bitop2 == BIT_NOT_EXPR)
10079 /* Instead of doing ~x ? y : z do x ? z : y. */
10080 vec_compare = new_temp;
10081 std::swap (vec_then_clause, vec_else_clause);
10083 else
10085 vec_compare = make_ssa_name (vec_cmp_type);
10086 new_stmt
10087 = gimple_build_assign (vec_compare, bitop2,
10088 vec_cond_lhs, new_temp);
10089 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10093 if (reduction_type == EXTRACT_LAST_REDUCTION)
10095 if (!is_gimple_val (vec_compare))
10097 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10098 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10099 vec_compare);
10100 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10101 vec_compare = vec_compare_name;
10103 gcall *new_stmt = gimple_build_call_internal
10104 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10105 vec_then_clause);
10106 gimple_call_set_lhs (new_stmt, scalar_dest);
10107 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
10108 if (stmt_info->stmt == gsi_stmt (*gsi))
10109 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10110 else
10112 /* In this case we're moving the definition to later in the
10113 block. That doesn't matter because the only uses of the
10114 lhs are in phi statements. */
10115 gimple_stmt_iterator old_gsi
10116 = gsi_for_stmt (stmt_info->stmt);
10117 gsi_remove (&old_gsi, true);
10118 new_stmt_info
10119 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10122 else
10124 new_temp = make_ssa_name (vec_dest);
10125 gassign *new_stmt
10126 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10127 vec_then_clause, vec_else_clause);
10128 new_stmt_info
10129 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10131 if (slp_node)
10132 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10135 if (slp_node)
10136 continue;
10138 if (j == 0)
10139 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10140 else
10141 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10143 prev_stmt_info = new_stmt_info;
10146 vec_oprnds0.release ();
10147 vec_oprnds1.release ();
10148 vec_oprnds2.release ();
10149 vec_oprnds3.release ();
10151 return true;
10154 /* vectorizable_comparison.
10156 Check if STMT_INFO is comparison expression that can be vectorized.
10157 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10158 comparison, put it in VEC_STMT, and insert it at GSI.
10160 Return true if STMT_INFO is vectorizable in this way. */
10162 static bool
10163 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10164 stmt_vec_info *vec_stmt,
10165 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10167 vec_info *vinfo = stmt_info->vinfo;
10168 tree lhs, rhs1, rhs2;
10169 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10170 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10171 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10172 tree new_temp;
10173 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10174 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10175 int ndts = 2;
10176 poly_uint64 nunits;
10177 int ncopies;
10178 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10179 stmt_vec_info prev_stmt_info = NULL;
10180 int i, j;
10181 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10182 vec<tree> vec_oprnds0 = vNULL;
10183 vec<tree> vec_oprnds1 = vNULL;
10184 tree mask_type;
10185 tree mask;
10187 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10188 return false;
10190 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10191 return false;
10193 mask_type = vectype;
10194 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10196 if (slp_node)
10197 ncopies = 1;
10198 else
10199 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10201 gcc_assert (ncopies >= 1);
10202 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10203 return false;
10205 if (STMT_VINFO_LIVE_P (stmt_info))
10207 if (dump_enabled_p ())
10208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10209 "value used after loop.\n");
10210 return false;
10213 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10214 if (!stmt)
10215 return false;
10217 code = gimple_assign_rhs_code (stmt);
10219 if (TREE_CODE_CLASS (code) != tcc_comparison)
10220 return false;
10222 rhs1 = gimple_assign_rhs1 (stmt);
10223 rhs2 = gimple_assign_rhs2 (stmt);
10225 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10226 return false;
10228 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10229 return false;
10231 if (vectype1 && vectype2
10232 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10233 TYPE_VECTOR_SUBPARTS (vectype2)))
10234 return false;
10236 vectype = vectype1 ? vectype1 : vectype2;
10238 /* Invariant comparison. */
10239 if (!vectype)
10241 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
10242 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10243 return false;
10245 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10246 return false;
10248 /* Can't compare mask and non-mask types. */
10249 if (vectype1 && vectype2
10250 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10251 return false;
10253 /* Boolean values may have another representation in vectors
10254 and therefore we prefer bit operations over comparison for
10255 them (which also works for scalar masks). We store opcodes
10256 to use in bitop1 and bitop2. Statement is vectorized as
10257 BITOP2 (rhs1 BITOP1 rhs2) or
10258 rhs1 BITOP2 (BITOP1 rhs2)
10259 depending on bitop1 and bitop2 arity. */
10260 bool swap_p = false;
10261 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10263 if (code == GT_EXPR)
10265 bitop1 = BIT_NOT_EXPR;
10266 bitop2 = BIT_AND_EXPR;
10268 else if (code == GE_EXPR)
10270 bitop1 = BIT_NOT_EXPR;
10271 bitop2 = BIT_IOR_EXPR;
10273 else if (code == LT_EXPR)
10275 bitop1 = BIT_NOT_EXPR;
10276 bitop2 = BIT_AND_EXPR;
10277 swap_p = true;
10279 else if (code == LE_EXPR)
10281 bitop1 = BIT_NOT_EXPR;
10282 bitop2 = BIT_IOR_EXPR;
10283 swap_p = true;
10285 else
10287 bitop1 = BIT_XOR_EXPR;
10288 if (code == EQ_EXPR)
10289 bitop2 = BIT_NOT_EXPR;
10293 if (!vec_stmt)
10295 if (bitop1 == NOP_EXPR)
10297 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10298 return false;
10300 else
10302 machine_mode mode = TYPE_MODE (vectype);
10303 optab optab;
10305 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10306 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10307 return false;
10309 if (bitop2 != NOP_EXPR)
10311 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10312 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10313 return false;
10317 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10318 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10319 dts, ndts, slp_node, cost_vec);
10320 return true;
10323 /* Transform. */
10324 if (!slp_node)
10326 vec_oprnds0.create (1);
10327 vec_oprnds1.create (1);
10330 /* Handle def. */
10331 lhs = gimple_assign_lhs (stmt);
10332 mask = vect_create_destination_var (lhs, mask_type);
10334 /* Handle cmp expr. */
10335 for (j = 0; j < ncopies; j++)
10337 stmt_vec_info new_stmt_info = NULL;
10338 if (j == 0)
10340 if (slp_node)
10342 auto_vec<tree, 2> ops;
10343 auto_vec<vec<tree>, 2> vec_defs;
10345 ops.safe_push (rhs1);
10346 ops.safe_push (rhs2);
10347 vect_get_slp_defs (ops, slp_node, &vec_defs);
10348 vec_oprnds1 = vec_defs.pop ();
10349 vec_oprnds0 = vec_defs.pop ();
10350 if (swap_p)
10351 std::swap (vec_oprnds0, vec_oprnds1);
10353 else
10355 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10356 vectype);
10357 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10358 vectype);
10361 else
10363 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10364 vec_oprnds0.pop ());
10365 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10366 vec_oprnds1.pop ());
10369 if (!slp_node)
10371 if (swap_p && j == 0)
10372 std::swap (vec_rhs1, vec_rhs2);
10373 vec_oprnds0.quick_push (vec_rhs1);
10374 vec_oprnds1.quick_push (vec_rhs2);
10377 /* Arguments are ready. Create the new vector stmt. */
10378 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10380 vec_rhs2 = vec_oprnds1[i];
10382 new_temp = make_ssa_name (mask);
10383 if (bitop1 == NOP_EXPR)
10385 gassign *new_stmt = gimple_build_assign (new_temp, code,
10386 vec_rhs1, vec_rhs2);
10387 new_stmt_info
10388 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10390 else
10392 gassign *new_stmt;
10393 if (bitop1 == BIT_NOT_EXPR)
10394 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10395 else
10396 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10397 vec_rhs2);
10398 new_stmt_info
10399 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10400 if (bitop2 != NOP_EXPR)
10402 tree res = make_ssa_name (mask);
10403 if (bitop2 == BIT_NOT_EXPR)
10404 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10405 else
10406 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10407 new_temp);
10408 new_stmt_info
10409 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10412 if (slp_node)
10413 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10416 if (slp_node)
10417 continue;
10419 if (j == 0)
10420 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10421 else
10422 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10424 prev_stmt_info = new_stmt_info;
10427 vec_oprnds0.release ();
10428 vec_oprnds1.release ();
10430 return true;
10433 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10434 can handle all live statements in the node. Otherwise return true
10435 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10436 GSI and VEC_STMT are as for vectorizable_live_operation. */
10438 static bool
10439 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10440 slp_tree slp_node, stmt_vec_info *vec_stmt,
10441 stmt_vector_for_cost *cost_vec)
10443 if (slp_node)
10445 stmt_vec_info slp_stmt_info;
10446 unsigned int i;
10447 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10449 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10450 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
10451 vec_stmt, cost_vec))
10452 return false;
10455 else if (STMT_VINFO_LIVE_P (stmt_info)
10456 && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
10457 vec_stmt, cost_vec))
10458 return false;
10460 return true;
10463 /* Make sure the statement is vectorizable. */
10465 opt_result
10466 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10467 slp_tree node, slp_instance node_instance,
10468 stmt_vector_for_cost *cost_vec)
10470 vec_info *vinfo = stmt_info->vinfo;
10471 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10472 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10473 bool ok;
10474 gimple_seq pattern_def_seq;
10476 if (dump_enabled_p ())
10477 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10478 stmt_info->stmt);
10480 if (gimple_has_volatile_ops (stmt_info->stmt))
10481 return opt_result::failure_at (stmt_info->stmt,
10482 "not vectorized:"
10483 " stmt has volatile operands: %G\n",
10484 stmt_info->stmt);
10486 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10487 && node == NULL
10488 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10490 gimple_stmt_iterator si;
10492 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10494 stmt_vec_info pattern_def_stmt_info
10495 = vinfo->lookup_stmt (gsi_stmt (si));
10496 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10497 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10499 /* Analyze def stmt of STMT if it's a pattern stmt. */
10500 if (dump_enabled_p ())
10501 dump_printf_loc (MSG_NOTE, vect_location,
10502 "==> examining pattern def statement: %G",
10503 pattern_def_stmt_info->stmt);
10505 opt_result res
10506 = vect_analyze_stmt (pattern_def_stmt_info,
10507 need_to_vectorize, node, node_instance,
10508 cost_vec);
10509 if (!res)
10510 return res;
10515 /* Skip stmts that do not need to be vectorized. In loops this is expected
10516 to include:
10517 - the COND_EXPR which is the loop exit condition
10518 - any LABEL_EXPRs in the loop
10519 - computations that are used only for array indexing or loop control.
10520 In basic blocks we only analyze statements that are a part of some SLP
10521 instance, therefore, all the statements are relevant.
10523 Pattern statement needs to be analyzed instead of the original statement
10524 if the original statement is not relevant. Otherwise, we analyze both
10525 statements. In basic blocks we are called from some SLP instance
10526 traversal, don't analyze pattern stmts instead, the pattern stmts
10527 already will be part of SLP instance. */
10529 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10530 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10531 && !STMT_VINFO_LIVE_P (stmt_info))
10533 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10534 && pattern_stmt_info
10535 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10536 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10538 /* Analyze PATTERN_STMT instead of the original stmt. */
10539 stmt_info = pattern_stmt_info;
10540 if (dump_enabled_p ())
10541 dump_printf_loc (MSG_NOTE, vect_location,
10542 "==> examining pattern statement: %G",
10543 stmt_info->stmt);
10545 else
10547 if (dump_enabled_p ())
10548 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10550 return opt_result::success ();
10553 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10554 && node == NULL
10555 && pattern_stmt_info
10556 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10557 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10559 /* Analyze PATTERN_STMT too. */
10560 if (dump_enabled_p ())
10561 dump_printf_loc (MSG_NOTE, vect_location,
10562 "==> examining pattern statement: %G",
10563 pattern_stmt_info->stmt);
10565 opt_result res
10566 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10567 node_instance, cost_vec);
10568 if (!res)
10569 return res;
10572 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10574 case vect_internal_def:
10575 break;
10577 case vect_reduction_def:
10578 case vect_nested_cycle:
10579 gcc_assert (!bb_vinfo
10580 && (relevance == vect_used_in_outer
10581 || relevance == vect_used_in_outer_by_reduction
10582 || relevance == vect_used_by_reduction
10583 || relevance == vect_unused_in_scope
10584 || relevance == vect_used_only_live));
10585 break;
10587 case vect_induction_def:
10588 gcc_assert (!bb_vinfo);
10589 break;
10591 case vect_constant_def:
10592 case vect_external_def:
10593 case vect_unknown_def_type:
10594 default:
10595 gcc_unreachable ();
10598 if (STMT_VINFO_RELEVANT_P (stmt_info))
10600 tree type = gimple_expr_type (stmt_info->stmt);
10601 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10602 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10603 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10604 || (call && gimple_call_lhs (call) == NULL_TREE));
10605 *need_to_vectorize = true;
10608 if (PURE_SLP_STMT (stmt_info) && !node)
10610 if (dump_enabled_p ())
10611 dump_printf_loc (MSG_NOTE, vect_location,
10612 "handled only by SLP analysis\n");
10613 return opt_result::success ();
10616 ok = true;
10617 if (!bb_vinfo
10618 && (STMT_VINFO_RELEVANT_P (stmt_info)
10619 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10620 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10621 -mveclibabi= takes preference over library functions with
10622 the simd attribute. */
10623 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10624 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10625 cost_vec)
10626 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10627 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10628 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10629 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10630 cost_vec)
10631 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10632 || vectorizable_reduction (stmt_info, NULL, NULL, node,
10633 node_instance, cost_vec)
10634 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10635 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10636 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
10637 cost_vec)
10638 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10639 cost_vec));
10640 else
10642 if (bb_vinfo)
10643 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10644 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10645 cost_vec)
10646 || vectorizable_conversion (stmt_info, NULL, NULL, node,
10647 cost_vec)
10648 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10649 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10650 || vectorizable_assignment (stmt_info, NULL, NULL, node,
10651 cost_vec)
10652 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10653 cost_vec)
10654 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10655 || vectorizable_condition (stmt_info, NULL, NULL, false, node,
10656 cost_vec)
10657 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10658 cost_vec));
10661 if (!ok)
10662 return opt_result::failure_at (stmt_info->stmt,
10663 "not vectorized:"
10664 " relevant stmt not supported: %G",
10665 stmt_info->stmt);
10667 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10668 need extra handling, except for vectorizable reductions. */
10669 if (!bb_vinfo
10670 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10671 && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
10672 return opt_result::failure_at (stmt_info->stmt,
10673 "not vectorized:"
10674 " live stmt not supported: %G",
10675 stmt_info->stmt);
10677 return opt_result::success ();
10681 /* Function vect_transform_stmt.
10683 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
10685 bool
10686 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10687 slp_tree slp_node, slp_instance slp_node_instance)
10689 vec_info *vinfo = stmt_info->vinfo;
10690 bool is_store = false;
10691 stmt_vec_info vec_stmt = NULL;
10692 bool done;
10694 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10695 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
10697 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
10698 && nested_in_vect_loop_p
10699 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
10700 stmt_info));
10702 gimple *stmt = stmt_info->stmt;
10703 switch (STMT_VINFO_TYPE (stmt_info))
10705 case type_demotion_vec_info_type:
10706 case type_promotion_vec_info_type:
10707 case type_conversion_vec_info_type:
10708 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
10709 NULL);
10710 gcc_assert (done);
10711 break;
10713 case induc_vec_info_type:
10714 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
10715 NULL);
10716 gcc_assert (done);
10717 break;
10719 case shift_vec_info_type:
10720 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10721 gcc_assert (done);
10722 break;
10724 case op_vec_info_type:
10725 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
10726 NULL);
10727 gcc_assert (done);
10728 break;
10730 case assignment_vec_info_type:
10731 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
10732 NULL);
10733 gcc_assert (done);
10734 break;
10736 case load_vec_info_type:
10737 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
10738 slp_node_instance, NULL);
10739 gcc_assert (done);
10740 break;
10742 case store_vec_info_type:
10743 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10744 gcc_assert (done);
10745 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10747 /* In case of interleaving, the whole chain is vectorized when the
10748 last store in the chain is reached. Store stmts before the last
10749 one are skipped, and there vec_stmt_info shouldn't be freed
10750 meanwhile. */
10751 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10752 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
10753 is_store = true;
10755 else
10756 is_store = true;
10757 break;
10759 case condition_vec_info_type:
10760 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
10761 slp_node, NULL);
10762 gcc_assert (done);
10763 break;
10765 case comparison_vec_info_type:
10766 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
10767 slp_node, NULL);
10768 gcc_assert (done);
10769 break;
10771 case call_vec_info_type:
10772 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10773 stmt = gsi_stmt (*gsi);
10774 break;
10776 case call_simd_clone_vec_info_type:
10777 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
10778 slp_node, NULL);
10779 stmt = gsi_stmt (*gsi);
10780 break;
10782 case reduc_vec_info_type:
10783 done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
10784 slp_node_instance, NULL);
10785 gcc_assert (done);
10786 break;
10788 default:
10789 if (!STMT_VINFO_LIVE_P (stmt_info))
10791 if (dump_enabled_p ())
10792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10793 "stmt not supported.\n");
10794 gcc_unreachable ();
10798 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
10799 This would break hybrid SLP vectorization. */
10800 if (slp_node)
10801 gcc_assert (!vec_stmt
10802 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
10804 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
10805 is being vectorized, but outside the immediately enclosing loop. */
10806 if (vec_stmt
10807 && nested_p
10808 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10809 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
10810 || STMT_VINFO_RELEVANT (stmt_info) ==
10811 vect_used_in_outer_by_reduction))
10813 class loop *innerloop = LOOP_VINFO_LOOP (
10814 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
10815 imm_use_iterator imm_iter;
10816 use_operand_p use_p;
10817 tree scalar_dest;
10819 if (dump_enabled_p ())
10820 dump_printf_loc (MSG_NOTE, vect_location,
10821 "Record the vdef for outer-loop vectorization.\n");
10823 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
10824 (to be used when vectorizing outer-loop stmts that use the DEF of
10825 STMT). */
10826 if (gimple_code (stmt) == GIMPLE_PHI)
10827 scalar_dest = PHI_RESULT (stmt);
10828 else
10829 scalar_dest = gimple_get_lhs (stmt);
10831 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
10832 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
10834 stmt_vec_info exit_phi_info
10835 = vinfo->lookup_stmt (USE_STMT (use_p));
10836 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
10840 /* Handle stmts whose DEF is used outside the loop-nest that is
10841 being vectorized. */
10842 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
10844 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
10845 NULL);
10846 gcc_assert (done);
10849 if (vec_stmt)
10850 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
10852 return is_store;
10856 /* Remove a group of stores (for SLP or interleaving), free their
10857 stmt_vec_info. */
10859 void
10860 vect_remove_stores (stmt_vec_info first_stmt_info)
10862 vec_info *vinfo = first_stmt_info->vinfo;
10863 stmt_vec_info next_stmt_info = first_stmt_info;
10865 while (next_stmt_info)
10867 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10868 next_stmt_info = vect_orig_stmt (next_stmt_info);
10869 /* Free the attached stmt_vec_info and remove the stmt. */
10870 vinfo->remove_stmt (next_stmt_info);
10871 next_stmt_info = tmp;
10875 /* Function get_vectype_for_scalar_type_and_size.
10877 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
10878 by the target. */
10880 tree
10881 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
10883 tree orig_scalar_type = scalar_type;
10884 scalar_mode inner_mode;
10885 machine_mode simd_mode;
10886 poly_uint64 nunits;
10887 tree vectype;
10889 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
10890 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
10891 return NULL_TREE;
10893 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
10895 /* For vector types of elements whose mode precision doesn't
10896 match their types precision we use a element type of mode
10897 precision. The vectorization routines will have to make sure
10898 they support the proper result truncation/extension.
10899 We also make sure to build vector types with INTEGER_TYPE
10900 component type only. */
10901 if (INTEGRAL_TYPE_P (scalar_type)
10902 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
10903 || TREE_CODE (scalar_type) != INTEGER_TYPE))
10904 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
10905 TYPE_UNSIGNED (scalar_type));
10907 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10908 When the component mode passes the above test simply use a type
10909 corresponding to that mode. The theory is that any use that
10910 would cause problems with this will disable vectorization anyway. */
10911 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
10912 && !INTEGRAL_TYPE_P (scalar_type))
10913 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
10915 /* We can't build a vector type of elements with alignment bigger than
10916 their size. */
10917 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
10918 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
10919 TYPE_UNSIGNED (scalar_type));
10921 /* If we felt back to using the mode fail if there was
10922 no scalar type for it. */
10923 if (scalar_type == NULL_TREE)
10924 return NULL_TREE;
10926 /* If no size was supplied use the mode the target prefers. Otherwise
10927 lookup a vector mode of the specified size. */
10928 if (known_eq (size, 0U))
10929 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
10930 else if (!multiple_p (size, nbytes, &nunits)
10931 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
10932 return NULL_TREE;
10933 /* NOTE: nunits == 1 is allowed to support single element vector types. */
10934 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
10935 return NULL_TREE;
10937 vectype = build_vector_type (scalar_type, nunits);
10939 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
10940 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
10941 return NULL_TREE;
10943 /* Re-attach the address-space qualifier if we canonicalized the scalar
10944 type. */
10945 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
10946 return build_qualified_type
10947 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
10949 return vectype;
10952 poly_uint64 current_vector_size;
10954 /* Function get_vectype_for_scalar_type.
10956 Returns the vector type corresponding to SCALAR_TYPE as supported
10957 by the target. */
10959 tree
10960 get_vectype_for_scalar_type (tree scalar_type)
10962 tree vectype;
10963 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
10964 current_vector_size);
10965 if (vectype
10966 && known_eq (current_vector_size, 0U))
10967 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
10968 return vectype;
10971 /* Function get_mask_type_for_scalar_type.
10973 Returns the mask type corresponding to a result of comparison
10974 of vectors of specified SCALAR_TYPE as supported by target. */
10976 tree
10977 get_mask_type_for_scalar_type (tree scalar_type)
10979 tree vectype = get_vectype_for_scalar_type (scalar_type);
10981 if (!vectype)
10982 return NULL;
10984 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
10985 current_vector_size);
10988 /* Function get_same_sized_vectype
10990 Returns a vector type corresponding to SCALAR_TYPE of size
10991 VECTOR_TYPE if supported by the target. */
10993 tree
10994 get_same_sized_vectype (tree scalar_type, tree vector_type)
10996 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10997 return build_same_sized_truth_vector_type (vector_type);
10999 return get_vectype_for_scalar_type_and_size
11000 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
11003 /* Function vect_is_simple_use.
11005 Input:
11006 VINFO - the vect info of the loop or basic block that is being vectorized.
11007 OPERAND - operand in the loop or bb.
11008 Output:
11009 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11010 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11011 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11012 the definition could be anywhere in the function
11013 DT - the type of definition
11015 Returns whether a stmt with OPERAND can be vectorized.
11016 For loops, supportable operands are constants, loop invariants, and operands
11017 that are defined by the current iteration of the loop. Unsupportable
11018 operands are those that are defined by a previous iteration of the loop (as
11019 is the case in reduction/induction computations).
11020 For basic blocks, supportable operands are constants and bb invariants.
11021 For now, operands defined outside the basic block are not supported. */
11023 bool
11024 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11025 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11027 if (def_stmt_info_out)
11028 *def_stmt_info_out = NULL;
11029 if (def_stmt_out)
11030 *def_stmt_out = NULL;
11031 *dt = vect_unknown_def_type;
11033 if (dump_enabled_p ())
11035 dump_printf_loc (MSG_NOTE, vect_location,
11036 "vect_is_simple_use: operand ");
11037 if (TREE_CODE (operand) == SSA_NAME
11038 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11039 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11040 else
11041 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11044 if (CONSTANT_CLASS_P (operand))
11045 *dt = vect_constant_def;
11046 else if (is_gimple_min_invariant (operand))
11047 *dt = vect_external_def;
11048 else if (TREE_CODE (operand) != SSA_NAME)
11049 *dt = vect_unknown_def_type;
11050 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11051 *dt = vect_external_def;
11052 else
11054 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11055 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11056 if (!stmt_vinfo)
11057 *dt = vect_external_def;
11058 else
11060 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11061 def_stmt = stmt_vinfo->stmt;
11062 switch (gimple_code (def_stmt))
11064 case GIMPLE_PHI:
11065 case GIMPLE_ASSIGN:
11066 case GIMPLE_CALL:
11067 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11068 break;
11069 default:
11070 *dt = vect_unknown_def_type;
11071 break;
11073 if (def_stmt_info_out)
11074 *def_stmt_info_out = stmt_vinfo;
11076 if (def_stmt_out)
11077 *def_stmt_out = def_stmt;
11080 if (dump_enabled_p ())
11082 dump_printf (MSG_NOTE, ", type of def: ");
11083 switch (*dt)
11085 case vect_uninitialized_def:
11086 dump_printf (MSG_NOTE, "uninitialized\n");
11087 break;
11088 case vect_constant_def:
11089 dump_printf (MSG_NOTE, "constant\n");
11090 break;
11091 case vect_external_def:
11092 dump_printf (MSG_NOTE, "external\n");
11093 break;
11094 case vect_internal_def:
11095 dump_printf (MSG_NOTE, "internal\n");
11096 break;
11097 case vect_induction_def:
11098 dump_printf (MSG_NOTE, "induction\n");
11099 break;
11100 case vect_reduction_def:
11101 dump_printf (MSG_NOTE, "reduction\n");
11102 break;
11103 case vect_double_reduction_def:
11104 dump_printf (MSG_NOTE, "double reduction\n");
11105 break;
11106 case vect_nested_cycle:
11107 dump_printf (MSG_NOTE, "nested cycle\n");
11108 break;
11109 case vect_unknown_def_type:
11110 dump_printf (MSG_NOTE, "unknown\n");
11111 break;
11115 if (*dt == vect_unknown_def_type)
11117 if (dump_enabled_p ())
11118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11119 "Unsupported pattern.\n");
11120 return false;
11123 return true;
11126 /* Function vect_is_simple_use.
11128 Same as vect_is_simple_use but also determines the vector operand
11129 type of OPERAND and stores it to *VECTYPE. If the definition of
11130 OPERAND is vect_uninitialized_def, vect_constant_def or
11131 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11132 is responsible to compute the best suited vector type for the
11133 scalar operand. */
11135 bool
11136 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11137 tree *vectype, stmt_vec_info *def_stmt_info_out,
11138 gimple **def_stmt_out)
11140 stmt_vec_info def_stmt_info;
11141 gimple *def_stmt;
11142 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11143 return false;
11145 if (def_stmt_out)
11146 *def_stmt_out = def_stmt;
11147 if (def_stmt_info_out)
11148 *def_stmt_info_out = def_stmt_info;
11150 /* Now get a vector type if the def is internal, otherwise supply
11151 NULL_TREE and leave it up to the caller to figure out a proper
11152 type for the use stmt. */
11153 if (*dt == vect_internal_def
11154 || *dt == vect_induction_def
11155 || *dt == vect_reduction_def
11156 || *dt == vect_double_reduction_def
11157 || *dt == vect_nested_cycle)
11159 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11160 gcc_assert (*vectype != NULL_TREE);
11161 if (dump_enabled_p ())
11162 dump_printf_loc (MSG_NOTE, vect_location,
11163 "vect_is_simple_use: vectype %T\n", *vectype);
11165 else if (*dt == vect_uninitialized_def
11166 || *dt == vect_constant_def
11167 || *dt == vect_external_def)
11168 *vectype = NULL_TREE;
11169 else
11170 gcc_unreachable ();
11172 return true;
11176 /* Function supportable_widening_operation
11178 Check whether an operation represented by the code CODE is a
11179 widening operation that is supported by the target platform in
11180 vector form (i.e., when operating on arguments of type VECTYPE_IN
11181 producing a result of type VECTYPE_OUT).
11183 Widening operations we currently support are NOP (CONVERT), FLOAT,
11184 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11185 are supported by the target platform either directly (via vector
11186 tree-codes), or via target builtins.
11188 Output:
11189 - CODE1 and CODE2 are codes of vector operations to be used when
11190 vectorizing the operation, if available.
11191 - MULTI_STEP_CVT determines the number of required intermediate steps in
11192 case of multi-step conversion (like char->short->int - in that case
11193 MULTI_STEP_CVT will be 1).
11194 - INTERM_TYPES contains the intermediate type required to perform the
11195 widening operation (short in the above example). */
11197 bool
11198 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11199 tree vectype_out, tree vectype_in,
11200 enum tree_code *code1, enum tree_code *code2,
11201 int *multi_step_cvt,
11202 vec<tree> *interm_types)
11204 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11205 class loop *vect_loop = NULL;
11206 machine_mode vec_mode;
11207 enum insn_code icode1, icode2;
11208 optab optab1, optab2;
11209 tree vectype = vectype_in;
11210 tree wide_vectype = vectype_out;
11211 enum tree_code c1, c2;
11212 int i;
11213 tree prev_type, intermediate_type;
11214 machine_mode intermediate_mode, prev_mode;
11215 optab optab3, optab4;
11217 *multi_step_cvt = 0;
11218 if (loop_info)
11219 vect_loop = LOOP_VINFO_LOOP (loop_info);
11221 switch (code)
11223 case WIDEN_MULT_EXPR:
11224 /* The result of a vectorized widening operation usually requires
11225 two vectors (because the widened results do not fit into one vector).
11226 The generated vector results would normally be expected to be
11227 generated in the same order as in the original scalar computation,
11228 i.e. if 8 results are generated in each vector iteration, they are
11229 to be organized as follows:
11230 vect1: [res1,res2,res3,res4],
11231 vect2: [res5,res6,res7,res8].
11233 However, in the special case that the result of the widening
11234 operation is used in a reduction computation only, the order doesn't
11235 matter (because when vectorizing a reduction we change the order of
11236 the computation). Some targets can take advantage of this and
11237 generate more efficient code. For example, targets like Altivec,
11238 that support widen_mult using a sequence of {mult_even,mult_odd}
11239 generate the following vectors:
11240 vect1: [res1,res3,res5,res7],
11241 vect2: [res2,res4,res6,res8].
11243 When vectorizing outer-loops, we execute the inner-loop sequentially
11244 (each vectorized inner-loop iteration contributes to VF outer-loop
11245 iterations in parallel). We therefore don't allow to change the
11246 order of the computation in the inner-loop during outer-loop
11247 vectorization. */
11248 /* TODO: Another case in which order doesn't *really* matter is when we
11249 widen and then contract again, e.g. (short)((int)x * y >> 8).
11250 Normally, pack_trunc performs an even/odd permute, whereas the
11251 repack from an even/odd expansion would be an interleave, which
11252 would be significantly simpler for e.g. AVX2. */
11253 /* In any case, in order to avoid duplicating the code below, recurse
11254 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11255 are properly set up for the caller. If we fail, we'll continue with
11256 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11257 if (vect_loop
11258 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11259 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11260 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11261 stmt_info, vectype_out,
11262 vectype_in, code1, code2,
11263 multi_step_cvt, interm_types))
11265 /* Elements in a vector with vect_used_by_reduction property cannot
11266 be reordered if the use chain with this property does not have the
11267 same operation. One such an example is s += a * b, where elements
11268 in a and b cannot be reordered. Here we check if the vector defined
11269 by STMT is only directly used in the reduction statement. */
11270 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11271 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11272 if (use_stmt_info
11273 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11274 return true;
11276 c1 = VEC_WIDEN_MULT_LO_EXPR;
11277 c2 = VEC_WIDEN_MULT_HI_EXPR;
11278 break;
11280 case DOT_PROD_EXPR:
11281 c1 = DOT_PROD_EXPR;
11282 c2 = DOT_PROD_EXPR;
11283 break;
11285 case SAD_EXPR:
11286 c1 = SAD_EXPR;
11287 c2 = SAD_EXPR;
11288 break;
11290 case VEC_WIDEN_MULT_EVEN_EXPR:
11291 /* Support the recursion induced just above. */
11292 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11293 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11294 break;
11296 case WIDEN_LSHIFT_EXPR:
11297 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11298 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11299 break;
11301 CASE_CONVERT:
11302 c1 = VEC_UNPACK_LO_EXPR;
11303 c2 = VEC_UNPACK_HI_EXPR;
11304 break;
11306 case FLOAT_EXPR:
11307 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11308 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11309 break;
11311 case FIX_TRUNC_EXPR:
11312 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11313 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11314 break;
11316 default:
11317 gcc_unreachable ();
11320 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11321 std::swap (c1, c2);
11323 if (code == FIX_TRUNC_EXPR)
11325 /* The signedness is determined from output operand. */
11326 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11327 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11329 else if (CONVERT_EXPR_CODE_P (code)
11330 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11331 && VECTOR_BOOLEAN_TYPE_P (vectype)
11332 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11333 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11335 /* If the input and result modes are the same, a different optab
11336 is needed where we pass in the number of units in vectype. */
11337 optab1 = vec_unpacks_sbool_lo_optab;
11338 optab2 = vec_unpacks_sbool_hi_optab;
11340 else
11342 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11343 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11346 if (!optab1 || !optab2)
11347 return false;
11349 vec_mode = TYPE_MODE (vectype);
11350 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11351 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11352 return false;
11354 *code1 = c1;
11355 *code2 = c2;
11357 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11358 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11360 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11361 return true;
11362 /* For scalar masks we may have different boolean
11363 vector types having the same QImode. Thus we
11364 add additional check for elements number. */
11365 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11366 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11367 return true;
11370 /* Check if it's a multi-step conversion that can be done using intermediate
11371 types. */
11373 prev_type = vectype;
11374 prev_mode = vec_mode;
11376 if (!CONVERT_EXPR_CODE_P (code))
11377 return false;
11379 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11380 intermediate steps in promotion sequence. We try
11381 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11382 not. */
11383 interm_types->create (MAX_INTERM_CVT_STEPS);
11384 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11386 intermediate_mode = insn_data[icode1].operand[0].mode;
11387 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11389 intermediate_type = vect_halve_mask_nunits (prev_type);
11390 if (intermediate_mode != TYPE_MODE (intermediate_type))
11391 return false;
11393 else
11394 intermediate_type
11395 = lang_hooks.types.type_for_mode (intermediate_mode,
11396 TYPE_UNSIGNED (prev_type));
11398 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11399 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11400 && intermediate_mode == prev_mode
11401 && SCALAR_INT_MODE_P (prev_mode))
11403 /* If the input and result modes are the same, a different optab
11404 is needed where we pass in the number of units in vectype. */
11405 optab3 = vec_unpacks_sbool_lo_optab;
11406 optab4 = vec_unpacks_sbool_hi_optab;
11408 else
11410 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11411 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11414 if (!optab3 || !optab4
11415 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11416 || insn_data[icode1].operand[0].mode != intermediate_mode
11417 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11418 || insn_data[icode2].operand[0].mode != intermediate_mode
11419 || ((icode1 = optab_handler (optab3, intermediate_mode))
11420 == CODE_FOR_nothing)
11421 || ((icode2 = optab_handler (optab4, intermediate_mode))
11422 == CODE_FOR_nothing))
11423 break;
11425 interm_types->quick_push (intermediate_type);
11426 (*multi_step_cvt)++;
11428 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11429 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11431 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11432 return true;
11433 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11434 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11435 return true;
11438 prev_type = intermediate_type;
11439 prev_mode = intermediate_mode;
11442 interm_types->release ();
11443 return false;
11447 /* Function supportable_narrowing_operation
11449 Check whether an operation represented by the code CODE is a
11450 narrowing operation that is supported by the target platform in
11451 vector form (i.e., when operating on arguments of type VECTYPE_IN
11452 and producing a result of type VECTYPE_OUT).
11454 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11455 and FLOAT. This function checks if these operations are supported by
11456 the target platform directly via vector tree-codes.
11458 Output:
11459 - CODE1 is the code of a vector operation to be used when
11460 vectorizing the operation, if available.
11461 - MULTI_STEP_CVT determines the number of required intermediate steps in
11462 case of multi-step conversion (like int->short->char - in that case
11463 MULTI_STEP_CVT will be 1).
11464 - INTERM_TYPES contains the intermediate type required to perform the
11465 narrowing operation (short in the above example). */
11467 bool
11468 supportable_narrowing_operation (enum tree_code code,
11469 tree vectype_out, tree vectype_in,
11470 enum tree_code *code1, int *multi_step_cvt,
11471 vec<tree> *interm_types)
11473 machine_mode vec_mode;
11474 enum insn_code icode1;
11475 optab optab1, interm_optab;
11476 tree vectype = vectype_in;
11477 tree narrow_vectype = vectype_out;
11478 enum tree_code c1;
11479 tree intermediate_type, prev_type;
11480 machine_mode intermediate_mode, prev_mode;
11481 int i;
11482 bool uns;
11484 *multi_step_cvt = 0;
11485 switch (code)
11487 CASE_CONVERT:
11488 c1 = VEC_PACK_TRUNC_EXPR;
11489 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11490 && VECTOR_BOOLEAN_TYPE_P (vectype)
11491 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11492 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11493 optab1 = vec_pack_sbool_trunc_optab;
11494 else
11495 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11496 break;
11498 case FIX_TRUNC_EXPR:
11499 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11500 /* The signedness is determined from output operand. */
11501 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11502 break;
11504 case FLOAT_EXPR:
11505 c1 = VEC_PACK_FLOAT_EXPR;
11506 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11507 break;
11509 default:
11510 gcc_unreachable ();
11513 if (!optab1)
11514 return false;
11516 vec_mode = TYPE_MODE (vectype);
11517 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11518 return false;
11520 *code1 = c1;
11522 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11524 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11525 return true;
11526 /* For scalar masks we may have different boolean
11527 vector types having the same QImode. Thus we
11528 add additional check for elements number. */
11529 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11530 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11531 return true;
11534 if (code == FLOAT_EXPR)
11535 return false;
11537 /* Check if it's a multi-step conversion that can be done using intermediate
11538 types. */
11539 prev_mode = vec_mode;
11540 prev_type = vectype;
11541 if (code == FIX_TRUNC_EXPR)
11542 uns = TYPE_UNSIGNED (vectype_out);
11543 else
11544 uns = TYPE_UNSIGNED (vectype);
11546 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11547 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11548 costly than signed. */
11549 if (code == FIX_TRUNC_EXPR && uns)
11551 enum insn_code icode2;
11553 intermediate_type
11554 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11555 interm_optab
11556 = optab_for_tree_code (c1, intermediate_type, optab_default);
11557 if (interm_optab != unknown_optab
11558 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11559 && insn_data[icode1].operand[0].mode
11560 == insn_data[icode2].operand[0].mode)
11562 uns = false;
11563 optab1 = interm_optab;
11564 icode1 = icode2;
11568 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11569 intermediate steps in promotion sequence. We try
11570 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11571 interm_types->create (MAX_INTERM_CVT_STEPS);
11572 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11574 intermediate_mode = insn_data[icode1].operand[0].mode;
11575 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11577 intermediate_type = vect_double_mask_nunits (prev_type);
11578 if (intermediate_mode != TYPE_MODE (intermediate_type))
11579 return false;
11581 else
11582 intermediate_type
11583 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
11584 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11585 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11586 && intermediate_mode == prev_mode
11587 && SCALAR_INT_MODE_P (prev_mode))
11588 interm_optab = vec_pack_sbool_trunc_optab;
11589 else
11590 interm_optab
11591 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
11592 optab_default);
11593 if (!interm_optab
11594 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
11595 || insn_data[icode1].operand[0].mode != intermediate_mode
11596 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
11597 == CODE_FOR_nothing))
11598 break;
11600 interm_types->quick_push (intermediate_type);
11601 (*multi_step_cvt)++;
11603 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11605 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11606 return true;
11607 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
11608 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11609 return true;
11612 prev_mode = intermediate_mode;
11613 prev_type = intermediate_type;
11614 optab1 = interm_optab;
11617 interm_types->release ();
11618 return false;
11621 /* Generate and return a statement that sets vector mask MASK such that
11622 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11624 gcall *
11625 vect_gen_while (tree mask, tree start_index, tree end_index)
11627 tree cmp_type = TREE_TYPE (start_index);
11628 tree mask_type = TREE_TYPE (mask);
11629 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
11630 cmp_type, mask_type,
11631 OPTIMIZE_FOR_SPEED));
11632 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
11633 start_index, end_index,
11634 build_zero_cst (mask_type));
11635 gimple_call_set_lhs (call, mask);
11636 return call;
11639 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11640 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11642 tree
11643 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
11644 tree end_index)
11646 tree tmp = make_ssa_name (mask_type);
11647 gcall *call = vect_gen_while (tmp, start_index, end_index);
11648 gimple_seq_add_stmt (seq, call);
11649 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
11652 /* Try to compute the vector types required to vectorize STMT_INFO,
11653 returning true on success and false if vectorization isn't possible.
11655 On success:
11657 - Set *STMT_VECTYPE_OUT to:
11658 - NULL_TREE if the statement doesn't need to be vectorized;
11659 - boolean_type_node if the statement is a boolean operation whose
11660 vector type can only be determined once all the other vector types
11661 are known; and
11662 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11664 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11665 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11666 statement does not help to determine the overall number of units. */
11668 opt_result
11669 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
11670 tree *stmt_vectype_out,
11671 tree *nunits_vectype_out)
11673 gimple *stmt = stmt_info->stmt;
11675 *stmt_vectype_out = NULL_TREE;
11676 *nunits_vectype_out = NULL_TREE;
11678 if (gimple_get_lhs (stmt) == NULL_TREE
11679 /* MASK_STORE has no lhs, but is ok. */
11680 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
11682 if (is_a <gcall *> (stmt))
11684 /* Ignore calls with no lhs. These must be calls to
11685 #pragma omp simd functions, and what vectorization factor
11686 it really needs can't be determined until
11687 vectorizable_simd_clone_call. */
11688 if (dump_enabled_p ())
11689 dump_printf_loc (MSG_NOTE, vect_location,
11690 "defer to SIMD clone analysis.\n");
11691 return opt_result::success ();
11694 return opt_result::failure_at (stmt,
11695 "not vectorized: irregular stmt.%G", stmt);
11698 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
11699 return opt_result::failure_at (stmt,
11700 "not vectorized: vector stmt in loop:%G",
11701 stmt);
11703 tree vectype;
11704 tree scalar_type = NULL_TREE;
11705 if (STMT_VINFO_VECTYPE (stmt_info))
11706 *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
11707 else
11709 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
11710 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
11711 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
11712 else
11713 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
11715 /* Pure bool ops don't participate in number-of-units computation.
11716 For comparisons use the types being compared. */
11717 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
11718 && is_gimple_assign (stmt)
11719 && gimple_assign_rhs_code (stmt) != COND_EXPR)
11721 *stmt_vectype_out = boolean_type_node;
11723 tree rhs1 = gimple_assign_rhs1 (stmt);
11724 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
11725 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11726 scalar_type = TREE_TYPE (rhs1);
11727 else
11729 if (dump_enabled_p ())
11730 dump_printf_loc (MSG_NOTE, vect_location,
11731 "pure bool operation.\n");
11732 return opt_result::success ();
11736 if (dump_enabled_p ())
11737 dump_printf_loc (MSG_NOTE, vect_location,
11738 "get vectype for scalar type: %T\n", scalar_type);
11739 vectype = get_vectype_for_scalar_type (scalar_type);
11740 if (!vectype)
11741 return opt_result::failure_at (stmt,
11742 "not vectorized:"
11743 " unsupported data-type %T\n",
11744 scalar_type);
11746 if (!*stmt_vectype_out)
11747 *stmt_vectype_out = vectype;
11749 if (dump_enabled_p ())
11750 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
11753 /* Don't try to compute scalar types if the stmt produces a boolean
11754 vector; use the existing vector type instead. */
11755 tree nunits_vectype;
11756 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11757 nunits_vectype = vectype;
11758 else
11760 /* The number of units is set according to the smallest scalar
11761 type (or the largest vector size, but we only support one
11762 vector size per vectorization). */
11763 if (*stmt_vectype_out != boolean_type_node)
11765 HOST_WIDE_INT dummy;
11766 scalar_type = vect_get_smallest_scalar_type (stmt_info,
11767 &dummy, &dummy);
11769 if (dump_enabled_p ())
11770 dump_printf_loc (MSG_NOTE, vect_location,
11771 "get vectype for scalar type: %T\n", scalar_type);
11772 nunits_vectype = get_vectype_for_scalar_type (scalar_type);
11774 if (!nunits_vectype)
11775 return opt_result::failure_at (stmt,
11776 "not vectorized: unsupported data-type %T\n",
11777 scalar_type);
11779 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
11780 GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
11781 return opt_result::failure_at (stmt,
11782 "not vectorized: different sized vector "
11783 "types in statement, %T and %T\n",
11784 vectype, nunits_vectype);
11786 if (dump_enabled_p ())
11788 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
11789 nunits_vectype);
11791 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
11792 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
11793 dump_printf (MSG_NOTE, "\n");
11796 *nunits_vectype_out = nunits_vectype;
11797 return opt_result::success ();
11800 /* Try to determine the correct vector type for STMT_INFO, which is a
11801 statement that produces a scalar boolean result. Return the vector
11802 type on success, otherwise return NULL_TREE. */
11804 opt_tree
11805 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
11807 gimple *stmt = stmt_info->stmt;
11808 tree mask_type = NULL;
11809 tree vectype, scalar_type;
11811 if (is_gimple_assign (stmt)
11812 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
11813 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
11815 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
11816 mask_type = get_mask_type_for_scalar_type (scalar_type);
11818 if (!mask_type)
11819 return opt_tree::failure_at (stmt,
11820 "not vectorized: unsupported mask\n");
11822 else
11824 tree rhs;
11825 ssa_op_iter iter;
11826 enum vect_def_type dt;
11828 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
11830 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
11831 return opt_tree::failure_at (stmt,
11832 "not vectorized:can't compute mask"
11833 " type for statement, %G", stmt);
11835 /* No vectype probably means external definition.
11836 Allow it in case there is another operand which
11837 allows to determine mask type. */
11838 if (!vectype)
11839 continue;
11841 if (!mask_type)
11842 mask_type = vectype;
11843 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
11844 TYPE_VECTOR_SUBPARTS (vectype)))
11845 return opt_tree::failure_at (stmt,
11846 "not vectorized: different sized mask"
11847 " types in statement, %T and %T\n",
11848 mask_type, vectype);
11849 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
11850 != VECTOR_BOOLEAN_TYPE_P (vectype))
11851 return opt_tree::failure_at (stmt,
11852 "not vectorized: mixed mask and "
11853 "nonmask vector types in statement, "
11854 "%T and %T\n",
11855 mask_type, vectype);
11858 /* We may compare boolean value loaded as vector of integers.
11859 Fix mask_type in such case. */
11860 if (mask_type
11861 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
11862 && gimple_code (stmt) == GIMPLE_ASSIGN
11863 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
11864 mask_type = build_same_sized_truth_vector_type (mask_type);
11867 /* No mask_type should mean loop invariant predicate.
11868 This is probably a subject for optimization in if-conversion. */
11869 if (!mask_type)
11870 return opt_tree::failure_at (stmt,
11871 "not vectorized: can't compute mask type "
11872 "for statement: %G", stmt);
11874 return opt_tree::success (mask_type);