Don't warn when alignment of global common data exceeds maximum alignment.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob4e0b2adf1dc2404bc345af30cfeb9c819084894e
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
842 static void
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
852 for (i = 0; i < pwr + 1; i++)
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 /* Returns true if the current function returns DECL. */
875 static bool
876 cfun_returns (tree decl)
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
901 return false;
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
909 static void
910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 vec_load_store_type vls_type, slp_tree slp_node,
913 stmt_vector_for_cost *cost_vec)
915 unsigned int inside_cost = 0, prologue_cost = 0;
916 stmt_vec_info first_stmt_info = stmt_info;
917 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
919 /* ??? Somehow we need to fix this at the callers. */
920 if (slp_node)
921 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
923 if (vls_type == VLS_STORE_INVARIANT)
925 if (!slp_node)
926 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
927 stmt_info, 0, vect_prologue);
930 /* Grouped stores update all elements in the group at once,
931 so we want the DR for the first statement. */
932 if (!slp_node && grouped_access_p)
933 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
935 /* True if we should include any once-per-group costs as well as
936 the cost of the statement itself. For SLP we only get called
937 once per group anyhow. */
938 bool first_stmt_p = (first_stmt_info == stmt_info);
940 /* We assume that the cost of a single store-lanes instruction is
941 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
942 access is instead being provided by a permute-and-store operation,
943 include the cost of the permutes. */
944 if (first_stmt_p
945 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
947 /* Uses a high and low interleave or shuffle operations for each
948 needed permute. */
949 int group_size = DR_GROUP_SIZE (first_stmt_info);
950 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
951 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
952 stmt_info, 0, vect_body);
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_NOTE, vect_location,
956 "vect_model_store_cost: strided group_size = %d .\n",
957 group_size);
960 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
961 /* Costs of the stores. */
962 if (memory_access_type == VMAT_ELEMENTWISE
963 || memory_access_type == VMAT_GATHER_SCATTER)
965 /* N scalar stores plus extracting the elements. */
966 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967 inside_cost += record_stmt_cost (cost_vec,
968 ncopies * assumed_nunits,
969 scalar_store, stmt_info, 0, vect_body);
971 else
972 vect_get_store_cost (vinfo, stmt_info, ncopies, &inside_cost, cost_vec);
974 if (memory_access_type == VMAT_ELEMENTWISE
975 || memory_access_type == VMAT_STRIDED_SLP)
977 /* N scalar stores plus extracting the elements. */
978 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
979 inside_cost += record_stmt_cost (cost_vec,
980 ncopies * assumed_nunits,
981 vec_to_scalar, stmt_info, 0, vect_body);
984 /* When vectorizing a store into the function result assign
985 a penalty if the function returns in a multi-register location.
986 In this case we assume we'll end up with having to spill the
987 vector result and do piecewise loads as a conservative estimate. */
988 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
989 if (base
990 && (TREE_CODE (base) == RESULT_DECL
991 || (DECL_P (base) && cfun_returns (base)))
992 && !aggregate_value_p (base, cfun->decl))
994 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
995 /* ??? Handle PARALLEL in some way. */
996 if (REG_P (reg))
998 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
999 /* Assume that a single reg-reg move is possible and cheap,
1000 do not account for vector to gp register move cost. */
1001 if (nregs > 1)
1003 /* Spill. */
1004 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1005 vector_store,
1006 stmt_info, 0, vect_epilogue);
1007 /* Loads. */
1008 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1009 scalar_load,
1010 stmt_info, 0, vect_epilogue);
1015 if (dump_enabled_p ())
1016 dump_printf_loc (MSG_NOTE, vect_location,
1017 "vect_model_store_cost: inside_cost = %d, "
1018 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1022 /* Calculate cost of DR's memory access. */
1023 void
1024 vect_get_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1025 unsigned int *inside_cost,
1026 stmt_vector_for_cost *body_cost_vec)
1028 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1029 int alignment_support_scheme
1030 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1032 switch (alignment_support_scheme)
1034 case dr_aligned:
1036 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1037 vector_store, stmt_info, 0,
1038 vect_body);
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_NOTE, vect_location,
1042 "vect_model_store_cost: aligned.\n");
1043 break;
1046 case dr_unaligned_supported:
1048 /* Here, we assign an additional cost for the unaligned store. */
1049 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1050 unaligned_store, stmt_info,
1051 DR_MISALIGNMENT (dr_info),
1052 vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_store_cost: unaligned supported by "
1056 "hardware.\n");
1057 break;
1060 case dr_unaligned_unsupported:
1062 *inside_cost = VECT_MAX_COST;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066 "vect_model_store_cost: unsupported access.\n");
1067 break;
1070 default:
1071 gcc_unreachable ();
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1083 static void
1084 vect_model_load_cost (vec_info *vinfo,
1085 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 vect_memory_access_type memory_access_type,
1087 gather_scatter_info *gs_info,
1088 slp_tree slp_node,
1089 stmt_vector_for_cost *cost_vec)
1091 unsigned int inside_cost = 0, prologue_cost = 0;
1092 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1094 gcc_assert (cost_vec);
1096 /* ??? Somehow we need to fix this at the callers. */
1097 if (slp_node)
1098 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1100 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1102 /* If the load is permuted then the alignment is determined by
1103 the first group element not by the first scalar stmt DR. */
1104 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1105 /* Record the cost for the permutation. */
1106 unsigned n_perms, n_loads;
1107 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1108 vf, true, &n_perms, &n_loads);
1109 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1110 first_stmt_info, 0, vect_body);
1112 /* And adjust the number of loads performed. This handles
1113 redundancies as well as loads that are later dead. */
1114 ncopies = n_loads;
1117 /* Grouped loads read all elements in the group at once,
1118 so we want the DR for the first statement. */
1119 stmt_vec_info first_stmt_info = stmt_info;
1120 if (!slp_node && grouped_access_p)
1121 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1123 /* True if we should include any once-per-group costs as well as
1124 the cost of the statement itself. For SLP we only get called
1125 once per group anyhow. */
1126 bool first_stmt_p = (first_stmt_info == stmt_info);
1128 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1129 ones we actually need. Account for the cost of unused results. */
1130 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1132 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1133 stmt_vec_info next_stmt_info = first_stmt_info;
1136 gaps -= 1;
1137 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1139 while (next_stmt_info);
1140 if (gaps)
1142 if (dump_enabled_p ())
1143 dump_printf_loc (MSG_NOTE, vect_location,
1144 "vect_model_load_cost: %d unused vectors.\n",
1145 gaps);
1146 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps, false,
1147 &inside_cost, &prologue_cost,
1148 cost_vec, cost_vec, true);
1152 /* We assume that the cost of a single load-lanes instruction is
1153 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1154 access is instead being provided by a load-and-permute operation,
1155 include the cost of the permutes. */
1156 if (first_stmt_p
1157 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1159 /* Uses an even and odd extract operations or shuffle operations
1160 for each needed permute. */
1161 int group_size = DR_GROUP_SIZE (first_stmt_info);
1162 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1163 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: strided group_size = %d .\n",
1169 group_size);
1172 /* The loads themselves. */
1173 if (memory_access_type == VMAT_ELEMENTWISE
1174 || memory_access_type == VMAT_GATHER_SCATTER)
1176 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1177 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1178 if (memory_access_type == VMAT_GATHER_SCATTER
1179 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1180 /* For emulated gathers N offset vector element extracts
1181 (we assume the scalar scaling and ptr + offset add is consumed by
1182 the load). */
1183 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1184 vec_to_scalar, stmt_info, 0,
1185 vect_body);
1186 /* N scalar loads plus gathering them into a vector. */
1187 inside_cost += record_stmt_cost (cost_vec,
1188 ncopies * assumed_nunits,
1189 scalar_load, stmt_info, 0, vect_body);
1191 else
1192 vect_get_load_cost (vinfo, stmt_info, ncopies, first_stmt_p,
1193 &inside_cost, &prologue_cost,
1194 cost_vec, cost_vec, true);
1195 if (memory_access_type == VMAT_ELEMENTWISE
1196 || memory_access_type == VMAT_STRIDED_SLP
1197 || (memory_access_type == VMAT_GATHER_SCATTER
1198 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1199 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: inside_cost = %d, "
1205 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1209 /* Calculate cost of DR's memory access. */
1210 void
1211 vect_get_load_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
1212 bool add_realign_cost, unsigned int *inside_cost,
1213 unsigned int *prologue_cost,
1214 stmt_vector_for_cost *prologue_cost_vec,
1215 stmt_vector_for_cost *body_cost_vec,
1216 bool record_prologue_costs)
1218 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1219 int alignment_support_scheme
1220 = vect_supportable_dr_alignment (vinfo, dr_info, false);
1222 switch (alignment_support_scheme)
1224 case dr_aligned:
1226 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1227 stmt_info, 0, vect_body);
1229 if (dump_enabled_p ())
1230 dump_printf_loc (MSG_NOTE, vect_location,
1231 "vect_model_load_cost: aligned.\n");
1233 break;
1235 case dr_unaligned_supported:
1237 /* Here, we assign an additional cost for the unaligned load. */
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1239 unaligned_load, stmt_info,
1240 DR_MISALIGNMENT (dr_info),
1241 vect_body);
1243 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE, vect_location,
1245 "vect_model_load_cost: unaligned supported by "
1246 "hardware.\n");
1248 break;
1250 case dr_explicit_realign:
1252 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1253 vector_load, stmt_info, 0, vect_body);
1254 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1255 vec_perm, stmt_info, 0, vect_body);
1257 /* FIXME: If the misalignment remains fixed across the iterations of
1258 the containing loop, the following cost should be added to the
1259 prologue costs. */
1260 if (targetm.vectorize.builtin_mask_for_load)
1261 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1262 stmt_info, 0, vect_body);
1264 if (dump_enabled_p ())
1265 dump_printf_loc (MSG_NOTE, vect_location,
1266 "vect_model_load_cost: explicit realign\n");
1268 break;
1270 case dr_explicit_realign_optimized:
1272 if (dump_enabled_p ())
1273 dump_printf_loc (MSG_NOTE, vect_location,
1274 "vect_model_load_cost: unaligned software "
1275 "pipelined.\n");
1277 /* Unaligned software pipeline has a load of an address, an initial
1278 load, and possibly a mask operation to "prime" the loop. However,
1279 if this is an access in a group of loads, which provide grouped
1280 access, then the above cost should only be considered for one
1281 access in the group. Inside the loop, there is a load op
1282 and a realignment op. */
1284 if (add_realign_cost && record_prologue_costs)
1286 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1287 vector_stmt, stmt_info,
1288 0, vect_prologue);
1289 if (targetm.vectorize.builtin_mask_for_load)
1290 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1291 vector_stmt, stmt_info,
1292 0, vect_prologue);
1295 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1296 stmt_info, 0, vect_body);
1297 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1298 stmt_info, 0, vect_body);
1300 if (dump_enabled_p ())
1301 dump_printf_loc (MSG_NOTE, vect_location,
1302 "vect_model_load_cost: explicit realign optimized"
1303 "\n");
1305 break;
1308 case dr_unaligned_unsupported:
1310 *inside_cost = VECT_MAX_COST;
1312 if (dump_enabled_p ())
1313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1314 "vect_model_load_cost: unsupported access.\n");
1315 break;
1318 default:
1319 gcc_unreachable ();
1323 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1324 the loop preheader for the vectorized stmt STMT_VINFO. */
1326 static void
1327 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1328 gimple_stmt_iterator *gsi)
1330 if (gsi)
1331 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1332 else
1333 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE, vect_location,
1337 "created new init_stmt: %G", new_stmt);
1340 /* Function vect_init_vector.
1342 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1343 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1344 vector type a vector with all elements equal to VAL is created first.
1345 Place the initialization at GSI if it is not NULL. Otherwise, place the
1346 initialization at the loop preheader.
1347 Return the DEF of INIT_STMT.
1348 It will be used in the vectorization of STMT_INFO. */
1350 tree
1351 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1352 gimple_stmt_iterator *gsi)
1354 gimple *init_stmt;
1355 tree new_temp;
1357 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1358 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1360 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1361 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1363 /* Scalar boolean value should be transformed into
1364 all zeros or all ones value before building a vector. */
1365 if (VECTOR_BOOLEAN_TYPE_P (type))
1367 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1368 tree false_val = build_zero_cst (TREE_TYPE (type));
1370 if (CONSTANT_CLASS_P (val))
1371 val = integer_zerop (val) ? false_val : true_val;
1372 else
1374 new_temp = make_ssa_name (TREE_TYPE (type));
1375 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1376 val, true_val, false_val);
1377 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1378 val = new_temp;
1381 else
1383 gimple_seq stmts = NULL;
1384 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1385 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1386 TREE_TYPE (type), val);
1387 else
1388 /* ??? Condition vectorization expects us to do
1389 promotion of invariant/external defs. */
1390 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1391 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1392 !gsi_end_p (gsi2); )
1394 init_stmt = gsi_stmt (gsi2);
1395 gsi_remove (&gsi2, false);
1396 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1400 val = build_vector_from_val (type, val);
1403 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1404 init_stmt = gimple_build_assign (new_temp, val);
1405 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1406 return new_temp;
1410 /* Function vect_get_vec_defs_for_operand.
1412 OP is an operand in STMT_VINFO. This function returns a vector of
1413 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1415 In the case that OP is an SSA_NAME which is defined in the loop, then
1416 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1418 In case OP is an invariant or constant, a new stmt that creates a vector def
1419 needs to be introduced. VECTYPE may be used to specify a required type for
1420 vector invariant. */
1422 void
1423 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1424 unsigned ncopies,
1425 tree op, vec<tree> *vec_oprnds, tree vectype)
1427 gimple *def_stmt;
1428 enum vect_def_type dt;
1429 bool is_simple_use;
1430 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1432 if (dump_enabled_p ())
1433 dump_printf_loc (MSG_NOTE, vect_location,
1434 "vect_get_vec_defs_for_operand: %T\n", op);
1436 stmt_vec_info def_stmt_info;
1437 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1438 &def_stmt_info, &def_stmt);
1439 gcc_assert (is_simple_use);
1440 if (def_stmt && dump_enabled_p ())
1441 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1443 vec_oprnds->create (ncopies);
1444 if (dt == vect_constant_def || dt == vect_external_def)
1446 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1447 tree vector_type;
1449 if (vectype)
1450 vector_type = vectype;
1451 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1452 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1453 vector_type = truth_type_for (stmt_vectype);
1454 else
1455 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1457 gcc_assert (vector_type);
1458 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1459 while (ncopies--)
1460 vec_oprnds->quick_push (vop);
1462 else
1464 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1465 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1466 for (unsigned i = 0; i < ncopies; ++i)
1467 vec_oprnds->quick_push (gimple_get_lhs
1468 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1473 /* Get vectorized definitions for OP0 and OP1. */
1475 void
1476 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1477 unsigned ncopies,
1478 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1479 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1480 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1481 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1483 if (slp_node)
1485 if (op0)
1486 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1487 if (op1)
1488 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1489 if (op2)
1490 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1491 if (op3)
1492 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1494 else
1496 if (op0)
1497 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1498 op0, vec_oprnds0, vectype0);
1499 if (op1)
1500 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1501 op1, vec_oprnds1, vectype1);
1502 if (op2)
1503 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1504 op2, vec_oprnds2, vectype2);
1505 if (op3)
1506 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1507 op3, vec_oprnds3, vectype3);
1511 void
1512 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1513 unsigned ncopies,
1514 tree op0, vec<tree> *vec_oprnds0,
1515 tree op1, vec<tree> *vec_oprnds1,
1516 tree op2, vec<tree> *vec_oprnds2,
1517 tree op3, vec<tree> *vec_oprnds3)
1519 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1520 op0, vec_oprnds0, NULL_TREE,
1521 op1, vec_oprnds1, NULL_TREE,
1522 op2, vec_oprnds2, NULL_TREE,
1523 op3, vec_oprnds3, NULL_TREE);
1526 /* Helper function called by vect_finish_replace_stmt and
1527 vect_finish_stmt_generation. Set the location of the new
1528 statement and create and return a stmt_vec_info for it. */
1530 static void
1531 vect_finish_stmt_generation_1 (vec_info *,
1532 stmt_vec_info stmt_info, gimple *vec_stmt)
1534 if (dump_enabled_p ())
1535 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1537 if (stmt_info)
1539 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1541 /* While EH edges will generally prevent vectorization, stmt might
1542 e.g. be in a must-not-throw region. Ensure newly created stmts
1543 that could throw are part of the same region. */
1544 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1545 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1546 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1548 else
1549 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1552 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1553 which sets the same scalar result as STMT_INFO did. Create and return a
1554 stmt_vec_info for VEC_STMT. */
1556 void
1557 vect_finish_replace_stmt (vec_info *vinfo,
1558 stmt_vec_info stmt_info, gimple *vec_stmt)
1560 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1561 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1563 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1564 gsi_replace (&gsi, vec_stmt, true);
1566 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1569 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1570 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1572 void
1573 vect_finish_stmt_generation (vec_info *vinfo,
1574 stmt_vec_info stmt_info, gimple *vec_stmt,
1575 gimple_stmt_iterator *gsi)
1577 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1579 if (!gsi_end_p (*gsi)
1580 && gimple_has_mem_ops (vec_stmt))
1582 gimple *at_stmt = gsi_stmt (*gsi);
1583 tree vuse = gimple_vuse (at_stmt);
1584 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1586 tree vdef = gimple_vdef (at_stmt);
1587 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1588 gimple_set_modified (vec_stmt, true);
1589 /* If we have an SSA vuse and insert a store, update virtual
1590 SSA form to avoid triggering the renamer. Do so only
1591 if we can easily see all uses - which is what almost always
1592 happens with the way vectorized stmts are inserted. */
1593 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1594 && ((is_gimple_assign (vec_stmt)
1595 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1596 || (is_gimple_call (vec_stmt)
1597 && !(gimple_call_flags (vec_stmt)
1598 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1600 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1601 gimple_set_vdef (vec_stmt, new_vdef);
1602 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1606 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1607 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1610 /* We want to vectorize a call to combined function CFN with function
1611 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1612 as the types of all inputs. Check whether this is possible using
1613 an internal function, returning its code if so or IFN_LAST if not. */
1615 static internal_fn
1616 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1617 tree vectype_out, tree vectype_in)
1619 internal_fn ifn;
1620 if (internal_fn_p (cfn))
1621 ifn = as_internal_fn (cfn);
1622 else
1623 ifn = associated_internal_fn (fndecl);
1624 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1626 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1627 if (info.vectorizable)
1629 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1630 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1631 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1632 OPTIMIZE_FOR_SPEED))
1633 return ifn;
1636 return IFN_LAST;
1640 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1641 gimple_stmt_iterator *);
1643 /* Check whether a load or store statement in the loop described by
1644 LOOP_VINFO is possible in a loop using partial vectors. This is
1645 testing whether the vectorizer pass has the appropriate support,
1646 as well as whether the target does.
1648 VLS_TYPE says whether the statement is a load or store and VECTYPE
1649 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1650 says how the load or store is going to be implemented and GROUP_SIZE
1651 is the number of load or store statements in the containing group.
1652 If the access is a gather load or scatter store, GS_INFO describes
1653 its arguments. If the load or store is conditional, SCALAR_MASK is the
1654 condition under which it occurs.
1656 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1657 vectors is not supported, otherwise record the required rgroup control
1658 types. */
1660 static void
1661 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1662 vec_load_store_type vls_type,
1663 int group_size,
1664 vect_memory_access_type
1665 memory_access_type,
1666 gather_scatter_info *gs_info,
1667 tree scalar_mask)
1669 /* Invariant loads need no special support. */
1670 if (memory_access_type == VMAT_INVARIANT)
1671 return;
1673 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1674 machine_mode vecmode = TYPE_MODE (vectype);
1675 bool is_load = (vls_type == VLS_LOAD);
1676 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1678 if (is_load
1679 ? !vect_load_lanes_supported (vectype, group_size, true)
1680 : !vect_store_lanes_supported (vectype, group_size, true))
1682 if (dump_enabled_p ())
1683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1684 "can't operate on partial vectors because"
1685 " the target doesn't have an appropriate"
1686 " load/store-lanes instruction.\n");
1687 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1688 return;
1690 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1691 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1692 return;
1695 if (memory_access_type == VMAT_GATHER_SCATTER)
1697 internal_fn ifn = (is_load
1698 ? IFN_MASK_GATHER_LOAD
1699 : IFN_MASK_SCATTER_STORE);
1700 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1701 gs_info->memory_type,
1702 gs_info->offset_vectype,
1703 gs_info->scale))
1705 if (dump_enabled_p ())
1706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1707 "can't operate on partial vectors because"
1708 " the target doesn't have an appropriate"
1709 " gather load or scatter store instruction.\n");
1710 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1711 return;
1713 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1714 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1715 return;
1718 if (memory_access_type != VMAT_CONTIGUOUS
1719 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1721 /* Element X of the data must come from iteration i * VF + X of the
1722 scalar loop. We need more work to support other mappings. */
1723 if (dump_enabled_p ())
1724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1725 "can't operate on partial vectors because an"
1726 " access isn't contiguous.\n");
1727 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1728 return;
1731 if (!VECTOR_MODE_P (vecmode))
1733 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735 "can't operate on partial vectors when emulating"
1736 " vector operations.\n");
1737 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1738 return;
1741 /* We might load more scalars than we need for permuting SLP loads.
1742 We checked in get_group_load_store_type that the extra elements
1743 don't leak into a new vector. */
1744 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1746 unsigned int nvectors;
1747 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1748 return nvectors;
1749 gcc_unreachable ();
1752 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1753 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1754 machine_mode mask_mode;
1755 bool using_partial_vectors_p = false;
1756 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1757 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1759 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1760 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1761 using_partial_vectors_p = true;
1764 machine_mode vmode;
1765 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1767 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1768 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1769 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1770 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1771 using_partial_vectors_p = true;
1774 if (!using_partial_vectors_p)
1776 if (dump_enabled_p ())
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "can't operate on partial vectors because the"
1779 " target doesn't have the appropriate partial"
1780 " vectorization load or store.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1785 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1786 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1787 that needs to be applied to all loads and stores in a vectorized loop.
1788 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1790 MASK_TYPE is the type of both masks. If new statements are needed,
1791 insert them before GSI. */
1793 static tree
1794 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1795 gimple_stmt_iterator *gsi)
1797 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1798 if (!loop_mask)
1799 return vec_mask;
1801 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1802 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1803 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1804 vec_mask, loop_mask);
1805 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1806 return and_res;
1809 /* Determine whether we can use a gather load or scatter store to vectorize
1810 strided load or store STMT_INFO by truncating the current offset to a
1811 smaller width. We need to be able to construct an offset vector:
1813 { 0, X, X*2, X*3, ... }
1815 without loss of precision, where X is STMT_INFO's DR_STEP.
1817 Return true if this is possible, describing the gather load or scatter
1818 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1820 static bool
1821 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1822 loop_vec_info loop_vinfo, bool masked_p,
1823 gather_scatter_info *gs_info)
1825 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1826 data_reference *dr = dr_info->dr;
1827 tree step = DR_STEP (dr);
1828 if (TREE_CODE (step) != INTEGER_CST)
1830 /* ??? Perhaps we could use range information here? */
1831 if (dump_enabled_p ())
1832 dump_printf_loc (MSG_NOTE, vect_location,
1833 "cannot truncate variable step.\n");
1834 return false;
1837 /* Get the number of bits in an element. */
1838 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1839 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1840 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1842 /* Set COUNT to the upper limit on the number of elements - 1.
1843 Start with the maximum vectorization factor. */
1844 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1846 /* Try lowering COUNT to the number of scalar latch iterations. */
1847 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1848 widest_int max_iters;
1849 if (max_loop_iterations (loop, &max_iters)
1850 && max_iters < count)
1851 count = max_iters.to_shwi ();
1853 /* Try scales of 1 and the element size. */
1854 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1855 wi::overflow_type overflow = wi::OVF_NONE;
1856 for (int i = 0; i < 2; ++i)
1858 int scale = scales[i];
1859 widest_int factor;
1860 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1861 continue;
1863 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1864 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1865 if (overflow)
1866 continue;
1867 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1868 unsigned int min_offset_bits = wi::min_precision (range, sign);
1870 /* Find the narrowest viable offset type. */
1871 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1872 tree offset_type = build_nonstandard_integer_type (offset_bits,
1873 sign == UNSIGNED);
1875 /* See whether the target supports the operation with an offset
1876 no narrower than OFFSET_TYPE. */
1877 tree memory_type = TREE_TYPE (DR_REF (dr));
1878 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1879 vectype, memory_type, offset_type, scale,
1880 &gs_info->ifn, &gs_info->offset_vectype)
1881 || gs_info->ifn == IFN_LAST)
1882 continue;
1884 gs_info->decl = NULL_TREE;
1885 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1886 but we don't need to store that here. */
1887 gs_info->base = NULL_TREE;
1888 gs_info->element_type = TREE_TYPE (vectype);
1889 gs_info->offset = fold_convert (offset_type, step);
1890 gs_info->offset_dt = vect_constant_def;
1891 gs_info->scale = scale;
1892 gs_info->memory_type = memory_type;
1893 return true;
1896 if (overflow && dump_enabled_p ())
1897 dump_printf_loc (MSG_NOTE, vect_location,
1898 "truncating gather/scatter offset to %d bits"
1899 " might change its value.\n", element_bits);
1901 return false;
1904 /* Return true if we can use gather/scatter internal functions to
1905 vectorize STMT_INFO, which is a grouped or strided load or store.
1906 MASKED_P is true if load or store is conditional. When returning
1907 true, fill in GS_INFO with the information required to perform the
1908 operation. */
1910 static bool
1911 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1912 loop_vec_info loop_vinfo, bool masked_p,
1913 gather_scatter_info *gs_info)
1915 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1916 || gs_info->ifn == IFN_LAST)
1917 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1918 masked_p, gs_info);
1920 tree old_offset_type = TREE_TYPE (gs_info->offset);
1921 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1923 gcc_assert (TYPE_PRECISION (new_offset_type)
1924 >= TYPE_PRECISION (old_offset_type));
1925 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1927 if (dump_enabled_p ())
1928 dump_printf_loc (MSG_NOTE, vect_location,
1929 "using gather/scatter for strided/grouped access,"
1930 " scale = %d\n", gs_info->scale);
1932 return true;
1935 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1936 elements with a known constant step. Return -1 if that step
1937 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1939 static int
1940 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1942 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1943 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1944 size_zero_node);
1947 /* If the target supports a permute mask that reverses the elements in
1948 a vector of type VECTYPE, return that mask, otherwise return null. */
1950 static tree
1951 perm_mask_for_reverse (tree vectype)
1953 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1955 /* The encoding has a single stepped pattern. */
1956 vec_perm_builder sel (nunits, 1, 3);
1957 for (int i = 0; i < 3; ++i)
1958 sel.quick_push (nunits - 1 - i);
1960 vec_perm_indices indices (sel, 1, nunits);
1961 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1962 return NULL_TREE;
1963 return vect_gen_perm_mask_checked (vectype, indices);
1966 /* A subroutine of get_load_store_type, with a subset of the same
1967 arguments. Handle the case where STMT_INFO is a load or store that
1968 accesses consecutive elements with a negative step. */
1970 static vect_memory_access_type
1971 get_negative_load_store_type (vec_info *vinfo,
1972 stmt_vec_info stmt_info, tree vectype,
1973 vec_load_store_type vls_type,
1974 unsigned int ncopies)
1976 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1977 dr_alignment_support alignment_support_scheme;
1979 if (ncopies > 1)
1981 if (dump_enabled_p ())
1982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1983 "multiple types with negative step.\n");
1984 return VMAT_ELEMENTWISE;
1987 alignment_support_scheme = vect_supportable_dr_alignment (vinfo,
1988 dr_info, false);
1989 if (alignment_support_scheme != dr_aligned
1990 && alignment_support_scheme != dr_unaligned_supported)
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1994 "negative step but alignment required.\n");
1995 return VMAT_ELEMENTWISE;
1998 if (vls_type == VLS_STORE_INVARIANT)
2000 if (dump_enabled_p ())
2001 dump_printf_loc (MSG_NOTE, vect_location,
2002 "negative step with invariant source;"
2003 " no permute needed.\n");
2004 return VMAT_CONTIGUOUS_DOWN;
2007 if (!perm_mask_for_reverse (vectype))
2009 if (dump_enabled_p ())
2010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2011 "negative step and reversing not supported.\n");
2012 return VMAT_ELEMENTWISE;
2015 return VMAT_CONTIGUOUS_REVERSE;
2018 /* STMT_INFO is either a masked or unconditional store. Return the value
2019 being stored. */
2021 tree
2022 vect_get_store_rhs (stmt_vec_info stmt_info)
2024 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2026 gcc_assert (gimple_assign_single_p (assign));
2027 return gimple_assign_rhs1 (assign);
2029 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2031 internal_fn ifn = gimple_call_internal_fn (call);
2032 int index = internal_fn_stored_value_index (ifn);
2033 gcc_assert (index >= 0);
2034 return gimple_call_arg (call, index);
2036 gcc_unreachable ();
2039 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2041 This function returns a vector type which can be composed with NETLS pieces,
2042 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2043 same vector size as the return vector. It checks target whether supports
2044 pieces-size vector mode for construction firstly, if target fails to, check
2045 pieces-size scalar mode for construction further. It returns NULL_TREE if
2046 fails to find the available composition.
2048 For example, for (vtype=V16QI, nelts=4), we can probably get:
2049 - V16QI with PTYPE V4QI.
2050 - V4SI with PTYPE SI.
2051 - NULL_TREE. */
2053 static tree
2054 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2056 gcc_assert (VECTOR_TYPE_P (vtype));
2057 gcc_assert (known_gt (nelts, 0U));
2059 machine_mode vmode = TYPE_MODE (vtype);
2060 if (!VECTOR_MODE_P (vmode))
2061 return NULL_TREE;
2063 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2064 unsigned int pbsize;
2065 if (constant_multiple_p (vbsize, nelts, &pbsize))
2067 /* First check if vec_init optab supports construction from
2068 vector pieces directly. */
2069 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2070 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2071 machine_mode rmode;
2072 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2073 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2074 != CODE_FOR_nothing))
2076 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2077 return vtype;
2080 /* Otherwise check if exists an integer type of the same piece size and
2081 if vec_init optab supports construction from it directly. */
2082 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2083 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2084 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2085 != CODE_FOR_nothing))
2087 *ptype = build_nonstandard_integer_type (pbsize, 1);
2088 return build_vector_type (*ptype, nelts);
2092 return NULL_TREE;
2095 /* A subroutine of get_load_store_type, with a subset of the same
2096 arguments. Handle the case where STMT_INFO is part of a grouped load
2097 or store.
2099 For stores, the statements in the group are all consecutive
2100 and there is no gap at the end. For loads, the statements in the
2101 group might not be consecutive; there can be gaps between statements
2102 as well as at the end. */
2104 static bool
2105 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2106 tree vectype, slp_tree slp_node,
2107 bool masked_p, vec_load_store_type vls_type,
2108 vect_memory_access_type *memory_access_type,
2109 dr_alignment_support *alignment_support_scheme,
2110 gather_scatter_info *gs_info)
2112 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2113 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2114 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2115 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2116 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2117 bool single_element_p = (stmt_info == first_stmt_info
2118 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2119 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2120 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2122 /* True if the vectorized statements would access beyond the last
2123 statement in the group. */
2124 bool overrun_p = false;
2126 /* True if we can cope with such overrun by peeling for gaps, so that
2127 there is at least one final scalar iteration after the vector loop. */
2128 bool can_overrun_p = (!masked_p
2129 && vls_type == VLS_LOAD
2130 && loop_vinfo
2131 && !loop->inner);
2133 /* There can only be a gap at the end of the group if the stride is
2134 known at compile time. */
2135 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2137 /* Stores can't yet have gaps. */
2138 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2140 if (slp_node)
2142 /* For SLP vectorization we directly vectorize a subchain
2143 without permutation. */
2144 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2145 first_dr_info
2146 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2147 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2149 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2150 separated by the stride, until we have a complete vector.
2151 Fall back to scalar accesses if that isn't possible. */
2152 if (multiple_p (nunits, group_size))
2153 *memory_access_type = VMAT_STRIDED_SLP;
2154 else
2155 *memory_access_type = VMAT_ELEMENTWISE;
2157 else
2159 overrun_p = loop_vinfo && gap != 0;
2160 if (overrun_p && vls_type != VLS_LOAD)
2162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2163 "Grouped store with gaps requires"
2164 " non-consecutive accesses\n");
2165 return false;
2167 /* An overrun is fine if the trailing elements are smaller
2168 than the alignment boundary B. Every vector access will
2169 be a multiple of B and so we are guaranteed to access a
2170 non-gap element in the same B-sized block. */
2171 if (overrun_p
2172 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2173 / vect_get_scalar_dr_size (first_dr_info)))
2174 overrun_p = false;
2176 /* If the gap splits the vector in half and the target
2177 can do half-vector operations avoid the epilogue peeling
2178 by simply loading half of the vector only. Usually
2179 the construction with an upper zero half will be elided. */
2180 dr_alignment_support alignment_support_scheme;
2181 tree half_vtype;
2182 if (overrun_p
2183 && !masked_p
2184 && (((alignment_support_scheme
2185 = vect_supportable_dr_alignment (vinfo,
2186 first_dr_info, false)))
2187 == dr_aligned
2188 || alignment_support_scheme == dr_unaligned_supported)
2189 && known_eq (nunits, (group_size - gap) * 2)
2190 && known_eq (nunits, group_size)
2191 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2192 != NULL_TREE))
2193 overrun_p = false;
2195 if (overrun_p && !can_overrun_p)
2197 if (dump_enabled_p ())
2198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2199 "Peeling for outer loop is not supported\n");
2200 return false;
2202 int cmp = compare_step_with_zero (vinfo, stmt_info);
2203 if (cmp < 0)
2205 if (single_element_p)
2206 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2207 only correct for single element "interleaving" SLP. */
2208 *memory_access_type = get_negative_load_store_type
2209 (vinfo, stmt_info, vectype, vls_type, 1);
2210 else
2212 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2213 separated by the stride, until we have a complete vector.
2214 Fall back to scalar accesses if that isn't possible. */
2215 if (multiple_p (nunits, group_size))
2216 *memory_access_type = VMAT_STRIDED_SLP;
2217 else
2218 *memory_access_type = VMAT_ELEMENTWISE;
2221 else
2223 gcc_assert (!loop_vinfo || cmp > 0);
2224 *memory_access_type = VMAT_CONTIGUOUS;
2228 else
2230 /* We can always handle this case using elementwise accesses,
2231 but see if something more efficient is available. */
2232 *memory_access_type = VMAT_ELEMENTWISE;
2234 /* If there is a gap at the end of the group then these optimizations
2235 would access excess elements in the last iteration. */
2236 bool would_overrun_p = (gap != 0);
2237 /* An overrun is fine if the trailing elements are smaller than the
2238 alignment boundary B. Every vector access will be a multiple of B
2239 and so we are guaranteed to access a non-gap element in the
2240 same B-sized block. */
2241 if (would_overrun_p
2242 && !masked_p
2243 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2244 / vect_get_scalar_dr_size (first_dr_info)))
2245 would_overrun_p = false;
2247 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2248 && (can_overrun_p || !would_overrun_p)
2249 && compare_step_with_zero (vinfo, stmt_info) > 0)
2251 /* First cope with the degenerate case of a single-element
2252 vector. */
2253 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2256 /* Otherwise try using LOAD/STORE_LANES. */
2257 else if (vls_type == VLS_LOAD
2258 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2259 : vect_store_lanes_supported (vectype, group_size,
2260 masked_p))
2262 *memory_access_type = VMAT_LOAD_STORE_LANES;
2263 overrun_p = would_overrun_p;
2266 /* If that fails, try using permuting loads. */
2267 else if (vls_type == VLS_LOAD
2268 ? vect_grouped_load_supported (vectype, single_element_p,
2269 group_size)
2270 : vect_grouped_store_supported (vectype, group_size))
2272 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2273 overrun_p = would_overrun_p;
2277 /* As a last resort, trying using a gather load or scatter store.
2279 ??? Although the code can handle all group sizes correctly,
2280 it probably isn't a win to use separate strided accesses based
2281 on nearby locations. Or, even if it's a win over scalar code,
2282 it might not be a win over vectorizing at a lower VF, if that
2283 allows us to use contiguous accesses. */
2284 if (*memory_access_type == VMAT_ELEMENTWISE
2285 && single_element_p
2286 && loop_vinfo
2287 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2288 masked_p, gs_info))
2289 *memory_access_type = VMAT_GATHER_SCATTER;
2292 if (*memory_access_type == VMAT_GATHER_SCATTER
2293 || *memory_access_type == VMAT_ELEMENTWISE)
2294 *alignment_support_scheme = dr_unaligned_supported;
2295 else
2296 *alignment_support_scheme
2297 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
2299 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2301 /* STMT is the leader of the group. Check the operands of all the
2302 stmts of the group. */
2303 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2304 while (next_stmt_info)
2306 tree op = vect_get_store_rhs (next_stmt_info);
2307 enum vect_def_type dt;
2308 if (!vect_is_simple_use (op, vinfo, &dt))
2310 if (dump_enabled_p ())
2311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2312 "use not simple.\n");
2313 return false;
2315 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2319 if (overrun_p)
2321 gcc_assert (can_overrun_p);
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "Data access with gaps requires scalar "
2325 "epilogue loop\n");
2326 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2329 return true;
2332 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2333 if there is a memory access type that the vectorized form can use,
2334 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2335 or scatters, fill in GS_INFO accordingly. In addition
2336 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2337 the target does not support the alignment scheme.
2339 SLP says whether we're performing SLP rather than loop vectorization.
2340 MASKED_P is true if the statement is conditional on a vectorized mask.
2341 VECTYPE is the vector type that the vectorized statements will use.
2342 NCOPIES is the number of vector statements that will be needed. */
2344 static bool
2345 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2346 tree vectype, slp_tree slp_node,
2347 bool masked_p, vec_load_store_type vls_type,
2348 unsigned int ncopies,
2349 vect_memory_access_type *memory_access_type,
2350 dr_alignment_support *alignment_support_scheme,
2351 gather_scatter_info *gs_info)
2353 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2354 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2355 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2357 *memory_access_type = VMAT_GATHER_SCATTER;
2358 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2359 gcc_unreachable ();
2360 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2361 &gs_info->offset_dt,
2362 &gs_info->offset_vectype))
2364 if (dump_enabled_p ())
2365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2366 "%s index use not simple.\n",
2367 vls_type == VLS_LOAD ? "gather" : "scatter");
2368 return false;
2370 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2372 if (vls_type != VLS_LOAD)
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2376 "unsupported emulated scatter.\n");
2377 return false;
2379 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2380 || !TYPE_VECTOR_SUBPARTS
2381 (gs_info->offset_vectype).is_constant ()
2382 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2383 (gs_info->offset_vectype),
2384 TYPE_VECTOR_SUBPARTS (vectype)))
2386 if (dump_enabled_p ())
2387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2388 "unsupported vector types for emulated "
2389 "gather.\n");
2390 return false;
2393 /* Gather-scatter accesses perform only component accesses, alignment
2394 is irrelevant for them. */
2395 *alignment_support_scheme = dr_unaligned_supported;
2397 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2399 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2400 masked_p,
2401 vls_type, memory_access_type,
2402 alignment_support_scheme, gs_info))
2403 return false;
2405 else if (STMT_VINFO_STRIDED_P (stmt_info))
2407 gcc_assert (!slp_node);
2408 if (loop_vinfo
2409 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2410 masked_p, gs_info))
2411 *memory_access_type = VMAT_GATHER_SCATTER;
2412 else
2413 *memory_access_type = VMAT_ELEMENTWISE;
2414 /* Alignment is irrelevant here. */
2415 *alignment_support_scheme = dr_unaligned_supported;
2417 else
2419 int cmp = compare_step_with_zero (vinfo, stmt_info);
2420 if (cmp == 0)
2422 gcc_assert (vls_type == VLS_LOAD);
2423 *memory_access_type = VMAT_INVARIANT;
2424 /* Invariant accesses perform only component accesses, alignment
2425 is irrelevant for them. */
2426 *alignment_support_scheme = dr_unaligned_supported;
2428 else
2430 if (cmp < 0)
2431 *memory_access_type = get_negative_load_store_type
2432 (vinfo, stmt_info, vectype, vls_type, ncopies);
2433 else
2434 *memory_access_type = VMAT_CONTIGUOUS;
2435 *alignment_support_scheme
2436 = vect_supportable_dr_alignment (vinfo,
2437 STMT_VINFO_DR_INFO (stmt_info),
2438 false);
2442 if ((*memory_access_type == VMAT_ELEMENTWISE
2443 || *memory_access_type == VMAT_STRIDED_SLP)
2444 && !nunits.is_constant ())
2446 if (dump_enabled_p ())
2447 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2448 "Not using elementwise accesses due to variable "
2449 "vectorization factor.\n");
2450 return false;
2453 if (*alignment_support_scheme == dr_unaligned_unsupported)
2455 if (dump_enabled_p ())
2456 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2457 "unsupported unaligned access\n");
2458 return false;
2461 /* FIXME: At the moment the cost model seems to underestimate the
2462 cost of using elementwise accesses. This check preserves the
2463 traditional behavior until that can be fixed. */
2464 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2465 if (!first_stmt_info)
2466 first_stmt_info = stmt_info;
2467 if (*memory_access_type == VMAT_ELEMENTWISE
2468 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2469 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2470 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2471 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "not falling back to elementwise accesses\n");
2476 return false;
2478 return true;
2481 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2482 conditional operation STMT_INFO. When returning true, store the mask
2483 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2484 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2485 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2487 static bool
2488 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2489 slp_tree slp_node, unsigned mask_index,
2490 tree *mask, slp_tree *mask_node,
2491 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2493 enum vect_def_type mask_dt;
2494 tree mask_vectype;
2495 slp_tree mask_node_1;
2496 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2497 mask, &mask_node_1, &mask_dt, &mask_vectype))
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "mask use not simple.\n");
2502 return false;
2505 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2507 if (dump_enabled_p ())
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2509 "mask argument is not a boolean.\n");
2510 return false;
2513 /* If the caller is not prepared for adjusting an external/constant
2514 SLP mask vector type fail. */
2515 if (slp_node
2516 && !mask_node
2517 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2519 if (dump_enabled_p ())
2520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2521 "SLP mask argument is not vectorized.\n");
2522 return false;
2525 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2526 if (!mask_vectype)
2527 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2529 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2531 if (dump_enabled_p ())
2532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2533 "could not find an appropriate vector mask type.\n");
2534 return false;
2537 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2538 TYPE_VECTOR_SUBPARTS (vectype)))
2540 if (dump_enabled_p ())
2541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2542 "vector mask type %T"
2543 " does not match vector data type %T.\n",
2544 mask_vectype, vectype);
2546 return false;
2549 *mask_dt_out = mask_dt;
2550 *mask_vectype_out = mask_vectype;
2551 if (mask_node)
2552 *mask_node = mask_node_1;
2553 return true;
2556 /* Return true if stored value RHS is suitable for vectorizing store
2557 statement STMT_INFO. When returning true, store the type of the
2558 definition in *RHS_DT_OUT, the type of the vectorized store value in
2559 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2561 static bool
2562 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2563 slp_tree slp_node, tree rhs,
2564 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2565 vec_load_store_type *vls_type_out)
2567 /* In the case this is a store from a constant make sure
2568 native_encode_expr can handle it. */
2569 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2571 if (dump_enabled_p ())
2572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2573 "cannot encode constant as a byte sequence.\n");
2574 return false;
2577 unsigned op_no = 0;
2578 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2580 if (gimple_call_internal_p (call)
2581 && internal_store_fn_p (gimple_call_internal_fn (call)))
2582 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2585 enum vect_def_type rhs_dt;
2586 tree rhs_vectype;
2587 slp_tree slp_op;
2588 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2589 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2591 if (dump_enabled_p ())
2592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2593 "use not simple.\n");
2594 return false;
2597 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2598 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2600 if (dump_enabled_p ())
2601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2602 "incompatible vector types.\n");
2603 return false;
2606 *rhs_dt_out = rhs_dt;
2607 *rhs_vectype_out = rhs_vectype;
2608 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2609 *vls_type_out = VLS_STORE_INVARIANT;
2610 else
2611 *vls_type_out = VLS_STORE;
2612 return true;
2615 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2616 Note that we support masks with floating-point type, in which case the
2617 floats are interpreted as a bitmask. */
2619 static tree
2620 vect_build_all_ones_mask (vec_info *vinfo,
2621 stmt_vec_info stmt_info, tree masktype)
2623 if (TREE_CODE (masktype) == INTEGER_TYPE)
2624 return build_int_cst (masktype, -1);
2625 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2627 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2628 mask = build_vector_from_val (masktype, mask);
2629 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2631 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2633 REAL_VALUE_TYPE r;
2634 long tmp[6];
2635 for (int j = 0; j < 6; ++j)
2636 tmp[j] = -1;
2637 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2638 tree mask = build_real (TREE_TYPE (masktype), r);
2639 mask = build_vector_from_val (masktype, mask);
2640 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2642 gcc_unreachable ();
2645 /* Build an all-zero merge value of type VECTYPE while vectorizing
2646 STMT_INFO as a gather load. */
2648 static tree
2649 vect_build_zero_merge_argument (vec_info *vinfo,
2650 stmt_vec_info stmt_info, tree vectype)
2652 tree merge;
2653 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2654 merge = build_int_cst (TREE_TYPE (vectype), 0);
2655 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2657 REAL_VALUE_TYPE r;
2658 long tmp[6];
2659 for (int j = 0; j < 6; ++j)
2660 tmp[j] = 0;
2661 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2662 merge = build_real (TREE_TYPE (vectype), r);
2664 else
2665 gcc_unreachable ();
2666 merge = build_vector_from_val (vectype, merge);
2667 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2670 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2671 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2672 the gather load operation. If the load is conditional, MASK is the
2673 unvectorized condition and MASK_DT is its definition type, otherwise
2674 MASK is null. */
2676 static void
2677 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2678 gimple_stmt_iterator *gsi,
2679 gimple **vec_stmt,
2680 gather_scatter_info *gs_info,
2681 tree mask)
2683 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2684 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2685 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2686 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2687 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2688 edge pe = loop_preheader_edge (loop);
2689 enum { NARROW, NONE, WIDEN } modifier;
2690 poly_uint64 gather_off_nunits
2691 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2693 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2694 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2695 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2696 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2697 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2698 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2699 tree scaletype = TREE_VALUE (arglist);
2700 tree real_masktype = masktype;
2701 gcc_checking_assert (types_compatible_p (srctype, rettype)
2702 && (!mask
2703 || TREE_CODE (masktype) == INTEGER_TYPE
2704 || types_compatible_p (srctype, masktype)));
2705 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2706 masktype = truth_type_for (srctype);
2708 tree mask_halftype = masktype;
2709 tree perm_mask = NULL_TREE;
2710 tree mask_perm_mask = NULL_TREE;
2711 if (known_eq (nunits, gather_off_nunits))
2712 modifier = NONE;
2713 else if (known_eq (nunits * 2, gather_off_nunits))
2715 modifier = WIDEN;
2717 /* Currently widening gathers and scatters are only supported for
2718 fixed-length vectors. */
2719 int count = gather_off_nunits.to_constant ();
2720 vec_perm_builder sel (count, count, 1);
2721 for (int i = 0; i < count; ++i)
2722 sel.quick_push (i | (count / 2));
2724 vec_perm_indices indices (sel, 1, count);
2725 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2726 indices);
2728 else if (known_eq (nunits, gather_off_nunits * 2))
2730 modifier = NARROW;
2732 /* Currently narrowing gathers and scatters are only supported for
2733 fixed-length vectors. */
2734 int count = nunits.to_constant ();
2735 vec_perm_builder sel (count, count, 1);
2736 sel.quick_grow (count);
2737 for (int i = 0; i < count; ++i)
2738 sel[i] = i < count / 2 ? i : i + count / 2;
2739 vec_perm_indices indices (sel, 2, count);
2740 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2742 ncopies *= 2;
2744 if (mask && masktype == real_masktype)
2746 for (int i = 0; i < count; ++i)
2747 sel[i] = i | (count / 2);
2748 indices.new_vector (sel, 2, count);
2749 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2751 else if (mask)
2752 mask_halftype = truth_type_for (gs_info->offset_vectype);
2754 else
2755 gcc_unreachable ();
2757 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2758 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2760 tree ptr = fold_convert (ptrtype, gs_info->base);
2761 if (!is_gimple_min_invariant (ptr))
2763 gimple_seq seq;
2764 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2765 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2766 gcc_assert (!new_bb);
2769 tree scale = build_int_cst (scaletype, gs_info->scale);
2771 tree vec_oprnd0 = NULL_TREE;
2772 tree vec_mask = NULL_TREE;
2773 tree src_op = NULL_TREE;
2774 tree mask_op = NULL_TREE;
2775 tree prev_res = NULL_TREE;
2777 if (!mask)
2779 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2780 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2783 auto_vec<tree> vec_oprnds0;
2784 auto_vec<tree> vec_masks;
2785 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2786 modifier == WIDEN ? ncopies / 2 : ncopies,
2787 gs_info->offset, &vec_oprnds0);
2788 if (mask)
2789 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2790 modifier == NARROW ? ncopies / 2 : ncopies,
2791 mask, &vec_masks);
2792 for (int j = 0; j < ncopies; ++j)
2794 tree op, var;
2795 if (modifier == WIDEN && (j & 1))
2796 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2797 perm_mask, stmt_info, gsi);
2798 else
2799 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2801 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2803 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2804 TYPE_VECTOR_SUBPARTS (idxtype)));
2805 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2806 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2807 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2808 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2809 op = var;
2812 if (mask)
2814 if (mask_perm_mask && (j & 1))
2815 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2816 mask_perm_mask, stmt_info, gsi);
2817 else
2819 if (modifier == NARROW)
2821 if ((j & 1) == 0)
2822 vec_mask = vec_masks[j / 2];
2824 else
2825 vec_mask = vec_masks[j];
2827 mask_op = vec_mask;
2828 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2830 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2831 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2832 gcc_assert (known_eq (sub1, sub2));
2833 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2834 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2835 gassign *new_stmt
2836 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2837 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2838 mask_op = var;
2841 if (modifier == NARROW && masktype != real_masktype)
2843 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2844 gassign *new_stmt
2845 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2846 : VEC_UNPACK_LO_EXPR,
2847 mask_op);
2848 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2849 mask_op = var;
2851 src_op = mask_op;
2854 tree mask_arg = mask_op;
2855 if (masktype != real_masktype)
2857 tree utype, optype = TREE_TYPE (mask_op);
2858 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2859 utype = real_masktype;
2860 else
2861 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2862 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2863 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2864 gassign *new_stmt
2865 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2866 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2867 mask_arg = var;
2868 if (!useless_type_conversion_p (real_masktype, utype))
2870 gcc_assert (TYPE_PRECISION (utype)
2871 <= TYPE_PRECISION (real_masktype));
2872 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2873 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2874 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2875 mask_arg = var;
2877 src_op = build_zero_cst (srctype);
2879 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2880 mask_arg, scale);
2882 if (!useless_type_conversion_p (vectype, rettype))
2884 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2885 TYPE_VECTOR_SUBPARTS (rettype)));
2886 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2887 gimple_call_set_lhs (new_stmt, op);
2888 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2889 var = make_ssa_name (vec_dest);
2890 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2891 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2892 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2894 else
2896 var = make_ssa_name (vec_dest, new_stmt);
2897 gimple_call_set_lhs (new_stmt, var);
2898 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2901 if (modifier == NARROW)
2903 if ((j & 1) == 0)
2905 prev_res = var;
2906 continue;
2908 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2909 stmt_info, gsi);
2910 new_stmt = SSA_NAME_DEF_STMT (var);
2913 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2915 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2918 /* Prepare the base and offset in GS_INFO for vectorization.
2919 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2920 to the vectorized offset argument for the first copy of STMT_INFO.
2921 STMT_INFO is the statement described by GS_INFO and LOOP is the
2922 containing loop. */
2924 static void
2925 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2926 class loop *loop, stmt_vec_info stmt_info,
2927 gather_scatter_info *gs_info,
2928 tree *dataref_ptr, vec<tree> *vec_offset)
2930 gimple_seq stmts = NULL;
2931 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2932 if (stmts != NULL)
2934 basic_block new_bb;
2935 edge pe = loop_preheader_edge (loop);
2936 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2937 gcc_assert (!new_bb);
2939 unsigned ncopies = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2940 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2941 gs_info->offset, vec_offset,
2942 gs_info->offset_vectype);
2945 /* Prepare to implement a grouped or strided load or store using
2946 the gather load or scatter store operation described by GS_INFO.
2947 STMT_INFO is the load or store statement.
2949 Set *DATAREF_BUMP to the amount that should be added to the base
2950 address after each copy of the vectorized statement. Set *VEC_OFFSET
2951 to an invariant offset vector in which element I has the value
2952 I * DR_STEP / SCALE. */
2954 static void
2955 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2956 loop_vec_info loop_vinfo,
2957 gather_scatter_info *gs_info,
2958 tree *dataref_bump, tree *vec_offset)
2960 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2961 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2963 tree bump = size_binop (MULT_EXPR,
2964 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2965 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2966 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2968 /* The offset given in GS_INFO can have pointer type, so use the element
2969 type of the vector instead. */
2970 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2972 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2973 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2974 ssize_int (gs_info->scale));
2975 step = fold_convert (offset_type, step);
2977 /* Create {0, X, X*2, X*3, ...}. */
2978 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2979 build_zero_cst (offset_type), step);
2980 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2983 /* Return the amount that should be added to a vector pointer to move
2984 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2985 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2986 vectorization. */
2988 static tree
2989 vect_get_data_ptr_increment (vec_info *vinfo,
2990 dr_vec_info *dr_info, tree aggr_type,
2991 vect_memory_access_type memory_access_type)
2993 if (memory_access_type == VMAT_INVARIANT)
2994 return size_zero_node;
2996 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2997 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2998 if (tree_int_cst_sgn (step) == -1)
2999 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3000 return iv_step;
3003 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3005 static bool
3006 vectorizable_bswap (vec_info *vinfo,
3007 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3008 gimple **vec_stmt, slp_tree slp_node,
3009 slp_tree *slp_op,
3010 tree vectype_in, stmt_vector_for_cost *cost_vec)
3012 tree op, vectype;
3013 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3014 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3015 unsigned ncopies;
3017 op = gimple_call_arg (stmt, 0);
3018 vectype = STMT_VINFO_VECTYPE (stmt_info);
3019 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3021 /* Multiple types in SLP are handled by creating the appropriate number of
3022 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3023 case of SLP. */
3024 if (slp_node)
3025 ncopies = 1;
3026 else
3027 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3029 gcc_assert (ncopies >= 1);
3031 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3032 if (! char_vectype)
3033 return false;
3035 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3036 unsigned word_bytes;
3037 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3038 return false;
3040 /* The encoding uses one stepped pattern for each byte in the word. */
3041 vec_perm_builder elts (num_bytes, word_bytes, 3);
3042 for (unsigned i = 0; i < 3; ++i)
3043 for (unsigned j = 0; j < word_bytes; ++j)
3044 elts.quick_push ((i + 1) * word_bytes - j - 1);
3046 vec_perm_indices indices (elts, 1, num_bytes);
3047 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3048 return false;
3050 if (! vec_stmt)
3052 if (slp_node
3053 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3055 if (dump_enabled_p ())
3056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3057 "incompatible vector types for invariants\n");
3058 return false;
3061 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3062 DUMP_VECT_SCOPE ("vectorizable_bswap");
3063 record_stmt_cost (cost_vec,
3064 1, vector_stmt, stmt_info, 0, vect_prologue);
3065 record_stmt_cost (cost_vec,
3066 slp_node
3067 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3068 vec_perm, stmt_info, 0, vect_body);
3069 return true;
3072 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3074 /* Transform. */
3075 vec<tree> vec_oprnds = vNULL;
3076 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3077 op, &vec_oprnds);
3078 /* Arguments are ready. create the new vector stmt. */
3079 unsigned i;
3080 tree vop;
3081 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3083 gimple *new_stmt;
3084 tree tem = make_ssa_name (char_vectype);
3085 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3086 char_vectype, vop));
3087 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3088 tree tem2 = make_ssa_name (char_vectype);
3089 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3090 tem, tem, bswap_vconst);
3091 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3092 tem = make_ssa_name (vectype);
3093 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3094 vectype, tem2));
3095 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3096 if (slp_node)
3097 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3098 else
3099 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3102 if (!slp_node)
3103 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3105 vec_oprnds.release ();
3106 return true;
3109 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3110 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3111 in a single step. On success, store the binary pack code in
3112 *CONVERT_CODE. */
3114 static bool
3115 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3116 tree_code *convert_code)
3118 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3119 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3120 return false;
3122 tree_code code;
3123 int multi_step_cvt = 0;
3124 auto_vec <tree, 8> interm_types;
3125 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3126 &code, &multi_step_cvt, &interm_types)
3127 || multi_step_cvt)
3128 return false;
3130 *convert_code = code;
3131 return true;
3134 /* Function vectorizable_call.
3136 Check if STMT_INFO performs a function call that can be vectorized.
3137 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3138 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3139 Return true if STMT_INFO is vectorizable in this way. */
3141 static bool
3142 vectorizable_call (vec_info *vinfo,
3143 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3144 gimple **vec_stmt, slp_tree slp_node,
3145 stmt_vector_for_cost *cost_vec)
3147 gcall *stmt;
3148 tree vec_dest;
3149 tree scalar_dest;
3150 tree op;
3151 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3152 tree vectype_out, vectype_in;
3153 poly_uint64 nunits_in;
3154 poly_uint64 nunits_out;
3155 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3156 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3157 tree fndecl, new_temp, rhs_type;
3158 enum vect_def_type dt[4]
3159 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3160 vect_unknown_def_type };
3161 tree vectypes[ARRAY_SIZE (dt)] = {};
3162 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3163 int ndts = ARRAY_SIZE (dt);
3164 int ncopies, j;
3165 auto_vec<tree, 8> vargs;
3166 auto_vec<tree, 8> orig_vargs;
3167 enum { NARROW, NONE, WIDEN } modifier;
3168 size_t i, nargs;
3169 tree lhs;
3171 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3172 return false;
3174 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3175 && ! vec_stmt)
3176 return false;
3178 /* Is STMT_INFO a vectorizable call? */
3179 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3180 if (!stmt)
3181 return false;
3183 if (gimple_call_internal_p (stmt)
3184 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3185 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3186 /* Handled by vectorizable_load and vectorizable_store. */
3187 return false;
3189 if (gimple_call_lhs (stmt) == NULL_TREE
3190 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3191 return false;
3193 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3195 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3197 /* Process function arguments. */
3198 rhs_type = NULL_TREE;
3199 vectype_in = NULL_TREE;
3200 nargs = gimple_call_num_args (stmt);
3202 /* Bail out if the function has more than four arguments, we do not have
3203 interesting builtin functions to vectorize with more than two arguments
3204 except for fma. No arguments is also not good. */
3205 if (nargs == 0 || nargs > 4)
3206 return false;
3208 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3209 combined_fn cfn = gimple_call_combined_fn (stmt);
3210 if (cfn == CFN_GOMP_SIMD_LANE)
3212 nargs = 0;
3213 rhs_type = unsigned_type_node;
3216 int mask_opno = -1;
3217 if (internal_fn_p (cfn))
3218 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3220 for (i = 0; i < nargs; i++)
3222 if ((int) i == mask_opno)
3224 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3225 &op, &slp_op[i], &dt[i], &vectypes[i]))
3226 return false;
3227 continue;
3230 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3231 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3235 "use not simple.\n");
3236 return false;
3239 /* We can only handle calls with arguments of the same type. */
3240 if (rhs_type
3241 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3243 if (dump_enabled_p ())
3244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3245 "argument types differ.\n");
3246 return false;
3248 if (!rhs_type)
3249 rhs_type = TREE_TYPE (op);
3251 if (!vectype_in)
3252 vectype_in = vectypes[i];
3253 else if (vectypes[i]
3254 && !types_compatible_p (vectypes[i], vectype_in))
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3258 "argument vector types differ.\n");
3259 return false;
3262 /* If all arguments are external or constant defs, infer the vector type
3263 from the scalar type. */
3264 if (!vectype_in)
3265 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3266 if (vec_stmt)
3267 gcc_assert (vectype_in);
3268 if (!vectype_in)
3270 if (dump_enabled_p ())
3271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3272 "no vectype for scalar type %T\n", rhs_type);
3274 return false;
3276 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3277 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3278 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3279 by a pack of the two vectors into an SI vector. We would need
3280 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3281 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 "mismatched vector sizes %T and %T\n",
3286 vectype_in, vectype_out);
3287 return false;
3290 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3291 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3295 "mixed mask and nonmask vector types\n");
3296 return false;
3299 /* FORNOW */
3300 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3301 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3302 if (known_eq (nunits_in * 2, nunits_out))
3303 modifier = NARROW;
3304 else if (known_eq (nunits_out, nunits_in))
3305 modifier = NONE;
3306 else if (known_eq (nunits_out * 2, nunits_in))
3307 modifier = WIDEN;
3308 else
3309 return false;
3311 /* We only handle functions that do not read or clobber memory. */
3312 if (gimple_vuse (stmt))
3314 if (dump_enabled_p ())
3315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3316 "function reads from or writes to memory.\n");
3317 return false;
3320 /* For now, we only vectorize functions if a target specific builtin
3321 is available. TODO -- in some cases, it might be profitable to
3322 insert the calls for pieces of the vector, in order to be able
3323 to vectorize other operations in the loop. */
3324 fndecl = NULL_TREE;
3325 internal_fn ifn = IFN_LAST;
3326 tree callee = gimple_call_fndecl (stmt);
3328 /* First try using an internal function. */
3329 tree_code convert_code = ERROR_MARK;
3330 if (cfn != CFN_LAST
3331 && (modifier == NONE
3332 || (modifier == NARROW
3333 && simple_integer_narrowing (vectype_out, vectype_in,
3334 &convert_code))))
3335 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3336 vectype_in);
3338 /* If that fails, try asking for a target-specific built-in function. */
3339 if (ifn == IFN_LAST)
3341 if (cfn != CFN_LAST)
3342 fndecl = targetm.vectorize.builtin_vectorized_function
3343 (cfn, vectype_out, vectype_in);
3344 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3345 fndecl = targetm.vectorize.builtin_md_vectorized_function
3346 (callee, vectype_out, vectype_in);
3349 if (ifn == IFN_LAST && !fndecl)
3351 if (cfn == CFN_GOMP_SIMD_LANE
3352 && !slp_node
3353 && loop_vinfo
3354 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3355 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3356 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3357 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3359 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3360 { 0, 1, 2, ... vf - 1 } vector. */
3361 gcc_assert (nargs == 0);
3363 else if (modifier == NONE
3364 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3365 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3366 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3367 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3368 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3369 slp_op, vectype_in, cost_vec);
3370 else
3372 if (dump_enabled_p ())
3373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3374 "function is not vectorizable.\n");
3375 return false;
3379 if (slp_node)
3380 ncopies = 1;
3381 else if (modifier == NARROW && ifn == IFN_LAST)
3382 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3383 else
3384 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3386 /* Sanity check: make sure that at least one copy of the vectorized stmt
3387 needs to be generated. */
3388 gcc_assert (ncopies >= 1);
3390 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3391 if (!vec_stmt) /* transformation not required. */
3393 if (slp_node)
3394 for (i = 0; i < nargs; ++i)
3395 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3399 "incompatible vector types for invariants\n");
3400 return false;
3402 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3403 DUMP_VECT_SCOPE ("vectorizable_call");
3404 vect_model_simple_cost (vinfo, stmt_info,
3405 ncopies, dt, ndts, slp_node, cost_vec);
3406 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3407 record_stmt_cost (cost_vec, ncopies / 2,
3408 vec_promote_demote, stmt_info, 0, vect_body);
3410 if (loop_vinfo && mask_opno >= 0)
3412 unsigned int nvectors = (slp_node
3413 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3414 : ncopies);
3415 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3416 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3417 vectype_out, scalar_mask);
3419 return true;
3422 /* Transform. */
3424 if (dump_enabled_p ())
3425 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3427 /* Handle def. */
3428 scalar_dest = gimple_call_lhs (stmt);
3429 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3431 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3433 if (modifier == NONE || ifn != IFN_LAST)
3435 tree prev_res = NULL_TREE;
3436 vargs.safe_grow (nargs, true);
3437 orig_vargs.safe_grow (nargs, true);
3438 auto_vec<vec<tree> > vec_defs (nargs);
3439 for (j = 0; j < ncopies; ++j)
3441 /* Build argument list for the vectorized call. */
3442 if (slp_node)
3444 vec<tree> vec_oprnds0;
3446 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3447 vec_oprnds0 = vec_defs[0];
3449 /* Arguments are ready. Create the new vector stmt. */
3450 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3452 size_t k;
3453 for (k = 0; k < nargs; k++)
3455 vec<tree> vec_oprndsk = vec_defs[k];
3456 vargs[k] = vec_oprndsk[i];
3458 gimple *new_stmt;
3459 if (modifier == NARROW)
3461 /* We don't define any narrowing conditional functions
3462 at present. */
3463 gcc_assert (mask_opno < 0);
3464 tree half_res = make_ssa_name (vectype_in);
3465 gcall *call
3466 = gimple_build_call_internal_vec (ifn, vargs);
3467 gimple_call_set_lhs (call, half_res);
3468 gimple_call_set_nothrow (call, true);
3469 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3470 if ((i & 1) == 0)
3472 prev_res = half_res;
3473 continue;
3475 new_temp = make_ssa_name (vec_dest);
3476 new_stmt = gimple_build_assign (new_temp, convert_code,
3477 prev_res, half_res);
3478 vect_finish_stmt_generation (vinfo, stmt_info,
3479 new_stmt, gsi);
3481 else
3483 if (mask_opno >= 0 && masked_loop_p)
3485 unsigned int vec_num = vec_oprnds0.length ();
3486 /* Always true for SLP. */
3487 gcc_assert (ncopies == 1);
3488 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3489 vectype_out, i);
3490 vargs[mask_opno] = prepare_load_store_mask
3491 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3494 gcall *call;
3495 if (ifn != IFN_LAST)
3496 call = gimple_build_call_internal_vec (ifn, vargs);
3497 else
3498 call = gimple_build_call_vec (fndecl, vargs);
3499 new_temp = make_ssa_name (vec_dest, call);
3500 gimple_call_set_lhs (call, new_temp);
3501 gimple_call_set_nothrow (call, true);
3502 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3503 new_stmt = call;
3505 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3507 continue;
3510 for (i = 0; i < nargs; i++)
3512 op = gimple_call_arg (stmt, i);
3513 if (j == 0)
3515 vec_defs.quick_push (vNULL);
3516 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3517 op, &vec_defs[i],
3518 vectypes[i]);
3520 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3523 if (mask_opno >= 0 && masked_loop_p)
3525 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3526 vectype_out, j);
3527 vargs[mask_opno]
3528 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3529 vargs[mask_opno], gsi);
3532 gimple *new_stmt;
3533 if (cfn == CFN_GOMP_SIMD_LANE)
3535 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3536 tree new_var
3537 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3538 gimple *init_stmt = gimple_build_assign (new_var, cst);
3539 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3540 new_temp = make_ssa_name (vec_dest);
3541 new_stmt = gimple_build_assign (new_temp, new_var);
3542 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3544 else if (modifier == NARROW)
3546 /* We don't define any narrowing conditional functions at
3547 present. */
3548 gcc_assert (mask_opno < 0);
3549 tree half_res = make_ssa_name (vectype_in);
3550 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3551 gimple_call_set_lhs (call, half_res);
3552 gimple_call_set_nothrow (call, true);
3553 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3554 if ((j & 1) == 0)
3556 prev_res = half_res;
3557 continue;
3559 new_temp = make_ssa_name (vec_dest);
3560 new_stmt = gimple_build_assign (new_temp, convert_code,
3561 prev_res, half_res);
3562 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3564 else
3566 gcall *call;
3567 if (ifn != IFN_LAST)
3568 call = gimple_build_call_internal_vec (ifn, vargs);
3569 else
3570 call = gimple_build_call_vec (fndecl, vargs);
3571 new_temp = make_ssa_name (vec_dest, call);
3572 gimple_call_set_lhs (call, new_temp);
3573 gimple_call_set_nothrow (call, true);
3574 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3575 new_stmt = call;
3578 if (j == (modifier == NARROW ? 1 : 0))
3579 *vec_stmt = new_stmt;
3580 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3582 for (i = 0; i < nargs; i++)
3584 vec<tree> vec_oprndsi = vec_defs[i];
3585 vec_oprndsi.release ();
3588 else if (modifier == NARROW)
3590 auto_vec<vec<tree> > vec_defs (nargs);
3591 /* We don't define any narrowing conditional functions at present. */
3592 gcc_assert (mask_opno < 0);
3593 for (j = 0; j < ncopies; ++j)
3595 /* Build argument list for the vectorized call. */
3596 if (j == 0)
3597 vargs.create (nargs * 2);
3598 else
3599 vargs.truncate (0);
3601 if (slp_node)
3603 vec<tree> vec_oprnds0;
3605 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3606 vec_oprnds0 = vec_defs[0];
3608 /* Arguments are ready. Create the new vector stmt. */
3609 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3611 size_t k;
3612 vargs.truncate (0);
3613 for (k = 0; k < nargs; k++)
3615 vec<tree> vec_oprndsk = vec_defs[k];
3616 vargs.quick_push (vec_oprndsk[i]);
3617 vargs.quick_push (vec_oprndsk[i + 1]);
3619 gcall *call;
3620 if (ifn != IFN_LAST)
3621 call = gimple_build_call_internal_vec (ifn, vargs);
3622 else
3623 call = gimple_build_call_vec (fndecl, vargs);
3624 new_temp = make_ssa_name (vec_dest, call);
3625 gimple_call_set_lhs (call, new_temp);
3626 gimple_call_set_nothrow (call, true);
3627 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3628 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3630 continue;
3633 for (i = 0; i < nargs; i++)
3635 op = gimple_call_arg (stmt, i);
3636 if (j == 0)
3638 vec_defs.quick_push (vNULL);
3639 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3640 op, &vec_defs[i], vectypes[i]);
3642 vec_oprnd0 = vec_defs[i][2*j];
3643 vec_oprnd1 = vec_defs[i][2*j+1];
3645 vargs.quick_push (vec_oprnd0);
3646 vargs.quick_push (vec_oprnd1);
3649 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3650 new_temp = make_ssa_name (vec_dest, new_stmt);
3651 gimple_call_set_lhs (new_stmt, new_temp);
3652 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3654 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3657 if (!slp_node)
3658 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3660 for (i = 0; i < nargs; i++)
3662 vec<tree> vec_oprndsi = vec_defs[i];
3663 vec_oprndsi.release ();
3666 else
3667 /* No current target implements this case. */
3668 return false;
3670 vargs.release ();
3672 /* The call in STMT might prevent it from being removed in dce.
3673 We however cannot remove it here, due to the way the ssa name
3674 it defines is mapped to the new definition. So just replace
3675 rhs of the statement with something harmless. */
3677 if (slp_node)
3678 return true;
3680 stmt_info = vect_orig_stmt (stmt_info);
3681 lhs = gimple_get_lhs (stmt_info->stmt);
3683 gassign *new_stmt
3684 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3685 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3687 return true;
3691 struct simd_call_arg_info
3693 tree vectype;
3694 tree op;
3695 HOST_WIDE_INT linear_step;
3696 enum vect_def_type dt;
3697 unsigned int align;
3698 bool simd_lane_linear;
3701 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3702 is linear within simd lane (but not within whole loop), note it in
3703 *ARGINFO. */
3705 static void
3706 vect_simd_lane_linear (tree op, class loop *loop,
3707 struct simd_call_arg_info *arginfo)
3709 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3711 if (!is_gimple_assign (def_stmt)
3712 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3713 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3714 return;
3716 tree base = gimple_assign_rhs1 (def_stmt);
3717 HOST_WIDE_INT linear_step = 0;
3718 tree v = gimple_assign_rhs2 (def_stmt);
3719 while (TREE_CODE (v) == SSA_NAME)
3721 tree t;
3722 def_stmt = SSA_NAME_DEF_STMT (v);
3723 if (is_gimple_assign (def_stmt))
3724 switch (gimple_assign_rhs_code (def_stmt))
3726 case PLUS_EXPR:
3727 t = gimple_assign_rhs2 (def_stmt);
3728 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3729 return;
3730 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3731 v = gimple_assign_rhs1 (def_stmt);
3732 continue;
3733 case MULT_EXPR:
3734 t = gimple_assign_rhs2 (def_stmt);
3735 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3736 return;
3737 linear_step = tree_to_shwi (t);
3738 v = gimple_assign_rhs1 (def_stmt);
3739 continue;
3740 CASE_CONVERT:
3741 t = gimple_assign_rhs1 (def_stmt);
3742 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3743 || (TYPE_PRECISION (TREE_TYPE (v))
3744 < TYPE_PRECISION (TREE_TYPE (t))))
3745 return;
3746 if (!linear_step)
3747 linear_step = 1;
3748 v = t;
3749 continue;
3750 default:
3751 return;
3753 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3754 && loop->simduid
3755 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3756 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3757 == loop->simduid))
3759 if (!linear_step)
3760 linear_step = 1;
3761 arginfo->linear_step = linear_step;
3762 arginfo->op = base;
3763 arginfo->simd_lane_linear = true;
3764 return;
3769 /* Return the number of elements in vector type VECTYPE, which is associated
3770 with a SIMD clone. At present these vectors always have a constant
3771 length. */
3773 static unsigned HOST_WIDE_INT
3774 simd_clone_subparts (tree vectype)
3776 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3779 /* Function vectorizable_simd_clone_call.
3781 Check if STMT_INFO performs a function call that can be vectorized
3782 by calling a simd clone of the function.
3783 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3784 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3785 Return true if STMT_INFO is vectorizable in this way. */
3787 static bool
3788 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3789 gimple_stmt_iterator *gsi,
3790 gimple **vec_stmt, slp_tree slp_node,
3791 stmt_vector_for_cost *)
3793 tree vec_dest;
3794 tree scalar_dest;
3795 tree op, type;
3796 tree vec_oprnd0 = NULL_TREE;
3797 tree vectype;
3798 poly_uint64 nunits;
3799 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3800 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3801 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3802 tree fndecl, new_temp;
3803 int ncopies, j;
3804 auto_vec<simd_call_arg_info> arginfo;
3805 vec<tree> vargs = vNULL;
3806 size_t i, nargs;
3807 tree lhs, rtype, ratype;
3808 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3810 /* Is STMT a vectorizable call? */
3811 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3812 if (!stmt)
3813 return false;
3815 fndecl = gimple_call_fndecl (stmt);
3816 if (fndecl == NULL_TREE)
3817 return false;
3819 struct cgraph_node *node = cgraph_node::get (fndecl);
3820 if (node == NULL || node->simd_clones == NULL)
3821 return false;
3823 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3824 return false;
3826 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3827 && ! vec_stmt)
3828 return false;
3830 if (gimple_call_lhs (stmt)
3831 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3832 return false;
3834 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3836 vectype = STMT_VINFO_VECTYPE (stmt_info);
3838 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3839 return false;
3841 /* FORNOW */
3842 if (slp_node)
3843 return false;
3845 /* Process function arguments. */
3846 nargs = gimple_call_num_args (stmt);
3848 /* Bail out if the function has zero arguments. */
3849 if (nargs == 0)
3850 return false;
3852 arginfo.reserve (nargs, true);
3854 for (i = 0; i < nargs; i++)
3856 simd_call_arg_info thisarginfo;
3857 affine_iv iv;
3859 thisarginfo.linear_step = 0;
3860 thisarginfo.align = 0;
3861 thisarginfo.op = NULL_TREE;
3862 thisarginfo.simd_lane_linear = false;
3864 op = gimple_call_arg (stmt, i);
3865 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3866 &thisarginfo.vectype)
3867 || thisarginfo.dt == vect_uninitialized_def)
3869 if (dump_enabled_p ())
3870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3871 "use not simple.\n");
3872 return false;
3875 if (thisarginfo.dt == vect_constant_def
3876 || thisarginfo.dt == vect_external_def)
3877 gcc_assert (thisarginfo.vectype == NULL_TREE);
3878 else
3880 gcc_assert (thisarginfo.vectype != NULL_TREE);
3881 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3883 if (dump_enabled_p ())
3884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3885 "vector mask arguments are not supported\n");
3886 return false;
3890 /* For linear arguments, the analyze phase should have saved
3891 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3892 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3893 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3895 gcc_assert (vec_stmt);
3896 thisarginfo.linear_step
3897 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3898 thisarginfo.op
3899 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3900 thisarginfo.simd_lane_linear
3901 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3902 == boolean_true_node);
3903 /* If loop has been peeled for alignment, we need to adjust it. */
3904 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3905 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3906 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3908 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3909 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3910 tree opt = TREE_TYPE (thisarginfo.op);
3911 bias = fold_convert (TREE_TYPE (step), bias);
3912 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3913 thisarginfo.op
3914 = fold_build2 (POINTER_TYPE_P (opt)
3915 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3916 thisarginfo.op, bias);
3919 else if (!vec_stmt
3920 && thisarginfo.dt != vect_constant_def
3921 && thisarginfo.dt != vect_external_def
3922 && loop_vinfo
3923 && TREE_CODE (op) == SSA_NAME
3924 && simple_iv (loop, loop_containing_stmt (stmt), op,
3925 &iv, false)
3926 && tree_fits_shwi_p (iv.step))
3928 thisarginfo.linear_step = tree_to_shwi (iv.step);
3929 thisarginfo.op = iv.base;
3931 else if ((thisarginfo.dt == vect_constant_def
3932 || thisarginfo.dt == vect_external_def)
3933 && POINTER_TYPE_P (TREE_TYPE (op)))
3934 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3935 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3936 linear too. */
3937 if (POINTER_TYPE_P (TREE_TYPE (op))
3938 && !thisarginfo.linear_step
3939 && !vec_stmt
3940 && thisarginfo.dt != vect_constant_def
3941 && thisarginfo.dt != vect_external_def
3942 && loop_vinfo
3943 && !slp_node
3944 && TREE_CODE (op) == SSA_NAME)
3945 vect_simd_lane_linear (op, loop, &thisarginfo);
3947 arginfo.quick_push (thisarginfo);
3950 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3951 if (!vf.is_constant ())
3953 if (dump_enabled_p ())
3954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3955 "not considering SIMD clones; not yet supported"
3956 " for variable-width vectors.\n");
3957 return false;
3960 unsigned int badness = 0;
3961 struct cgraph_node *bestn = NULL;
3962 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3963 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3964 else
3965 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3966 n = n->simdclone->next_clone)
3968 unsigned int this_badness = 0;
3969 unsigned int num_calls;
3970 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
3971 || n->simdclone->nargs != nargs)
3972 continue;
3973 if (num_calls != 1)
3974 this_badness += exact_log2 (num_calls) * 4096;
3975 if (n->simdclone->inbranch)
3976 this_badness += 8192;
3977 int target_badness = targetm.simd_clone.usable (n);
3978 if (target_badness < 0)
3979 continue;
3980 this_badness += target_badness * 512;
3981 /* FORNOW: Have to add code to add the mask argument. */
3982 if (n->simdclone->inbranch)
3983 continue;
3984 for (i = 0; i < nargs; i++)
3986 switch (n->simdclone->args[i].arg_type)
3988 case SIMD_CLONE_ARG_TYPE_VECTOR:
3989 if (!useless_type_conversion_p
3990 (n->simdclone->args[i].orig_type,
3991 TREE_TYPE (gimple_call_arg (stmt, i))))
3992 i = -1;
3993 else if (arginfo[i].dt == vect_constant_def
3994 || arginfo[i].dt == vect_external_def
3995 || arginfo[i].linear_step)
3996 this_badness += 64;
3997 break;
3998 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3999 if (arginfo[i].dt != vect_constant_def
4000 && arginfo[i].dt != vect_external_def)
4001 i = -1;
4002 break;
4003 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4004 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4005 if (arginfo[i].dt == vect_constant_def
4006 || arginfo[i].dt == vect_external_def
4007 || (arginfo[i].linear_step
4008 != n->simdclone->args[i].linear_step))
4009 i = -1;
4010 break;
4011 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4012 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4013 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4014 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4015 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4016 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4017 /* FORNOW */
4018 i = -1;
4019 break;
4020 case SIMD_CLONE_ARG_TYPE_MASK:
4021 gcc_unreachable ();
4023 if (i == (size_t) -1)
4024 break;
4025 if (n->simdclone->args[i].alignment > arginfo[i].align)
4027 i = -1;
4028 break;
4030 if (arginfo[i].align)
4031 this_badness += (exact_log2 (arginfo[i].align)
4032 - exact_log2 (n->simdclone->args[i].alignment));
4034 if (i == (size_t) -1)
4035 continue;
4036 if (bestn == NULL || this_badness < badness)
4038 bestn = n;
4039 badness = this_badness;
4043 if (bestn == NULL)
4044 return false;
4046 for (i = 0; i < nargs; i++)
4047 if ((arginfo[i].dt == vect_constant_def
4048 || arginfo[i].dt == vect_external_def)
4049 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4051 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4052 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4053 slp_node);
4054 if (arginfo[i].vectype == NULL
4055 || !constant_multiple_p (bestn->simdclone->simdlen,
4056 simd_clone_subparts (arginfo[i].vectype)))
4057 return false;
4060 fndecl = bestn->decl;
4061 nunits = bestn->simdclone->simdlen;
4062 ncopies = vector_unroll_factor (vf, nunits);
4064 /* If the function isn't const, only allow it in simd loops where user
4065 has asserted that at least nunits consecutive iterations can be
4066 performed using SIMD instructions. */
4067 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4068 && gimple_vuse (stmt))
4069 return false;
4071 /* Sanity check: make sure that at least one copy of the vectorized stmt
4072 needs to be generated. */
4073 gcc_assert (ncopies >= 1);
4075 if (!vec_stmt) /* transformation not required. */
4077 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4078 for (i = 0; i < nargs; i++)
4079 if ((bestn->simdclone->args[i].arg_type
4080 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4081 || (bestn->simdclone->args[i].arg_type
4082 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4084 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4085 + 1,
4086 true);
4087 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4088 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4089 ? size_type_node : TREE_TYPE (arginfo[i].op);
4090 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4091 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4092 tree sll = arginfo[i].simd_lane_linear
4093 ? boolean_true_node : boolean_false_node;
4094 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4096 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4097 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4098 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4099 dt, slp_node, cost_vec); */
4100 return true;
4103 /* Transform. */
4105 if (dump_enabled_p ())
4106 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4108 /* Handle def. */
4109 scalar_dest = gimple_call_lhs (stmt);
4110 vec_dest = NULL_TREE;
4111 rtype = NULL_TREE;
4112 ratype = NULL_TREE;
4113 if (scalar_dest)
4115 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4116 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4117 if (TREE_CODE (rtype) == ARRAY_TYPE)
4119 ratype = rtype;
4120 rtype = TREE_TYPE (ratype);
4124 auto_vec<vec<tree> > vec_oprnds;
4125 auto_vec<unsigned> vec_oprnds_i;
4126 vec_oprnds.safe_grow_cleared (nargs, true);
4127 vec_oprnds_i.safe_grow_cleared (nargs, true);
4128 for (j = 0; j < ncopies; ++j)
4130 /* Build argument list for the vectorized call. */
4131 if (j == 0)
4132 vargs.create (nargs);
4133 else
4134 vargs.truncate (0);
4136 for (i = 0; i < nargs; i++)
4138 unsigned int k, l, m, o;
4139 tree atype;
4140 op = gimple_call_arg (stmt, i);
4141 switch (bestn->simdclone->args[i].arg_type)
4143 case SIMD_CLONE_ARG_TYPE_VECTOR:
4144 atype = bestn->simdclone->args[i].vector_type;
4145 o = vector_unroll_factor (nunits,
4146 simd_clone_subparts (atype));
4147 for (m = j * o; m < (j + 1) * o; m++)
4149 if (simd_clone_subparts (atype)
4150 < simd_clone_subparts (arginfo[i].vectype))
4152 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4153 k = (simd_clone_subparts (arginfo[i].vectype)
4154 / simd_clone_subparts (atype));
4155 gcc_assert ((k & (k - 1)) == 0);
4156 if (m == 0)
4158 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4159 ncopies * o / k, op,
4160 &vec_oprnds[i]);
4161 vec_oprnds_i[i] = 0;
4162 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4164 else
4166 vec_oprnd0 = arginfo[i].op;
4167 if ((m & (k - 1)) == 0)
4168 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4170 arginfo[i].op = vec_oprnd0;
4171 vec_oprnd0
4172 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4173 bitsize_int (prec),
4174 bitsize_int ((m & (k - 1)) * prec));
4175 gassign *new_stmt
4176 = gimple_build_assign (make_ssa_name (atype),
4177 vec_oprnd0);
4178 vect_finish_stmt_generation (vinfo, stmt_info,
4179 new_stmt, gsi);
4180 vargs.safe_push (gimple_assign_lhs (new_stmt));
4182 else
4184 k = (simd_clone_subparts (atype)
4185 / simd_clone_subparts (arginfo[i].vectype));
4186 gcc_assert ((k & (k - 1)) == 0);
4187 vec<constructor_elt, va_gc> *ctor_elts;
4188 if (k != 1)
4189 vec_alloc (ctor_elts, k);
4190 else
4191 ctor_elts = NULL;
4192 for (l = 0; l < k; l++)
4194 if (m == 0 && l == 0)
4196 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4197 k * o * ncopies,
4199 &vec_oprnds[i]);
4200 vec_oprnds_i[i] = 0;
4201 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4203 else
4204 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4205 arginfo[i].op = vec_oprnd0;
4206 if (k == 1)
4207 break;
4208 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4209 vec_oprnd0);
4211 if (k == 1)
4212 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4213 atype))
4215 vec_oprnd0
4216 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4217 gassign *new_stmt
4218 = gimple_build_assign (make_ssa_name (atype),
4219 vec_oprnd0);
4220 vect_finish_stmt_generation (vinfo, stmt_info,
4221 new_stmt, gsi);
4222 vargs.safe_push (gimple_assign_lhs (new_stmt));
4224 else
4225 vargs.safe_push (vec_oprnd0);
4226 else
4228 vec_oprnd0 = build_constructor (atype, ctor_elts);
4229 gassign *new_stmt
4230 = gimple_build_assign (make_ssa_name (atype),
4231 vec_oprnd0);
4232 vect_finish_stmt_generation (vinfo, stmt_info,
4233 new_stmt, gsi);
4234 vargs.safe_push (gimple_assign_lhs (new_stmt));
4238 break;
4239 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4240 vargs.safe_push (op);
4241 break;
4242 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4243 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4244 if (j == 0)
4246 gimple_seq stmts;
4247 arginfo[i].op
4248 = force_gimple_operand (unshare_expr (arginfo[i].op),
4249 &stmts, true, NULL_TREE);
4250 if (stmts != NULL)
4252 basic_block new_bb;
4253 edge pe = loop_preheader_edge (loop);
4254 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4255 gcc_assert (!new_bb);
4257 if (arginfo[i].simd_lane_linear)
4259 vargs.safe_push (arginfo[i].op);
4260 break;
4262 tree phi_res = copy_ssa_name (op);
4263 gphi *new_phi = create_phi_node (phi_res, loop->header);
4264 add_phi_arg (new_phi, arginfo[i].op,
4265 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4266 enum tree_code code
4267 = POINTER_TYPE_P (TREE_TYPE (op))
4268 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4269 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4270 ? sizetype : TREE_TYPE (op);
4271 poly_widest_int cst
4272 = wi::mul (bestn->simdclone->args[i].linear_step,
4273 ncopies * nunits);
4274 tree tcst = wide_int_to_tree (type, cst);
4275 tree phi_arg = copy_ssa_name (op);
4276 gassign *new_stmt
4277 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4278 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4279 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4280 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4281 UNKNOWN_LOCATION);
4282 arginfo[i].op = phi_res;
4283 vargs.safe_push (phi_res);
4285 else
4287 enum tree_code code
4288 = POINTER_TYPE_P (TREE_TYPE (op))
4289 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4290 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4291 ? sizetype : TREE_TYPE (op);
4292 poly_widest_int cst
4293 = wi::mul (bestn->simdclone->args[i].linear_step,
4294 j * nunits);
4295 tree tcst = wide_int_to_tree (type, cst);
4296 new_temp = make_ssa_name (TREE_TYPE (op));
4297 gassign *new_stmt
4298 = gimple_build_assign (new_temp, code,
4299 arginfo[i].op, tcst);
4300 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4301 vargs.safe_push (new_temp);
4303 break;
4304 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4305 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4306 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4307 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4308 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4309 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4310 default:
4311 gcc_unreachable ();
4315 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4316 if (vec_dest)
4318 gcc_assert (ratype
4319 || known_eq (simd_clone_subparts (rtype), nunits));
4320 if (ratype)
4321 new_temp = create_tmp_var (ratype);
4322 else if (useless_type_conversion_p (vectype, rtype))
4323 new_temp = make_ssa_name (vec_dest, new_call);
4324 else
4325 new_temp = make_ssa_name (rtype, new_call);
4326 gimple_call_set_lhs (new_call, new_temp);
4328 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4329 gimple *new_stmt = new_call;
4331 if (vec_dest)
4333 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4335 unsigned int k, l;
4336 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4337 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4338 k = vector_unroll_factor (nunits,
4339 simd_clone_subparts (vectype));
4340 gcc_assert ((k & (k - 1)) == 0);
4341 for (l = 0; l < k; l++)
4343 tree t;
4344 if (ratype)
4346 t = build_fold_addr_expr (new_temp);
4347 t = build2 (MEM_REF, vectype, t,
4348 build_int_cst (TREE_TYPE (t), l * bytes));
4350 else
4351 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4352 bitsize_int (prec), bitsize_int (l * prec));
4353 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4354 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4356 if (j == 0 && l == 0)
4357 *vec_stmt = new_stmt;
4358 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4361 if (ratype)
4362 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4363 continue;
4365 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4367 unsigned int k = (simd_clone_subparts (vectype)
4368 / simd_clone_subparts (rtype));
4369 gcc_assert ((k & (k - 1)) == 0);
4370 if ((j & (k - 1)) == 0)
4371 vec_alloc (ret_ctor_elts, k);
4372 if (ratype)
4374 unsigned int m, o;
4375 o = vector_unroll_factor (nunits,
4376 simd_clone_subparts (rtype));
4377 for (m = 0; m < o; m++)
4379 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4380 size_int (m), NULL_TREE, NULL_TREE);
4381 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4382 tem);
4383 vect_finish_stmt_generation (vinfo, stmt_info,
4384 new_stmt, gsi);
4385 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4386 gimple_assign_lhs (new_stmt));
4388 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4390 else
4391 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4392 if ((j & (k - 1)) != k - 1)
4393 continue;
4394 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4395 new_stmt
4396 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4397 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4399 if ((unsigned) j == k - 1)
4400 *vec_stmt = new_stmt;
4401 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4402 continue;
4404 else if (ratype)
4406 tree t = build_fold_addr_expr (new_temp);
4407 t = build2 (MEM_REF, vectype, t,
4408 build_int_cst (TREE_TYPE (t), 0));
4409 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4410 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4411 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4413 else if (!useless_type_conversion_p (vectype, rtype))
4415 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4416 new_stmt
4417 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4418 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4422 if (j == 0)
4423 *vec_stmt = new_stmt;
4424 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4427 for (i = 0; i < nargs; ++i)
4429 vec<tree> oprndsi = vec_oprnds[i];
4430 oprndsi.release ();
4432 vargs.release ();
4434 /* The call in STMT might prevent it from being removed in dce.
4435 We however cannot remove it here, due to the way the ssa name
4436 it defines is mapped to the new definition. So just replace
4437 rhs of the statement with something harmless. */
4439 if (slp_node)
4440 return true;
4442 gimple *new_stmt;
4443 if (scalar_dest)
4445 type = TREE_TYPE (scalar_dest);
4446 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4447 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4449 else
4450 new_stmt = gimple_build_nop ();
4451 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4452 unlink_stmt_vdef (stmt);
4454 return true;
4458 /* Function vect_gen_widened_results_half
4460 Create a vector stmt whose code, type, number of arguments, and result
4461 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4462 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4463 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4464 needs to be created (DECL is a function-decl of a target-builtin).
4465 STMT_INFO is the original scalar stmt that we are vectorizing. */
4467 static gimple *
4468 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4469 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4470 tree vec_dest, gimple_stmt_iterator *gsi,
4471 stmt_vec_info stmt_info)
4473 gimple *new_stmt;
4474 tree new_temp;
4476 /* Generate half of the widened result: */
4477 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4478 if (op_type != binary_op)
4479 vec_oprnd1 = NULL;
4480 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4481 new_temp = make_ssa_name (vec_dest, new_stmt);
4482 gimple_assign_set_lhs (new_stmt, new_temp);
4483 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4485 return new_stmt;
4489 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4490 For multi-step conversions store the resulting vectors and call the function
4491 recursively. */
4493 static void
4494 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4495 int multi_step_cvt,
4496 stmt_vec_info stmt_info,
4497 vec<tree> &vec_dsts,
4498 gimple_stmt_iterator *gsi,
4499 slp_tree slp_node, enum tree_code code)
4501 unsigned int i;
4502 tree vop0, vop1, new_tmp, vec_dest;
4504 vec_dest = vec_dsts.pop ();
4506 for (i = 0; i < vec_oprnds->length (); i += 2)
4508 /* Create demotion operation. */
4509 vop0 = (*vec_oprnds)[i];
4510 vop1 = (*vec_oprnds)[i + 1];
4511 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4512 new_tmp = make_ssa_name (vec_dest, new_stmt);
4513 gimple_assign_set_lhs (new_stmt, new_tmp);
4514 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4516 if (multi_step_cvt)
4517 /* Store the resulting vector for next recursive call. */
4518 (*vec_oprnds)[i/2] = new_tmp;
4519 else
4521 /* This is the last step of the conversion sequence. Store the
4522 vectors in SLP_NODE or in vector info of the scalar statement
4523 (or in STMT_VINFO_RELATED_STMT chain). */
4524 if (slp_node)
4525 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4526 else
4527 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4531 /* For multi-step demotion operations we first generate demotion operations
4532 from the source type to the intermediate types, and then combine the
4533 results (stored in VEC_OPRNDS) in demotion operation to the destination
4534 type. */
4535 if (multi_step_cvt)
4537 /* At each level of recursion we have half of the operands we had at the
4538 previous level. */
4539 vec_oprnds->truncate ((i+1)/2);
4540 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4541 multi_step_cvt - 1,
4542 stmt_info, vec_dsts, gsi,
4543 slp_node, VEC_PACK_TRUNC_EXPR);
4546 vec_dsts.quick_push (vec_dest);
4550 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4551 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4552 STMT_INFO. For multi-step conversions store the resulting vectors and
4553 call the function recursively. */
4555 static void
4556 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4557 vec<tree> *vec_oprnds0,
4558 vec<tree> *vec_oprnds1,
4559 stmt_vec_info stmt_info, tree vec_dest,
4560 gimple_stmt_iterator *gsi,
4561 enum tree_code code1,
4562 enum tree_code code2, int op_type)
4564 int i;
4565 tree vop0, vop1, new_tmp1, new_tmp2;
4566 gimple *new_stmt1, *new_stmt2;
4567 vec<tree> vec_tmp = vNULL;
4569 vec_tmp.create (vec_oprnds0->length () * 2);
4570 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4572 if (op_type == binary_op)
4573 vop1 = (*vec_oprnds1)[i];
4574 else
4575 vop1 = NULL_TREE;
4577 /* Generate the two halves of promotion operation. */
4578 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4579 op_type, vec_dest, gsi,
4580 stmt_info);
4581 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4582 op_type, vec_dest, gsi,
4583 stmt_info);
4584 if (is_gimple_call (new_stmt1))
4586 new_tmp1 = gimple_call_lhs (new_stmt1);
4587 new_tmp2 = gimple_call_lhs (new_stmt2);
4589 else
4591 new_tmp1 = gimple_assign_lhs (new_stmt1);
4592 new_tmp2 = gimple_assign_lhs (new_stmt2);
4595 /* Store the results for the next step. */
4596 vec_tmp.quick_push (new_tmp1);
4597 vec_tmp.quick_push (new_tmp2);
4600 vec_oprnds0->release ();
4601 *vec_oprnds0 = vec_tmp;
4604 /* Create vectorized promotion stmts for widening stmts using only half the
4605 potential vector size for input. */
4606 static void
4607 vect_create_half_widening_stmts (vec_info *vinfo,
4608 vec<tree> *vec_oprnds0,
4609 vec<tree> *vec_oprnds1,
4610 stmt_vec_info stmt_info, tree vec_dest,
4611 gimple_stmt_iterator *gsi,
4612 enum tree_code code1,
4613 int op_type)
4615 int i;
4616 tree vop0, vop1;
4617 gimple *new_stmt1;
4618 gimple *new_stmt2;
4619 gimple *new_stmt3;
4620 vec<tree> vec_tmp = vNULL;
4622 vec_tmp.create (vec_oprnds0->length ());
4623 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4625 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4627 gcc_assert (op_type == binary_op);
4628 vop1 = (*vec_oprnds1)[i];
4630 /* Widen the first vector input. */
4631 out_type = TREE_TYPE (vec_dest);
4632 new_tmp1 = make_ssa_name (out_type);
4633 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4634 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4635 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4637 /* Widen the second vector input. */
4638 new_tmp2 = make_ssa_name (out_type);
4639 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4640 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4641 /* Perform the operation. With both vector inputs widened. */
4642 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4644 else
4646 /* Perform the operation. With the single vector input widened. */
4647 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4650 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4651 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4652 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4654 /* Store the results for the next step. */
4655 vec_tmp.quick_push (new_tmp3);
4658 vec_oprnds0->release ();
4659 *vec_oprnds0 = vec_tmp;
4663 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4664 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4665 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4666 Return true if STMT_INFO is vectorizable in this way. */
4668 static bool
4669 vectorizable_conversion (vec_info *vinfo,
4670 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4671 gimple **vec_stmt, slp_tree slp_node,
4672 stmt_vector_for_cost *cost_vec)
4674 tree vec_dest;
4675 tree scalar_dest;
4676 tree op0, op1 = NULL_TREE;
4677 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4678 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4679 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4680 tree new_temp;
4681 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4682 int ndts = 2;
4683 poly_uint64 nunits_in;
4684 poly_uint64 nunits_out;
4685 tree vectype_out, vectype_in;
4686 int ncopies, i;
4687 tree lhs_type, rhs_type;
4688 enum { NARROW, NONE, WIDEN } modifier;
4689 vec<tree> vec_oprnds0 = vNULL;
4690 vec<tree> vec_oprnds1 = vNULL;
4691 tree vop0;
4692 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4693 int multi_step_cvt = 0;
4694 vec<tree> interm_types = vNULL;
4695 tree intermediate_type, cvt_type = NULL_TREE;
4696 int op_type;
4697 unsigned short fltsz;
4699 /* Is STMT a vectorizable conversion? */
4701 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4702 return false;
4704 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4705 && ! vec_stmt)
4706 return false;
4708 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4709 if (!stmt)
4710 return false;
4712 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4713 return false;
4715 code = gimple_assign_rhs_code (stmt);
4716 if (!CONVERT_EXPR_CODE_P (code)
4717 && code != FIX_TRUNC_EXPR
4718 && code != FLOAT_EXPR
4719 && code != WIDEN_PLUS_EXPR
4720 && code != WIDEN_MINUS_EXPR
4721 && code != WIDEN_MULT_EXPR
4722 && code != WIDEN_LSHIFT_EXPR)
4723 return false;
4725 bool widen_arith = (code == WIDEN_PLUS_EXPR
4726 || code == WIDEN_MINUS_EXPR
4727 || code == WIDEN_MULT_EXPR
4728 || code == WIDEN_LSHIFT_EXPR);
4729 op_type = TREE_CODE_LENGTH (code);
4731 /* Check types of lhs and rhs. */
4732 scalar_dest = gimple_assign_lhs (stmt);
4733 lhs_type = TREE_TYPE (scalar_dest);
4734 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4736 /* Check the operands of the operation. */
4737 slp_tree slp_op0, slp_op1 = NULL;
4738 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4739 0, &op0, &slp_op0, &dt[0], &vectype_in))
4741 if (dump_enabled_p ())
4742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4743 "use not simple.\n");
4744 return false;
4747 rhs_type = TREE_TYPE (op0);
4748 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4749 && !((INTEGRAL_TYPE_P (lhs_type)
4750 && INTEGRAL_TYPE_P (rhs_type))
4751 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4752 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4753 return false;
4755 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4756 && ((INTEGRAL_TYPE_P (lhs_type)
4757 && !type_has_mode_precision_p (lhs_type))
4758 || (INTEGRAL_TYPE_P (rhs_type)
4759 && !type_has_mode_precision_p (rhs_type))))
4761 if (dump_enabled_p ())
4762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4763 "type conversion to/from bit-precision unsupported."
4764 "\n");
4765 return false;
4768 if (op_type == binary_op)
4770 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4771 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4773 op1 = gimple_assign_rhs2 (stmt);
4774 tree vectype1_in;
4775 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4776 &op1, &slp_op1, &dt[1], &vectype1_in))
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4780 "use not simple.\n");
4781 return false;
4783 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4784 OP1. */
4785 if (!vectype_in)
4786 vectype_in = vectype1_in;
4789 /* If op0 is an external or constant def, infer the vector type
4790 from the scalar type. */
4791 if (!vectype_in)
4792 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4793 if (vec_stmt)
4794 gcc_assert (vectype_in);
4795 if (!vectype_in)
4797 if (dump_enabled_p ())
4798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4799 "no vectype for scalar type %T\n", rhs_type);
4801 return false;
4804 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4805 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4807 if (dump_enabled_p ())
4808 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4809 "can't convert between boolean and non "
4810 "boolean vectors %T\n", rhs_type);
4812 return false;
4815 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4816 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4817 if (known_eq (nunits_out, nunits_in))
4818 if (widen_arith)
4819 modifier = WIDEN;
4820 else
4821 modifier = NONE;
4822 else if (multiple_p (nunits_out, nunits_in))
4823 modifier = NARROW;
4824 else
4826 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4827 modifier = WIDEN;
4830 /* Multiple types in SLP are handled by creating the appropriate number of
4831 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4832 case of SLP. */
4833 if (slp_node)
4834 ncopies = 1;
4835 else if (modifier == NARROW)
4836 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4837 else
4838 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4840 /* Sanity check: make sure that at least one copy of the vectorized stmt
4841 needs to be generated. */
4842 gcc_assert (ncopies >= 1);
4844 bool found_mode = false;
4845 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4846 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4847 opt_scalar_mode rhs_mode_iter;
4849 /* Supportable by target? */
4850 switch (modifier)
4852 case NONE:
4853 if (code != FIX_TRUNC_EXPR
4854 && code != FLOAT_EXPR
4855 && !CONVERT_EXPR_CODE_P (code))
4856 return false;
4857 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4858 break;
4859 /* FALLTHRU */
4860 unsupported:
4861 if (dump_enabled_p ())
4862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4863 "conversion not supported by target.\n");
4864 return false;
4866 case WIDEN:
4867 if (known_eq (nunits_in, nunits_out))
4869 if (!supportable_half_widening_operation (code, vectype_out,
4870 vectype_in, &code1))
4871 goto unsupported;
4872 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4873 break;
4875 if (supportable_widening_operation (vinfo, code, stmt_info,
4876 vectype_out, vectype_in, &code1,
4877 &code2, &multi_step_cvt,
4878 &interm_types))
4880 /* Binary widening operation can only be supported directly by the
4881 architecture. */
4882 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4883 break;
4886 if (code != FLOAT_EXPR
4887 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4888 goto unsupported;
4890 fltsz = GET_MODE_SIZE (lhs_mode);
4891 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4893 rhs_mode = rhs_mode_iter.require ();
4894 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4895 break;
4897 cvt_type
4898 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4899 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4900 if (cvt_type == NULL_TREE)
4901 goto unsupported;
4903 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4905 if (!supportable_convert_operation (code, vectype_out,
4906 cvt_type, &codecvt1))
4907 goto unsupported;
4909 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4910 vectype_out, cvt_type,
4911 &codecvt1, &codecvt2,
4912 &multi_step_cvt,
4913 &interm_types))
4914 continue;
4915 else
4916 gcc_assert (multi_step_cvt == 0);
4918 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4919 cvt_type,
4920 vectype_in, &code1, &code2,
4921 &multi_step_cvt, &interm_types))
4923 found_mode = true;
4924 break;
4928 if (!found_mode)
4929 goto unsupported;
4931 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4932 codecvt2 = ERROR_MARK;
4933 else
4935 multi_step_cvt++;
4936 interm_types.safe_push (cvt_type);
4937 cvt_type = NULL_TREE;
4939 break;
4941 case NARROW:
4942 gcc_assert (op_type == unary_op);
4943 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4944 &code1, &multi_step_cvt,
4945 &interm_types))
4946 break;
4948 if (code != FIX_TRUNC_EXPR
4949 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4950 goto unsupported;
4952 cvt_type
4953 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4954 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4955 if (cvt_type == NULL_TREE)
4956 goto unsupported;
4957 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4958 &codecvt1))
4959 goto unsupported;
4960 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4961 &code1, &multi_step_cvt,
4962 &interm_types))
4963 break;
4964 goto unsupported;
4966 default:
4967 gcc_unreachable ();
4970 if (!vec_stmt) /* transformation not required. */
4972 if (slp_node
4973 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
4974 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
4976 if (dump_enabled_p ())
4977 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4978 "incompatible vector types for invariants\n");
4979 return false;
4981 DUMP_VECT_SCOPE ("vectorizable_conversion");
4982 if (modifier == NONE)
4984 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4985 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
4986 cost_vec);
4988 else if (modifier == NARROW)
4990 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4991 /* The final packing step produces one vector result per copy. */
4992 unsigned int nvectors
4993 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4994 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4995 multi_step_cvt, cost_vec,
4996 widen_arith);
4998 else
5000 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5001 /* The initial unpacking step produces two vector results
5002 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5003 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5004 unsigned int nvectors
5005 = (slp_node
5006 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5007 : ncopies * 2);
5008 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5009 multi_step_cvt, cost_vec,
5010 widen_arith);
5012 interm_types.release ();
5013 return true;
5016 /* Transform. */
5017 if (dump_enabled_p ())
5018 dump_printf_loc (MSG_NOTE, vect_location,
5019 "transform conversion. ncopies = %d.\n", ncopies);
5021 if (op_type == binary_op)
5023 if (CONSTANT_CLASS_P (op0))
5024 op0 = fold_convert (TREE_TYPE (op1), op0);
5025 else if (CONSTANT_CLASS_P (op1))
5026 op1 = fold_convert (TREE_TYPE (op0), op1);
5029 /* In case of multi-step conversion, we first generate conversion operations
5030 to the intermediate types, and then from that types to the final one.
5031 We create vector destinations for the intermediate type (TYPES) received
5032 from supportable_*_operation, and store them in the correct order
5033 for future use in vect_create_vectorized_*_stmts (). */
5034 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5035 vec_dest = vect_create_destination_var (scalar_dest,
5036 (cvt_type && modifier == WIDEN)
5037 ? cvt_type : vectype_out);
5038 vec_dsts.quick_push (vec_dest);
5040 if (multi_step_cvt)
5042 for (i = interm_types.length () - 1;
5043 interm_types.iterate (i, &intermediate_type); i--)
5045 vec_dest = vect_create_destination_var (scalar_dest,
5046 intermediate_type);
5047 vec_dsts.quick_push (vec_dest);
5051 if (cvt_type)
5052 vec_dest = vect_create_destination_var (scalar_dest,
5053 modifier == WIDEN
5054 ? vectype_out : cvt_type);
5056 int ninputs = 1;
5057 if (!slp_node)
5059 if (modifier == WIDEN)
5061 else if (modifier == NARROW)
5063 if (multi_step_cvt)
5064 ninputs = vect_pow2 (multi_step_cvt);
5065 ninputs *= 2;
5069 switch (modifier)
5071 case NONE:
5072 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5073 op0, &vec_oprnds0);
5074 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5076 /* Arguments are ready, create the new vector stmt. */
5077 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5078 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5079 new_temp = make_ssa_name (vec_dest, new_stmt);
5080 gimple_assign_set_lhs (new_stmt, new_temp);
5081 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5083 if (slp_node)
5084 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5085 else
5086 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5088 break;
5090 case WIDEN:
5091 /* In case the vectorization factor (VF) is bigger than the number
5092 of elements that we can fit in a vectype (nunits), we have to
5093 generate more than one vector stmt - i.e - we need to "unroll"
5094 the vector stmt by a factor VF/nunits. */
5095 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5096 op0, &vec_oprnds0,
5097 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5098 &vec_oprnds1);
5099 if (code == WIDEN_LSHIFT_EXPR)
5101 int oprnds_size = vec_oprnds0.length ();
5102 vec_oprnds1.create (oprnds_size);
5103 for (i = 0; i < oprnds_size; ++i)
5104 vec_oprnds1.quick_push (op1);
5106 /* Arguments are ready. Create the new vector stmts. */
5107 for (i = multi_step_cvt; i >= 0; i--)
5109 tree this_dest = vec_dsts[i];
5110 enum tree_code c1 = code1, c2 = code2;
5111 if (i == 0 && codecvt2 != ERROR_MARK)
5113 c1 = codecvt1;
5114 c2 = codecvt2;
5116 if (known_eq (nunits_out, nunits_in))
5117 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5118 &vec_oprnds1, stmt_info,
5119 this_dest, gsi,
5120 c1, op_type);
5121 else
5122 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5123 &vec_oprnds1, stmt_info,
5124 this_dest, gsi,
5125 c1, c2, op_type);
5128 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5130 gimple *new_stmt;
5131 if (cvt_type)
5133 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5134 new_temp = make_ssa_name (vec_dest);
5135 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5136 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5138 else
5139 new_stmt = SSA_NAME_DEF_STMT (vop0);
5141 if (slp_node)
5142 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5143 else
5144 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5146 break;
5148 case NARROW:
5149 /* In case the vectorization factor (VF) is bigger than the number
5150 of elements that we can fit in a vectype (nunits), we have to
5151 generate more than one vector stmt - i.e - we need to "unroll"
5152 the vector stmt by a factor VF/nunits. */
5153 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5154 op0, &vec_oprnds0);
5155 /* Arguments are ready. Create the new vector stmts. */
5156 if (cvt_type)
5157 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5159 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5160 new_temp = make_ssa_name (vec_dest);
5161 gassign *new_stmt
5162 = gimple_build_assign (new_temp, codecvt1, vop0);
5163 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5164 vec_oprnds0[i] = new_temp;
5167 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5168 multi_step_cvt,
5169 stmt_info, vec_dsts, gsi,
5170 slp_node, code1);
5171 break;
5173 if (!slp_node)
5174 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5176 vec_oprnds0.release ();
5177 vec_oprnds1.release ();
5178 interm_types.release ();
5180 return true;
5183 /* Return true if we can assume from the scalar form of STMT_INFO that
5184 neither the scalar nor the vector forms will generate code. STMT_INFO
5185 is known not to involve a data reference. */
5187 bool
5188 vect_nop_conversion_p (stmt_vec_info stmt_info)
5190 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5191 if (!stmt)
5192 return false;
5194 tree lhs = gimple_assign_lhs (stmt);
5195 tree_code code = gimple_assign_rhs_code (stmt);
5196 tree rhs = gimple_assign_rhs1 (stmt);
5198 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5199 return true;
5201 if (CONVERT_EXPR_CODE_P (code))
5202 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5204 return false;
5207 /* Function vectorizable_assignment.
5209 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5210 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5211 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5212 Return true if STMT_INFO is vectorizable in this way. */
5214 static bool
5215 vectorizable_assignment (vec_info *vinfo,
5216 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5217 gimple **vec_stmt, slp_tree slp_node,
5218 stmt_vector_for_cost *cost_vec)
5220 tree vec_dest;
5221 tree scalar_dest;
5222 tree op;
5223 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5224 tree new_temp;
5225 enum vect_def_type dt[1] = {vect_unknown_def_type};
5226 int ndts = 1;
5227 int ncopies;
5228 int i;
5229 vec<tree> vec_oprnds = vNULL;
5230 tree vop;
5231 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5232 enum tree_code code;
5233 tree vectype_in;
5235 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5236 return false;
5238 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5239 && ! vec_stmt)
5240 return false;
5242 /* Is vectorizable assignment? */
5243 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5244 if (!stmt)
5245 return false;
5247 scalar_dest = gimple_assign_lhs (stmt);
5248 if (TREE_CODE (scalar_dest) != SSA_NAME)
5249 return false;
5251 if (STMT_VINFO_DATA_REF (stmt_info))
5252 return false;
5254 code = gimple_assign_rhs_code (stmt);
5255 if (!(gimple_assign_single_p (stmt)
5256 || code == PAREN_EXPR
5257 || CONVERT_EXPR_CODE_P (code)))
5258 return false;
5260 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5261 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5263 /* Multiple types in SLP are handled by creating the appropriate number of
5264 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5265 case of SLP. */
5266 if (slp_node)
5267 ncopies = 1;
5268 else
5269 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5271 gcc_assert (ncopies >= 1);
5273 slp_tree slp_op;
5274 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5275 &dt[0], &vectype_in))
5277 if (dump_enabled_p ())
5278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5279 "use not simple.\n");
5280 return false;
5282 if (!vectype_in)
5283 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5285 /* We can handle NOP_EXPR conversions that do not change the number
5286 of elements or the vector size. */
5287 if ((CONVERT_EXPR_CODE_P (code)
5288 || code == VIEW_CONVERT_EXPR)
5289 && (!vectype_in
5290 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5291 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5292 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5293 return false;
5295 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5296 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5298 if (dump_enabled_p ())
5299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5300 "can't convert between boolean and non "
5301 "boolean vectors %T\n", TREE_TYPE (op));
5303 return false;
5306 /* We do not handle bit-precision changes. */
5307 if ((CONVERT_EXPR_CODE_P (code)
5308 || code == VIEW_CONVERT_EXPR)
5309 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5310 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5311 || !type_has_mode_precision_p (TREE_TYPE (op)))
5312 /* But a conversion that does not change the bit-pattern is ok. */
5313 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5314 > TYPE_PRECISION (TREE_TYPE (op)))
5315 && TYPE_UNSIGNED (TREE_TYPE (op))))
5317 if (dump_enabled_p ())
5318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5319 "type conversion to/from bit-precision "
5320 "unsupported.\n");
5321 return false;
5324 if (!vec_stmt) /* transformation not required. */
5326 if (slp_node
5327 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5329 if (dump_enabled_p ())
5330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5331 "incompatible vector types for invariants\n");
5332 return false;
5334 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5335 DUMP_VECT_SCOPE ("vectorizable_assignment");
5336 if (!vect_nop_conversion_p (stmt_info))
5337 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5338 cost_vec);
5339 return true;
5342 /* Transform. */
5343 if (dump_enabled_p ())
5344 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5346 /* Handle def. */
5347 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5349 /* Handle use. */
5350 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5352 /* Arguments are ready. create the new vector stmt. */
5353 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5355 if (CONVERT_EXPR_CODE_P (code)
5356 || code == VIEW_CONVERT_EXPR)
5357 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5358 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5359 new_temp = make_ssa_name (vec_dest, new_stmt);
5360 gimple_assign_set_lhs (new_stmt, new_temp);
5361 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5362 if (slp_node)
5363 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5364 else
5365 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5367 if (!slp_node)
5368 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5370 vec_oprnds.release ();
5371 return true;
5375 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5376 either as shift by a scalar or by a vector. */
5378 bool
5379 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5382 machine_mode vec_mode;
5383 optab optab;
5384 int icode;
5385 tree vectype;
5387 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5388 if (!vectype)
5389 return false;
5391 optab = optab_for_tree_code (code, vectype, optab_scalar);
5392 if (!optab
5393 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5395 optab = optab_for_tree_code (code, vectype, optab_vector);
5396 if (!optab
5397 || (optab_handler (optab, TYPE_MODE (vectype))
5398 == CODE_FOR_nothing))
5399 return false;
5402 vec_mode = TYPE_MODE (vectype);
5403 icode = (int) optab_handler (optab, vec_mode);
5404 if (icode == CODE_FOR_nothing)
5405 return false;
5407 return true;
5411 /* Function vectorizable_shift.
5413 Check if STMT_INFO performs a shift operation that can be vectorized.
5414 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5415 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5416 Return true if STMT_INFO is vectorizable in this way. */
5418 static bool
5419 vectorizable_shift (vec_info *vinfo,
5420 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5421 gimple **vec_stmt, slp_tree slp_node,
5422 stmt_vector_for_cost *cost_vec)
5424 tree vec_dest;
5425 tree scalar_dest;
5426 tree op0, op1 = NULL;
5427 tree vec_oprnd1 = NULL_TREE;
5428 tree vectype;
5429 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5430 enum tree_code code;
5431 machine_mode vec_mode;
5432 tree new_temp;
5433 optab optab;
5434 int icode;
5435 machine_mode optab_op2_mode;
5436 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5437 int ndts = 2;
5438 poly_uint64 nunits_in;
5439 poly_uint64 nunits_out;
5440 tree vectype_out;
5441 tree op1_vectype;
5442 int ncopies;
5443 int i;
5444 vec<tree> vec_oprnds0 = vNULL;
5445 vec<tree> vec_oprnds1 = vNULL;
5446 tree vop0, vop1;
5447 unsigned int k;
5448 bool scalar_shift_arg = true;
5449 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5450 bool incompatible_op1_vectype_p = false;
5452 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5453 return false;
5455 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5456 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5457 && ! vec_stmt)
5458 return false;
5460 /* Is STMT a vectorizable binary/unary operation? */
5461 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5462 if (!stmt)
5463 return false;
5465 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5466 return false;
5468 code = gimple_assign_rhs_code (stmt);
5470 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5471 || code == RROTATE_EXPR))
5472 return false;
5474 scalar_dest = gimple_assign_lhs (stmt);
5475 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5476 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5478 if (dump_enabled_p ())
5479 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5480 "bit-precision shifts not supported.\n");
5481 return false;
5484 slp_tree slp_op0;
5485 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5486 0, &op0, &slp_op0, &dt[0], &vectype))
5488 if (dump_enabled_p ())
5489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5490 "use not simple.\n");
5491 return false;
5493 /* If op0 is an external or constant def, infer the vector type
5494 from the scalar type. */
5495 if (!vectype)
5496 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5497 if (vec_stmt)
5498 gcc_assert (vectype);
5499 if (!vectype)
5501 if (dump_enabled_p ())
5502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5503 "no vectype for scalar type\n");
5504 return false;
5507 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5508 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5509 if (maybe_ne (nunits_out, nunits_in))
5510 return false;
5512 stmt_vec_info op1_def_stmt_info;
5513 slp_tree slp_op1;
5514 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5515 &dt[1], &op1_vectype, &op1_def_stmt_info))
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5519 "use not simple.\n");
5520 return false;
5523 /* Multiple types in SLP are handled by creating the appropriate number of
5524 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5525 case of SLP. */
5526 if (slp_node)
5527 ncopies = 1;
5528 else
5529 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5531 gcc_assert (ncopies >= 1);
5533 /* Determine whether the shift amount is a vector, or scalar. If the
5534 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5536 if ((dt[1] == vect_internal_def
5537 || dt[1] == vect_induction_def
5538 || dt[1] == vect_nested_cycle)
5539 && !slp_node)
5540 scalar_shift_arg = false;
5541 else if (dt[1] == vect_constant_def
5542 || dt[1] == vect_external_def
5543 || dt[1] == vect_internal_def)
5545 /* In SLP, need to check whether the shift count is the same,
5546 in loops if it is a constant or invariant, it is always
5547 a scalar shift. */
5548 if (slp_node)
5550 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5551 stmt_vec_info slpstmt_info;
5553 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5555 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5556 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5557 scalar_shift_arg = false;
5560 /* For internal SLP defs we have to make sure we see scalar stmts
5561 for all vector elements.
5562 ??? For different vectors we could resort to a different
5563 scalar shift operand but code-generation below simply always
5564 takes the first. */
5565 if (dt[1] == vect_internal_def
5566 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5567 stmts.length ()))
5568 scalar_shift_arg = false;
5571 /* If the shift amount is computed by a pattern stmt we cannot
5572 use the scalar amount directly thus give up and use a vector
5573 shift. */
5574 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5575 scalar_shift_arg = false;
5577 else
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5581 "operand mode requires invariant argument.\n");
5582 return false;
5585 /* Vector shifted by vector. */
5586 bool was_scalar_shift_arg = scalar_shift_arg;
5587 if (!scalar_shift_arg)
5589 optab = optab_for_tree_code (code, vectype, optab_vector);
5590 if (dump_enabled_p ())
5591 dump_printf_loc (MSG_NOTE, vect_location,
5592 "vector/vector shift/rotate found.\n");
5594 if (!op1_vectype)
5595 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5596 slp_op1);
5597 incompatible_op1_vectype_p
5598 = (op1_vectype == NULL_TREE
5599 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5600 TYPE_VECTOR_SUBPARTS (vectype))
5601 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5602 if (incompatible_op1_vectype_p
5603 && (!slp_node
5604 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5605 || slp_op1->refcnt != 1))
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5609 "unusable type for last operand in"
5610 " vector/vector shift/rotate.\n");
5611 return false;
5614 /* See if the machine has a vector shifted by scalar insn and if not
5615 then see if it has a vector shifted by vector insn. */
5616 else
5618 optab = optab_for_tree_code (code, vectype, optab_scalar);
5619 if (optab
5620 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5622 if (dump_enabled_p ())
5623 dump_printf_loc (MSG_NOTE, vect_location,
5624 "vector/scalar shift/rotate found.\n");
5626 else
5628 optab = optab_for_tree_code (code, vectype, optab_vector);
5629 if (optab
5630 && (optab_handler (optab, TYPE_MODE (vectype))
5631 != CODE_FOR_nothing))
5633 scalar_shift_arg = false;
5635 if (dump_enabled_p ())
5636 dump_printf_loc (MSG_NOTE, vect_location,
5637 "vector/vector shift/rotate found.\n");
5639 if (!op1_vectype)
5640 op1_vectype = get_vectype_for_scalar_type (vinfo,
5641 TREE_TYPE (op1),
5642 slp_op1);
5644 /* Unlike the other binary operators, shifts/rotates have
5645 the rhs being int, instead of the same type as the lhs,
5646 so make sure the scalar is the right type if we are
5647 dealing with vectors of long long/long/short/char. */
5648 incompatible_op1_vectype_p
5649 = (!op1_vectype
5650 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5651 TREE_TYPE (op1)));
5652 if (incompatible_op1_vectype_p
5653 && dt[1] == vect_internal_def)
5655 if (dump_enabled_p ())
5656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5657 "unusable type for last operand in"
5658 " vector/vector shift/rotate.\n");
5659 return false;
5665 /* Supportable by target? */
5666 if (!optab)
5668 if (dump_enabled_p ())
5669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5670 "no optab.\n");
5671 return false;
5673 vec_mode = TYPE_MODE (vectype);
5674 icode = (int) optab_handler (optab, vec_mode);
5675 if (icode == CODE_FOR_nothing)
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "op not supported by target.\n");
5680 return false;
5682 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5683 if (vect_emulated_vector_p (vectype))
5684 return false;
5686 if (!vec_stmt) /* transformation not required. */
5688 if (slp_node
5689 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5690 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5691 && (!incompatible_op1_vectype_p
5692 || dt[1] == vect_constant_def)
5693 && !vect_maybe_update_slp_op_vectype
5694 (slp_op1,
5695 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5699 "incompatible vector types for invariants\n");
5700 return false;
5702 /* Now adjust the constant shift amount in place. */
5703 if (slp_node
5704 && incompatible_op1_vectype_p
5705 && dt[1] == vect_constant_def)
5707 for (unsigned i = 0;
5708 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5710 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5711 = fold_convert (TREE_TYPE (vectype),
5712 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5713 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5714 == INTEGER_CST));
5717 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5718 DUMP_VECT_SCOPE ("vectorizable_shift");
5719 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5720 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5721 return true;
5724 /* Transform. */
5726 if (dump_enabled_p ())
5727 dump_printf_loc (MSG_NOTE, vect_location,
5728 "transform binary/unary operation.\n");
5730 if (incompatible_op1_vectype_p && !slp_node)
5732 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5733 op1 = fold_convert (TREE_TYPE (vectype), op1);
5734 if (dt[1] != vect_constant_def)
5735 op1 = vect_init_vector (vinfo, stmt_info, op1,
5736 TREE_TYPE (vectype), NULL);
5739 /* Handle def. */
5740 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5742 if (scalar_shift_arg && dt[1] != vect_internal_def)
5744 /* Vector shl and shr insn patterns can be defined with scalar
5745 operand 2 (shift operand). In this case, use constant or loop
5746 invariant op1 directly, without extending it to vector mode
5747 first. */
5748 optab_op2_mode = insn_data[icode].operand[2].mode;
5749 if (!VECTOR_MODE_P (optab_op2_mode))
5751 if (dump_enabled_p ())
5752 dump_printf_loc (MSG_NOTE, vect_location,
5753 "operand 1 using scalar mode.\n");
5754 vec_oprnd1 = op1;
5755 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5756 vec_oprnds1.quick_push (vec_oprnd1);
5757 /* Store vec_oprnd1 for every vector stmt to be created.
5758 We check during the analysis that all the shift arguments
5759 are the same.
5760 TODO: Allow different constants for different vector
5761 stmts generated for an SLP instance. */
5762 for (k = 0;
5763 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5764 vec_oprnds1.quick_push (vec_oprnd1);
5767 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5769 if (was_scalar_shift_arg)
5771 /* If the argument was the same in all lanes create
5772 the correctly typed vector shift amount directly. */
5773 op1 = fold_convert (TREE_TYPE (vectype), op1);
5774 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5775 !loop_vinfo ? gsi : NULL);
5776 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5777 !loop_vinfo ? gsi : NULL);
5778 vec_oprnds1.create (slp_node->vec_stmts_size);
5779 for (k = 0; k < slp_node->vec_stmts_size; k++)
5780 vec_oprnds1.quick_push (vec_oprnd1);
5782 else if (dt[1] == vect_constant_def)
5783 /* The constant shift amount has been adjusted in place. */
5785 else
5786 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5789 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5790 (a special case for certain kind of vector shifts); otherwise,
5791 operand 1 should be of a vector type (the usual case). */
5792 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5793 op0, &vec_oprnds0,
5794 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5796 /* Arguments are ready. Create the new vector stmt. */
5797 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5799 /* For internal defs where we need to use a scalar shift arg
5800 extract the first lane. */
5801 if (scalar_shift_arg && dt[1] == vect_internal_def)
5803 vop1 = vec_oprnds1[0];
5804 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5805 gassign *new_stmt
5806 = gimple_build_assign (new_temp,
5807 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5808 vop1,
5809 TYPE_SIZE (TREE_TYPE (new_temp)),
5810 bitsize_zero_node));
5811 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5812 vop1 = new_temp;
5814 else
5815 vop1 = vec_oprnds1[i];
5816 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5817 new_temp = make_ssa_name (vec_dest, new_stmt);
5818 gimple_assign_set_lhs (new_stmt, new_temp);
5819 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5820 if (slp_node)
5821 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5822 else
5823 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5826 if (!slp_node)
5827 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5829 vec_oprnds0.release ();
5830 vec_oprnds1.release ();
5832 return true;
5836 /* Function vectorizable_operation.
5838 Check if STMT_INFO performs a binary, unary or ternary operation that can
5839 be vectorized.
5840 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5841 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5842 Return true if STMT_INFO is vectorizable in this way. */
5844 static bool
5845 vectorizable_operation (vec_info *vinfo,
5846 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5847 gimple **vec_stmt, slp_tree slp_node,
5848 stmt_vector_for_cost *cost_vec)
5850 tree vec_dest;
5851 tree scalar_dest;
5852 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5853 tree vectype;
5854 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5855 enum tree_code code, orig_code;
5856 machine_mode vec_mode;
5857 tree new_temp;
5858 int op_type;
5859 optab optab;
5860 bool target_support_p;
5861 enum vect_def_type dt[3]
5862 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5863 int ndts = 3;
5864 poly_uint64 nunits_in;
5865 poly_uint64 nunits_out;
5866 tree vectype_out;
5867 int ncopies, vec_num;
5868 int i;
5869 vec<tree> vec_oprnds0 = vNULL;
5870 vec<tree> vec_oprnds1 = vNULL;
5871 vec<tree> vec_oprnds2 = vNULL;
5872 tree vop0, vop1, vop2;
5873 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5875 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5876 return false;
5878 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5879 && ! vec_stmt)
5880 return false;
5882 /* Is STMT a vectorizable binary/unary operation? */
5883 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5884 if (!stmt)
5885 return false;
5887 /* Loads and stores are handled in vectorizable_{load,store}. */
5888 if (STMT_VINFO_DATA_REF (stmt_info))
5889 return false;
5891 orig_code = code = gimple_assign_rhs_code (stmt);
5893 /* Shifts are handled in vectorizable_shift. */
5894 if (code == LSHIFT_EXPR
5895 || code == RSHIFT_EXPR
5896 || code == LROTATE_EXPR
5897 || code == RROTATE_EXPR)
5898 return false;
5900 /* Comparisons are handled in vectorizable_comparison. */
5901 if (TREE_CODE_CLASS (code) == tcc_comparison)
5902 return false;
5904 /* Conditions are handled in vectorizable_condition. */
5905 if (code == COND_EXPR)
5906 return false;
5908 /* For pointer addition and subtraction, we should use the normal
5909 plus and minus for the vector operation. */
5910 if (code == POINTER_PLUS_EXPR)
5911 code = PLUS_EXPR;
5912 if (code == POINTER_DIFF_EXPR)
5913 code = MINUS_EXPR;
5915 /* Support only unary or binary operations. */
5916 op_type = TREE_CODE_LENGTH (code);
5917 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5919 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5921 "num. args = %d (not unary/binary/ternary op).\n",
5922 op_type);
5923 return false;
5926 scalar_dest = gimple_assign_lhs (stmt);
5927 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5929 /* Most operations cannot handle bit-precision types without extra
5930 truncations. */
5931 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5932 if (!mask_op_p
5933 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5934 /* Exception are bitwise binary operations. */
5935 && code != BIT_IOR_EXPR
5936 && code != BIT_XOR_EXPR
5937 && code != BIT_AND_EXPR)
5939 if (dump_enabled_p ())
5940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5941 "bit-precision arithmetic not supported.\n");
5942 return false;
5945 slp_tree slp_op0;
5946 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5947 0, &op0, &slp_op0, &dt[0], &vectype))
5949 if (dump_enabled_p ())
5950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5951 "use not simple.\n");
5952 return false;
5954 /* If op0 is an external or constant def, infer the vector type
5955 from the scalar type. */
5956 if (!vectype)
5958 /* For boolean type we cannot determine vectype by
5959 invariant value (don't know whether it is a vector
5960 of booleans or vector of integers). We use output
5961 vectype because operations on boolean don't change
5962 type. */
5963 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5965 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5967 if (dump_enabled_p ())
5968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5969 "not supported operation on bool value.\n");
5970 return false;
5972 vectype = vectype_out;
5974 else
5975 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
5976 slp_node);
5978 if (vec_stmt)
5979 gcc_assert (vectype);
5980 if (!vectype)
5982 if (dump_enabled_p ())
5983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5984 "no vectype for scalar type %T\n",
5985 TREE_TYPE (op0));
5987 return false;
5990 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5991 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5992 if (maybe_ne (nunits_out, nunits_in))
5993 return false;
5995 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
5996 slp_tree slp_op1 = NULL, slp_op2 = NULL;
5997 if (op_type == binary_op || op_type == ternary_op)
5999 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6000 1, &op1, &slp_op1, &dt[1], &vectype2))
6002 if (dump_enabled_p ())
6003 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6004 "use not simple.\n");
6005 return false;
6008 if (op_type == ternary_op)
6010 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6011 2, &op2, &slp_op2, &dt[2], &vectype3))
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015 "use not simple.\n");
6016 return false;
6020 /* Multiple types in SLP are handled by creating the appropriate number of
6021 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6022 case of SLP. */
6023 if (slp_node)
6025 ncopies = 1;
6026 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6028 else
6030 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6031 vec_num = 1;
6034 gcc_assert (ncopies >= 1);
6036 /* Reject attempts to combine mask types with nonmask types, e.g. if
6037 we have an AND between a (nonmask) boolean loaded from memory and
6038 a (mask) boolean result of a comparison.
6040 TODO: We could easily fix these cases up using pattern statements. */
6041 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6042 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6043 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6045 if (dump_enabled_p ())
6046 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6047 "mixed mask and nonmask vector types\n");
6048 return false;
6051 /* Supportable by target? */
6053 vec_mode = TYPE_MODE (vectype);
6054 if (code == MULT_HIGHPART_EXPR)
6055 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6056 else
6058 optab = optab_for_tree_code (code, vectype, optab_default);
6059 if (!optab)
6061 if (dump_enabled_p ())
6062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 "no optab.\n");
6064 return false;
6066 target_support_p = (optab_handler (optab, vec_mode)
6067 != CODE_FOR_nothing);
6070 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6071 if (!target_support_p)
6073 if (dump_enabled_p ())
6074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6075 "op not supported by target.\n");
6076 /* Check only during analysis. */
6077 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6078 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6079 return false;
6080 if (dump_enabled_p ())
6081 dump_printf_loc (MSG_NOTE, vect_location,
6082 "proceeding using word mode.\n");
6083 using_emulated_vectors_p = true;
6086 if (using_emulated_vectors_p
6087 && !vect_can_vectorize_without_simd_p (code))
6089 if (dump_enabled_p ())
6090 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6091 return false;
6094 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6095 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6096 internal_fn cond_fn = get_conditional_internal_fn (code);
6098 if (!vec_stmt) /* transformation not required. */
6100 /* If this operation is part of a reduction, a fully-masked loop
6101 should only change the active lanes of the reduction chain,
6102 keeping the inactive lanes as-is. */
6103 if (loop_vinfo
6104 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6105 && reduc_idx >= 0)
6107 if (cond_fn == IFN_LAST
6108 || !direct_internal_fn_supported_p (cond_fn, vectype,
6109 OPTIMIZE_FOR_SPEED))
6111 if (dump_enabled_p ())
6112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6113 "can't use a fully-masked loop because no"
6114 " conditional operation is available.\n");
6115 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6117 else
6118 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6119 vectype, NULL);
6122 /* Put types on constant and invariant SLP children. */
6123 if (slp_node
6124 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6125 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6126 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "incompatible vector types for invariants\n");
6131 return false;
6134 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6135 DUMP_VECT_SCOPE ("vectorizable_operation");
6136 vect_model_simple_cost (vinfo, stmt_info,
6137 ncopies, dt, ndts, slp_node, cost_vec);
6138 if (using_emulated_vectors_p)
6140 /* The above vect_model_simple_cost call handles constants
6141 in the prologue and (mis-)costs one of the stmts as
6142 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6143 for the actual lowering that will be applied. */
6144 unsigned n
6145 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6146 switch (code)
6148 case PLUS_EXPR:
6149 n *= 5;
6150 break;
6151 case MINUS_EXPR:
6152 n *= 6;
6153 break;
6154 case NEGATE_EXPR:
6155 n *= 4;
6156 break;
6157 default:;
6159 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6161 return true;
6164 /* Transform. */
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE, vect_location,
6168 "transform binary/unary operation.\n");
6170 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6172 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6173 vectors with unsigned elements, but the result is signed. So, we
6174 need to compute the MINUS_EXPR into vectype temporary and
6175 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6176 tree vec_cvt_dest = NULL_TREE;
6177 if (orig_code == POINTER_DIFF_EXPR)
6179 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6180 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6182 /* Handle def. */
6183 else
6184 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6186 /* In case the vectorization factor (VF) is bigger than the number
6187 of elements that we can fit in a vectype (nunits), we have to generate
6188 more than one vector stmt - i.e - we need to "unroll" the
6189 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6190 from one copy of the vector stmt to the next, in the field
6191 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6192 stages to find the correct vector defs to be used when vectorizing
6193 stmts that use the defs of the current stmt. The example below
6194 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6195 we need to create 4 vectorized stmts):
6197 before vectorization:
6198 RELATED_STMT VEC_STMT
6199 S1: x = memref - -
6200 S2: z = x + 1 - -
6202 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6203 there):
6204 RELATED_STMT VEC_STMT
6205 VS1_0: vx0 = memref0 VS1_1 -
6206 VS1_1: vx1 = memref1 VS1_2 -
6207 VS1_2: vx2 = memref2 VS1_3 -
6208 VS1_3: vx3 = memref3 - -
6209 S1: x = load - VS1_0
6210 S2: z = x + 1 - -
6212 step2: vectorize stmt S2 (done here):
6213 To vectorize stmt S2 we first need to find the relevant vector
6214 def for the first operand 'x'. This is, as usual, obtained from
6215 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6216 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6217 relevant vector def 'vx0'. Having found 'vx0' we can generate
6218 the vector stmt VS2_0, and as usual, record it in the
6219 STMT_VINFO_VEC_STMT of stmt S2.
6220 When creating the second copy (VS2_1), we obtain the relevant vector
6221 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6222 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6223 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6224 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6225 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6226 chain of stmts and pointers:
6227 RELATED_STMT VEC_STMT
6228 VS1_0: vx0 = memref0 VS1_1 -
6229 VS1_1: vx1 = memref1 VS1_2 -
6230 VS1_2: vx2 = memref2 VS1_3 -
6231 VS1_3: vx3 = memref3 - -
6232 S1: x = load - VS1_0
6233 VS2_0: vz0 = vx0 + v1 VS2_1 -
6234 VS2_1: vz1 = vx1 + v1 VS2_2 -
6235 VS2_2: vz2 = vx2 + v1 VS2_3 -
6236 VS2_3: vz3 = vx3 + v1 - -
6237 S2: z = x + 1 - VS2_0 */
6239 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6240 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6241 /* Arguments are ready. Create the new vector stmt. */
6242 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6244 gimple *new_stmt = NULL;
6245 vop1 = ((op_type == binary_op || op_type == ternary_op)
6246 ? vec_oprnds1[i] : NULL_TREE);
6247 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6248 if (masked_loop_p && reduc_idx >= 0)
6250 /* Perform the operation on active elements only and take
6251 inactive elements from the reduction chain input. */
6252 gcc_assert (!vop2);
6253 vop2 = reduc_idx == 1 ? vop1 : vop0;
6254 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6255 vectype, i);
6256 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6257 vop0, vop1, vop2);
6258 new_temp = make_ssa_name (vec_dest, call);
6259 gimple_call_set_lhs (call, new_temp);
6260 gimple_call_set_nothrow (call, true);
6261 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6262 new_stmt = call;
6264 else
6266 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6267 new_temp = make_ssa_name (vec_dest, new_stmt);
6268 gimple_assign_set_lhs (new_stmt, new_temp);
6269 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6270 if (vec_cvt_dest)
6272 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6273 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6274 new_temp);
6275 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6276 gimple_assign_set_lhs (new_stmt, new_temp);
6277 vect_finish_stmt_generation (vinfo, stmt_info,
6278 new_stmt, gsi);
6281 if (slp_node)
6282 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6283 else
6284 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6287 if (!slp_node)
6288 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6290 vec_oprnds0.release ();
6291 vec_oprnds1.release ();
6292 vec_oprnds2.release ();
6294 return true;
6297 /* A helper function to ensure data reference DR_INFO's base alignment. */
6299 static void
6300 ensure_base_align (dr_vec_info *dr_info)
6302 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6303 return;
6305 if (dr_info->base_misaligned)
6307 tree base_decl = dr_info->base_decl;
6309 // We should only be able to increase the alignment of a base object if
6310 // we know what its new alignment should be at compile time.
6311 unsigned HOST_WIDE_INT align_base_to =
6312 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6314 if (decl_in_symtab_p (base_decl))
6315 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6316 else if (DECL_ALIGN (base_decl) < align_base_to)
6318 SET_DECL_ALIGN (base_decl, align_base_to);
6319 DECL_USER_ALIGN (base_decl) = 1;
6321 dr_info->base_misaligned = false;
6326 /* Function get_group_alias_ptr_type.
6328 Return the alias type for the group starting at FIRST_STMT_INFO. */
6330 static tree
6331 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6333 struct data_reference *first_dr, *next_dr;
6335 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6336 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6337 while (next_stmt_info)
6339 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6340 if (get_alias_set (DR_REF (first_dr))
6341 != get_alias_set (DR_REF (next_dr)))
6343 if (dump_enabled_p ())
6344 dump_printf_loc (MSG_NOTE, vect_location,
6345 "conflicting alias set types.\n");
6346 return ptr_type_node;
6348 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6350 return reference_alias_ptr_type (DR_REF (first_dr));
6354 /* Function scan_operand_equal_p.
6356 Helper function for check_scan_store. Compare two references
6357 with .GOMP_SIMD_LANE bases. */
6359 static bool
6360 scan_operand_equal_p (tree ref1, tree ref2)
6362 tree ref[2] = { ref1, ref2 };
6363 poly_int64 bitsize[2], bitpos[2];
6364 tree offset[2], base[2];
6365 for (int i = 0; i < 2; ++i)
6367 machine_mode mode;
6368 int unsignedp, reversep, volatilep = 0;
6369 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6370 &offset[i], &mode, &unsignedp,
6371 &reversep, &volatilep);
6372 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6373 return false;
6374 if (TREE_CODE (base[i]) == MEM_REF
6375 && offset[i] == NULL_TREE
6376 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6378 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6379 if (is_gimple_assign (def_stmt)
6380 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6381 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6382 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6384 if (maybe_ne (mem_ref_offset (base[i]), 0))
6385 return false;
6386 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6387 offset[i] = gimple_assign_rhs2 (def_stmt);
6392 if (!operand_equal_p (base[0], base[1], 0))
6393 return false;
6394 if (maybe_ne (bitsize[0], bitsize[1]))
6395 return false;
6396 if (offset[0] != offset[1])
6398 if (!offset[0] || !offset[1])
6399 return false;
6400 if (!operand_equal_p (offset[0], offset[1], 0))
6402 tree step[2];
6403 for (int i = 0; i < 2; ++i)
6405 step[i] = integer_one_node;
6406 if (TREE_CODE (offset[i]) == SSA_NAME)
6408 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6409 if (is_gimple_assign (def_stmt)
6410 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6411 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6412 == INTEGER_CST))
6414 step[i] = gimple_assign_rhs2 (def_stmt);
6415 offset[i] = gimple_assign_rhs1 (def_stmt);
6418 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6420 step[i] = TREE_OPERAND (offset[i], 1);
6421 offset[i] = TREE_OPERAND (offset[i], 0);
6423 tree rhs1 = NULL_TREE;
6424 if (TREE_CODE (offset[i]) == SSA_NAME)
6426 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6427 if (gimple_assign_cast_p (def_stmt))
6428 rhs1 = gimple_assign_rhs1 (def_stmt);
6430 else if (CONVERT_EXPR_P (offset[i]))
6431 rhs1 = TREE_OPERAND (offset[i], 0);
6432 if (rhs1
6433 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6434 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6435 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6436 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6437 offset[i] = rhs1;
6439 if (!operand_equal_p (offset[0], offset[1], 0)
6440 || !operand_equal_p (step[0], step[1], 0))
6441 return false;
6444 return true;
6448 enum scan_store_kind {
6449 /* Normal permutation. */
6450 scan_store_kind_perm,
6452 /* Whole vector left shift permutation with zero init. */
6453 scan_store_kind_lshift_zero,
6455 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6456 scan_store_kind_lshift_cond
6459 /* Function check_scan_store.
6461 Verify if we can perform the needed permutations or whole vector shifts.
6462 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6463 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6464 to do at each step. */
6466 static int
6467 scan_store_can_perm_p (tree vectype, tree init,
6468 vec<enum scan_store_kind> *use_whole_vector = NULL)
6470 enum machine_mode vec_mode = TYPE_MODE (vectype);
6471 unsigned HOST_WIDE_INT nunits;
6472 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6473 return -1;
6474 int units_log2 = exact_log2 (nunits);
6475 if (units_log2 <= 0)
6476 return -1;
6478 int i;
6479 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6480 for (i = 0; i <= units_log2; ++i)
6482 unsigned HOST_WIDE_INT j, k;
6483 enum scan_store_kind kind = scan_store_kind_perm;
6484 vec_perm_builder sel (nunits, nunits, 1);
6485 sel.quick_grow (nunits);
6486 if (i == units_log2)
6488 for (j = 0; j < nunits; ++j)
6489 sel[j] = nunits - 1;
6491 else
6493 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6494 sel[j] = j;
6495 for (k = 0; j < nunits; ++j, ++k)
6496 sel[j] = nunits + k;
6498 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6499 if (!can_vec_perm_const_p (vec_mode, indices))
6501 if (i == units_log2)
6502 return -1;
6504 if (whole_vector_shift_kind == scan_store_kind_perm)
6506 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6507 return -1;
6508 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6509 /* Whole vector shifts shift in zeros, so if init is all zero
6510 constant, there is no need to do anything further. */
6511 if ((TREE_CODE (init) != INTEGER_CST
6512 && TREE_CODE (init) != REAL_CST)
6513 || !initializer_zerop (init))
6515 tree masktype = truth_type_for (vectype);
6516 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6517 return -1;
6518 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6521 kind = whole_vector_shift_kind;
6523 if (use_whole_vector)
6525 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6526 use_whole_vector->safe_grow_cleared (i, true);
6527 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6528 use_whole_vector->safe_push (kind);
6532 return units_log2;
6536 /* Function check_scan_store.
6538 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6540 static bool
6541 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6542 enum vect_def_type rhs_dt, bool slp, tree mask,
6543 vect_memory_access_type memory_access_type)
6545 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6546 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6547 tree ref_type;
6549 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6550 if (slp
6551 || mask
6552 || memory_access_type != VMAT_CONTIGUOUS
6553 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6554 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6555 || loop_vinfo == NULL
6556 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6557 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6558 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6559 || !integer_zerop (DR_INIT (dr_info->dr))
6560 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6561 || !alias_sets_conflict_p (get_alias_set (vectype),
6562 get_alias_set (TREE_TYPE (ref_type))))
6564 if (dump_enabled_p ())
6565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6566 "unsupported OpenMP scan store.\n");
6567 return false;
6570 /* We need to pattern match code built by OpenMP lowering and simplified
6571 by following optimizations into something we can handle.
6572 #pragma omp simd reduction(inscan,+:r)
6573 for (...)
6575 r += something ();
6576 #pragma omp scan inclusive (r)
6577 use (r);
6579 shall have body with:
6580 // Initialization for input phase, store the reduction initializer:
6581 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6582 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6583 D.2042[_21] = 0;
6584 // Actual input phase:
6586 r.0_5 = D.2042[_20];
6587 _6 = _4 + r.0_5;
6588 D.2042[_20] = _6;
6589 // Initialization for scan phase:
6590 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6591 _26 = D.2043[_25];
6592 _27 = D.2042[_25];
6593 _28 = _26 + _27;
6594 D.2043[_25] = _28;
6595 D.2042[_25] = _28;
6596 // Actual scan phase:
6598 r.1_8 = D.2042[_20];
6600 The "omp simd array" variable D.2042 holds the privatized copy used
6601 inside of the loop and D.2043 is another one that holds copies of
6602 the current original list item. The separate GOMP_SIMD_LANE ifn
6603 kinds are there in order to allow optimizing the initializer store
6604 and combiner sequence, e.g. if it is originally some C++ish user
6605 defined reduction, but allow the vectorizer to pattern recognize it
6606 and turn into the appropriate vectorized scan.
6608 For exclusive scan, this is slightly different:
6609 #pragma omp simd reduction(inscan,+:r)
6610 for (...)
6612 use (r);
6613 #pragma omp scan exclusive (r)
6614 r += something ();
6616 shall have body with:
6617 // Initialization for input phase, store the reduction initializer:
6618 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6619 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6620 D.2042[_21] = 0;
6621 // Actual input phase:
6623 r.0_5 = D.2042[_20];
6624 _6 = _4 + r.0_5;
6625 D.2042[_20] = _6;
6626 // Initialization for scan phase:
6627 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6628 _26 = D.2043[_25];
6629 D.2044[_25] = _26;
6630 _27 = D.2042[_25];
6631 _28 = _26 + _27;
6632 D.2043[_25] = _28;
6633 // Actual scan phase:
6635 r.1_8 = D.2044[_20];
6636 ... */
6638 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6640 /* Match the D.2042[_21] = 0; store above. Just require that
6641 it is a constant or external definition store. */
6642 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6644 fail_init:
6645 if (dump_enabled_p ())
6646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6647 "unsupported OpenMP scan initializer store.\n");
6648 return false;
6651 if (! loop_vinfo->scan_map)
6652 loop_vinfo->scan_map = new hash_map<tree, tree>;
6653 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6654 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6655 if (cached)
6656 goto fail_init;
6657 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6659 /* These stores can be vectorized normally. */
6660 return true;
6663 if (rhs_dt != vect_internal_def)
6665 fail:
6666 if (dump_enabled_p ())
6667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6668 "unsupported OpenMP scan combiner pattern.\n");
6669 return false;
6672 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6673 tree rhs = gimple_assign_rhs1 (stmt);
6674 if (TREE_CODE (rhs) != SSA_NAME)
6675 goto fail;
6677 gimple *other_store_stmt = NULL;
6678 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6679 bool inscan_var_store
6680 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6682 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6684 if (!inscan_var_store)
6686 use_operand_p use_p;
6687 imm_use_iterator iter;
6688 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6690 gimple *use_stmt = USE_STMT (use_p);
6691 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6692 continue;
6693 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6694 || !is_gimple_assign (use_stmt)
6695 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6696 || other_store_stmt
6697 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6698 goto fail;
6699 other_store_stmt = use_stmt;
6701 if (other_store_stmt == NULL)
6702 goto fail;
6703 rhs = gimple_assign_lhs (other_store_stmt);
6704 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6705 goto fail;
6708 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6710 use_operand_p use_p;
6711 imm_use_iterator iter;
6712 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6714 gimple *use_stmt = USE_STMT (use_p);
6715 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6716 continue;
6717 if (other_store_stmt)
6718 goto fail;
6719 other_store_stmt = use_stmt;
6722 else
6723 goto fail;
6725 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6726 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6727 || !is_gimple_assign (def_stmt)
6728 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6729 goto fail;
6731 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6732 /* For pointer addition, we should use the normal plus for the vector
6733 operation. */
6734 switch (code)
6736 case POINTER_PLUS_EXPR:
6737 code = PLUS_EXPR;
6738 break;
6739 case MULT_HIGHPART_EXPR:
6740 goto fail;
6741 default:
6742 break;
6744 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6745 goto fail;
6747 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6748 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6749 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6750 goto fail;
6752 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6753 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6754 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6755 || !gimple_assign_load_p (load1_stmt)
6756 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6757 || !gimple_assign_load_p (load2_stmt))
6758 goto fail;
6760 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6761 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6762 if (load1_stmt_info == NULL
6763 || load2_stmt_info == NULL
6764 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6765 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6766 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6767 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6768 goto fail;
6770 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6772 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6773 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6774 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6775 goto fail;
6776 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6777 tree lrhs;
6778 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6779 lrhs = rhs1;
6780 else
6781 lrhs = rhs2;
6782 use_operand_p use_p;
6783 imm_use_iterator iter;
6784 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6786 gimple *use_stmt = USE_STMT (use_p);
6787 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6788 continue;
6789 if (other_store_stmt)
6790 goto fail;
6791 other_store_stmt = use_stmt;
6795 if (other_store_stmt == NULL)
6796 goto fail;
6797 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6798 || !gimple_store_p (other_store_stmt))
6799 goto fail;
6801 stmt_vec_info other_store_stmt_info
6802 = loop_vinfo->lookup_stmt (other_store_stmt);
6803 if (other_store_stmt_info == NULL
6804 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6805 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6806 goto fail;
6808 gimple *stmt1 = stmt;
6809 gimple *stmt2 = other_store_stmt;
6810 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6811 std::swap (stmt1, stmt2);
6812 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6813 gimple_assign_rhs1 (load2_stmt)))
6815 std::swap (rhs1, rhs2);
6816 std::swap (load1_stmt, load2_stmt);
6817 std::swap (load1_stmt_info, load2_stmt_info);
6819 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6820 gimple_assign_rhs1 (load1_stmt)))
6821 goto fail;
6823 tree var3 = NULL_TREE;
6824 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6825 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6826 gimple_assign_rhs1 (load2_stmt)))
6827 goto fail;
6828 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6830 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6831 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6832 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6833 goto fail;
6834 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6835 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6836 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6837 || lookup_attribute ("omp simd inscan exclusive",
6838 DECL_ATTRIBUTES (var3)))
6839 goto fail;
6842 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6843 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6844 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6845 goto fail;
6847 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6848 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6849 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6850 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6851 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6852 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6853 goto fail;
6855 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6856 std::swap (var1, var2);
6858 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6860 if (!lookup_attribute ("omp simd inscan exclusive",
6861 DECL_ATTRIBUTES (var1)))
6862 goto fail;
6863 var1 = var3;
6866 if (loop_vinfo->scan_map == NULL)
6867 goto fail;
6868 tree *init = loop_vinfo->scan_map->get (var1);
6869 if (init == NULL)
6870 goto fail;
6872 /* The IL is as expected, now check if we can actually vectorize it.
6873 Inclusive scan:
6874 _26 = D.2043[_25];
6875 _27 = D.2042[_25];
6876 _28 = _26 + _27;
6877 D.2043[_25] = _28;
6878 D.2042[_25] = _28;
6879 should be vectorized as (where _40 is the vectorized rhs
6880 from the D.2042[_21] = 0; store):
6881 _30 = MEM <vector(8) int> [(int *)&D.2043];
6882 _31 = MEM <vector(8) int> [(int *)&D.2042];
6883 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6884 _33 = _31 + _32;
6885 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6886 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6887 _35 = _33 + _34;
6888 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6889 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6890 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6891 _37 = _35 + _36;
6892 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6893 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6894 _38 = _30 + _37;
6895 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6896 MEM <vector(8) int> [(int *)&D.2043] = _39;
6897 MEM <vector(8) int> [(int *)&D.2042] = _38;
6898 Exclusive scan:
6899 _26 = D.2043[_25];
6900 D.2044[_25] = _26;
6901 _27 = D.2042[_25];
6902 _28 = _26 + _27;
6903 D.2043[_25] = _28;
6904 should be vectorized as (where _40 is the vectorized rhs
6905 from the D.2042[_21] = 0; store):
6906 _30 = MEM <vector(8) int> [(int *)&D.2043];
6907 _31 = MEM <vector(8) int> [(int *)&D.2042];
6908 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6909 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6910 _34 = _32 + _33;
6911 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6912 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6913 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6914 _36 = _34 + _35;
6915 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6916 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6917 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6918 _38 = _36 + _37;
6919 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6920 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6921 _39 = _30 + _38;
6922 _50 = _31 + _39;
6923 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6924 MEM <vector(8) int> [(int *)&D.2044] = _39;
6925 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6926 enum machine_mode vec_mode = TYPE_MODE (vectype);
6927 optab optab = optab_for_tree_code (code, vectype, optab_default);
6928 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6929 goto fail;
6931 int units_log2 = scan_store_can_perm_p (vectype, *init);
6932 if (units_log2 == -1)
6933 goto fail;
6935 return true;
6939 /* Function vectorizable_scan_store.
6941 Helper of vectorizable_score, arguments like on vectorizable_store.
6942 Handle only the transformation, checking is done in check_scan_store. */
6944 static bool
6945 vectorizable_scan_store (vec_info *vinfo,
6946 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6947 gimple **vec_stmt, int ncopies)
6949 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6950 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6951 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6952 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6954 if (dump_enabled_p ())
6955 dump_printf_loc (MSG_NOTE, vect_location,
6956 "transform scan store. ncopies = %d\n", ncopies);
6958 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6959 tree rhs = gimple_assign_rhs1 (stmt);
6960 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
6962 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6963 bool inscan_var_store
6964 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6966 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6968 use_operand_p use_p;
6969 imm_use_iterator iter;
6970 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6972 gimple *use_stmt = USE_STMT (use_p);
6973 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6974 continue;
6975 rhs = gimple_assign_lhs (use_stmt);
6976 break;
6980 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6981 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6982 if (code == POINTER_PLUS_EXPR)
6983 code = PLUS_EXPR;
6984 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
6985 && commutative_tree_code (code));
6986 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6987 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6988 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
6989 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6990 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6991 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6992 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6993 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6994 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6995 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6996 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6998 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7000 std::swap (rhs1, rhs2);
7001 std::swap (var1, var2);
7002 std::swap (load1_dr_info, load2_dr_info);
7005 tree *init = loop_vinfo->scan_map->get (var1);
7006 gcc_assert (init);
7008 unsigned HOST_WIDE_INT nunits;
7009 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7010 gcc_unreachable ();
7011 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7012 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7013 gcc_assert (units_log2 > 0);
7014 auto_vec<tree, 16> perms;
7015 perms.quick_grow (units_log2 + 1);
7016 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7017 for (int i = 0; i <= units_log2; ++i)
7019 unsigned HOST_WIDE_INT j, k;
7020 vec_perm_builder sel (nunits, nunits, 1);
7021 sel.quick_grow (nunits);
7022 if (i == units_log2)
7023 for (j = 0; j < nunits; ++j)
7024 sel[j] = nunits - 1;
7025 else
7027 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7028 sel[j] = j;
7029 for (k = 0; j < nunits; ++j, ++k)
7030 sel[j] = nunits + k;
7032 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7033 if (!use_whole_vector.is_empty ()
7034 && use_whole_vector[i] != scan_store_kind_perm)
7036 if (zero_vec == NULL_TREE)
7037 zero_vec = build_zero_cst (vectype);
7038 if (masktype == NULL_TREE
7039 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7040 masktype = truth_type_for (vectype);
7041 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7043 else
7044 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7047 tree vec_oprnd1 = NULL_TREE;
7048 tree vec_oprnd2 = NULL_TREE;
7049 tree vec_oprnd3 = NULL_TREE;
7050 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7051 tree dataref_offset = build_int_cst (ref_type, 0);
7052 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7053 vectype, VMAT_CONTIGUOUS);
7054 tree ldataref_ptr = NULL_TREE;
7055 tree orig = NULL_TREE;
7056 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7057 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7058 auto_vec<tree> vec_oprnds1;
7059 auto_vec<tree> vec_oprnds2;
7060 auto_vec<tree> vec_oprnds3;
7061 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7062 *init, &vec_oprnds1,
7063 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7064 rhs2, &vec_oprnds3);
7065 for (int j = 0; j < ncopies; j++)
7067 vec_oprnd1 = vec_oprnds1[j];
7068 if (ldataref_ptr == NULL)
7069 vec_oprnd2 = vec_oprnds2[j];
7070 vec_oprnd3 = vec_oprnds3[j];
7071 if (j == 0)
7072 orig = vec_oprnd3;
7073 else if (!inscan_var_store)
7074 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7076 if (ldataref_ptr)
7078 vec_oprnd2 = make_ssa_name (vectype);
7079 tree data_ref = fold_build2 (MEM_REF, vectype,
7080 unshare_expr (ldataref_ptr),
7081 dataref_offset);
7082 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7083 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7084 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7085 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7086 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7089 tree v = vec_oprnd2;
7090 for (int i = 0; i < units_log2; ++i)
7092 tree new_temp = make_ssa_name (vectype);
7093 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7094 (zero_vec
7095 && (use_whole_vector[i]
7096 != scan_store_kind_perm))
7097 ? zero_vec : vec_oprnd1, v,
7098 perms[i]);
7099 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7100 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7101 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7103 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7105 /* Whole vector shift shifted in zero bits, but if *init
7106 is not initializer_zerop, we need to replace those elements
7107 with elements from vec_oprnd1. */
7108 tree_vector_builder vb (masktype, nunits, 1);
7109 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7110 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7111 ? boolean_false_node : boolean_true_node);
7113 tree new_temp2 = make_ssa_name (vectype);
7114 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7115 new_temp, vec_oprnd1);
7116 vect_finish_stmt_generation (vinfo, stmt_info,
7117 g, gsi);
7118 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7119 new_temp = new_temp2;
7122 /* For exclusive scan, perform the perms[i] permutation once
7123 more. */
7124 if (i == 0
7125 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7126 && v == vec_oprnd2)
7128 v = new_temp;
7129 --i;
7130 continue;
7133 tree new_temp2 = make_ssa_name (vectype);
7134 g = gimple_build_assign (new_temp2, code, v, new_temp);
7135 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7136 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7138 v = new_temp2;
7141 tree new_temp = make_ssa_name (vectype);
7142 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7143 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7144 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7146 tree last_perm_arg = new_temp;
7147 /* For exclusive scan, new_temp computed above is the exclusive scan
7148 prefix sum. Turn it into inclusive prefix sum for the broadcast
7149 of the last element into orig. */
7150 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7152 last_perm_arg = make_ssa_name (vectype);
7153 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7154 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7155 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7158 orig = make_ssa_name (vectype);
7159 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7160 last_perm_arg, perms[units_log2]);
7161 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7162 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7164 if (!inscan_var_store)
7166 tree data_ref = fold_build2 (MEM_REF, vectype,
7167 unshare_expr (dataref_ptr),
7168 dataref_offset);
7169 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7170 g = gimple_build_assign (data_ref, new_temp);
7171 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7172 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7176 if (inscan_var_store)
7177 for (int j = 0; j < ncopies; j++)
7179 if (j != 0)
7180 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7182 tree data_ref = fold_build2 (MEM_REF, vectype,
7183 unshare_expr (dataref_ptr),
7184 dataref_offset);
7185 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7186 gimple *g = gimple_build_assign (data_ref, orig);
7187 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7188 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7190 return true;
7194 /* Function vectorizable_store.
7196 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7197 that can be vectorized.
7198 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7199 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7200 Return true if STMT_INFO is vectorizable in this way. */
7202 static bool
7203 vectorizable_store (vec_info *vinfo,
7204 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7205 gimple **vec_stmt, slp_tree slp_node,
7206 stmt_vector_for_cost *cost_vec)
7208 tree data_ref;
7209 tree op;
7210 tree vec_oprnd = NULL_TREE;
7211 tree elem_type;
7212 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7213 class loop *loop = NULL;
7214 machine_mode vec_mode;
7215 tree dummy;
7216 enum vect_def_type rhs_dt = vect_unknown_def_type;
7217 enum vect_def_type mask_dt = vect_unknown_def_type;
7218 tree dataref_ptr = NULL_TREE;
7219 tree dataref_offset = NULL_TREE;
7220 gimple *ptr_incr = NULL;
7221 int ncopies;
7222 int j;
7223 stmt_vec_info first_stmt_info;
7224 bool grouped_store;
7225 unsigned int group_size, i;
7226 vec<tree> oprnds = vNULL;
7227 vec<tree> result_chain = vNULL;
7228 tree offset = NULL_TREE;
7229 vec<tree> vec_oprnds = vNULL;
7230 bool slp = (slp_node != NULL);
7231 unsigned int vec_num;
7232 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7233 tree aggr_type;
7234 gather_scatter_info gs_info;
7235 poly_uint64 vf;
7236 vec_load_store_type vls_type;
7237 tree ref_type;
7239 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7240 return false;
7242 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7243 && ! vec_stmt)
7244 return false;
7246 /* Is vectorizable store? */
7248 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7249 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7251 tree scalar_dest = gimple_assign_lhs (assign);
7252 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7253 && is_pattern_stmt_p (stmt_info))
7254 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7255 if (TREE_CODE (scalar_dest) != ARRAY_REF
7256 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7257 && TREE_CODE (scalar_dest) != INDIRECT_REF
7258 && TREE_CODE (scalar_dest) != COMPONENT_REF
7259 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7260 && TREE_CODE (scalar_dest) != REALPART_EXPR
7261 && TREE_CODE (scalar_dest) != MEM_REF)
7262 return false;
7264 else
7266 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7267 if (!call || !gimple_call_internal_p (call))
7268 return false;
7270 internal_fn ifn = gimple_call_internal_fn (call);
7271 if (!internal_store_fn_p (ifn))
7272 return false;
7274 if (slp_node != NULL)
7276 if (dump_enabled_p ())
7277 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7278 "SLP of masked stores not supported.\n");
7279 return false;
7282 int mask_index = internal_fn_mask_index (ifn);
7283 if (mask_index >= 0
7284 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7285 &mask, NULL, &mask_dt, &mask_vectype))
7286 return false;
7289 op = vect_get_store_rhs (stmt_info);
7291 /* Cannot have hybrid store SLP -- that would mean storing to the
7292 same location twice. */
7293 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7295 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7296 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7298 if (loop_vinfo)
7300 loop = LOOP_VINFO_LOOP (loop_vinfo);
7301 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7303 else
7304 vf = 1;
7306 /* Multiple types in SLP are handled by creating the appropriate number of
7307 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7308 case of SLP. */
7309 if (slp)
7310 ncopies = 1;
7311 else
7312 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7314 gcc_assert (ncopies >= 1);
7316 /* FORNOW. This restriction should be relaxed. */
7317 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7319 if (dump_enabled_p ())
7320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7321 "multiple types in nested loop.\n");
7322 return false;
7325 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7326 op, &rhs_dt, &rhs_vectype, &vls_type))
7327 return false;
7329 elem_type = TREE_TYPE (vectype);
7330 vec_mode = TYPE_MODE (vectype);
7332 if (!STMT_VINFO_DATA_REF (stmt_info))
7333 return false;
7335 vect_memory_access_type memory_access_type;
7336 enum dr_alignment_support alignment_support_scheme;
7337 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7338 ncopies, &memory_access_type,
7339 &alignment_support_scheme, &gs_info))
7340 return false;
7342 if (mask)
7344 if (memory_access_type == VMAT_CONTIGUOUS)
7346 if (!VECTOR_MODE_P (vec_mode)
7347 || !can_vec_mask_load_store_p (vec_mode,
7348 TYPE_MODE (mask_vectype), false))
7349 return false;
7351 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7352 && (memory_access_type != VMAT_GATHER_SCATTER
7353 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7355 if (dump_enabled_p ())
7356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7357 "unsupported access type for masked store.\n");
7358 return false;
7361 else
7363 /* FORNOW. In some cases can vectorize even if data-type not supported
7364 (e.g. - array initialization with 0). */
7365 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7366 return false;
7369 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7370 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7371 && memory_access_type != VMAT_GATHER_SCATTER
7372 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7373 if (grouped_store)
7375 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7376 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7377 group_size = DR_GROUP_SIZE (first_stmt_info);
7379 else
7381 first_stmt_info = stmt_info;
7382 first_dr_info = dr_info;
7383 group_size = vec_num = 1;
7386 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7388 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7389 memory_access_type))
7390 return false;
7393 if (!vec_stmt) /* transformation not required. */
7395 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7397 if (loop_vinfo
7398 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7399 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7400 group_size, memory_access_type,
7401 &gs_info, mask);
7403 if (slp_node
7404 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7405 vectype))
7407 if (dump_enabled_p ())
7408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7409 "incompatible vector types for invariants\n");
7410 return false;
7413 if (dump_enabled_p ()
7414 && memory_access_type != VMAT_ELEMENTWISE
7415 && memory_access_type != VMAT_GATHER_SCATTER
7416 && alignment_support_scheme != dr_aligned)
7417 dump_printf_loc (MSG_NOTE, vect_location,
7418 "Vectorizing an unaligned access.\n");
7420 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7421 vect_model_store_cost (vinfo, stmt_info, ncopies,
7422 memory_access_type, vls_type, slp_node, cost_vec);
7423 return true;
7425 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7427 /* Transform. */
7429 ensure_base_align (dr_info);
7431 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7433 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7434 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7435 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7436 tree ptr, var, scale, vec_mask;
7437 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7438 tree mask_halfvectype = mask_vectype;
7439 edge pe = loop_preheader_edge (loop);
7440 gimple_seq seq;
7441 basic_block new_bb;
7442 enum { NARROW, NONE, WIDEN } modifier;
7443 poly_uint64 scatter_off_nunits
7444 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7446 if (known_eq (nunits, scatter_off_nunits))
7447 modifier = NONE;
7448 else if (known_eq (nunits * 2, scatter_off_nunits))
7450 modifier = WIDEN;
7452 /* Currently gathers and scatters are only supported for
7453 fixed-length vectors. */
7454 unsigned int count = scatter_off_nunits.to_constant ();
7455 vec_perm_builder sel (count, count, 1);
7456 for (i = 0; i < (unsigned int) count; ++i)
7457 sel.quick_push (i | (count / 2));
7459 vec_perm_indices indices (sel, 1, count);
7460 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7461 indices);
7462 gcc_assert (perm_mask != NULL_TREE);
7464 else if (known_eq (nunits, scatter_off_nunits * 2))
7466 modifier = NARROW;
7468 /* Currently gathers and scatters are only supported for
7469 fixed-length vectors. */
7470 unsigned int count = nunits.to_constant ();
7471 vec_perm_builder sel (count, count, 1);
7472 for (i = 0; i < (unsigned int) count; ++i)
7473 sel.quick_push (i | (count / 2));
7475 vec_perm_indices indices (sel, 2, count);
7476 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7477 gcc_assert (perm_mask != NULL_TREE);
7478 ncopies *= 2;
7480 if (mask)
7481 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7483 else
7484 gcc_unreachable ();
7486 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7487 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7488 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7489 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7490 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7491 scaletype = TREE_VALUE (arglist);
7493 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7494 && TREE_CODE (rettype) == VOID_TYPE);
7496 ptr = fold_convert (ptrtype, gs_info.base);
7497 if (!is_gimple_min_invariant (ptr))
7499 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7500 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7501 gcc_assert (!new_bb);
7504 if (mask == NULL_TREE)
7506 mask_arg = build_int_cst (masktype, -1);
7507 mask_arg = vect_init_vector (vinfo, stmt_info,
7508 mask_arg, masktype, NULL);
7511 scale = build_int_cst (scaletype, gs_info.scale);
7513 auto_vec<tree> vec_oprnds0;
7514 auto_vec<tree> vec_oprnds1;
7515 auto_vec<tree> vec_masks;
7516 if (mask)
7518 tree mask_vectype = truth_type_for (vectype);
7519 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7520 modifier == NARROW
7521 ? ncopies / 2 : ncopies,
7522 mask, &vec_masks, mask_vectype);
7524 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7525 modifier == WIDEN
7526 ? ncopies / 2 : ncopies,
7527 gs_info.offset, &vec_oprnds0);
7528 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7529 modifier == NARROW
7530 ? ncopies / 2 : ncopies,
7531 op, &vec_oprnds1);
7532 for (j = 0; j < ncopies; ++j)
7534 if (modifier == WIDEN)
7536 if (j & 1)
7537 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7538 perm_mask, stmt_info, gsi);
7539 else
7540 op = vec_oprnd0 = vec_oprnds0[j / 2];
7541 src = vec_oprnd1 = vec_oprnds1[j];
7542 if (mask)
7543 mask_op = vec_mask = vec_masks[j];
7545 else if (modifier == NARROW)
7547 if (j & 1)
7548 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7549 perm_mask, stmt_info, gsi);
7550 else
7551 src = vec_oprnd1 = vec_oprnds1[j / 2];
7552 op = vec_oprnd0 = vec_oprnds0[j];
7553 if (mask)
7554 mask_op = vec_mask = vec_masks[j / 2];
7556 else
7558 op = vec_oprnd0 = vec_oprnds0[j];
7559 src = vec_oprnd1 = vec_oprnds1[j];
7560 if (mask)
7561 mask_op = vec_mask = vec_masks[j];
7564 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7566 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7567 TYPE_VECTOR_SUBPARTS (srctype)));
7568 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7569 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7570 gassign *new_stmt
7571 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7572 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7573 src = var;
7576 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7578 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7579 TYPE_VECTOR_SUBPARTS (idxtype)));
7580 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7581 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7582 gassign *new_stmt
7583 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7584 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7585 op = var;
7588 if (mask)
7590 tree utype;
7591 mask_arg = mask_op;
7592 if (modifier == NARROW)
7594 var = vect_get_new_ssa_name (mask_halfvectype,
7595 vect_simple_var);
7596 gassign *new_stmt
7597 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7598 : VEC_UNPACK_LO_EXPR,
7599 mask_op);
7600 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7601 mask_arg = var;
7603 tree optype = TREE_TYPE (mask_arg);
7604 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7605 utype = masktype;
7606 else
7607 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7608 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7609 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7610 gassign *new_stmt
7611 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7612 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7613 mask_arg = var;
7614 if (!useless_type_conversion_p (masktype, utype))
7616 gcc_assert (TYPE_PRECISION (utype)
7617 <= TYPE_PRECISION (masktype));
7618 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7619 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7620 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7621 mask_arg = var;
7625 gcall *new_stmt
7626 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7627 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7629 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7631 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7632 return true;
7634 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7635 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7637 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7638 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7640 if (grouped_store)
7642 /* FORNOW */
7643 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7645 /* We vectorize all the stmts of the interleaving group when we
7646 reach the last stmt in the group. */
7647 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7648 < DR_GROUP_SIZE (first_stmt_info)
7649 && !slp)
7651 *vec_stmt = NULL;
7652 return true;
7655 if (slp)
7657 grouped_store = false;
7658 /* VEC_NUM is the number of vect stmts to be created for this
7659 group. */
7660 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7661 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7662 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7663 == first_stmt_info);
7664 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7665 op = vect_get_store_rhs (first_stmt_info);
7667 else
7668 /* VEC_NUM is the number of vect stmts to be created for this
7669 group. */
7670 vec_num = group_size;
7672 ref_type = get_group_alias_ptr_type (first_stmt_info);
7674 else
7675 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7677 if (dump_enabled_p ())
7678 dump_printf_loc (MSG_NOTE, vect_location,
7679 "transform store. ncopies = %d\n", ncopies);
7681 if (memory_access_type == VMAT_ELEMENTWISE
7682 || memory_access_type == VMAT_STRIDED_SLP)
7684 gimple_stmt_iterator incr_gsi;
7685 bool insert_after;
7686 gimple *incr;
7687 tree offvar;
7688 tree ivstep;
7689 tree running_off;
7690 tree stride_base, stride_step, alias_off;
7691 tree vec_oprnd;
7692 tree dr_offset;
7693 unsigned int g;
7694 /* Checked by get_load_store_type. */
7695 unsigned int const_nunits = nunits.to_constant ();
7697 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7698 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7700 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7701 stride_base
7702 = fold_build_pointer_plus
7703 (DR_BASE_ADDRESS (first_dr_info->dr),
7704 size_binop (PLUS_EXPR,
7705 convert_to_ptrofftype (dr_offset),
7706 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7707 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7709 /* For a store with loop-invariant (but other than power-of-2)
7710 stride (i.e. not a grouped access) like so:
7712 for (i = 0; i < n; i += stride)
7713 array[i] = ...;
7715 we generate a new induction variable and new stores from
7716 the components of the (vectorized) rhs:
7718 for (j = 0; ; j += VF*stride)
7719 vectemp = ...;
7720 tmp1 = vectemp[0];
7721 array[j] = tmp1;
7722 tmp2 = vectemp[1];
7723 array[j + stride] = tmp2;
7727 unsigned nstores = const_nunits;
7728 unsigned lnel = 1;
7729 tree ltype = elem_type;
7730 tree lvectype = vectype;
7731 if (slp)
7733 if (group_size < const_nunits
7734 && const_nunits % group_size == 0)
7736 nstores = const_nunits / group_size;
7737 lnel = group_size;
7738 ltype = build_vector_type (elem_type, group_size);
7739 lvectype = vectype;
7741 /* First check if vec_extract optab doesn't support extraction
7742 of vector elts directly. */
7743 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7744 machine_mode vmode;
7745 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7746 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7747 group_size).exists (&vmode)
7748 || (convert_optab_handler (vec_extract_optab,
7749 TYPE_MODE (vectype), vmode)
7750 == CODE_FOR_nothing))
7752 /* Try to avoid emitting an extract of vector elements
7753 by performing the extracts using an integer type of the
7754 same size, extracting from a vector of those and then
7755 re-interpreting it as the original vector type if
7756 supported. */
7757 unsigned lsize
7758 = group_size * GET_MODE_BITSIZE (elmode);
7759 unsigned int lnunits = const_nunits / group_size;
7760 /* If we can't construct such a vector fall back to
7761 element extracts from the original vector type and
7762 element size stores. */
7763 if (int_mode_for_size (lsize, 0).exists (&elmode)
7764 && VECTOR_MODE_P (TYPE_MODE (vectype))
7765 && related_vector_mode (TYPE_MODE (vectype), elmode,
7766 lnunits).exists (&vmode)
7767 && (convert_optab_handler (vec_extract_optab,
7768 vmode, elmode)
7769 != CODE_FOR_nothing))
7771 nstores = lnunits;
7772 lnel = group_size;
7773 ltype = build_nonstandard_integer_type (lsize, 1);
7774 lvectype = build_vector_type (ltype, nstores);
7776 /* Else fall back to vector extraction anyway.
7777 Fewer stores are more important than avoiding spilling
7778 of the vector we extract from. Compared to the
7779 construction case in vectorizable_load no store-forwarding
7780 issue exists here for reasonable archs. */
7783 else if (group_size >= const_nunits
7784 && group_size % const_nunits == 0)
7786 nstores = 1;
7787 lnel = const_nunits;
7788 ltype = vectype;
7789 lvectype = vectype;
7791 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7792 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7795 ivstep = stride_step;
7796 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7797 build_int_cst (TREE_TYPE (ivstep), vf));
7799 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7801 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7802 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7803 create_iv (stride_base, ivstep, NULL,
7804 loop, &incr_gsi, insert_after,
7805 &offvar, NULL);
7806 incr = gsi_stmt (incr_gsi);
7808 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7810 alias_off = build_int_cst (ref_type, 0);
7811 stmt_vec_info next_stmt_info = first_stmt_info;
7812 for (g = 0; g < group_size; g++)
7814 running_off = offvar;
7815 if (g)
7817 tree size = TYPE_SIZE_UNIT (ltype);
7818 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7819 size);
7820 tree newoff = copy_ssa_name (running_off, NULL);
7821 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7822 running_off, pos);
7823 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7824 running_off = newoff;
7826 if (!slp)
7827 op = vect_get_store_rhs (next_stmt_info);
7828 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7829 op, &vec_oprnds);
7830 unsigned int group_el = 0;
7831 unsigned HOST_WIDE_INT
7832 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7833 for (j = 0; j < ncopies; j++)
7835 vec_oprnd = vec_oprnds[j];
7836 /* Pun the vector to extract from if necessary. */
7837 if (lvectype != vectype)
7839 tree tem = make_ssa_name (lvectype);
7840 gimple *pun
7841 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7842 lvectype, vec_oprnd));
7843 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7844 vec_oprnd = tem;
7846 for (i = 0; i < nstores; i++)
7848 tree newref, newoff;
7849 gimple *incr, *assign;
7850 tree size = TYPE_SIZE (ltype);
7851 /* Extract the i'th component. */
7852 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7853 bitsize_int (i), size);
7854 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7855 size, pos);
7857 elem = force_gimple_operand_gsi (gsi, elem, true,
7858 NULL_TREE, true,
7859 GSI_SAME_STMT);
7861 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7862 group_el * elsz);
7863 newref = build2 (MEM_REF, ltype,
7864 running_off, this_off);
7865 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7867 /* And store it to *running_off. */
7868 assign = gimple_build_assign (newref, elem);
7869 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7871 group_el += lnel;
7872 if (! slp
7873 || group_el == group_size)
7875 newoff = copy_ssa_name (running_off, NULL);
7876 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7877 running_off, stride_step);
7878 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7880 running_off = newoff;
7881 group_el = 0;
7883 if (g == group_size - 1
7884 && !slp)
7886 if (j == 0 && i == 0)
7887 *vec_stmt = assign;
7888 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7892 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7893 vec_oprnds.release ();
7894 if (slp)
7895 break;
7898 return true;
7901 auto_vec<tree> dr_chain (group_size);
7902 oprnds.create (group_size);
7904 /* Gather-scatter accesses perform only component accesses, alignment
7905 is irrelevant for them. */
7906 if (memory_access_type == VMAT_GATHER_SCATTER)
7907 alignment_support_scheme = dr_unaligned_supported;
7908 else
7909 alignment_support_scheme
7910 = vect_supportable_dr_alignment (vinfo, first_dr_info, false);
7912 gcc_assert (alignment_support_scheme);
7913 vec_loop_masks *loop_masks
7914 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7915 ? &LOOP_VINFO_MASKS (loop_vinfo)
7916 : NULL);
7917 vec_loop_lens *loop_lens
7918 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7919 ? &LOOP_VINFO_LENS (loop_vinfo)
7920 : NULL);
7922 /* Shouldn't go with length-based approach if fully masked. */
7923 gcc_assert (!loop_lens || !loop_masks);
7925 /* Targets with store-lane instructions must not require explicit
7926 realignment. vect_supportable_dr_alignment always returns either
7927 dr_aligned or dr_unaligned_supported for masked operations. */
7928 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7929 && !mask
7930 && !loop_masks)
7931 || alignment_support_scheme == dr_aligned
7932 || alignment_support_scheme == dr_unaligned_supported);
7934 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
7935 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7936 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7938 tree bump;
7939 tree vec_offset = NULL_TREE;
7940 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7942 aggr_type = NULL_TREE;
7943 bump = NULL_TREE;
7945 else if (memory_access_type == VMAT_GATHER_SCATTER)
7947 aggr_type = elem_type;
7948 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7949 &bump, &vec_offset);
7951 else
7953 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7954 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7955 else
7956 aggr_type = vectype;
7957 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7958 memory_access_type);
7961 if (mask)
7962 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
7964 /* In case the vectorization factor (VF) is bigger than the number
7965 of elements that we can fit in a vectype (nunits), we have to generate
7966 more than one vector stmt - i.e - we need to "unroll" the
7967 vector stmt by a factor VF/nunits. */
7969 /* In case of interleaving (non-unit grouped access):
7971 S1: &base + 2 = x2
7972 S2: &base = x0
7973 S3: &base + 1 = x1
7974 S4: &base + 3 = x3
7976 We create vectorized stores starting from base address (the access of the
7977 first stmt in the chain (S2 in the above example), when the last store stmt
7978 of the chain (S4) is reached:
7980 VS1: &base = vx2
7981 VS2: &base + vec_size*1 = vx0
7982 VS3: &base + vec_size*2 = vx1
7983 VS4: &base + vec_size*3 = vx3
7985 Then permutation statements are generated:
7987 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7988 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7991 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7992 (the order of the data-refs in the output of vect_permute_store_chain
7993 corresponds to the order of scalar stmts in the interleaving chain - see
7994 the documentation of vect_permute_store_chain()).
7996 In case of both multiple types and interleaving, above vector stores and
7997 permutation stmts are created for every copy. The result vector stmts are
7998 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7999 STMT_VINFO_RELATED_STMT for the next copies.
8002 auto_vec<tree> vec_masks;
8003 tree vec_mask = NULL;
8004 auto_vec<tree> vec_offsets;
8005 auto_vec<vec<tree> > gvec_oprnds;
8006 gvec_oprnds.safe_grow_cleared (group_size, true);
8007 for (j = 0; j < ncopies; j++)
8009 gimple *new_stmt;
8010 if (j == 0)
8012 if (slp)
8014 /* Get vectorized arguments for SLP_NODE. */
8015 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8016 op, &vec_oprnds);
8017 vec_oprnd = vec_oprnds[0];
8019 else
8021 /* For interleaved stores we collect vectorized defs for all the
8022 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8023 used as an input to vect_permute_store_chain().
8025 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8026 and OPRNDS are of size 1. */
8027 stmt_vec_info next_stmt_info = first_stmt_info;
8028 for (i = 0; i < group_size; i++)
8030 /* Since gaps are not supported for interleaved stores,
8031 DR_GROUP_SIZE is the exact number of stmts in the chain.
8032 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8033 that there is no interleaving, DR_GROUP_SIZE is 1,
8034 and only one iteration of the loop will be executed. */
8035 op = vect_get_store_rhs (next_stmt_info);
8036 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8037 ncopies, op, &gvec_oprnds[i]);
8038 vec_oprnd = gvec_oprnds[i][0];
8039 dr_chain.quick_push (gvec_oprnds[i][0]);
8040 oprnds.quick_push (gvec_oprnds[i][0]);
8041 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8043 if (mask)
8045 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8046 mask, &vec_masks, mask_vectype);
8047 vec_mask = vec_masks[0];
8051 /* We should have catched mismatched types earlier. */
8052 gcc_assert (useless_type_conversion_p (vectype,
8053 TREE_TYPE (vec_oprnd)));
8054 bool simd_lane_access_p
8055 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8056 if (simd_lane_access_p
8057 && !loop_masks
8058 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8059 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8060 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8061 && integer_zerop (DR_INIT (first_dr_info->dr))
8062 && alias_sets_conflict_p (get_alias_set (aggr_type),
8063 get_alias_set (TREE_TYPE (ref_type))))
8065 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8066 dataref_offset = build_int_cst (ref_type, 0);
8068 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8070 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8071 &gs_info, &dataref_ptr,
8072 &vec_offsets);
8073 vec_offset = vec_offsets[0];
8075 else
8076 dataref_ptr
8077 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8078 simd_lane_access_p ? loop : NULL,
8079 offset, &dummy, gsi, &ptr_incr,
8080 simd_lane_access_p, NULL_TREE, bump);
8082 else
8084 /* For interleaved stores we created vectorized defs for all the
8085 defs stored in OPRNDS in the previous iteration (previous copy).
8086 DR_CHAIN is then used as an input to vect_permute_store_chain().
8087 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8088 OPRNDS are of size 1. */
8089 for (i = 0; i < group_size; i++)
8091 vec_oprnd = gvec_oprnds[i][j];
8092 dr_chain[i] = gvec_oprnds[i][j];
8093 oprnds[i] = gvec_oprnds[i][j];
8095 if (mask)
8096 vec_mask = vec_masks[j];
8097 if (dataref_offset)
8098 dataref_offset
8099 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8100 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8101 vec_offset = vec_offsets[j];
8102 else
8103 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8104 stmt_info, bump);
8107 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8109 tree vec_array;
8111 /* Get an array into which we can store the individual vectors. */
8112 vec_array = create_vector_array (vectype, vec_num);
8114 /* Invalidate the current contents of VEC_ARRAY. This should
8115 become an RTL clobber too, which prevents the vector registers
8116 from being upward-exposed. */
8117 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8119 /* Store the individual vectors into the array. */
8120 for (i = 0; i < vec_num; i++)
8122 vec_oprnd = dr_chain[i];
8123 write_vector_array (vinfo, stmt_info,
8124 gsi, vec_oprnd, vec_array, i);
8127 tree final_mask = NULL;
8128 if (loop_masks)
8129 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8130 vectype, j);
8131 if (vec_mask)
8132 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8133 vec_mask, gsi);
8135 gcall *call;
8136 if (final_mask)
8138 /* Emit:
8139 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8140 VEC_ARRAY). */
8141 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8142 tree alias_ptr = build_int_cst (ref_type, align);
8143 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8144 dataref_ptr, alias_ptr,
8145 final_mask, vec_array);
8147 else
8149 /* Emit:
8150 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8151 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8152 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8153 vec_array);
8154 gimple_call_set_lhs (call, data_ref);
8156 gimple_call_set_nothrow (call, true);
8157 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8158 new_stmt = call;
8160 /* Record that VEC_ARRAY is now dead. */
8161 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8163 else
8165 new_stmt = NULL;
8166 if (grouped_store)
8168 if (j == 0)
8169 result_chain.create (group_size);
8170 /* Permute. */
8171 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8172 gsi, &result_chain);
8175 stmt_vec_info next_stmt_info = first_stmt_info;
8176 for (i = 0; i < vec_num; i++)
8178 unsigned misalign;
8179 unsigned HOST_WIDE_INT align;
8181 tree final_mask = NULL_TREE;
8182 if (loop_masks)
8183 final_mask = vect_get_loop_mask (gsi, loop_masks,
8184 vec_num * ncopies,
8185 vectype, vec_num * j + i);
8186 if (vec_mask)
8187 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8188 vec_mask, gsi);
8190 if (memory_access_type == VMAT_GATHER_SCATTER)
8192 tree scale = size_int (gs_info.scale);
8193 gcall *call;
8194 if (final_mask)
8195 call = gimple_build_call_internal
8196 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8197 scale, vec_oprnd, final_mask);
8198 else
8199 call = gimple_build_call_internal
8200 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8201 scale, vec_oprnd);
8202 gimple_call_set_nothrow (call, true);
8203 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8204 new_stmt = call;
8205 break;
8208 if (i > 0)
8209 /* Bump the vector pointer. */
8210 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8211 gsi, stmt_info, bump);
8213 if (slp)
8214 vec_oprnd = vec_oprnds[i];
8215 else if (grouped_store)
8216 /* For grouped stores vectorized defs are interleaved in
8217 vect_permute_store_chain(). */
8218 vec_oprnd = result_chain[i];
8220 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8221 if (aligned_access_p (first_dr_info))
8222 misalign = 0;
8223 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8225 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8226 misalign = 0;
8228 else
8229 misalign = DR_MISALIGNMENT (first_dr_info);
8230 if (dataref_offset == NULL_TREE
8231 && TREE_CODE (dataref_ptr) == SSA_NAME)
8232 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8233 misalign);
8234 align = least_bit_hwi (misalign | align);
8236 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8238 tree perm_mask = perm_mask_for_reverse (vectype);
8239 tree perm_dest = vect_create_destination_var
8240 (vect_get_store_rhs (stmt_info), vectype);
8241 tree new_temp = make_ssa_name (perm_dest);
8243 /* Generate the permute statement. */
8244 gimple *perm_stmt
8245 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8246 vec_oprnd, perm_mask);
8247 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8249 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8250 vec_oprnd = new_temp;
8253 /* Arguments are ready. Create the new vector stmt. */
8254 if (final_mask)
8256 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8257 gcall *call
8258 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8259 dataref_ptr, ptr,
8260 final_mask, vec_oprnd);
8261 gimple_call_set_nothrow (call, true);
8262 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8263 new_stmt = call;
8265 else if (loop_lens)
8267 tree final_len
8268 = vect_get_loop_len (loop_vinfo, loop_lens,
8269 vec_num * ncopies, vec_num * j + i);
8270 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8271 machine_mode vmode = TYPE_MODE (vectype);
8272 opt_machine_mode new_ovmode
8273 = get_len_load_store_mode (vmode, false);
8274 machine_mode new_vmode = new_ovmode.require ();
8275 /* Need conversion if it's wrapped with VnQI. */
8276 if (vmode != new_vmode)
8278 tree new_vtype
8279 = build_vector_type_for_mode (unsigned_intQI_type_node,
8280 new_vmode);
8281 tree var
8282 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8283 vec_oprnd
8284 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8285 gassign *new_stmt
8286 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8287 vec_oprnd);
8288 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8289 gsi);
8290 vec_oprnd = var;
8292 gcall *call
8293 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8294 ptr, final_len, vec_oprnd);
8295 gimple_call_set_nothrow (call, true);
8296 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8297 new_stmt = call;
8299 else
8301 data_ref = fold_build2 (MEM_REF, vectype,
8302 dataref_ptr,
8303 dataref_offset
8304 ? dataref_offset
8305 : build_int_cst (ref_type, 0));
8306 if (aligned_access_p (first_dr_info))
8308 else
8309 TREE_TYPE (data_ref)
8310 = build_aligned_type (TREE_TYPE (data_ref),
8311 align * BITS_PER_UNIT);
8312 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8313 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8314 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8317 if (slp)
8318 continue;
8320 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8321 if (!next_stmt_info)
8322 break;
8325 if (!slp)
8327 if (j == 0)
8328 *vec_stmt = new_stmt;
8329 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8333 for (i = 0; i < group_size; ++i)
8335 vec<tree> oprndsi = gvec_oprnds[i];
8336 oprndsi.release ();
8338 oprnds.release ();
8339 result_chain.release ();
8340 vec_oprnds.release ();
8342 return true;
8345 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8346 VECTOR_CST mask. No checks are made that the target platform supports the
8347 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8348 vect_gen_perm_mask_checked. */
8350 tree
8351 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8353 tree mask_type;
8355 poly_uint64 nunits = sel.length ();
8356 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8358 mask_type = build_vector_type (ssizetype, nunits);
8359 return vec_perm_indices_to_tree (mask_type, sel);
8362 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8363 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8365 tree
8366 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8368 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8369 return vect_gen_perm_mask_any (vectype, sel);
8372 /* Given a vector variable X and Y, that was generated for the scalar
8373 STMT_INFO, generate instructions to permute the vector elements of X and Y
8374 using permutation mask MASK_VEC, insert them at *GSI and return the
8375 permuted vector variable. */
8377 static tree
8378 permute_vec_elements (vec_info *vinfo,
8379 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8380 gimple_stmt_iterator *gsi)
8382 tree vectype = TREE_TYPE (x);
8383 tree perm_dest, data_ref;
8384 gimple *perm_stmt;
8386 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8387 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8388 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8389 else
8390 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8391 data_ref = make_ssa_name (perm_dest);
8393 /* Generate the permute statement. */
8394 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8395 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8397 return data_ref;
8400 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8401 inserting them on the loops preheader edge. Returns true if we
8402 were successful in doing so (and thus STMT_INFO can be moved then),
8403 otherwise returns false. */
8405 static bool
8406 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8408 ssa_op_iter i;
8409 tree op;
8410 bool any = false;
8412 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8414 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8415 if (!gimple_nop_p (def_stmt)
8416 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8418 /* Make sure we don't need to recurse. While we could do
8419 so in simple cases when there are more complex use webs
8420 we don't have an easy way to preserve stmt order to fulfil
8421 dependencies within them. */
8422 tree op2;
8423 ssa_op_iter i2;
8424 if (gimple_code (def_stmt) == GIMPLE_PHI)
8425 return false;
8426 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8428 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8429 if (!gimple_nop_p (def_stmt2)
8430 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8431 return false;
8433 any = true;
8437 if (!any)
8438 return true;
8440 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8442 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8443 if (!gimple_nop_p (def_stmt)
8444 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8446 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8447 gsi_remove (&gsi, false);
8448 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8452 return true;
8455 /* vectorizable_load.
8457 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8458 that can be vectorized.
8459 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8460 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8461 Return true if STMT_INFO is vectorizable in this way. */
8463 static bool
8464 vectorizable_load (vec_info *vinfo,
8465 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8466 gimple **vec_stmt, slp_tree slp_node,
8467 stmt_vector_for_cost *cost_vec)
8469 tree scalar_dest;
8470 tree vec_dest = NULL;
8471 tree data_ref = NULL;
8472 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8473 class loop *loop = NULL;
8474 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8475 bool nested_in_vect_loop = false;
8476 tree elem_type;
8477 tree new_temp;
8478 machine_mode mode;
8479 tree dummy;
8480 tree dataref_ptr = NULL_TREE;
8481 tree dataref_offset = NULL_TREE;
8482 gimple *ptr_incr = NULL;
8483 int ncopies;
8484 int i, j;
8485 unsigned int group_size;
8486 poly_uint64 group_gap_adj;
8487 tree msq = NULL_TREE, lsq;
8488 tree offset = NULL_TREE;
8489 tree byte_offset = NULL_TREE;
8490 tree realignment_token = NULL_TREE;
8491 gphi *phi = NULL;
8492 vec<tree> dr_chain = vNULL;
8493 bool grouped_load = false;
8494 stmt_vec_info first_stmt_info;
8495 stmt_vec_info first_stmt_info_for_drptr = NULL;
8496 bool compute_in_loop = false;
8497 class loop *at_loop;
8498 int vec_num;
8499 bool slp = (slp_node != NULL);
8500 bool slp_perm = false;
8501 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8502 poly_uint64 vf;
8503 tree aggr_type;
8504 gather_scatter_info gs_info;
8505 tree ref_type;
8506 enum vect_def_type mask_dt = vect_unknown_def_type;
8508 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8509 return false;
8511 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8512 && ! vec_stmt)
8513 return false;
8515 if (!STMT_VINFO_DATA_REF (stmt_info))
8516 return false;
8518 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8519 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8520 which can be different when reduction chains were re-ordered.
8521 Now that we figured we're a dataref reset stmt_info back to
8522 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8523 refactored in a way to maintain the dr_vec_info pointer for the
8524 relevant access explicitely. */
8525 stmt_vec_info orig_stmt_info = stmt_info;
8526 if (slp_node)
8527 stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8529 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8530 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8532 scalar_dest = gimple_assign_lhs (assign);
8533 if (TREE_CODE (scalar_dest) != SSA_NAME)
8534 return false;
8536 tree_code code = gimple_assign_rhs_code (assign);
8537 if (code != ARRAY_REF
8538 && code != BIT_FIELD_REF
8539 && code != INDIRECT_REF
8540 && code != COMPONENT_REF
8541 && code != IMAGPART_EXPR
8542 && code != REALPART_EXPR
8543 && code != MEM_REF
8544 && TREE_CODE_CLASS (code) != tcc_declaration)
8545 return false;
8547 else
8549 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8550 if (!call || !gimple_call_internal_p (call))
8551 return false;
8553 internal_fn ifn = gimple_call_internal_fn (call);
8554 if (!internal_load_fn_p (ifn))
8555 return false;
8557 scalar_dest = gimple_call_lhs (call);
8558 if (!scalar_dest)
8559 return false;
8561 int mask_index = internal_fn_mask_index (ifn);
8562 if (mask_index >= 0
8563 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
8564 /* ??? For SLP we only have operands for
8565 the mask operand. */
8566 slp_node ? 0 : mask_index,
8567 &mask, NULL, &mask_dt, &mask_vectype))
8568 return false;
8571 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8572 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8574 if (loop_vinfo)
8576 loop = LOOP_VINFO_LOOP (loop_vinfo);
8577 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8578 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8580 else
8581 vf = 1;
8583 /* Multiple types in SLP are handled by creating the appropriate number of
8584 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8585 case of SLP. */
8586 if (slp)
8587 ncopies = 1;
8588 else
8589 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8591 gcc_assert (ncopies >= 1);
8593 /* FORNOW. This restriction should be relaxed. */
8594 if (nested_in_vect_loop && ncopies > 1)
8596 if (dump_enabled_p ())
8597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8598 "multiple types in nested loop.\n");
8599 return false;
8602 /* Invalidate assumptions made by dependence analysis when vectorization
8603 on the unrolled body effectively re-orders stmts. */
8604 if (ncopies > 1
8605 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8606 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8607 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8609 if (dump_enabled_p ())
8610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8611 "cannot perform implicit CSE when unrolling "
8612 "with negative dependence distance\n");
8613 return false;
8616 elem_type = TREE_TYPE (vectype);
8617 mode = TYPE_MODE (vectype);
8619 /* FORNOW. In some cases can vectorize even if data-type not supported
8620 (e.g. - data copies). */
8621 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8623 if (dump_enabled_p ())
8624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8625 "Aligned load, but unsupported type.\n");
8626 return false;
8629 /* Check if the load is a part of an interleaving chain. */
8630 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8632 grouped_load = true;
8633 /* FORNOW */
8634 gcc_assert (!nested_in_vect_loop);
8635 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8637 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8638 group_size = DR_GROUP_SIZE (first_stmt_info);
8640 /* Refuse non-SLP vectorization of SLP-only groups. */
8641 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8643 if (dump_enabled_p ())
8644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8645 "cannot vectorize load in non-SLP mode.\n");
8646 return false;
8649 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8651 slp_perm = true;
8653 if (!loop_vinfo)
8655 /* In BB vectorization we may not actually use a loaded vector
8656 accessing elements in excess of DR_GROUP_SIZE. */
8657 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8658 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8659 unsigned HOST_WIDE_INT nunits;
8660 unsigned j, k, maxk = 0;
8661 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8662 if (k > maxk)
8663 maxk = k;
8664 tree vectype = STMT_VINFO_VECTYPE (group_info);
8665 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8666 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8668 if (dump_enabled_p ())
8669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8670 "BB vectorization with gaps at the end of "
8671 "a load is not supported\n");
8672 return false;
8676 auto_vec<tree> tem;
8677 unsigned n_perms;
8678 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8679 true, &n_perms))
8681 if (dump_enabled_p ())
8682 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8683 vect_location,
8684 "unsupported load permutation\n");
8685 return false;
8689 /* Invalidate assumptions made by dependence analysis when vectorization
8690 on the unrolled body effectively re-orders stmts. */
8691 if (!PURE_SLP_STMT (stmt_info)
8692 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8693 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8694 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8696 if (dump_enabled_p ())
8697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8698 "cannot perform implicit CSE when performing "
8699 "group loads with negative dependence distance\n");
8700 return false;
8703 else
8704 group_size = 1;
8706 vect_memory_access_type memory_access_type;
8707 enum dr_alignment_support alignment_support_scheme;
8708 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8709 ncopies, &memory_access_type,
8710 &alignment_support_scheme, &gs_info))
8711 return false;
8713 if (mask)
8715 if (memory_access_type == VMAT_CONTIGUOUS)
8717 machine_mode vec_mode = TYPE_MODE (vectype);
8718 if (!VECTOR_MODE_P (vec_mode)
8719 || !can_vec_mask_load_store_p (vec_mode,
8720 TYPE_MODE (mask_vectype), true))
8721 return false;
8723 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8724 && memory_access_type != VMAT_GATHER_SCATTER)
8726 if (dump_enabled_p ())
8727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8728 "unsupported access type for masked load.\n");
8729 return false;
8731 else if (memory_access_type == VMAT_GATHER_SCATTER
8732 && gs_info.ifn == IFN_LAST
8733 && !gs_info.decl)
8735 if (dump_enabled_p ())
8736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8737 "unsupported masked emulated gather.\n");
8738 return false;
8742 if (!vec_stmt) /* transformation not required. */
8744 if (slp_node
8745 && mask
8746 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8747 mask_vectype))
8749 if (dump_enabled_p ())
8750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8751 "incompatible vector types for invariants\n");
8752 return false;
8755 if (!slp)
8756 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8758 if (loop_vinfo
8759 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8760 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8761 group_size, memory_access_type,
8762 &gs_info, mask);
8764 if (dump_enabled_p ()
8765 && memory_access_type != VMAT_ELEMENTWISE
8766 && memory_access_type != VMAT_GATHER_SCATTER
8767 && alignment_support_scheme != dr_aligned)
8768 dump_printf_loc (MSG_NOTE, vect_location,
8769 "Vectorizing an unaligned access.\n");
8771 STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
8772 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8773 &gs_info, slp_node, cost_vec);
8774 return true;
8777 if (!slp)
8778 gcc_assert (memory_access_type
8779 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8781 if (dump_enabled_p ())
8782 dump_printf_loc (MSG_NOTE, vect_location,
8783 "transform load. ncopies = %d\n", ncopies);
8785 /* Transform. */
8787 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8788 ensure_base_align (dr_info);
8790 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8792 vect_build_gather_load_calls (vinfo,
8793 stmt_info, gsi, vec_stmt, &gs_info, mask);
8794 return true;
8797 if (memory_access_type == VMAT_INVARIANT)
8799 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8800 /* If we have versioned for aliasing or the loop doesn't
8801 have any data dependencies that would preclude this,
8802 then we are sure this is a loop invariant load and
8803 thus we can insert it on the preheader edge. */
8804 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8805 && !nested_in_vect_loop
8806 && hoist_defs_of_uses (stmt_info, loop));
8807 if (hoist_p)
8809 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8810 if (dump_enabled_p ())
8811 dump_printf_loc (MSG_NOTE, vect_location,
8812 "hoisting out of the vectorized loop: %G", stmt);
8813 scalar_dest = copy_ssa_name (scalar_dest);
8814 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8815 gsi_insert_on_edge_immediate
8816 (loop_preheader_edge (loop),
8817 gimple_build_assign (scalar_dest, rhs));
8819 /* These copies are all equivalent, but currently the representation
8820 requires a separate STMT_VINFO_VEC_STMT for each one. */
8821 gimple_stmt_iterator gsi2 = *gsi;
8822 gsi_next (&gsi2);
8823 for (j = 0; j < ncopies; j++)
8825 if (hoist_p)
8826 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8827 vectype, NULL);
8828 else
8829 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8830 vectype, &gsi2);
8831 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8832 if (slp)
8833 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8834 else
8836 if (j == 0)
8837 *vec_stmt = new_stmt;
8838 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8841 return true;
8844 if (memory_access_type == VMAT_ELEMENTWISE
8845 || memory_access_type == VMAT_STRIDED_SLP)
8847 gimple_stmt_iterator incr_gsi;
8848 bool insert_after;
8849 tree offvar;
8850 tree ivstep;
8851 tree running_off;
8852 vec<constructor_elt, va_gc> *v = NULL;
8853 tree stride_base, stride_step, alias_off;
8854 /* Checked by get_load_store_type. */
8855 unsigned int const_nunits = nunits.to_constant ();
8856 unsigned HOST_WIDE_INT cst_offset = 0;
8857 tree dr_offset;
8859 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8860 gcc_assert (!nested_in_vect_loop);
8862 if (grouped_load)
8864 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8865 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8867 else
8869 first_stmt_info = stmt_info;
8870 first_dr_info = dr_info;
8872 if (slp && grouped_load)
8874 group_size = DR_GROUP_SIZE (first_stmt_info);
8875 ref_type = get_group_alias_ptr_type (first_stmt_info);
8877 else
8879 if (grouped_load)
8880 cst_offset
8881 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8882 * vect_get_place_in_interleaving_chain (stmt_info,
8883 first_stmt_info));
8884 group_size = 1;
8885 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8888 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8889 stride_base
8890 = fold_build_pointer_plus
8891 (DR_BASE_ADDRESS (first_dr_info->dr),
8892 size_binop (PLUS_EXPR,
8893 convert_to_ptrofftype (dr_offset),
8894 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8895 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8897 /* For a load with loop-invariant (but other than power-of-2)
8898 stride (i.e. not a grouped access) like so:
8900 for (i = 0; i < n; i += stride)
8901 ... = array[i];
8903 we generate a new induction variable and new accesses to
8904 form a new vector (or vectors, depending on ncopies):
8906 for (j = 0; ; j += VF*stride)
8907 tmp1 = array[j];
8908 tmp2 = array[j + stride];
8910 vectemp = {tmp1, tmp2, ...}
8913 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8914 build_int_cst (TREE_TYPE (stride_step), vf));
8916 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8918 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8919 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8920 create_iv (stride_base, ivstep, NULL,
8921 loop, &incr_gsi, insert_after,
8922 &offvar, NULL);
8924 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8926 running_off = offvar;
8927 alias_off = build_int_cst (ref_type, 0);
8928 int nloads = const_nunits;
8929 int lnel = 1;
8930 tree ltype = TREE_TYPE (vectype);
8931 tree lvectype = vectype;
8932 auto_vec<tree> dr_chain;
8933 if (memory_access_type == VMAT_STRIDED_SLP)
8935 if (group_size < const_nunits)
8937 /* First check if vec_init optab supports construction from vector
8938 elts directly. Otherwise avoid emitting a constructor of
8939 vector elements by performing the loads using an integer type
8940 of the same size, constructing a vector of those and then
8941 re-interpreting it as the original vector type. This avoids a
8942 huge runtime penalty due to the general inability to perform
8943 store forwarding from smaller stores to a larger load. */
8944 tree ptype;
8945 tree vtype
8946 = vector_vector_composition_type (vectype,
8947 const_nunits / group_size,
8948 &ptype);
8949 if (vtype != NULL_TREE)
8951 nloads = const_nunits / group_size;
8952 lnel = group_size;
8953 lvectype = vtype;
8954 ltype = ptype;
8957 else
8959 nloads = 1;
8960 lnel = const_nunits;
8961 ltype = vectype;
8963 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8965 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8966 else if (nloads == 1)
8967 ltype = vectype;
8969 if (slp)
8971 /* For SLP permutation support we need to load the whole group,
8972 not only the number of vector stmts the permutation result
8973 fits in. */
8974 if (slp_perm)
8976 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8977 variable VF. */
8978 unsigned int const_vf = vf.to_constant ();
8979 ncopies = CEIL (group_size * const_vf, const_nunits);
8980 dr_chain.create (ncopies);
8982 else
8983 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8985 unsigned int group_el = 0;
8986 unsigned HOST_WIDE_INT
8987 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8988 for (j = 0; j < ncopies; j++)
8990 if (nloads > 1)
8991 vec_alloc (v, nloads);
8992 gimple *new_stmt = NULL;
8993 for (i = 0; i < nloads; i++)
8995 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8996 group_el * elsz + cst_offset);
8997 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
8998 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8999 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9000 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9001 if (nloads > 1)
9002 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9003 gimple_assign_lhs (new_stmt));
9005 group_el += lnel;
9006 if (! slp
9007 || group_el == group_size)
9009 tree newoff = copy_ssa_name (running_off);
9010 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9011 running_off, stride_step);
9012 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9014 running_off = newoff;
9015 group_el = 0;
9018 if (nloads > 1)
9020 tree vec_inv = build_constructor (lvectype, v);
9021 new_temp = vect_init_vector (vinfo, stmt_info,
9022 vec_inv, lvectype, gsi);
9023 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9024 if (lvectype != vectype)
9026 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9027 VIEW_CONVERT_EXPR,
9028 build1 (VIEW_CONVERT_EXPR,
9029 vectype, new_temp));
9030 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9034 if (slp)
9036 if (slp_perm)
9037 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9038 else
9039 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9041 else
9043 if (j == 0)
9044 *vec_stmt = new_stmt;
9045 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9048 if (slp_perm)
9050 unsigned n_perms;
9051 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9052 false, &n_perms);
9054 return true;
9057 if (memory_access_type == VMAT_GATHER_SCATTER
9058 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9059 grouped_load = false;
9061 if (grouped_load)
9063 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9064 group_size = DR_GROUP_SIZE (first_stmt_info);
9065 /* For SLP vectorization we directly vectorize a subchain
9066 without permutation. */
9067 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9068 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9069 /* For BB vectorization always use the first stmt to base
9070 the data ref pointer on. */
9071 if (bb_vinfo)
9072 first_stmt_info_for_drptr
9073 = vect_find_first_scalar_stmt_in_slp (slp_node);
9075 /* Check if the chain of loads is already vectorized. */
9076 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9077 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9078 ??? But we can only do so if there is exactly one
9079 as we have no way to get at the rest. Leave the CSE
9080 opportunity alone.
9081 ??? With the group load eventually participating
9082 in multiple different permutations (having multiple
9083 slp nodes which refer to the same group) the CSE
9084 is even wrong code. See PR56270. */
9085 && !slp)
9087 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9088 return true;
9090 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9091 group_gap_adj = 0;
9093 /* VEC_NUM is the number of vect stmts to be created for this group. */
9094 if (slp)
9096 grouped_load = false;
9097 /* If an SLP permutation is from N elements to N elements,
9098 and if one vector holds a whole number of N, we can load
9099 the inputs to the permutation in the same way as an
9100 unpermuted sequence. In other cases we need to load the
9101 whole group, not only the number of vector stmts the
9102 permutation result fits in. */
9103 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9104 if (slp_perm
9105 && (group_size != scalar_lanes
9106 || !multiple_p (nunits, group_size)))
9108 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9109 variable VF; see vect_transform_slp_perm_load. */
9110 unsigned int const_vf = vf.to_constant ();
9111 unsigned int const_nunits = nunits.to_constant ();
9112 vec_num = CEIL (group_size * const_vf, const_nunits);
9113 group_gap_adj = vf * group_size - nunits * vec_num;
9115 else
9117 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9118 group_gap_adj
9119 = group_size - scalar_lanes;
9122 else
9123 vec_num = group_size;
9125 ref_type = get_group_alias_ptr_type (first_stmt_info);
9127 else
9129 first_stmt_info = stmt_info;
9130 first_dr_info = dr_info;
9131 group_size = vec_num = 1;
9132 group_gap_adj = 0;
9133 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9136 gcc_assert (alignment_support_scheme);
9137 vec_loop_masks *loop_masks
9138 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9139 ? &LOOP_VINFO_MASKS (loop_vinfo)
9140 : NULL);
9141 vec_loop_lens *loop_lens
9142 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9143 ? &LOOP_VINFO_LENS (loop_vinfo)
9144 : NULL);
9146 /* Shouldn't go with length-based approach if fully masked. */
9147 gcc_assert (!loop_lens || !loop_masks);
9149 /* Targets with store-lane instructions must not require explicit
9150 realignment. vect_supportable_dr_alignment always returns either
9151 dr_aligned or dr_unaligned_supported for masked operations. */
9152 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9153 && !mask
9154 && !loop_masks)
9155 || alignment_support_scheme == dr_aligned
9156 || alignment_support_scheme == dr_unaligned_supported);
9158 /* In case the vectorization factor (VF) is bigger than the number
9159 of elements that we can fit in a vectype (nunits), we have to generate
9160 more than one vector stmt - i.e - we need to "unroll" the
9161 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9162 from one copy of the vector stmt to the next, in the field
9163 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9164 stages to find the correct vector defs to be used when vectorizing
9165 stmts that use the defs of the current stmt. The example below
9166 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9167 need to create 4 vectorized stmts):
9169 before vectorization:
9170 RELATED_STMT VEC_STMT
9171 S1: x = memref - -
9172 S2: z = x + 1 - -
9174 step 1: vectorize stmt S1:
9175 We first create the vector stmt VS1_0, and, as usual, record a
9176 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9177 Next, we create the vector stmt VS1_1, and record a pointer to
9178 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9179 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9180 stmts and pointers:
9181 RELATED_STMT VEC_STMT
9182 VS1_0: vx0 = memref0 VS1_1 -
9183 VS1_1: vx1 = memref1 VS1_2 -
9184 VS1_2: vx2 = memref2 VS1_3 -
9185 VS1_3: vx3 = memref3 - -
9186 S1: x = load - VS1_0
9187 S2: z = x + 1 - -
9190 /* In case of interleaving (non-unit grouped access):
9192 S1: x2 = &base + 2
9193 S2: x0 = &base
9194 S3: x1 = &base + 1
9195 S4: x3 = &base + 3
9197 Vectorized loads are created in the order of memory accesses
9198 starting from the access of the first stmt of the chain:
9200 VS1: vx0 = &base
9201 VS2: vx1 = &base + vec_size*1
9202 VS3: vx3 = &base + vec_size*2
9203 VS4: vx4 = &base + vec_size*3
9205 Then permutation statements are generated:
9207 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9208 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9211 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9212 (the order of the data-refs in the output of vect_permute_load_chain
9213 corresponds to the order of scalar stmts in the interleaving chain - see
9214 the documentation of vect_permute_load_chain()).
9215 The generation of permutation stmts and recording them in
9216 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9218 In case of both multiple types and interleaving, the vector loads and
9219 permutation stmts above are created for every copy. The result vector
9220 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9221 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9223 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9224 on a target that supports unaligned accesses (dr_unaligned_supported)
9225 we generate the following code:
9226 p = initial_addr;
9227 indx = 0;
9228 loop {
9229 p = p + indx * vectype_size;
9230 vec_dest = *(p);
9231 indx = indx + 1;
9234 Otherwise, the data reference is potentially unaligned on a target that
9235 does not support unaligned accesses (dr_explicit_realign_optimized) -
9236 then generate the following code, in which the data in each iteration is
9237 obtained by two vector loads, one from the previous iteration, and one
9238 from the current iteration:
9239 p1 = initial_addr;
9240 msq_init = *(floor(p1))
9241 p2 = initial_addr + VS - 1;
9242 realignment_token = call target_builtin;
9243 indx = 0;
9244 loop {
9245 p2 = p2 + indx * vectype_size
9246 lsq = *(floor(p2))
9247 vec_dest = realign_load (msq, lsq, realignment_token)
9248 indx = indx + 1;
9249 msq = lsq;
9250 } */
9252 /* If the misalignment remains the same throughout the execution of the
9253 loop, we can create the init_addr and permutation mask at the loop
9254 preheader. Otherwise, it needs to be created inside the loop.
9255 This can only occur when vectorizing memory accesses in the inner-loop
9256 nested within an outer-loop that is being vectorized. */
9258 if (nested_in_vect_loop
9259 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9260 GET_MODE_SIZE (TYPE_MODE (vectype))))
9262 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9263 compute_in_loop = true;
9266 bool diff_first_stmt_info
9267 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9269 if ((alignment_support_scheme == dr_explicit_realign_optimized
9270 || alignment_support_scheme == dr_explicit_realign)
9271 && !compute_in_loop)
9273 /* If we have different first_stmt_info, we can't set up realignment
9274 here, since we can't guarantee first_stmt_info DR has been
9275 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9276 distance from first_stmt_info DR instead as below. */
9277 if (!diff_first_stmt_info)
9278 msq = vect_setup_realignment (vinfo,
9279 first_stmt_info, gsi, &realignment_token,
9280 alignment_support_scheme, NULL_TREE,
9281 &at_loop);
9282 if (alignment_support_scheme == dr_explicit_realign_optimized)
9284 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9285 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9286 size_one_node);
9287 gcc_assert (!first_stmt_info_for_drptr);
9290 else
9291 at_loop = loop;
9293 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9294 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9296 tree bump;
9297 tree vec_offset = NULL_TREE;
9298 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9300 aggr_type = NULL_TREE;
9301 bump = NULL_TREE;
9303 else if (memory_access_type == VMAT_GATHER_SCATTER)
9305 aggr_type = elem_type;
9306 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9307 &bump, &vec_offset);
9309 else
9311 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9312 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9313 else
9314 aggr_type = vectype;
9315 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9316 memory_access_type);
9319 vec<tree> vec_offsets = vNULL;
9320 auto_vec<tree> vec_masks;
9321 if (mask)
9322 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9323 mask, &vec_masks, mask_vectype, NULL_TREE);
9324 tree vec_mask = NULL_TREE;
9325 poly_uint64 group_elt = 0;
9326 for (j = 0; j < ncopies; j++)
9328 /* 1. Create the vector or array pointer update chain. */
9329 if (j == 0)
9331 bool simd_lane_access_p
9332 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9333 if (simd_lane_access_p
9334 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9335 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9336 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9337 && integer_zerop (DR_INIT (first_dr_info->dr))
9338 && alias_sets_conflict_p (get_alias_set (aggr_type),
9339 get_alias_set (TREE_TYPE (ref_type)))
9340 && (alignment_support_scheme == dr_aligned
9341 || alignment_support_scheme == dr_unaligned_supported))
9343 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9344 dataref_offset = build_int_cst (ref_type, 0);
9346 else if (diff_first_stmt_info)
9348 dataref_ptr
9349 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9350 aggr_type, at_loop, offset, &dummy,
9351 gsi, &ptr_incr, simd_lane_access_p,
9352 byte_offset, bump);
9353 /* Adjust the pointer by the difference to first_stmt. */
9354 data_reference_p ptrdr
9355 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9356 tree diff
9357 = fold_convert (sizetype,
9358 size_binop (MINUS_EXPR,
9359 DR_INIT (first_dr_info->dr),
9360 DR_INIT (ptrdr)));
9361 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9362 stmt_info, diff);
9363 if (alignment_support_scheme == dr_explicit_realign)
9365 msq = vect_setup_realignment (vinfo,
9366 first_stmt_info_for_drptr, gsi,
9367 &realignment_token,
9368 alignment_support_scheme,
9369 dataref_ptr, &at_loop);
9370 gcc_assert (!compute_in_loop);
9373 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9375 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9376 &gs_info, &dataref_ptr,
9377 &vec_offsets);
9379 else
9380 dataref_ptr
9381 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9382 at_loop,
9383 offset, &dummy, gsi, &ptr_incr,
9384 simd_lane_access_p,
9385 byte_offset, bump);
9386 if (mask)
9387 vec_mask = vec_masks[0];
9389 else
9391 if (dataref_offset)
9392 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9393 bump);
9394 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9395 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9396 stmt_info, bump);
9397 if (mask)
9398 vec_mask = vec_masks[j];
9401 if (grouped_load || slp_perm)
9402 dr_chain.create (vec_num);
9404 gimple *new_stmt = NULL;
9405 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9407 tree vec_array;
9409 vec_array = create_vector_array (vectype, vec_num);
9411 tree final_mask = NULL_TREE;
9412 if (loop_masks)
9413 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9414 vectype, j);
9415 if (vec_mask)
9416 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9417 vec_mask, gsi);
9419 gcall *call;
9420 if (final_mask)
9422 /* Emit:
9423 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9424 VEC_MASK). */
9425 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9426 tree alias_ptr = build_int_cst (ref_type, align);
9427 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9428 dataref_ptr, alias_ptr,
9429 final_mask);
9431 else
9433 /* Emit:
9434 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9435 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9436 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9438 gimple_call_set_lhs (call, vec_array);
9439 gimple_call_set_nothrow (call, true);
9440 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9441 new_stmt = call;
9443 /* Extract each vector into an SSA_NAME. */
9444 for (i = 0; i < vec_num; i++)
9446 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9447 vec_array, i);
9448 dr_chain.quick_push (new_temp);
9451 /* Record the mapping between SSA_NAMEs and statements. */
9452 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9454 /* Record that VEC_ARRAY is now dead. */
9455 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9457 else
9459 for (i = 0; i < vec_num; i++)
9461 tree final_mask = NULL_TREE;
9462 if (loop_masks
9463 && memory_access_type != VMAT_INVARIANT)
9464 final_mask = vect_get_loop_mask (gsi, loop_masks,
9465 vec_num * ncopies,
9466 vectype, vec_num * j + i);
9467 if (vec_mask)
9468 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9469 vec_mask, gsi);
9471 if (i > 0)
9472 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9473 gsi, stmt_info, bump);
9475 /* 2. Create the vector-load in the loop. */
9476 switch (alignment_support_scheme)
9478 case dr_aligned:
9479 case dr_unaligned_supported:
9481 unsigned int misalign;
9482 unsigned HOST_WIDE_INT align;
9484 if (memory_access_type == VMAT_GATHER_SCATTER
9485 && gs_info.ifn != IFN_LAST)
9487 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9488 vec_offset = vec_offsets[j];
9489 tree zero = build_zero_cst (vectype);
9490 tree scale = size_int (gs_info.scale);
9491 gcall *call;
9492 if (final_mask)
9493 call = gimple_build_call_internal
9494 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9495 vec_offset, scale, zero, final_mask);
9496 else
9497 call = gimple_build_call_internal
9498 (IFN_GATHER_LOAD, 4, dataref_ptr,
9499 vec_offset, scale, zero);
9500 gimple_call_set_nothrow (call, true);
9501 new_stmt = call;
9502 data_ref = NULL_TREE;
9503 break;
9505 else if (memory_access_type == VMAT_GATHER_SCATTER)
9507 /* Emulated gather-scatter. */
9508 gcc_assert (!final_mask);
9509 unsigned HOST_WIDE_INT const_nunits
9510 = nunits.to_constant ();
9511 unsigned HOST_WIDE_INT const_offset_nunits
9512 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9513 .to_constant ();
9514 vec<constructor_elt, va_gc> *ctor_elts;
9515 vec_alloc (ctor_elts, const_nunits);
9516 gimple_seq stmts = NULL;
9517 /* We support offset vectors with more elements
9518 than the data vector for now. */
9519 unsigned HOST_WIDE_INT factor
9520 = const_offset_nunits / const_nunits;
9521 vec_offset = vec_offsets[j / factor];
9522 unsigned elt_offset = (j % factor) * const_nunits;
9523 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9524 tree scale = size_int (gs_info.scale);
9525 align
9526 = get_object_alignment (DR_REF (first_dr_info->dr));
9527 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9528 align);
9529 for (unsigned k = 0; k < const_nunits; ++k)
9531 tree boff = size_binop (MULT_EXPR,
9532 TYPE_SIZE (idx_type),
9533 bitsize_int
9534 (k + elt_offset));
9535 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9536 idx_type, vec_offset,
9537 TYPE_SIZE (idx_type),
9538 boff);
9539 idx = gimple_convert (&stmts, sizetype, idx);
9540 idx = gimple_build (&stmts, MULT_EXPR,
9541 sizetype, idx, scale);
9542 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9543 TREE_TYPE (dataref_ptr),
9544 dataref_ptr, idx);
9545 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9546 tree elt = make_ssa_name (TREE_TYPE (vectype));
9547 tree ref = build2 (MEM_REF, ltype, ptr,
9548 build_int_cst (ref_type, 0));
9549 new_stmt = gimple_build_assign (elt, ref);
9550 gimple_seq_add_stmt (&stmts, new_stmt);
9551 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9553 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9554 new_stmt = gimple_build_assign (NULL_TREE,
9555 build_constructor
9556 (vectype, ctor_elts));
9557 data_ref = NULL_TREE;
9558 break;
9561 align =
9562 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9563 if (alignment_support_scheme == dr_aligned)
9565 gcc_assert (aligned_access_p (first_dr_info));
9566 misalign = 0;
9568 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9570 align = dr_alignment
9571 (vect_dr_behavior (vinfo, first_dr_info));
9572 misalign = 0;
9574 else
9575 misalign = DR_MISALIGNMENT (first_dr_info);
9576 if (dataref_offset == NULL_TREE
9577 && TREE_CODE (dataref_ptr) == SSA_NAME)
9578 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9579 align, misalign);
9580 align = least_bit_hwi (misalign | align);
9582 if (final_mask)
9584 tree ptr = build_int_cst (ref_type,
9585 align * BITS_PER_UNIT);
9586 gcall *call
9587 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9588 dataref_ptr, ptr,
9589 final_mask);
9590 gimple_call_set_nothrow (call, true);
9591 new_stmt = call;
9592 data_ref = NULL_TREE;
9594 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9596 tree final_len
9597 = vect_get_loop_len (loop_vinfo, loop_lens,
9598 vec_num * ncopies,
9599 vec_num * j + i);
9600 tree ptr = build_int_cst (ref_type,
9601 align * BITS_PER_UNIT);
9602 gcall *call
9603 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9604 dataref_ptr, ptr,
9605 final_len);
9606 gimple_call_set_nothrow (call, true);
9607 new_stmt = call;
9608 data_ref = NULL_TREE;
9610 /* Need conversion if it's wrapped with VnQI. */
9611 machine_mode vmode = TYPE_MODE (vectype);
9612 opt_machine_mode new_ovmode
9613 = get_len_load_store_mode (vmode, true);
9614 machine_mode new_vmode = new_ovmode.require ();
9615 if (vmode != new_vmode)
9617 tree qi_type = unsigned_intQI_type_node;
9618 tree new_vtype
9619 = build_vector_type_for_mode (qi_type, new_vmode);
9620 tree var = vect_get_new_ssa_name (new_vtype,
9621 vect_simple_var);
9622 gimple_set_lhs (call, var);
9623 vect_finish_stmt_generation (vinfo, stmt_info, call,
9624 gsi);
9625 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9626 new_stmt
9627 = gimple_build_assign (vec_dest,
9628 VIEW_CONVERT_EXPR, op);
9631 else
9633 tree ltype = vectype;
9634 tree new_vtype = NULL_TREE;
9635 unsigned HOST_WIDE_INT gap
9636 = DR_GROUP_GAP (first_stmt_info);
9637 unsigned int vect_align
9638 = vect_known_alignment_in_bytes (first_dr_info);
9639 unsigned int scalar_dr_size
9640 = vect_get_scalar_dr_size (first_dr_info);
9641 /* If there's no peeling for gaps but we have a gap
9642 with slp loads then load the lower half of the
9643 vector only. See get_group_load_store_type for
9644 when we apply this optimization. */
9645 if (slp
9646 && loop_vinfo
9647 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9648 && gap != 0
9649 && known_eq (nunits, (group_size - gap) * 2)
9650 && known_eq (nunits, group_size)
9651 && gap >= (vect_align / scalar_dr_size))
9653 tree half_vtype;
9654 new_vtype
9655 = vector_vector_composition_type (vectype, 2,
9656 &half_vtype);
9657 if (new_vtype != NULL_TREE)
9658 ltype = half_vtype;
9660 tree offset
9661 = (dataref_offset ? dataref_offset
9662 : build_int_cst (ref_type, 0));
9663 if (ltype != vectype
9664 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9666 unsigned HOST_WIDE_INT gap_offset
9667 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9668 tree gapcst = build_int_cst (ref_type, gap_offset);
9669 offset = size_binop (PLUS_EXPR, offset, gapcst);
9671 data_ref
9672 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9673 if (alignment_support_scheme == dr_aligned)
9675 else
9676 TREE_TYPE (data_ref)
9677 = build_aligned_type (TREE_TYPE (data_ref),
9678 align * BITS_PER_UNIT);
9679 if (ltype != vectype)
9681 vect_copy_ref_info (data_ref,
9682 DR_REF (first_dr_info->dr));
9683 tree tem = make_ssa_name (ltype);
9684 new_stmt = gimple_build_assign (tem, data_ref);
9685 vect_finish_stmt_generation (vinfo, stmt_info,
9686 new_stmt, gsi);
9687 data_ref = NULL;
9688 vec<constructor_elt, va_gc> *v;
9689 vec_alloc (v, 2);
9690 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9692 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9693 build_zero_cst (ltype));
9694 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9696 else
9698 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9699 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9700 build_zero_cst (ltype));
9702 gcc_assert (new_vtype != NULL_TREE);
9703 if (new_vtype == vectype)
9704 new_stmt = gimple_build_assign (
9705 vec_dest, build_constructor (vectype, v));
9706 else
9708 tree new_vname = make_ssa_name (new_vtype);
9709 new_stmt = gimple_build_assign (
9710 new_vname, build_constructor (new_vtype, v));
9711 vect_finish_stmt_generation (vinfo, stmt_info,
9712 new_stmt, gsi);
9713 new_stmt = gimple_build_assign (
9714 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9715 new_vname));
9719 break;
9721 case dr_explicit_realign:
9723 tree ptr, bump;
9725 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9727 if (compute_in_loop)
9728 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9729 &realignment_token,
9730 dr_explicit_realign,
9731 dataref_ptr, NULL);
9733 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9734 ptr = copy_ssa_name (dataref_ptr);
9735 else
9736 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9737 // For explicit realign the target alignment should be
9738 // known at compile time.
9739 unsigned HOST_WIDE_INT align =
9740 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9741 new_stmt = gimple_build_assign
9742 (ptr, BIT_AND_EXPR, dataref_ptr,
9743 build_int_cst
9744 (TREE_TYPE (dataref_ptr),
9745 -(HOST_WIDE_INT) align));
9746 vect_finish_stmt_generation (vinfo, stmt_info,
9747 new_stmt, gsi);
9748 data_ref
9749 = build2 (MEM_REF, vectype, ptr,
9750 build_int_cst (ref_type, 0));
9751 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9752 vec_dest = vect_create_destination_var (scalar_dest,
9753 vectype);
9754 new_stmt = gimple_build_assign (vec_dest, data_ref);
9755 new_temp = make_ssa_name (vec_dest, new_stmt);
9756 gimple_assign_set_lhs (new_stmt, new_temp);
9757 gimple_move_vops (new_stmt, stmt_info->stmt);
9758 vect_finish_stmt_generation (vinfo, stmt_info,
9759 new_stmt, gsi);
9760 msq = new_temp;
9762 bump = size_binop (MULT_EXPR, vs,
9763 TYPE_SIZE_UNIT (elem_type));
9764 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9765 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9766 stmt_info, bump);
9767 new_stmt = gimple_build_assign
9768 (NULL_TREE, BIT_AND_EXPR, ptr,
9769 build_int_cst
9770 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9771 ptr = copy_ssa_name (ptr, new_stmt);
9772 gimple_assign_set_lhs (new_stmt, ptr);
9773 vect_finish_stmt_generation (vinfo, stmt_info,
9774 new_stmt, gsi);
9775 data_ref
9776 = build2 (MEM_REF, vectype, ptr,
9777 build_int_cst (ref_type, 0));
9778 break;
9780 case dr_explicit_realign_optimized:
9782 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9783 new_temp = copy_ssa_name (dataref_ptr);
9784 else
9785 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9786 // We should only be doing this if we know the target
9787 // alignment at compile time.
9788 unsigned HOST_WIDE_INT align =
9789 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9790 new_stmt = gimple_build_assign
9791 (new_temp, BIT_AND_EXPR, dataref_ptr,
9792 build_int_cst (TREE_TYPE (dataref_ptr),
9793 -(HOST_WIDE_INT) align));
9794 vect_finish_stmt_generation (vinfo, stmt_info,
9795 new_stmt, gsi);
9796 data_ref
9797 = build2 (MEM_REF, vectype, new_temp,
9798 build_int_cst (ref_type, 0));
9799 break;
9801 default:
9802 gcc_unreachable ();
9804 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9805 /* DATA_REF is null if we've already built the statement. */
9806 if (data_ref)
9808 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9809 new_stmt = gimple_build_assign (vec_dest, data_ref);
9811 new_temp = make_ssa_name (vec_dest, new_stmt);
9812 gimple_set_lhs (new_stmt, new_temp);
9813 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9815 /* 3. Handle explicit realignment if necessary/supported.
9816 Create in loop:
9817 vec_dest = realign_load (msq, lsq, realignment_token) */
9818 if (alignment_support_scheme == dr_explicit_realign_optimized
9819 || alignment_support_scheme == dr_explicit_realign)
9821 lsq = gimple_assign_lhs (new_stmt);
9822 if (!realignment_token)
9823 realignment_token = dataref_ptr;
9824 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9825 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9826 msq, lsq, realignment_token);
9827 new_temp = make_ssa_name (vec_dest, new_stmt);
9828 gimple_assign_set_lhs (new_stmt, new_temp);
9829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9831 if (alignment_support_scheme == dr_explicit_realign_optimized)
9833 gcc_assert (phi);
9834 if (i == vec_num - 1 && j == ncopies - 1)
9835 add_phi_arg (phi, lsq,
9836 loop_latch_edge (containing_loop),
9837 UNKNOWN_LOCATION);
9838 msq = lsq;
9842 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9844 tree perm_mask = perm_mask_for_reverse (vectype);
9845 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9846 perm_mask, stmt_info, gsi);
9847 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9850 /* Collect vector loads and later create their permutation in
9851 vect_transform_grouped_load (). */
9852 if (grouped_load || slp_perm)
9853 dr_chain.quick_push (new_temp);
9855 /* Store vector loads in the corresponding SLP_NODE. */
9856 if (slp && !slp_perm)
9857 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9859 /* With SLP permutation we load the gaps as well, without
9860 we need to skip the gaps after we manage to fully load
9861 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9862 group_elt += nunits;
9863 if (maybe_ne (group_gap_adj, 0U)
9864 && !slp_perm
9865 && known_eq (group_elt, group_size - group_gap_adj))
9867 poly_wide_int bump_val
9868 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9869 * group_gap_adj);
9870 if (tree_int_cst_sgn
9871 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9872 bump_val = -bump_val;
9873 tree bump = wide_int_to_tree (sizetype, bump_val);
9874 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9875 gsi, stmt_info, bump);
9876 group_elt = 0;
9879 /* Bump the vector pointer to account for a gap or for excess
9880 elements loaded for a permuted SLP load. */
9881 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9883 poly_wide_int bump_val
9884 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9885 * group_gap_adj);
9886 if (tree_int_cst_sgn
9887 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9888 bump_val = -bump_val;
9889 tree bump = wide_int_to_tree (sizetype, bump_val);
9890 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9891 stmt_info, bump);
9895 if (slp && !slp_perm)
9896 continue;
9898 if (slp_perm)
9900 unsigned n_perms;
9901 /* For SLP we know we've seen all possible uses of dr_chain so
9902 direct vect_transform_slp_perm_load to DCE the unused parts.
9903 ??? This is a hack to prevent compile-time issues as seen
9904 in PR101120 and friends. */
9905 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9906 gsi, vf, false, &n_perms,
9907 nullptr, true);
9908 gcc_assert (ok);
9910 else
9912 if (grouped_load)
9914 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9915 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9916 group_size, gsi);
9917 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9919 else
9921 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9924 dr_chain.release ();
9926 if (!slp)
9927 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9929 return true;
9932 /* Function vect_is_simple_cond.
9934 Input:
9935 LOOP - the loop that is being vectorized.
9936 COND - Condition that is checked for simple use.
9938 Output:
9939 *COMP_VECTYPE - the vector type for the comparison.
9940 *DTS - The def types for the arguments of the comparison
9942 Returns whether a COND can be vectorized. Checks whether
9943 condition operands are supportable using vec_is_simple_use. */
9945 static bool
9946 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9947 slp_tree slp_node, tree *comp_vectype,
9948 enum vect_def_type *dts, tree vectype)
9950 tree lhs, rhs;
9951 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9952 slp_tree slp_op;
9954 /* Mask case. */
9955 if (TREE_CODE (cond) == SSA_NAME
9956 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9958 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9959 &slp_op, &dts[0], comp_vectype)
9960 || !*comp_vectype
9961 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9962 return false;
9963 return true;
9966 if (!COMPARISON_CLASS_P (cond))
9967 return false;
9969 lhs = TREE_OPERAND (cond, 0);
9970 rhs = TREE_OPERAND (cond, 1);
9972 if (TREE_CODE (lhs) == SSA_NAME)
9974 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
9975 &lhs, &slp_op, &dts[0], &vectype1))
9976 return false;
9978 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9979 || TREE_CODE (lhs) == FIXED_CST)
9980 dts[0] = vect_constant_def;
9981 else
9982 return false;
9984 if (TREE_CODE (rhs) == SSA_NAME)
9986 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
9987 &rhs, &slp_op, &dts[1], &vectype2))
9988 return false;
9990 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9991 || TREE_CODE (rhs) == FIXED_CST)
9992 dts[1] = vect_constant_def;
9993 else
9994 return false;
9996 if (vectype1 && vectype2
9997 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9998 TYPE_VECTOR_SUBPARTS (vectype2)))
9999 return false;
10001 *comp_vectype = vectype1 ? vectype1 : vectype2;
10002 /* Invariant comparison. */
10003 if (! *comp_vectype)
10005 tree scalar_type = TREE_TYPE (lhs);
10006 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10007 *comp_vectype = truth_type_for (vectype);
10008 else
10010 /* If we can widen the comparison to match vectype do so. */
10011 if (INTEGRAL_TYPE_P (scalar_type)
10012 && !slp_node
10013 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10014 TYPE_SIZE (TREE_TYPE (vectype))))
10015 scalar_type = build_nonstandard_integer_type
10016 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10017 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10018 slp_node);
10022 return true;
10025 /* vectorizable_condition.
10027 Check if STMT_INFO is conditional modify expression that can be vectorized.
10028 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10029 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10030 at GSI.
10032 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10034 Return true if STMT_INFO is vectorizable in this way. */
10036 static bool
10037 vectorizable_condition (vec_info *vinfo,
10038 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10039 gimple **vec_stmt,
10040 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10042 tree scalar_dest = NULL_TREE;
10043 tree vec_dest = NULL_TREE;
10044 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10045 tree then_clause, else_clause;
10046 tree comp_vectype = NULL_TREE;
10047 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10048 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10049 tree vec_compare;
10050 tree new_temp;
10051 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10052 enum vect_def_type dts[4]
10053 = {vect_unknown_def_type, vect_unknown_def_type,
10054 vect_unknown_def_type, vect_unknown_def_type};
10055 int ndts = 4;
10056 int ncopies;
10057 int vec_num;
10058 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10059 int i;
10060 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10061 vec<tree> vec_oprnds0 = vNULL;
10062 vec<tree> vec_oprnds1 = vNULL;
10063 vec<tree> vec_oprnds2 = vNULL;
10064 vec<tree> vec_oprnds3 = vNULL;
10065 tree vec_cmp_type;
10066 bool masked = false;
10068 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10069 return false;
10071 /* Is vectorizable conditional operation? */
10072 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10073 if (!stmt)
10074 return false;
10076 code = gimple_assign_rhs_code (stmt);
10077 if (code != COND_EXPR)
10078 return false;
10080 stmt_vec_info reduc_info = NULL;
10081 int reduc_index = -1;
10082 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10083 bool for_reduction
10084 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10085 if (for_reduction)
10087 if (STMT_SLP_TYPE (stmt_info))
10088 return false;
10089 reduc_info = info_for_reduction (vinfo, stmt_info);
10090 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10091 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10092 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10093 || reduc_index != -1);
10095 else
10097 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10098 return false;
10101 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10102 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10104 if (slp_node)
10106 ncopies = 1;
10107 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10109 else
10111 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10112 vec_num = 1;
10115 gcc_assert (ncopies >= 1);
10116 if (for_reduction && ncopies > 1)
10117 return false; /* FORNOW */
10119 cond_expr = gimple_assign_rhs1 (stmt);
10121 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10122 &comp_vectype, &dts[0], vectype)
10123 || !comp_vectype)
10124 return false;
10126 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10127 slp_tree then_slp_node, else_slp_node;
10128 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10129 &then_clause, &then_slp_node, &dts[2], &vectype1))
10130 return false;
10131 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10132 &else_clause, &else_slp_node, &dts[3], &vectype2))
10133 return false;
10135 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10136 return false;
10138 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10139 return false;
10141 masked = !COMPARISON_CLASS_P (cond_expr);
10142 vec_cmp_type = truth_type_for (comp_vectype);
10144 if (vec_cmp_type == NULL_TREE)
10145 return false;
10147 cond_code = TREE_CODE (cond_expr);
10148 if (!masked)
10150 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10151 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10154 /* For conditional reductions, the "then" value needs to be the candidate
10155 value calculated by this iteration while the "else" value needs to be
10156 the result carried over from previous iterations. If the COND_EXPR
10157 is the other way around, we need to swap it. */
10158 bool must_invert_cmp_result = false;
10159 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10161 if (masked)
10162 must_invert_cmp_result = true;
10163 else
10165 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10166 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10167 if (new_code == ERROR_MARK)
10168 must_invert_cmp_result = true;
10169 else
10171 cond_code = new_code;
10172 /* Make sure we don't accidentally use the old condition. */
10173 cond_expr = NULL_TREE;
10176 std::swap (then_clause, else_clause);
10179 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10181 /* Boolean values may have another representation in vectors
10182 and therefore we prefer bit operations over comparison for
10183 them (which also works for scalar masks). We store opcodes
10184 to use in bitop1 and bitop2. Statement is vectorized as
10185 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10186 depending on bitop1 and bitop2 arity. */
10187 switch (cond_code)
10189 case GT_EXPR:
10190 bitop1 = BIT_NOT_EXPR;
10191 bitop2 = BIT_AND_EXPR;
10192 break;
10193 case GE_EXPR:
10194 bitop1 = BIT_NOT_EXPR;
10195 bitop2 = BIT_IOR_EXPR;
10196 break;
10197 case LT_EXPR:
10198 bitop1 = BIT_NOT_EXPR;
10199 bitop2 = BIT_AND_EXPR;
10200 std::swap (cond_expr0, cond_expr1);
10201 break;
10202 case LE_EXPR:
10203 bitop1 = BIT_NOT_EXPR;
10204 bitop2 = BIT_IOR_EXPR;
10205 std::swap (cond_expr0, cond_expr1);
10206 break;
10207 case NE_EXPR:
10208 bitop1 = BIT_XOR_EXPR;
10209 break;
10210 case EQ_EXPR:
10211 bitop1 = BIT_XOR_EXPR;
10212 bitop2 = BIT_NOT_EXPR;
10213 break;
10214 default:
10215 return false;
10217 cond_code = SSA_NAME;
10220 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10221 && reduction_type == EXTRACT_LAST_REDUCTION
10222 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10224 if (dump_enabled_p ())
10225 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10226 "reduction comparison operation not supported.\n");
10227 return false;
10230 if (!vec_stmt)
10232 if (bitop1 != NOP_EXPR)
10234 machine_mode mode = TYPE_MODE (comp_vectype);
10235 optab optab;
10237 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10238 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10239 return false;
10241 if (bitop2 != NOP_EXPR)
10243 optab = optab_for_tree_code (bitop2, comp_vectype,
10244 optab_default);
10245 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10246 return false;
10250 vect_cost_for_stmt kind = vector_stmt;
10251 if (reduction_type == EXTRACT_LAST_REDUCTION)
10252 /* Count one reduction-like operation per vector. */
10253 kind = vec_to_scalar;
10254 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10255 return false;
10257 if (slp_node
10258 && (!vect_maybe_update_slp_op_vectype
10259 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10260 || (op_adjust == 1
10261 && !vect_maybe_update_slp_op_vectype
10262 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10263 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10264 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10266 if (dump_enabled_p ())
10267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10268 "incompatible vector types for invariants\n");
10269 return false;
10272 if (loop_vinfo && for_reduction
10273 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10275 if (reduction_type == EXTRACT_LAST_REDUCTION)
10276 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10277 ncopies * vec_num, vectype, NULL);
10278 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10279 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10281 if (dump_enabled_p ())
10282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10283 "conditional reduction prevents the use"
10284 " of partial vectors.\n");
10285 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10289 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10290 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10291 cost_vec, kind);
10292 return true;
10295 /* Transform. */
10297 /* Handle def. */
10298 scalar_dest = gimple_assign_lhs (stmt);
10299 if (reduction_type != EXTRACT_LAST_REDUCTION)
10300 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10302 bool swap_cond_operands = false;
10304 /* See whether another part of the vectorized code applies a loop
10305 mask to the condition, or to its inverse. */
10307 vec_loop_masks *masks = NULL;
10308 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10310 if (reduction_type == EXTRACT_LAST_REDUCTION)
10311 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10312 else
10314 scalar_cond_masked_key cond (cond_expr, ncopies);
10315 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10316 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10317 else
10319 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10320 cond.code = invert_tree_comparison (cond.code, honor_nans);
10321 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10323 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10324 cond_code = cond.code;
10325 swap_cond_operands = true;
10331 /* Handle cond expr. */
10332 if (masked)
10333 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10334 cond_expr, &vec_oprnds0, comp_vectype,
10335 then_clause, &vec_oprnds2, vectype,
10336 reduction_type != EXTRACT_LAST_REDUCTION
10337 ? else_clause : NULL, &vec_oprnds3, vectype);
10338 else
10339 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10340 cond_expr0, &vec_oprnds0, comp_vectype,
10341 cond_expr1, &vec_oprnds1, comp_vectype,
10342 then_clause, &vec_oprnds2, vectype,
10343 reduction_type != EXTRACT_LAST_REDUCTION
10344 ? else_clause : NULL, &vec_oprnds3, vectype);
10346 /* Arguments are ready. Create the new vector stmt. */
10347 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10349 vec_then_clause = vec_oprnds2[i];
10350 if (reduction_type != EXTRACT_LAST_REDUCTION)
10351 vec_else_clause = vec_oprnds3[i];
10353 if (swap_cond_operands)
10354 std::swap (vec_then_clause, vec_else_clause);
10356 if (masked)
10357 vec_compare = vec_cond_lhs;
10358 else
10360 vec_cond_rhs = vec_oprnds1[i];
10361 if (bitop1 == NOP_EXPR)
10363 gimple_seq stmts = NULL;
10364 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10365 vec_cond_lhs, vec_cond_rhs);
10366 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10368 else
10370 new_temp = make_ssa_name (vec_cmp_type);
10371 gassign *new_stmt;
10372 if (bitop1 == BIT_NOT_EXPR)
10373 new_stmt = gimple_build_assign (new_temp, bitop1,
10374 vec_cond_rhs);
10375 else
10376 new_stmt
10377 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10378 vec_cond_rhs);
10379 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10380 if (bitop2 == NOP_EXPR)
10381 vec_compare = new_temp;
10382 else if (bitop2 == BIT_NOT_EXPR)
10384 /* Instead of doing ~x ? y : z do x ? z : y. */
10385 vec_compare = new_temp;
10386 std::swap (vec_then_clause, vec_else_clause);
10388 else
10390 vec_compare = make_ssa_name (vec_cmp_type);
10391 new_stmt
10392 = gimple_build_assign (vec_compare, bitop2,
10393 vec_cond_lhs, new_temp);
10394 vect_finish_stmt_generation (vinfo, stmt_info,
10395 new_stmt, gsi);
10400 /* If we decided to apply a loop mask to the result of the vector
10401 comparison, AND the comparison with the mask now. Later passes
10402 should then be able to reuse the AND results between mulitple
10403 vector statements.
10405 For example:
10406 for (int i = 0; i < 100; ++i)
10407 x[i] = y[i] ? z[i] : 10;
10409 results in following optimized GIMPLE:
10411 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10412 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10413 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10414 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10415 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10416 vect_iftmp.11_47, { 10, ... }>;
10418 instead of using a masked and unmasked forms of
10419 vec != { 0, ... } (masked in the MASK_LOAD,
10420 unmasked in the VEC_COND_EXPR). */
10422 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10423 in cases where that's necessary. */
10425 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10427 if (!is_gimple_val (vec_compare))
10429 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10430 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10431 vec_compare);
10432 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10433 vec_compare = vec_compare_name;
10436 if (must_invert_cmp_result)
10438 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10439 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10440 BIT_NOT_EXPR,
10441 vec_compare);
10442 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10443 vec_compare = vec_compare_name;
10446 if (masks)
10448 tree loop_mask
10449 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10450 vectype, i);
10451 tree tmp2 = make_ssa_name (vec_cmp_type);
10452 gassign *g
10453 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10454 loop_mask);
10455 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10456 vec_compare = tmp2;
10460 gimple *new_stmt;
10461 if (reduction_type == EXTRACT_LAST_REDUCTION)
10463 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10464 tree lhs = gimple_get_lhs (old_stmt);
10465 new_stmt = gimple_build_call_internal
10466 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10467 vec_then_clause);
10468 gimple_call_set_lhs (new_stmt, lhs);
10469 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10470 if (old_stmt == gsi_stmt (*gsi))
10471 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10472 else
10474 /* In this case we're moving the definition to later in the
10475 block. That doesn't matter because the only uses of the
10476 lhs are in phi statements. */
10477 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10478 gsi_remove (&old_gsi, true);
10479 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10482 else
10484 new_temp = make_ssa_name (vec_dest);
10485 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10486 vec_then_clause, vec_else_clause);
10487 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10489 if (slp_node)
10490 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10491 else
10492 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10495 if (!slp_node)
10496 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10498 vec_oprnds0.release ();
10499 vec_oprnds1.release ();
10500 vec_oprnds2.release ();
10501 vec_oprnds3.release ();
10503 return true;
10506 /* vectorizable_comparison.
10508 Check if STMT_INFO is comparison expression that can be vectorized.
10509 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10510 comparison, put it in VEC_STMT, and insert it at GSI.
10512 Return true if STMT_INFO is vectorizable in this way. */
10514 static bool
10515 vectorizable_comparison (vec_info *vinfo,
10516 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10517 gimple **vec_stmt,
10518 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10520 tree lhs, rhs1, rhs2;
10521 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10522 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10523 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10524 tree new_temp;
10525 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10526 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10527 int ndts = 2;
10528 poly_uint64 nunits;
10529 int ncopies;
10530 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10531 int i;
10532 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10533 vec<tree> vec_oprnds0 = vNULL;
10534 vec<tree> vec_oprnds1 = vNULL;
10535 tree mask_type;
10536 tree mask;
10538 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10539 return false;
10541 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10542 return false;
10544 mask_type = vectype;
10545 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10547 if (slp_node)
10548 ncopies = 1;
10549 else
10550 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10552 gcc_assert (ncopies >= 1);
10553 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10554 return false;
10556 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10557 if (!stmt)
10558 return false;
10560 code = gimple_assign_rhs_code (stmt);
10562 if (TREE_CODE_CLASS (code) != tcc_comparison)
10563 return false;
10565 slp_tree slp_rhs1, slp_rhs2;
10566 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10567 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10568 return false;
10570 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10571 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10572 return false;
10574 if (vectype1 && vectype2
10575 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10576 TYPE_VECTOR_SUBPARTS (vectype2)))
10577 return false;
10579 vectype = vectype1 ? vectype1 : vectype2;
10581 /* Invariant comparison. */
10582 if (!vectype)
10584 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10585 vectype = mask_type;
10586 else
10587 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10588 slp_node);
10589 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10590 return false;
10592 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10593 return false;
10595 /* Can't compare mask and non-mask types. */
10596 if (vectype1 && vectype2
10597 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10598 return false;
10600 /* Boolean values may have another representation in vectors
10601 and therefore we prefer bit operations over comparison for
10602 them (which also works for scalar masks). We store opcodes
10603 to use in bitop1 and bitop2. Statement is vectorized as
10604 BITOP2 (rhs1 BITOP1 rhs2) or
10605 rhs1 BITOP2 (BITOP1 rhs2)
10606 depending on bitop1 and bitop2 arity. */
10607 bool swap_p = false;
10608 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10610 if (code == GT_EXPR)
10612 bitop1 = BIT_NOT_EXPR;
10613 bitop2 = BIT_AND_EXPR;
10615 else if (code == GE_EXPR)
10617 bitop1 = BIT_NOT_EXPR;
10618 bitop2 = BIT_IOR_EXPR;
10620 else if (code == LT_EXPR)
10622 bitop1 = BIT_NOT_EXPR;
10623 bitop2 = BIT_AND_EXPR;
10624 swap_p = true;
10626 else if (code == LE_EXPR)
10628 bitop1 = BIT_NOT_EXPR;
10629 bitop2 = BIT_IOR_EXPR;
10630 swap_p = true;
10632 else
10634 bitop1 = BIT_XOR_EXPR;
10635 if (code == EQ_EXPR)
10636 bitop2 = BIT_NOT_EXPR;
10640 if (!vec_stmt)
10642 if (bitop1 == NOP_EXPR)
10644 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10645 return false;
10647 else
10649 machine_mode mode = TYPE_MODE (vectype);
10650 optab optab;
10652 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10653 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10654 return false;
10656 if (bitop2 != NOP_EXPR)
10658 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10659 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10660 return false;
10664 /* Put types on constant and invariant SLP children. */
10665 if (slp_node
10666 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10667 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10669 if (dump_enabled_p ())
10670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10671 "incompatible vector types for invariants\n");
10672 return false;
10675 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10676 vect_model_simple_cost (vinfo, stmt_info,
10677 ncopies * (1 + (bitop2 != NOP_EXPR)),
10678 dts, ndts, slp_node, cost_vec);
10679 return true;
10682 /* Transform. */
10684 /* Handle def. */
10685 lhs = gimple_assign_lhs (stmt);
10686 mask = vect_create_destination_var (lhs, mask_type);
10688 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10689 rhs1, &vec_oprnds0, vectype,
10690 rhs2, &vec_oprnds1, vectype);
10691 if (swap_p)
10692 std::swap (vec_oprnds0, vec_oprnds1);
10694 /* Arguments are ready. Create the new vector stmt. */
10695 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10697 gimple *new_stmt;
10698 vec_rhs2 = vec_oprnds1[i];
10700 new_temp = make_ssa_name (mask);
10701 if (bitop1 == NOP_EXPR)
10703 new_stmt = gimple_build_assign (new_temp, code,
10704 vec_rhs1, vec_rhs2);
10705 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10707 else
10709 if (bitop1 == BIT_NOT_EXPR)
10710 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10711 else
10712 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10713 vec_rhs2);
10714 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10715 if (bitop2 != NOP_EXPR)
10717 tree res = make_ssa_name (mask);
10718 if (bitop2 == BIT_NOT_EXPR)
10719 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10720 else
10721 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10722 new_temp);
10723 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10726 if (slp_node)
10727 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10728 else
10729 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10732 if (!slp_node)
10733 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10735 vec_oprnds0.release ();
10736 vec_oprnds1.release ();
10738 return true;
10741 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10742 can handle all live statements in the node. Otherwise return true
10743 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10744 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10746 static bool
10747 can_vectorize_live_stmts (vec_info *vinfo,
10748 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10749 slp_tree slp_node, slp_instance slp_node_instance,
10750 bool vec_stmt_p,
10751 stmt_vector_for_cost *cost_vec)
10753 if (slp_node)
10755 stmt_vec_info slp_stmt_info;
10756 unsigned int i;
10757 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10759 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10760 && !vectorizable_live_operation (vinfo,
10761 slp_stmt_info, gsi, slp_node,
10762 slp_node_instance, i,
10763 vec_stmt_p, cost_vec))
10764 return false;
10767 else if (STMT_VINFO_LIVE_P (stmt_info)
10768 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10769 slp_node, slp_node_instance, -1,
10770 vec_stmt_p, cost_vec))
10771 return false;
10773 return true;
10776 /* Make sure the statement is vectorizable. */
10778 opt_result
10779 vect_analyze_stmt (vec_info *vinfo,
10780 stmt_vec_info stmt_info, bool *need_to_vectorize,
10781 slp_tree node, slp_instance node_instance,
10782 stmt_vector_for_cost *cost_vec)
10784 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10785 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10786 bool ok;
10787 gimple_seq pattern_def_seq;
10789 if (dump_enabled_p ())
10790 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10791 stmt_info->stmt);
10793 if (gimple_has_volatile_ops (stmt_info->stmt))
10794 return opt_result::failure_at (stmt_info->stmt,
10795 "not vectorized:"
10796 " stmt has volatile operands: %G\n",
10797 stmt_info->stmt);
10799 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10800 && node == NULL
10801 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10803 gimple_stmt_iterator si;
10805 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10807 stmt_vec_info pattern_def_stmt_info
10808 = vinfo->lookup_stmt (gsi_stmt (si));
10809 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10810 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10812 /* Analyze def stmt of STMT if it's a pattern stmt. */
10813 if (dump_enabled_p ())
10814 dump_printf_loc (MSG_NOTE, vect_location,
10815 "==> examining pattern def statement: %G",
10816 pattern_def_stmt_info->stmt);
10818 opt_result res
10819 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10820 need_to_vectorize, node, node_instance,
10821 cost_vec);
10822 if (!res)
10823 return res;
10828 /* Skip stmts that do not need to be vectorized. In loops this is expected
10829 to include:
10830 - the COND_EXPR which is the loop exit condition
10831 - any LABEL_EXPRs in the loop
10832 - computations that are used only for array indexing or loop control.
10833 In basic blocks we only analyze statements that are a part of some SLP
10834 instance, therefore, all the statements are relevant.
10836 Pattern statement needs to be analyzed instead of the original statement
10837 if the original statement is not relevant. Otherwise, we analyze both
10838 statements. In basic blocks we are called from some SLP instance
10839 traversal, don't analyze pattern stmts instead, the pattern stmts
10840 already will be part of SLP instance. */
10842 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10843 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10844 && !STMT_VINFO_LIVE_P (stmt_info))
10846 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10847 && pattern_stmt_info
10848 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10849 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10851 /* Analyze PATTERN_STMT instead of the original stmt. */
10852 stmt_info = pattern_stmt_info;
10853 if (dump_enabled_p ())
10854 dump_printf_loc (MSG_NOTE, vect_location,
10855 "==> examining pattern statement: %G",
10856 stmt_info->stmt);
10858 else
10860 if (dump_enabled_p ())
10861 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10863 return opt_result::success ();
10866 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10867 && node == NULL
10868 && pattern_stmt_info
10869 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10870 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10872 /* Analyze PATTERN_STMT too. */
10873 if (dump_enabled_p ())
10874 dump_printf_loc (MSG_NOTE, vect_location,
10875 "==> examining pattern statement: %G",
10876 pattern_stmt_info->stmt);
10878 opt_result res
10879 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10880 node_instance, cost_vec);
10881 if (!res)
10882 return res;
10885 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10887 case vect_internal_def:
10888 break;
10890 case vect_reduction_def:
10891 case vect_nested_cycle:
10892 gcc_assert (!bb_vinfo
10893 && (relevance == vect_used_in_outer
10894 || relevance == vect_used_in_outer_by_reduction
10895 || relevance == vect_used_by_reduction
10896 || relevance == vect_unused_in_scope
10897 || relevance == vect_used_only_live));
10898 break;
10900 case vect_induction_def:
10901 gcc_assert (!bb_vinfo);
10902 break;
10904 case vect_constant_def:
10905 case vect_external_def:
10906 case vect_unknown_def_type:
10907 default:
10908 gcc_unreachable ();
10911 if (STMT_VINFO_RELEVANT_P (stmt_info))
10913 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10914 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10915 || (call && gimple_call_lhs (call) == NULL_TREE));
10916 *need_to_vectorize = true;
10919 if (PURE_SLP_STMT (stmt_info) && !node)
10921 if (dump_enabled_p ())
10922 dump_printf_loc (MSG_NOTE, vect_location,
10923 "handled only by SLP analysis\n");
10924 return opt_result::success ();
10927 ok = true;
10928 if (!bb_vinfo
10929 && (STMT_VINFO_RELEVANT_P (stmt_info)
10930 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10931 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10932 -mveclibabi= takes preference over library functions with
10933 the simd attribute. */
10934 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10935 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10936 cost_vec)
10937 || vectorizable_conversion (vinfo, stmt_info,
10938 NULL, NULL, node, cost_vec)
10939 || vectorizable_operation (vinfo, stmt_info,
10940 NULL, NULL, node, cost_vec)
10941 || vectorizable_assignment (vinfo, stmt_info,
10942 NULL, NULL, node, cost_vec)
10943 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10944 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10945 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10946 node, node_instance, cost_vec)
10947 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10948 NULL, node, cost_vec)
10949 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10950 || vectorizable_condition (vinfo, stmt_info,
10951 NULL, NULL, node, cost_vec)
10952 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10953 cost_vec)
10954 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10955 stmt_info, NULL, node));
10956 else
10958 if (bb_vinfo)
10959 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10960 || vectorizable_simd_clone_call (vinfo, stmt_info,
10961 NULL, NULL, node, cost_vec)
10962 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10963 cost_vec)
10964 || vectorizable_shift (vinfo, stmt_info,
10965 NULL, NULL, node, cost_vec)
10966 || vectorizable_operation (vinfo, stmt_info,
10967 NULL, NULL, node, cost_vec)
10968 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
10969 cost_vec)
10970 || vectorizable_load (vinfo, stmt_info,
10971 NULL, NULL, node, cost_vec)
10972 || vectorizable_store (vinfo, stmt_info,
10973 NULL, NULL, node, cost_vec)
10974 || vectorizable_condition (vinfo, stmt_info,
10975 NULL, NULL, node, cost_vec)
10976 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10977 cost_vec)
10978 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
10981 if (!ok)
10982 return opt_result::failure_at (stmt_info->stmt,
10983 "not vectorized:"
10984 " relevant stmt not supported: %G",
10985 stmt_info->stmt);
10987 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10988 need extra handling, except for vectorizable reductions. */
10989 if (!bb_vinfo
10990 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10991 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10992 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
10993 stmt_info, NULL, node, node_instance,
10994 false, cost_vec))
10995 return opt_result::failure_at (stmt_info->stmt,
10996 "not vectorized:"
10997 " live stmt not supported: %G",
10998 stmt_info->stmt);
11000 return opt_result::success ();
11004 /* Function vect_transform_stmt.
11006 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11008 bool
11009 vect_transform_stmt (vec_info *vinfo,
11010 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11011 slp_tree slp_node, slp_instance slp_node_instance)
11013 bool is_store = false;
11014 gimple *vec_stmt = NULL;
11015 bool done;
11017 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11019 switch (STMT_VINFO_TYPE (stmt_info))
11021 case type_demotion_vec_info_type:
11022 case type_promotion_vec_info_type:
11023 case type_conversion_vec_info_type:
11024 done = vectorizable_conversion (vinfo, stmt_info,
11025 gsi, &vec_stmt, slp_node, NULL);
11026 gcc_assert (done);
11027 break;
11029 case induc_vec_info_type:
11030 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11031 stmt_info, &vec_stmt, slp_node,
11032 NULL);
11033 gcc_assert (done);
11034 break;
11036 case shift_vec_info_type:
11037 done = vectorizable_shift (vinfo, stmt_info,
11038 gsi, &vec_stmt, slp_node, NULL);
11039 gcc_assert (done);
11040 break;
11042 case op_vec_info_type:
11043 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11044 NULL);
11045 gcc_assert (done);
11046 break;
11048 case assignment_vec_info_type:
11049 done = vectorizable_assignment (vinfo, stmt_info,
11050 gsi, &vec_stmt, slp_node, NULL);
11051 gcc_assert (done);
11052 break;
11054 case load_vec_info_type:
11055 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11056 NULL);
11057 gcc_assert (done);
11058 break;
11060 case store_vec_info_type:
11061 done = vectorizable_store (vinfo, stmt_info,
11062 gsi, &vec_stmt, slp_node, NULL);
11063 gcc_assert (done);
11064 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11066 /* In case of interleaving, the whole chain is vectorized when the
11067 last store in the chain is reached. Store stmts before the last
11068 one are skipped, and there vec_stmt_info shouldn't be freed
11069 meanwhile. */
11070 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11071 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11072 is_store = true;
11074 else
11075 is_store = true;
11076 break;
11078 case condition_vec_info_type:
11079 done = vectorizable_condition (vinfo, stmt_info,
11080 gsi, &vec_stmt, slp_node, NULL);
11081 gcc_assert (done);
11082 break;
11084 case comparison_vec_info_type:
11085 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11086 slp_node, NULL);
11087 gcc_assert (done);
11088 break;
11090 case call_vec_info_type:
11091 done = vectorizable_call (vinfo, stmt_info,
11092 gsi, &vec_stmt, slp_node, NULL);
11093 break;
11095 case call_simd_clone_vec_info_type:
11096 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11097 slp_node, NULL);
11098 break;
11100 case reduc_vec_info_type:
11101 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11102 gsi, &vec_stmt, slp_node);
11103 gcc_assert (done);
11104 break;
11106 case cycle_phi_info_type:
11107 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11108 &vec_stmt, slp_node, slp_node_instance);
11109 gcc_assert (done);
11110 break;
11112 case lc_phi_info_type:
11113 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11114 stmt_info, &vec_stmt, slp_node);
11115 gcc_assert (done);
11116 break;
11118 case phi_info_type:
11119 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11120 gcc_assert (done);
11121 break;
11123 default:
11124 if (!STMT_VINFO_LIVE_P (stmt_info))
11126 if (dump_enabled_p ())
11127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11128 "stmt not supported.\n");
11129 gcc_unreachable ();
11131 done = true;
11134 if (!slp_node && vec_stmt)
11135 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11137 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11138 return is_store;
11140 /* Handle stmts whose DEF is used outside the loop-nest that is
11141 being vectorized. */
11142 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11143 slp_node_instance, true, NULL);
11144 gcc_assert (done);
11146 return false;
11150 /* Remove a group of stores (for SLP or interleaving), free their
11151 stmt_vec_info. */
11153 void
11154 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11156 stmt_vec_info next_stmt_info = first_stmt_info;
11158 while (next_stmt_info)
11160 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11161 next_stmt_info = vect_orig_stmt (next_stmt_info);
11162 /* Free the attached stmt_vec_info and remove the stmt. */
11163 vinfo->remove_stmt (next_stmt_info);
11164 next_stmt_info = tmp;
11168 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11169 elements of type SCALAR_TYPE, or null if the target doesn't support
11170 such a type.
11172 If NUNITS is zero, return a vector type that contains elements of
11173 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11175 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11176 for this vectorization region and want to "autodetect" the best choice.
11177 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11178 and we want the new type to be interoperable with it. PREVAILING_MODE
11179 in this case can be a scalar integer mode or a vector mode; when it
11180 is a vector mode, the function acts like a tree-level version of
11181 related_vector_mode. */
11183 tree
11184 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11185 tree scalar_type, poly_uint64 nunits)
11187 tree orig_scalar_type = scalar_type;
11188 scalar_mode inner_mode;
11189 machine_mode simd_mode;
11190 tree vectype;
11192 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11193 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11194 return NULL_TREE;
11196 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11198 /* For vector types of elements whose mode precision doesn't
11199 match their types precision we use a element type of mode
11200 precision. The vectorization routines will have to make sure
11201 they support the proper result truncation/extension.
11202 We also make sure to build vector types with INTEGER_TYPE
11203 component type only. */
11204 if (INTEGRAL_TYPE_P (scalar_type)
11205 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11206 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11207 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11208 TYPE_UNSIGNED (scalar_type));
11210 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11211 When the component mode passes the above test simply use a type
11212 corresponding to that mode. The theory is that any use that
11213 would cause problems with this will disable vectorization anyway. */
11214 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11215 && !INTEGRAL_TYPE_P (scalar_type))
11216 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11218 /* We can't build a vector type of elements with alignment bigger than
11219 their size. */
11220 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11221 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11222 TYPE_UNSIGNED (scalar_type));
11224 /* If we felt back to using the mode fail if there was
11225 no scalar type for it. */
11226 if (scalar_type == NULL_TREE)
11227 return NULL_TREE;
11229 /* If no prevailing mode was supplied, use the mode the target prefers.
11230 Otherwise lookup a vector mode based on the prevailing mode. */
11231 if (prevailing_mode == VOIDmode)
11233 gcc_assert (known_eq (nunits, 0U));
11234 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11235 if (SCALAR_INT_MODE_P (simd_mode))
11237 /* Traditional behavior is not to take the integer mode
11238 literally, but simply to use it as a way of determining
11239 the vector size. It is up to mode_for_vector to decide
11240 what the TYPE_MODE should be.
11242 Note that nunits == 1 is allowed in order to support single
11243 element vector types. */
11244 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11245 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11246 return NULL_TREE;
11249 else if (SCALAR_INT_MODE_P (prevailing_mode)
11250 || !related_vector_mode (prevailing_mode,
11251 inner_mode, nunits).exists (&simd_mode))
11253 /* Fall back to using mode_for_vector, mostly in the hope of being
11254 able to use an integer mode. */
11255 if (known_eq (nunits, 0U)
11256 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11257 return NULL_TREE;
11259 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11260 return NULL_TREE;
11263 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11265 /* In cases where the mode was chosen by mode_for_vector, check that
11266 the target actually supports the chosen mode, or that it at least
11267 allows the vector mode to be replaced by a like-sized integer. */
11268 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11269 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11270 return NULL_TREE;
11272 /* Re-attach the address-space qualifier if we canonicalized the scalar
11273 type. */
11274 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11275 return build_qualified_type
11276 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11278 return vectype;
11281 /* Function get_vectype_for_scalar_type.
11283 Returns the vector type corresponding to SCALAR_TYPE as supported
11284 by the target. If GROUP_SIZE is nonzero and we're performing BB
11285 vectorization, make sure that the number of elements in the vector
11286 is no bigger than GROUP_SIZE. */
11288 tree
11289 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11290 unsigned int group_size)
11292 /* For BB vectorization, we should always have a group size once we've
11293 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11294 are tentative requests during things like early data reference
11295 analysis and pattern recognition. */
11296 if (is_a <bb_vec_info> (vinfo))
11297 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11298 else
11299 group_size = 0;
11301 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11302 scalar_type);
11303 if (vectype && vinfo->vector_mode == VOIDmode)
11304 vinfo->vector_mode = TYPE_MODE (vectype);
11306 /* Register the natural choice of vector type, before the group size
11307 has been applied. */
11308 if (vectype)
11309 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11311 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11312 try again with an explicit number of elements. */
11313 if (vectype
11314 && group_size
11315 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11317 /* Start with the biggest number of units that fits within
11318 GROUP_SIZE and halve it until we find a valid vector type.
11319 Usually either the first attempt will succeed or all will
11320 fail (in the latter case because GROUP_SIZE is too small
11321 for the target), but it's possible that a target could have
11322 a hole between supported vector types.
11324 If GROUP_SIZE is not a power of 2, this has the effect of
11325 trying the largest power of 2 that fits within the group,
11326 even though the group is not a multiple of that vector size.
11327 The BB vectorizer will then try to carve up the group into
11328 smaller pieces. */
11329 unsigned int nunits = 1 << floor_log2 (group_size);
11332 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11333 scalar_type, nunits);
11334 nunits /= 2;
11336 while (nunits > 1 && !vectype);
11339 return vectype;
11342 /* Return the vector type corresponding to SCALAR_TYPE as supported
11343 by the target. NODE, if nonnull, is the SLP tree node that will
11344 use the returned vector type. */
11346 tree
11347 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11349 unsigned int group_size = 0;
11350 if (node)
11351 group_size = SLP_TREE_LANES (node);
11352 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11355 /* Function get_mask_type_for_scalar_type.
11357 Returns the mask type corresponding to a result of comparison
11358 of vectors of specified SCALAR_TYPE as supported by target.
11359 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11360 make sure that the number of elements in the vector is no bigger
11361 than GROUP_SIZE. */
11363 tree
11364 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11365 unsigned int group_size)
11367 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11369 if (!vectype)
11370 return NULL;
11372 return truth_type_for (vectype);
11375 /* Function get_same_sized_vectype
11377 Returns a vector type corresponding to SCALAR_TYPE of size
11378 VECTOR_TYPE if supported by the target. */
11380 tree
11381 get_same_sized_vectype (tree scalar_type, tree vector_type)
11383 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11384 return truth_type_for (vector_type);
11386 poly_uint64 nunits;
11387 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11388 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11389 return NULL_TREE;
11391 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11392 scalar_type, nunits);
11395 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11396 would not change the chosen vector modes. */
11398 bool
11399 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11401 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11402 i != vinfo->used_vector_modes.end (); ++i)
11403 if (!VECTOR_MODE_P (*i)
11404 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11405 return false;
11406 return true;
11409 /* Function vect_is_simple_use.
11411 Input:
11412 VINFO - the vect info of the loop or basic block that is being vectorized.
11413 OPERAND - operand in the loop or bb.
11414 Output:
11415 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11416 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11417 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11418 the definition could be anywhere in the function
11419 DT - the type of definition
11421 Returns whether a stmt with OPERAND can be vectorized.
11422 For loops, supportable operands are constants, loop invariants, and operands
11423 that are defined by the current iteration of the loop. Unsupportable
11424 operands are those that are defined by a previous iteration of the loop (as
11425 is the case in reduction/induction computations).
11426 For basic blocks, supportable operands are constants and bb invariants.
11427 For now, operands defined outside the basic block are not supported. */
11429 bool
11430 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11431 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11433 if (def_stmt_info_out)
11434 *def_stmt_info_out = NULL;
11435 if (def_stmt_out)
11436 *def_stmt_out = NULL;
11437 *dt = vect_unknown_def_type;
11439 if (dump_enabled_p ())
11441 dump_printf_loc (MSG_NOTE, vect_location,
11442 "vect_is_simple_use: operand ");
11443 if (TREE_CODE (operand) == SSA_NAME
11444 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11445 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11446 else
11447 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11450 if (CONSTANT_CLASS_P (operand))
11451 *dt = vect_constant_def;
11452 else if (is_gimple_min_invariant (operand))
11453 *dt = vect_external_def;
11454 else if (TREE_CODE (operand) != SSA_NAME)
11455 *dt = vect_unknown_def_type;
11456 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11457 *dt = vect_external_def;
11458 else
11460 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11461 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11462 if (!stmt_vinfo)
11463 *dt = vect_external_def;
11464 else
11466 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11467 def_stmt = stmt_vinfo->stmt;
11468 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11469 if (def_stmt_info_out)
11470 *def_stmt_info_out = stmt_vinfo;
11472 if (def_stmt_out)
11473 *def_stmt_out = def_stmt;
11476 if (dump_enabled_p ())
11478 dump_printf (MSG_NOTE, ", type of def: ");
11479 switch (*dt)
11481 case vect_uninitialized_def:
11482 dump_printf (MSG_NOTE, "uninitialized\n");
11483 break;
11484 case vect_constant_def:
11485 dump_printf (MSG_NOTE, "constant\n");
11486 break;
11487 case vect_external_def:
11488 dump_printf (MSG_NOTE, "external\n");
11489 break;
11490 case vect_internal_def:
11491 dump_printf (MSG_NOTE, "internal\n");
11492 break;
11493 case vect_induction_def:
11494 dump_printf (MSG_NOTE, "induction\n");
11495 break;
11496 case vect_reduction_def:
11497 dump_printf (MSG_NOTE, "reduction\n");
11498 break;
11499 case vect_double_reduction_def:
11500 dump_printf (MSG_NOTE, "double reduction\n");
11501 break;
11502 case vect_nested_cycle:
11503 dump_printf (MSG_NOTE, "nested cycle\n");
11504 break;
11505 case vect_unknown_def_type:
11506 dump_printf (MSG_NOTE, "unknown\n");
11507 break;
11511 if (*dt == vect_unknown_def_type)
11513 if (dump_enabled_p ())
11514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11515 "Unsupported pattern.\n");
11516 return false;
11519 return true;
11522 /* Function vect_is_simple_use.
11524 Same as vect_is_simple_use but also determines the vector operand
11525 type of OPERAND and stores it to *VECTYPE. If the definition of
11526 OPERAND is vect_uninitialized_def, vect_constant_def or
11527 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11528 is responsible to compute the best suited vector type for the
11529 scalar operand. */
11531 bool
11532 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11533 tree *vectype, stmt_vec_info *def_stmt_info_out,
11534 gimple **def_stmt_out)
11536 stmt_vec_info def_stmt_info;
11537 gimple *def_stmt;
11538 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11539 return false;
11541 if (def_stmt_out)
11542 *def_stmt_out = def_stmt;
11543 if (def_stmt_info_out)
11544 *def_stmt_info_out = def_stmt_info;
11546 /* Now get a vector type if the def is internal, otherwise supply
11547 NULL_TREE and leave it up to the caller to figure out a proper
11548 type for the use stmt. */
11549 if (*dt == vect_internal_def
11550 || *dt == vect_induction_def
11551 || *dt == vect_reduction_def
11552 || *dt == vect_double_reduction_def
11553 || *dt == vect_nested_cycle)
11555 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11556 gcc_assert (*vectype != NULL_TREE);
11557 if (dump_enabled_p ())
11558 dump_printf_loc (MSG_NOTE, vect_location,
11559 "vect_is_simple_use: vectype %T\n", *vectype);
11561 else if (*dt == vect_uninitialized_def
11562 || *dt == vect_constant_def
11563 || *dt == vect_external_def)
11564 *vectype = NULL_TREE;
11565 else
11566 gcc_unreachable ();
11568 return true;
11571 /* Function vect_is_simple_use.
11573 Same as vect_is_simple_use but determines the operand by operand
11574 position OPERAND from either STMT or SLP_NODE, filling in *OP
11575 and *SLP_DEF (when SLP_NODE is not NULL). */
11577 bool
11578 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11579 unsigned operand, tree *op, slp_tree *slp_def,
11580 enum vect_def_type *dt,
11581 tree *vectype, stmt_vec_info *def_stmt_info_out)
11583 if (slp_node)
11585 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11586 *slp_def = child;
11587 *vectype = SLP_TREE_VECTYPE (child);
11588 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11590 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11591 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11593 else
11595 if (def_stmt_info_out)
11596 *def_stmt_info_out = NULL;
11597 *op = SLP_TREE_SCALAR_OPS (child)[0];
11598 *dt = SLP_TREE_DEF_TYPE (child);
11599 return true;
11602 else
11604 *slp_def = NULL;
11605 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11607 if (gimple_assign_rhs_code (ass) == COND_EXPR
11608 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11610 if (operand < 2)
11611 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11612 else
11613 *op = gimple_op (ass, operand);
11615 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11616 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11617 else
11618 *op = gimple_op (ass, operand + 1);
11620 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11621 *op = gimple_call_arg (call, operand);
11622 else
11623 gcc_unreachable ();
11624 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11628 /* If OP is not NULL and is external or constant update its vector
11629 type with VECTYPE. Returns true if successful or false if not,
11630 for example when conflicting vector types are present. */
11632 bool
11633 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11635 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11636 return true;
11637 if (SLP_TREE_VECTYPE (op))
11638 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11639 SLP_TREE_VECTYPE (op) = vectype;
11640 return true;
11643 /* Function supportable_widening_operation
11645 Check whether an operation represented by the code CODE is a
11646 widening operation that is supported by the target platform in
11647 vector form (i.e., when operating on arguments of type VECTYPE_IN
11648 producing a result of type VECTYPE_OUT).
11650 Widening operations we currently support are NOP (CONVERT), FLOAT,
11651 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11652 are supported by the target platform either directly (via vector
11653 tree-codes), or via target builtins.
11655 Output:
11656 - CODE1 and CODE2 are codes of vector operations to be used when
11657 vectorizing the operation, if available.
11658 - MULTI_STEP_CVT determines the number of required intermediate steps in
11659 case of multi-step conversion (like char->short->int - in that case
11660 MULTI_STEP_CVT will be 1).
11661 - INTERM_TYPES contains the intermediate type required to perform the
11662 widening operation (short in the above example). */
11664 bool
11665 supportable_widening_operation (vec_info *vinfo,
11666 enum tree_code code, stmt_vec_info stmt_info,
11667 tree vectype_out, tree vectype_in,
11668 enum tree_code *code1, enum tree_code *code2,
11669 int *multi_step_cvt,
11670 vec<tree> *interm_types)
11672 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11673 class loop *vect_loop = NULL;
11674 machine_mode vec_mode;
11675 enum insn_code icode1, icode2;
11676 optab optab1, optab2;
11677 tree vectype = vectype_in;
11678 tree wide_vectype = vectype_out;
11679 enum tree_code c1, c2;
11680 int i;
11681 tree prev_type, intermediate_type;
11682 machine_mode intermediate_mode, prev_mode;
11683 optab optab3, optab4;
11685 *multi_step_cvt = 0;
11686 if (loop_info)
11687 vect_loop = LOOP_VINFO_LOOP (loop_info);
11689 switch (code)
11691 case WIDEN_MULT_EXPR:
11692 /* The result of a vectorized widening operation usually requires
11693 two vectors (because the widened results do not fit into one vector).
11694 The generated vector results would normally be expected to be
11695 generated in the same order as in the original scalar computation,
11696 i.e. if 8 results are generated in each vector iteration, they are
11697 to be organized as follows:
11698 vect1: [res1,res2,res3,res4],
11699 vect2: [res5,res6,res7,res8].
11701 However, in the special case that the result of the widening
11702 operation is used in a reduction computation only, the order doesn't
11703 matter (because when vectorizing a reduction we change the order of
11704 the computation). Some targets can take advantage of this and
11705 generate more efficient code. For example, targets like Altivec,
11706 that support widen_mult using a sequence of {mult_even,mult_odd}
11707 generate the following vectors:
11708 vect1: [res1,res3,res5,res7],
11709 vect2: [res2,res4,res6,res8].
11711 When vectorizing outer-loops, we execute the inner-loop sequentially
11712 (each vectorized inner-loop iteration contributes to VF outer-loop
11713 iterations in parallel). We therefore don't allow to change the
11714 order of the computation in the inner-loop during outer-loop
11715 vectorization. */
11716 /* TODO: Another case in which order doesn't *really* matter is when we
11717 widen and then contract again, e.g. (short)((int)x * y >> 8).
11718 Normally, pack_trunc performs an even/odd permute, whereas the
11719 repack from an even/odd expansion would be an interleave, which
11720 would be significantly simpler for e.g. AVX2. */
11721 /* In any case, in order to avoid duplicating the code below, recurse
11722 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11723 are properly set up for the caller. If we fail, we'll continue with
11724 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11725 if (vect_loop
11726 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11727 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11728 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11729 stmt_info, vectype_out,
11730 vectype_in, code1, code2,
11731 multi_step_cvt, interm_types))
11733 /* Elements in a vector with vect_used_by_reduction property cannot
11734 be reordered if the use chain with this property does not have the
11735 same operation. One such an example is s += a * b, where elements
11736 in a and b cannot be reordered. Here we check if the vector defined
11737 by STMT is only directly used in the reduction statement. */
11738 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11739 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11740 if (use_stmt_info
11741 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11742 return true;
11744 c1 = VEC_WIDEN_MULT_LO_EXPR;
11745 c2 = VEC_WIDEN_MULT_HI_EXPR;
11746 break;
11748 case DOT_PROD_EXPR:
11749 c1 = DOT_PROD_EXPR;
11750 c2 = DOT_PROD_EXPR;
11751 break;
11753 case SAD_EXPR:
11754 c1 = SAD_EXPR;
11755 c2 = SAD_EXPR;
11756 break;
11758 case VEC_WIDEN_MULT_EVEN_EXPR:
11759 /* Support the recursion induced just above. */
11760 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11761 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11762 break;
11764 case WIDEN_LSHIFT_EXPR:
11765 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11766 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11767 break;
11769 case WIDEN_PLUS_EXPR:
11770 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11771 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11772 break;
11774 case WIDEN_MINUS_EXPR:
11775 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11776 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11777 break;
11779 CASE_CONVERT:
11780 c1 = VEC_UNPACK_LO_EXPR;
11781 c2 = VEC_UNPACK_HI_EXPR;
11782 break;
11784 case FLOAT_EXPR:
11785 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11786 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11787 break;
11789 case FIX_TRUNC_EXPR:
11790 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11791 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11792 break;
11794 default:
11795 gcc_unreachable ();
11798 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11799 std::swap (c1, c2);
11801 if (code == FIX_TRUNC_EXPR)
11803 /* The signedness is determined from output operand. */
11804 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11805 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11807 else if (CONVERT_EXPR_CODE_P (code)
11808 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11809 && VECTOR_BOOLEAN_TYPE_P (vectype)
11810 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11811 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11813 /* If the input and result modes are the same, a different optab
11814 is needed where we pass in the number of units in vectype. */
11815 optab1 = vec_unpacks_sbool_lo_optab;
11816 optab2 = vec_unpacks_sbool_hi_optab;
11818 else
11820 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11821 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11824 if (!optab1 || !optab2)
11825 return false;
11827 vec_mode = TYPE_MODE (vectype);
11828 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11829 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11830 return false;
11832 *code1 = c1;
11833 *code2 = c2;
11835 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11836 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11838 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11839 return true;
11840 /* For scalar masks we may have different boolean
11841 vector types having the same QImode. Thus we
11842 add additional check for elements number. */
11843 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11844 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11845 return true;
11848 /* Check if it's a multi-step conversion that can be done using intermediate
11849 types. */
11851 prev_type = vectype;
11852 prev_mode = vec_mode;
11854 if (!CONVERT_EXPR_CODE_P (code))
11855 return false;
11857 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11858 intermediate steps in promotion sequence. We try
11859 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11860 not. */
11861 interm_types->create (MAX_INTERM_CVT_STEPS);
11862 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11864 intermediate_mode = insn_data[icode1].operand[0].mode;
11865 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11866 intermediate_type
11867 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11868 else
11869 intermediate_type
11870 = lang_hooks.types.type_for_mode (intermediate_mode,
11871 TYPE_UNSIGNED (prev_type));
11873 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11874 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11875 && intermediate_mode == prev_mode
11876 && SCALAR_INT_MODE_P (prev_mode))
11878 /* If the input and result modes are the same, a different optab
11879 is needed where we pass in the number of units in vectype. */
11880 optab3 = vec_unpacks_sbool_lo_optab;
11881 optab4 = vec_unpacks_sbool_hi_optab;
11883 else
11885 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11886 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11889 if (!optab3 || !optab4
11890 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11891 || insn_data[icode1].operand[0].mode != intermediate_mode
11892 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11893 || insn_data[icode2].operand[0].mode != intermediate_mode
11894 || ((icode1 = optab_handler (optab3, intermediate_mode))
11895 == CODE_FOR_nothing)
11896 || ((icode2 = optab_handler (optab4, intermediate_mode))
11897 == CODE_FOR_nothing))
11898 break;
11900 interm_types->quick_push (intermediate_type);
11901 (*multi_step_cvt)++;
11903 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11904 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11906 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11907 return true;
11908 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11909 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11910 return true;
11913 prev_type = intermediate_type;
11914 prev_mode = intermediate_mode;
11917 interm_types->release ();
11918 return false;
11922 /* Function supportable_narrowing_operation
11924 Check whether an operation represented by the code CODE is a
11925 narrowing operation that is supported by the target platform in
11926 vector form (i.e., when operating on arguments of type VECTYPE_IN
11927 and producing a result of type VECTYPE_OUT).
11929 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11930 and FLOAT. This function checks if these operations are supported by
11931 the target platform directly via vector tree-codes.
11933 Output:
11934 - CODE1 is the code of a vector operation to be used when
11935 vectorizing the operation, if available.
11936 - MULTI_STEP_CVT determines the number of required intermediate steps in
11937 case of multi-step conversion (like int->short->char - in that case
11938 MULTI_STEP_CVT will be 1).
11939 - INTERM_TYPES contains the intermediate type required to perform the
11940 narrowing operation (short in the above example). */
11942 bool
11943 supportable_narrowing_operation (enum tree_code code,
11944 tree vectype_out, tree vectype_in,
11945 enum tree_code *code1, int *multi_step_cvt,
11946 vec<tree> *interm_types)
11948 machine_mode vec_mode;
11949 enum insn_code icode1;
11950 optab optab1, interm_optab;
11951 tree vectype = vectype_in;
11952 tree narrow_vectype = vectype_out;
11953 enum tree_code c1;
11954 tree intermediate_type, prev_type;
11955 machine_mode intermediate_mode, prev_mode;
11956 int i;
11957 bool uns;
11959 *multi_step_cvt = 0;
11960 switch (code)
11962 CASE_CONVERT:
11963 c1 = VEC_PACK_TRUNC_EXPR;
11964 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11965 && VECTOR_BOOLEAN_TYPE_P (vectype)
11966 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11967 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11968 optab1 = vec_pack_sbool_trunc_optab;
11969 else
11970 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11971 break;
11973 case FIX_TRUNC_EXPR:
11974 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11975 /* The signedness is determined from output operand. */
11976 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11977 break;
11979 case FLOAT_EXPR:
11980 c1 = VEC_PACK_FLOAT_EXPR;
11981 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11982 break;
11984 default:
11985 gcc_unreachable ();
11988 if (!optab1)
11989 return false;
11991 vec_mode = TYPE_MODE (vectype);
11992 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11993 return false;
11995 *code1 = c1;
11997 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11999 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12000 return true;
12001 /* For scalar masks we may have different boolean
12002 vector types having the same QImode. Thus we
12003 add additional check for elements number. */
12004 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12005 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12006 return true;
12009 if (code == FLOAT_EXPR)
12010 return false;
12012 /* Check if it's a multi-step conversion that can be done using intermediate
12013 types. */
12014 prev_mode = vec_mode;
12015 prev_type = vectype;
12016 if (code == FIX_TRUNC_EXPR)
12017 uns = TYPE_UNSIGNED (vectype_out);
12018 else
12019 uns = TYPE_UNSIGNED (vectype);
12021 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12022 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12023 costly than signed. */
12024 if (code == FIX_TRUNC_EXPR && uns)
12026 enum insn_code icode2;
12028 intermediate_type
12029 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12030 interm_optab
12031 = optab_for_tree_code (c1, intermediate_type, optab_default);
12032 if (interm_optab != unknown_optab
12033 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12034 && insn_data[icode1].operand[0].mode
12035 == insn_data[icode2].operand[0].mode)
12037 uns = false;
12038 optab1 = interm_optab;
12039 icode1 = icode2;
12043 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12044 intermediate steps in promotion sequence. We try
12045 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12046 interm_types->create (MAX_INTERM_CVT_STEPS);
12047 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12049 intermediate_mode = insn_data[icode1].operand[0].mode;
12050 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12051 intermediate_type
12052 = vect_double_mask_nunits (prev_type, intermediate_mode);
12053 else
12054 intermediate_type
12055 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12056 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12057 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12058 && intermediate_mode == prev_mode
12059 && SCALAR_INT_MODE_P (prev_mode))
12060 interm_optab = vec_pack_sbool_trunc_optab;
12061 else
12062 interm_optab
12063 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12064 optab_default);
12065 if (!interm_optab
12066 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12067 || insn_data[icode1].operand[0].mode != intermediate_mode
12068 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12069 == CODE_FOR_nothing))
12070 break;
12072 interm_types->quick_push (intermediate_type);
12073 (*multi_step_cvt)++;
12075 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12077 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12078 return true;
12079 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12080 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12081 return true;
12084 prev_mode = intermediate_mode;
12085 prev_type = intermediate_type;
12086 optab1 = interm_optab;
12089 interm_types->release ();
12090 return false;
12093 /* Generate and return a vector mask of MASK_TYPE such that
12094 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12095 Add the statements to SEQ. */
12097 tree
12098 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12099 tree end_index, const char *name)
12101 tree cmp_type = TREE_TYPE (start_index);
12102 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12103 cmp_type, mask_type,
12104 OPTIMIZE_FOR_SPEED));
12105 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12106 start_index, end_index,
12107 build_zero_cst (mask_type));
12108 tree tmp;
12109 if (name)
12110 tmp = make_temp_ssa_name (mask_type, NULL, name);
12111 else
12112 tmp = make_ssa_name (mask_type);
12113 gimple_call_set_lhs (call, tmp);
12114 gimple_seq_add_stmt (seq, call);
12115 return tmp;
12118 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12119 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12121 tree
12122 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12123 tree end_index)
12125 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12126 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12129 /* Try to compute the vector types required to vectorize STMT_INFO,
12130 returning true on success and false if vectorization isn't possible.
12131 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12132 take sure that the number of elements in the vectors is no bigger
12133 than GROUP_SIZE.
12135 On success:
12137 - Set *STMT_VECTYPE_OUT to:
12138 - NULL_TREE if the statement doesn't need to be vectorized;
12139 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12141 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12142 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12143 statement does not help to determine the overall number of units. */
12145 opt_result
12146 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12147 tree *stmt_vectype_out,
12148 tree *nunits_vectype_out,
12149 unsigned int group_size)
12151 gimple *stmt = stmt_info->stmt;
12153 /* For BB vectorization, we should always have a group size once we've
12154 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12155 are tentative requests during things like early data reference
12156 analysis and pattern recognition. */
12157 if (is_a <bb_vec_info> (vinfo))
12158 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12159 else
12160 group_size = 0;
12162 *stmt_vectype_out = NULL_TREE;
12163 *nunits_vectype_out = NULL_TREE;
12165 if (gimple_get_lhs (stmt) == NULL_TREE
12166 /* MASK_STORE has no lhs, but is ok. */
12167 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12169 if (is_a <gcall *> (stmt))
12171 /* Ignore calls with no lhs. These must be calls to
12172 #pragma omp simd functions, and what vectorization factor
12173 it really needs can't be determined until
12174 vectorizable_simd_clone_call. */
12175 if (dump_enabled_p ())
12176 dump_printf_loc (MSG_NOTE, vect_location,
12177 "defer to SIMD clone analysis.\n");
12178 return opt_result::success ();
12181 return opt_result::failure_at (stmt,
12182 "not vectorized: irregular stmt.%G", stmt);
12185 tree vectype;
12186 tree scalar_type = NULL_TREE;
12187 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12189 vectype = STMT_VINFO_VECTYPE (stmt_info);
12190 if (dump_enabled_p ())
12191 dump_printf_loc (MSG_NOTE, vect_location,
12192 "precomputed vectype: %T\n", vectype);
12194 else if (vect_use_mask_type_p (stmt_info))
12196 unsigned int precision = stmt_info->mask_precision;
12197 scalar_type = build_nonstandard_integer_type (precision, 1);
12198 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12199 if (!vectype)
12200 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12201 " data-type %T\n", scalar_type);
12202 if (dump_enabled_p ())
12203 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12205 else
12207 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12208 scalar_type = TREE_TYPE (DR_REF (dr));
12209 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12210 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12211 else
12212 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12214 if (dump_enabled_p ())
12216 if (group_size)
12217 dump_printf_loc (MSG_NOTE, vect_location,
12218 "get vectype for scalar type (group size %d):"
12219 " %T\n", group_size, scalar_type);
12220 else
12221 dump_printf_loc (MSG_NOTE, vect_location,
12222 "get vectype for scalar type: %T\n", scalar_type);
12224 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12225 if (!vectype)
12226 return opt_result::failure_at (stmt,
12227 "not vectorized:"
12228 " unsupported data-type %T\n",
12229 scalar_type);
12231 if (dump_enabled_p ())
12232 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12235 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12236 return opt_result::failure_at (stmt,
12237 "not vectorized: vector stmt in loop:%G",
12238 stmt);
12240 *stmt_vectype_out = vectype;
12242 /* Don't try to compute scalar types if the stmt produces a boolean
12243 vector; use the existing vector type instead. */
12244 tree nunits_vectype = vectype;
12245 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12247 /* The number of units is set according to the smallest scalar
12248 type (or the largest vector size, but we only support one
12249 vector size per vectorization). */
12250 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12251 TREE_TYPE (vectype));
12252 if (scalar_type != TREE_TYPE (vectype))
12254 if (dump_enabled_p ())
12255 dump_printf_loc (MSG_NOTE, vect_location,
12256 "get vectype for smallest scalar type: %T\n",
12257 scalar_type);
12258 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12259 group_size);
12260 if (!nunits_vectype)
12261 return opt_result::failure_at
12262 (stmt, "not vectorized: unsupported data-type %T\n",
12263 scalar_type);
12264 if (dump_enabled_p ())
12265 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12266 nunits_vectype);
12270 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12271 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12272 return opt_result::failure_at (stmt,
12273 "Not vectorized: Incompatible number "
12274 "of vector subparts between %T and %T\n",
12275 nunits_vectype, *stmt_vectype_out);
12277 if (dump_enabled_p ())
12279 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12280 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12281 dump_printf (MSG_NOTE, "\n");
12284 *nunits_vectype_out = nunits_vectype;
12285 return opt_result::success ();
12288 /* Generate and return statement sequence that sets vector length LEN that is:
12290 min_of_start_and_end = min (START_INDEX, END_INDEX);
12291 left_len = END_INDEX - min_of_start_and_end;
12292 rhs = min (left_len, LEN_LIMIT);
12293 LEN = rhs;
12295 Note: the cost of the code generated by this function is modeled
12296 by vect_estimate_min_profitable_iters, so changes here may need
12297 corresponding changes there. */
12299 gimple_seq
12300 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12302 gimple_seq stmts = NULL;
12303 tree len_type = TREE_TYPE (len);
12304 gcc_assert (TREE_TYPE (start_index) == len_type);
12306 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12307 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12308 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12309 gimple* stmt = gimple_build_assign (len, rhs);
12310 gimple_seq_add_stmt (&stmts, stmt);
12312 return stmts;