Add assember CFI directives to millicode division and remainder routines.
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobefa2d0daa5248b0c6eb2cd12183db1d8915c5cd3
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 static unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
124 unsigned
125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
134 unsigned
135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
159 static tree
160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
179 return vect_name;
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
186 static void
187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
191 tree array_ref;
192 gimple *new_stmt;
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
206 static tree
207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
209 tree mem_ref;
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
220 static void
221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
235 static void
236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
282 worklist->safe_push (stmt_info);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
290 bool
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
294 tree op;
295 ssa_op_iter iter;
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
303 enum vect_def_type dt = vect_uninitialized_def;
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
316 return true;
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
331 static bool
332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
380 *live_p = true;
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
394 return (*live_p || *relevant);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
403 static bool
404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
406 tree operand;
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
446 return false;
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
455 if (operand == use)
456 return true;
458 return false;
463 Function process_use.
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
489 static opt_result
490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
507 if (!dstmt_vinfo)
508 return opt_result::success ();
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
542 switch (relevant)
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
559 case vect_used_in_scope:
560 break;
562 default:
563 gcc_unreachable ();
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
580 switch (relevant)
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
597 default:
598 gcc_unreachable ();
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
633 3. j = j + 1
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
640 opt_result
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec<stmt_vec_info, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
687 use_operand_p use_p;
688 ssa_op_iter iter;
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
767 for (; i < gimple_num_ops (assign); i++)
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
782 for (i = 0; i < gimple_call_num_args (call); i++)
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
814 if (fatal)
815 *fatal = false;
816 return res;
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 static void
831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
839 int inside_cost = 0, prologue_cost = 0;
841 gcc_assert (cost_vec != NULL);
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
875 static void
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
885 for (i = 0; i < pwr + 1; i++)
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
906 /* Returns true if the current function returns DECL. */
908 static bool
909 cfun_returns (tree decl)
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
915 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
934 return false;
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
942 static void
943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 dr_alignment_support alignment_support_scheme,
946 int misalignment,
947 vec_load_store_type vls_type, slp_tree slp_node,
948 stmt_vector_for_cost *cost_vec)
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 stmt_vec_info first_stmt_info = stmt_info;
952 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
954 /* ??? Somehow we need to fix this at the callers. */
955 if (slp_node)
956 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
958 if (vls_type == VLS_STORE_INVARIANT)
960 if (!slp_node)
961 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962 stmt_info, 0, vect_prologue);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node && grouped_access_p)
968 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p = (first_stmt_info == stmt_info);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
979 if (first_stmt_p
980 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
982 /* Uses a high and low interleave or shuffle operations for each
983 needed permute. */
984 int group_size = DR_GROUP_SIZE (first_stmt_info);
985 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987 stmt_info, 0, vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: strided group_size = %d .\n",
992 group_size);
995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996 /* Costs of the stores. */
997 if (memory_access_type == VMAT_ELEMENTWISE
998 || memory_access_type == VMAT_GATHER_SCATTER)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 inside_cost += record_stmt_cost (cost_vec,
1003 ncopies * assumed_nunits,
1004 scalar_store, stmt_info, 0, vect_body);
1006 else
1007 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008 misalignment, &inside_cost, cost_vec);
1010 if (memory_access_type == VMAT_ELEMENTWISE
1011 || memory_access_type == VMAT_STRIDED_SLP)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015 inside_cost += record_stmt_cost (cost_vec,
1016 ncopies * assumed_nunits,
1017 vec_to_scalar, stmt_info, 0, vect_body);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025 if (base
1026 && (TREE_CODE (base) == RESULT_DECL
1027 || (DECL_P (base) && cfun_returns (base)))
1028 && !aggregate_value_p (base, cfun->decl))
1030 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1032 if (REG_P (reg))
1034 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1037 if (nregs > 1)
1039 /* Spill. */
1040 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041 vector_store,
1042 stmt_info, 0, vect_epilogue);
1043 /* Loads. */
1044 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045 scalar_load,
1046 stmt_info, 0, vect_epilogue);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1058 /* Calculate cost of DR's memory access. */
1059 void
1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061 dr_alignment_support alignment_support_scheme,
1062 int misalignment,
1063 unsigned int *inside_cost,
1064 stmt_vector_for_cost *body_cost_vec)
1066 switch (alignment_support_scheme)
1068 case dr_aligned:
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 vector_store, stmt_info, 0,
1072 vect_body);
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE, vect_location,
1076 "vect_model_store_cost: aligned.\n");
1077 break;
1080 case dr_unaligned_supported:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 unaligned_store, stmt_info,
1085 misalignment, vect_body);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: unaligned supported by "
1089 "hardware.\n");
1090 break;
1093 case dr_unaligned_unsupported:
1095 *inside_cost = VECT_MAX_COST;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099 "vect_model_store_cost: unsupported access.\n");
1100 break;
1103 default:
1104 gcc_unreachable ();
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1116 static void
1117 vect_model_load_cost (vec_info *vinfo,
1118 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119 vect_memory_access_type memory_access_type,
1120 dr_alignment_support alignment_support_scheme,
1121 int misalignment,
1122 gather_scatter_info *gs_info,
1123 slp_tree slp_node,
1124 stmt_vector_for_cost *cost_vec)
1126 unsigned int inside_cost = 0, prologue_cost = 0;
1127 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1129 gcc_assert (cost_vec);
1131 /* ??? Somehow we need to fix this at the callers. */
1132 if (slp_node)
1133 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1135 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms, n_loads;
1142 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143 vf, true, &n_perms, &n_loads);
1144 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145 first_stmt_info, 0, vect_body);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1149 ncopies = n_loads;
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info = stmt_info;
1155 if (!slp_node && grouped_access_p)
1156 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p = (first_stmt_info == stmt_info);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1167 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168 stmt_vec_info next_stmt_info = first_stmt_info;
1171 gaps -= 1;
1172 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1174 while (next_stmt_info);
1175 if (gaps)
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE, vect_location,
1179 "vect_model_load_cost: %d unused vectors.\n",
1180 gaps);
1181 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182 alignment_support_scheme, misalignment, false,
1183 &inside_cost, &prologue_cost,
1184 cost_vec, cost_vec, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1192 if (first_stmt_p
1193 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size = DR_GROUP_SIZE (first_stmt_info);
1198 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1205 group_size);
1208 /* The loads themselves. */
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_GATHER_SCATTER)
1212 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214 if (memory_access_type == VMAT_GATHER_SCATTER
1215 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1218 the load). */
1219 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220 vec_to_scalar, stmt_info, 0,
1221 vect_body);
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost += record_stmt_cost (cost_vec,
1224 ncopies * assumed_nunits,
1225 scalar_load, stmt_info, 0, vect_body);
1227 else if (memory_access_type == VMAT_INVARIANT)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost += record_stmt_cost (cost_vec, 1,
1231 scalar_load, stmt_info, 0,
1232 vect_prologue);
1233 prologue_cost += record_stmt_cost (cost_vec, 1,
1234 scalar_to_vec, stmt_info, 0,
1235 vect_prologue);
1237 else
1238 vect_get_load_cost (vinfo, stmt_info, ncopies,
1239 alignment_support_scheme, misalignment, first_stmt_p,
1240 &inside_cost, &prologue_cost,
1241 cost_vec, cost_vec, true);
1242 if (memory_access_type == VMAT_ELEMENTWISE
1243 || memory_access_type == VMAT_STRIDED_SLP
1244 || (memory_access_type == VMAT_GATHER_SCATTER
1245 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1256 /* Calculate cost of DR's memory access. */
1257 void
1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259 dr_alignment_support alignment_support_scheme,
1260 int misalignment,
1261 bool add_realign_cost, unsigned int *inside_cost,
1262 unsigned int *prologue_cost,
1263 stmt_vector_for_cost *prologue_cost_vec,
1264 stmt_vector_for_cost *body_cost_vec,
1265 bool record_prologue_costs)
1267 switch (alignment_support_scheme)
1269 case dr_aligned:
1271 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272 stmt_info, 0, vect_body);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE, vect_location,
1276 "vect_model_load_cost: aligned.\n");
1278 break;
1280 case dr_unaligned_supported:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284 unaligned_load, stmt_info,
1285 misalignment, vect_body);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "vect_model_load_cost: unaligned supported by "
1290 "hardware.\n");
1292 break;
1294 case dr_explicit_realign:
1296 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297 vector_load, stmt_info, 0, vect_body);
1298 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299 vec_perm, stmt_info, 0, vect_body);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1303 prologue costs. */
1304 if (targetm.vectorize.builtin_mask_for_load)
1305 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306 stmt_info, 0, vect_body);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_model_load_cost: explicit realign\n");
1312 break;
1314 case dr_explicit_realign_optimized:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "vect_model_load_cost: unaligned software "
1319 "pipelined.\n");
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost && record_prologue_costs)
1330 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331 vector_stmt, stmt_info,
1332 0, vect_prologue);
1333 if (targetm.vectorize.builtin_mask_for_load)
1334 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335 vector_stmt, stmt_info,
1336 0, vect_prologue);
1339 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340 stmt_info, 0, vect_body);
1341 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342 stmt_info, 0, vect_body);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_model_load_cost: explicit realign optimized"
1347 "\n");
1349 break;
1352 case dr_unaligned_unsupported:
1354 *inside_cost = VECT_MAX_COST;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358 "vect_model_load_cost: unsupported access.\n");
1359 break;
1362 default:
1363 gcc_unreachable ();
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1370 static void
1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372 gimple_stmt_iterator *gsi)
1374 if (gsi)
1375 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376 else
1377 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE, vect_location,
1381 "created new init_stmt: %G", new_stmt);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1394 tree
1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396 gimple_stmt_iterator *gsi)
1398 gimple *init_stmt;
1399 tree new_temp;
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1404 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type))
1411 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412 tree false_val = build_zero_cst (TREE_TYPE (type));
1414 if (CONSTANT_CLASS_P (val))
1415 val = integer_zerop (val) ? false_val : true_val;
1416 else
1418 new_temp = make_ssa_name (TREE_TYPE (type));
1419 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420 val, true_val, false_val);
1421 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422 val = new_temp;
1425 else
1427 gimple_seq stmts = NULL;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430 TREE_TYPE (type), val);
1431 else
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436 !gsi_end_p (gsi2); )
1438 init_stmt = gsi_stmt (gsi2);
1439 gsi_remove (&gsi2, false);
1440 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1444 val = build_vector_from_val (type, val);
1447 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448 init_stmt = gimple_build_assign (new_temp, val);
1449 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450 return new_temp;
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1466 void
1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468 unsigned ncopies,
1469 tree op, vec<tree> *vec_oprnds, tree vectype)
1471 gimple *def_stmt;
1472 enum vect_def_type dt;
1473 bool is_simple_use;
1474 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE, vect_location,
1478 "vect_get_vec_defs_for_operand: %T\n", op);
1480 stmt_vec_info def_stmt_info;
1481 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482 &def_stmt_info, &def_stmt);
1483 gcc_assert (is_simple_use);
1484 if (def_stmt && dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1487 vec_oprnds->create (ncopies);
1488 if (dt == vect_constant_def || dt == vect_external_def)
1490 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491 tree vector_type;
1493 if (vectype)
1494 vector_type = vectype;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497 vector_type = truth_type_for (stmt_vectype);
1498 else
1499 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1501 gcc_assert (vector_type);
1502 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503 while (ncopies--)
1504 vec_oprnds->quick_push (vop);
1506 else
1508 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510 for (unsigned i = 0; i < ncopies; ++i)
1511 vec_oprnds->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1519 void
1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521 unsigned ncopies,
1522 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1527 if (slp_node)
1529 if (op0)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531 if (op1)
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533 if (op2)
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535 if (op3)
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1538 else
1540 if (op0)
1541 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542 op0, vec_oprnds0, vectype0);
1543 if (op1)
1544 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545 op1, vec_oprnds1, vectype1);
1546 if (op2)
1547 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548 op2, vec_oprnds2, vectype2);
1549 if (op3)
1550 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551 op3, vec_oprnds3, vectype3);
1555 void
1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557 unsigned ncopies,
1558 tree op0, vec<tree> *vec_oprnds0,
1559 tree op1, vec<tree> *vec_oprnds1,
1560 tree op2, vec<tree> *vec_oprnds2,
1561 tree op3, vec<tree> *vec_oprnds3)
1563 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564 op0, vec_oprnds0, NULL_TREE,
1565 op1, vec_oprnds1, NULL_TREE,
1566 op2, vec_oprnds2, NULL_TREE,
1567 op3, vec_oprnds3, NULL_TREE);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1574 static void
1575 vect_finish_stmt_generation_1 (vec_info *,
1576 stmt_vec_info stmt_info, gimple *vec_stmt)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1581 if (stmt_info)
1583 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1592 else
1593 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1600 void
1601 vect_finish_replace_stmt (vec_info *vinfo,
1602 stmt_vec_info stmt_info, gimple *vec_stmt)
1604 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1607 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608 gsi_replace (&gsi, vec_stmt, true);
1610 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1616 void
1617 vect_finish_stmt_generation (vec_info *vinfo,
1618 stmt_vec_info stmt_info, gimple *vec_stmt,
1619 gimple_stmt_iterator *gsi)
1621 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1623 if (!gsi_end_p (*gsi)
1624 && gimple_has_mem_ops (vec_stmt))
1626 gimple *at_stmt = gsi_stmt (*gsi);
1627 tree vuse = gimple_vuse (at_stmt);
1628 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1630 tree vdef = gimple_vdef (at_stmt);
1631 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632 gimple_set_modified (vec_stmt, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638 && ((is_gimple_assign (vec_stmt)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640 || (is_gimple_call (vec_stmt)
1641 && (!(gimple_call_flags (vec_stmt)
1642 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1643 || (gimple_call_lhs (vec_stmt)
1644 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1646 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1647 gimple_set_vdef (vec_stmt, new_vdef);
1648 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1652 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1653 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1661 static internal_fn
1662 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1663 tree vectype_out, tree vectype_in)
1665 internal_fn ifn;
1666 if (internal_fn_p (cfn))
1667 ifn = as_internal_fn (cfn);
1668 else
1669 ifn = associated_internal_fn (fndecl);
1670 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1672 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1673 if (info.vectorizable)
1675 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1676 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1677 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1678 OPTIMIZE_FOR_SPEED))
1679 return ifn;
1682 return IFN_LAST;
1686 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1687 gimple_stmt_iterator *);
1689 /* Check whether a load or store statement in the loop described by
1690 LOOP_VINFO is possible in a loop using partial vectors. This is
1691 testing whether the vectorizer pass has the appropriate support,
1692 as well as whether the target does.
1694 VLS_TYPE says whether the statement is a load or store and VECTYPE
1695 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1696 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1697 says how the load or store is going to be implemented and GROUP_SIZE
1698 is the number of load or store statements in the containing group.
1699 If the access is a gather load or scatter store, GS_INFO describes
1700 its arguments. If the load or store is conditional, SCALAR_MASK is the
1701 condition under which it occurs.
1703 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1704 vectors is not supported, otherwise record the required rgroup control
1705 types. */
1707 static void
1708 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1709 slp_tree slp_node,
1710 vec_load_store_type vls_type,
1711 int group_size,
1712 vect_memory_access_type
1713 memory_access_type,
1714 gather_scatter_info *gs_info,
1715 tree scalar_mask)
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type == VMAT_INVARIANT)
1719 return;
1721 unsigned int nvectors;
1722 if (slp_node)
1723 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1724 else
1725 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1727 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1728 machine_mode vecmode = TYPE_MODE (vectype);
1729 bool is_load = (vls_type == VLS_LOAD);
1730 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1732 if (is_load
1733 ? !vect_load_lanes_supported (vectype, group_size, true)
1734 : !vect_store_lanes_supported (vectype, group_size, true))
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1738 "can't operate on partial vectors because"
1739 " the target doesn't have an appropriate"
1740 " load/store-lanes instruction.\n");
1741 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1742 return;
1744 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1745 scalar_mask);
1746 return;
1749 if (memory_access_type == VMAT_GATHER_SCATTER)
1751 internal_fn ifn = (is_load
1752 ? IFN_MASK_GATHER_LOAD
1753 : IFN_MASK_SCATTER_STORE);
1754 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1755 gs_info->memory_type,
1756 gs_info->offset_vectype,
1757 gs_info->scale))
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1761 "can't operate on partial vectors because"
1762 " the target doesn't have an appropriate"
1763 " gather load or scatter store instruction.\n");
1764 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1765 return;
1767 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1768 scalar_mask);
1769 return;
1772 if (memory_access_type != VMAT_CONTIGUOUS
1773 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1775 /* Element X of the data must come from iteration i * VF + X of the
1776 scalar loop. We need more work to support other mappings. */
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1779 "can't operate on partial vectors because an"
1780 " access isn't contiguous.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1782 return;
1785 if (!VECTOR_MODE_P (vecmode))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't operate on partial vectors when emulating"
1790 " vector operations.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1792 return;
1795 /* We might load more scalars than we need for permuting SLP loads.
1796 We checked in get_group_load_store_type that the extra elements
1797 don't leak into a new vector. */
1798 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1800 unsigned int nvectors;
1801 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1802 return nvectors;
1803 gcc_unreachable ();
1806 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1807 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1808 machine_mode mask_mode;
1809 bool using_partial_vectors_p = false;
1810 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1811 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1813 nvectors = group_memory_nvectors (group_size * vf, nunits);
1814 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1815 using_partial_vectors_p = true;
1818 machine_mode vmode;
1819 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1821 nvectors = group_memory_nvectors (group_size * vf, nunits);
1822 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1823 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1824 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1825 using_partial_vectors_p = true;
1828 if (!using_partial_vectors_p)
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832 "can't operate on partial vectors because the"
1833 " target doesn't have the appropriate partial"
1834 " vectorization load or store.\n");
1835 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1843 otherwise return VEC_MASK & LOOP_MASK.
1845 MASK_TYPE is the type of both masks. If new statements are needed,
1846 insert them before GSI. */
1848 static tree
1849 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1850 tree vec_mask, gimple_stmt_iterator *gsi)
1852 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1853 if (!loop_mask)
1854 return vec_mask;
1856 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1858 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1859 return vec_mask;
1861 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1862 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1863 vec_mask, loop_mask);
1865 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1866 return and_res;
1869 /* Determine whether we can use a gather load or scatter store to vectorize
1870 strided load or store STMT_INFO by truncating the current offset to a
1871 smaller width. We need to be able to construct an offset vector:
1873 { 0, X, X*2, X*3, ... }
1875 without loss of precision, where X is STMT_INFO's DR_STEP.
1877 Return true if this is possible, describing the gather load or scatter
1878 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1880 static bool
1881 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1882 loop_vec_info loop_vinfo, bool masked_p,
1883 gather_scatter_info *gs_info)
1885 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1886 data_reference *dr = dr_info->dr;
1887 tree step = DR_STEP (dr);
1888 if (TREE_CODE (step) != INTEGER_CST)
1890 /* ??? Perhaps we could use range information here? */
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_NOTE, vect_location,
1893 "cannot truncate variable step.\n");
1894 return false;
1897 /* Get the number of bits in an element. */
1898 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1899 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1900 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1902 /* Set COUNT to the upper limit on the number of elements - 1.
1903 Start with the maximum vectorization factor. */
1904 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1906 /* Try lowering COUNT to the number of scalar latch iterations. */
1907 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1908 widest_int max_iters;
1909 if (max_loop_iterations (loop, &max_iters)
1910 && max_iters < count)
1911 count = max_iters.to_shwi ();
1913 /* Try scales of 1 and the element size. */
1914 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1915 wi::overflow_type overflow = wi::OVF_NONE;
1916 for (int i = 0; i < 2; ++i)
1918 int scale = scales[i];
1919 widest_int factor;
1920 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1921 continue;
1923 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1924 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1925 if (overflow)
1926 continue;
1927 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1928 unsigned int min_offset_bits = wi::min_precision (range, sign);
1930 /* Find the narrowest viable offset type. */
1931 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1932 tree offset_type = build_nonstandard_integer_type (offset_bits,
1933 sign == UNSIGNED);
1935 /* See whether the target supports the operation with an offset
1936 no narrower than OFFSET_TYPE. */
1937 tree memory_type = TREE_TYPE (DR_REF (dr));
1938 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1939 vectype, memory_type, offset_type, scale,
1940 &gs_info->ifn, &gs_info->offset_vectype)
1941 || gs_info->ifn == IFN_LAST)
1942 continue;
1944 gs_info->decl = NULL_TREE;
1945 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1946 but we don't need to store that here. */
1947 gs_info->base = NULL_TREE;
1948 gs_info->element_type = TREE_TYPE (vectype);
1949 gs_info->offset = fold_convert (offset_type, step);
1950 gs_info->offset_dt = vect_constant_def;
1951 gs_info->scale = scale;
1952 gs_info->memory_type = memory_type;
1953 return true;
1956 if (overflow && dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "truncating gather/scatter offset to %d bits"
1959 " might change its value.\n", element_bits);
1961 return false;
1964 /* Return true if we can use gather/scatter internal functions to
1965 vectorize STMT_INFO, which is a grouped or strided load or store.
1966 MASKED_P is true if load or store is conditional. When returning
1967 true, fill in GS_INFO with the information required to perform the
1968 operation. */
1970 static bool
1971 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1972 loop_vec_info loop_vinfo, bool masked_p,
1973 gather_scatter_info *gs_info)
1975 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1976 || gs_info->ifn == IFN_LAST)
1977 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1978 masked_p, gs_info);
1980 tree old_offset_type = TREE_TYPE (gs_info->offset);
1981 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1983 gcc_assert (TYPE_PRECISION (new_offset_type)
1984 >= TYPE_PRECISION (old_offset_type));
1985 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE, vect_location,
1989 "using gather/scatter for strided/grouped access,"
1990 " scale = %d\n", gs_info->scale);
1992 return true;
1995 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1996 elements with a known constant step. Return -1 if that step
1997 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1999 static int
2000 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2002 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2003 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2004 size_zero_node);
2007 /* If the target supports a permute mask that reverses the elements in
2008 a vector of type VECTYPE, return that mask, otherwise return null. */
2010 static tree
2011 perm_mask_for_reverse (tree vectype)
2013 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2015 /* The encoding has a single stepped pattern. */
2016 vec_perm_builder sel (nunits, 1, 3);
2017 for (int i = 0; i < 3; ++i)
2018 sel.quick_push (nunits - 1 - i);
2020 vec_perm_indices indices (sel, 1, nunits);
2021 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2022 indices))
2023 return NULL_TREE;
2024 return vect_gen_perm_mask_checked (vectype, indices);
2027 /* A subroutine of get_load_store_type, with a subset of the same
2028 arguments. Handle the case where STMT_INFO is a load or store that
2029 accesses consecutive elements with a negative step. Sets *POFFSET
2030 to the offset to be applied to the DR for the first access. */
2032 static vect_memory_access_type
2033 get_negative_load_store_type (vec_info *vinfo,
2034 stmt_vec_info stmt_info, tree vectype,
2035 vec_load_store_type vls_type,
2036 unsigned int ncopies, poly_int64 *poffset)
2038 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2039 dr_alignment_support alignment_support_scheme;
2041 if (ncopies > 1)
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE;
2049 /* For backward running DRs the first access in vectype actually is
2050 N-1 elements before the address of the DR. */
2051 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2052 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2054 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2055 alignment_support_scheme
2056 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2057 if (alignment_support_scheme != dr_aligned
2058 && alignment_support_scheme != dr_unaligned_supported)
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2062 "negative step but alignment required.\n");
2063 *poffset = 0;
2064 return VMAT_ELEMENTWISE;
2067 if (vls_type == VLS_STORE_INVARIANT)
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_NOTE, vect_location,
2071 "negative step with invariant source;"
2072 " no permute needed.\n");
2073 return VMAT_CONTIGUOUS_DOWN;
2076 if (!perm_mask_for_reverse (vectype))
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2080 "negative step and reversing not supported.\n");
2081 *poffset = 0;
2082 return VMAT_ELEMENTWISE;
2085 return VMAT_CONTIGUOUS_REVERSE;
2088 /* STMT_INFO is either a masked or unconditional store. Return the value
2089 being stored. */
2091 tree
2092 vect_get_store_rhs (stmt_vec_info stmt_info)
2094 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2096 gcc_assert (gimple_assign_single_p (assign));
2097 return gimple_assign_rhs1 (assign);
2099 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2101 internal_fn ifn = gimple_call_internal_fn (call);
2102 int index = internal_fn_stored_value_index (ifn);
2103 gcc_assert (index >= 0);
2104 return gimple_call_arg (call, index);
2106 gcc_unreachable ();
2109 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2111 This function returns a vector type which can be composed with NETLS pieces,
2112 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2113 same vector size as the return vector. It checks target whether supports
2114 pieces-size vector mode for construction firstly, if target fails to, check
2115 pieces-size scalar mode for construction further. It returns NULL_TREE if
2116 fails to find the available composition.
2118 For example, for (vtype=V16QI, nelts=4), we can probably get:
2119 - V16QI with PTYPE V4QI.
2120 - V4SI with PTYPE SI.
2121 - NULL_TREE. */
2123 static tree
2124 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2126 gcc_assert (VECTOR_TYPE_P (vtype));
2127 gcc_assert (known_gt (nelts, 0U));
2129 machine_mode vmode = TYPE_MODE (vtype);
2130 if (!VECTOR_MODE_P (vmode))
2131 return NULL_TREE;
2133 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2134 unsigned int pbsize;
2135 if (constant_multiple_p (vbsize, nelts, &pbsize))
2137 /* First check if vec_init optab supports construction from
2138 vector pieces directly. */
2139 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2140 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2141 machine_mode rmode;
2142 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2143 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2144 != CODE_FOR_nothing))
2146 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2147 return vtype;
2150 /* Otherwise check if exists an integer type of the same piece size and
2151 if vec_init optab supports construction from it directly. */
2152 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2153 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2154 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2155 != CODE_FOR_nothing))
2157 *ptype = build_nonstandard_integer_type (pbsize, 1);
2158 return build_vector_type (*ptype, nelts);
2162 return NULL_TREE;
2165 /* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT_INFO is part of a grouped load
2167 or store.
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2174 static bool
2175 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2176 tree vectype, slp_tree slp_node,
2177 bool masked_p, vec_load_store_type vls_type,
2178 vect_memory_access_type *memory_access_type,
2179 poly_int64 *poffset,
2180 dr_alignment_support *alignment_support_scheme,
2181 int *misalignment,
2182 gather_scatter_info *gs_info)
2184 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2185 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2186 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2187 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2188 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2189 bool single_element_p = (stmt_info == first_stmt_info
2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2191 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2192 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p = false;
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
2200 bool can_overrun_p = (!masked_p
2201 && vls_type == VLS_LOAD
2202 && loop_vinfo
2203 && !loop->inner);
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2212 if (slp_node)
2214 /* For SLP vectorization we directly vectorize a subchain
2215 without permutation. */
2216 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2217 first_dr_info
2218 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2219 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2221 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2222 separated by the stride, until we have a complete vector.
2223 Fall back to scalar accesses if that isn't possible. */
2224 if (multiple_p (nunits, group_size))
2225 *memory_access_type = VMAT_STRIDED_SLP;
2226 else
2227 *memory_access_type = VMAT_ELEMENTWISE;
2229 else
2231 overrun_p = loop_vinfo && gap != 0;
2232 if (overrun_p && vls_type != VLS_LOAD)
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2235 "Grouped store with gaps requires"
2236 " non-consecutive accesses\n");
2237 return false;
2239 /* An overrun is fine if the trailing elements are smaller
2240 than the alignment boundary B. Every vector access will
2241 be a multiple of B and so we are guaranteed to access a
2242 non-gap element in the same B-sized block. */
2243 if (overrun_p
2244 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2245 vectype)
2246 / vect_get_scalar_dr_size (first_dr_info)))
2247 overrun_p = false;
2249 /* If the gap splits the vector in half and the target
2250 can do half-vector operations avoid the epilogue peeling
2251 by simply loading half of the vector only. Usually
2252 the construction with an upper zero half will be elided. */
2253 dr_alignment_support alss;
2254 int misalign = dr_misalignment (first_dr_info, vectype);
2255 tree half_vtype;
2256 if (overrun_p
2257 && !masked_p
2258 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2259 vectype, misalign)))
2260 == dr_aligned
2261 || alss == dr_unaligned_supported)
2262 && known_eq (nunits, (group_size - gap) * 2)
2263 && known_eq (nunits, group_size)
2264 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2265 != NULL_TREE))
2266 overrun_p = false;
2268 if (overrun_p && !can_overrun_p)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "Peeling for outer loop is not supported\n");
2273 return false;
2275 int cmp = compare_step_with_zero (vinfo, stmt_info);
2276 if (cmp < 0)
2278 if (single_element_p)
2279 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2280 only correct for single element "interleaving" SLP. */
2281 *memory_access_type = get_negative_load_store_type
2282 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2283 else
2285 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2286 separated by the stride, until we have a complete vector.
2287 Fall back to scalar accesses if that isn't possible. */
2288 if (multiple_p (nunits, group_size))
2289 *memory_access_type = VMAT_STRIDED_SLP;
2290 else
2291 *memory_access_type = VMAT_ELEMENTWISE;
2294 else
2296 gcc_assert (!loop_vinfo || cmp > 0);
2297 *memory_access_type = VMAT_CONTIGUOUS;
2300 /* When we have a contiguous access across loop iterations
2301 but the access in the loop doesn't cover the full vector
2302 we can end up with no gap recorded but still excess
2303 elements accessed, see PR103116. Make sure we peel for
2304 gaps if necessary and sufficient and give up if not. */
2305 if (loop_vinfo
2306 && *memory_access_type == VMAT_CONTIGUOUS
2307 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2308 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2309 nunits))
2311 unsigned HOST_WIDE_INT cnunits, cvf;
2312 if (!can_overrun_p
2313 || !nunits.is_constant (&cnunits)
2314 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2315 /* Peeling for gaps assumes that a single scalar iteration
2316 is enough to make sure the last vector iteration doesn't
2317 access excess elements.
2318 ??? Enhancements include peeling multiple iterations
2319 or using masked loads with a static mask. */
2320 || (group_size * cvf) % cnunits + group_size < cnunits)
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "peeling for gaps insufficient for "
2325 "access\n");
2326 return false;
2328 overrun_p = true;
2332 else
2334 /* We can always handle this case using elementwise accesses,
2335 but see if something more efficient is available. */
2336 *memory_access_type = VMAT_ELEMENTWISE;
2338 /* If there is a gap at the end of the group then these optimizations
2339 would access excess elements in the last iteration. */
2340 bool would_overrun_p = (gap != 0);
2341 /* An overrun is fine if the trailing elements are smaller than the
2342 alignment boundary B. Every vector access will be a multiple of B
2343 and so we are guaranteed to access a non-gap element in the
2344 same B-sized block. */
2345 if (would_overrun_p
2346 && !masked_p
2347 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2348 / vect_get_scalar_dr_size (first_dr_info)))
2349 would_overrun_p = false;
2351 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2352 && (can_overrun_p || !would_overrun_p)
2353 && compare_step_with_zero (vinfo, stmt_info) > 0)
2355 /* First cope with the degenerate case of a single-element
2356 vector. */
2357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2360 /* Otherwise try using LOAD/STORE_LANES. */
2361 else if (vls_type == VLS_LOAD
2362 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2363 : vect_store_lanes_supported (vectype, group_size,
2364 masked_p))
2366 *memory_access_type = VMAT_LOAD_STORE_LANES;
2367 overrun_p = would_overrun_p;
2370 /* If that fails, try using permuting loads. */
2371 else if (vls_type == VLS_LOAD
2372 ? vect_grouped_load_supported (vectype, single_element_p,
2373 group_size)
2374 : vect_grouped_store_supported (vectype, group_size))
2376 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2377 overrun_p = would_overrun_p;
2381 /* As a last resort, trying using a gather load or scatter store.
2383 ??? Although the code can handle all group sizes correctly,
2384 it probably isn't a win to use separate strided accesses based
2385 on nearby locations. Or, even if it's a win over scalar code,
2386 it might not be a win over vectorizing at a lower VF, if that
2387 allows us to use contiguous accesses. */
2388 if (*memory_access_type == VMAT_ELEMENTWISE
2389 && single_element_p
2390 && loop_vinfo
2391 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2392 masked_p, gs_info))
2393 *memory_access_type = VMAT_GATHER_SCATTER;
2396 if (*memory_access_type == VMAT_GATHER_SCATTER
2397 || *memory_access_type == VMAT_ELEMENTWISE)
2399 *alignment_support_scheme = dr_unaligned_supported;
2400 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2402 else
2404 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2405 *alignment_support_scheme
2406 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2407 *misalignment);
2410 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2412 /* STMT is the leader of the group. Check the operands of all the
2413 stmts of the group. */
2414 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2415 while (next_stmt_info)
2417 tree op = vect_get_store_rhs (next_stmt_info);
2418 enum vect_def_type dt;
2419 if (!vect_is_simple_use (op, vinfo, &dt))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 "use not simple.\n");
2424 return false;
2426 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2430 if (overrun_p)
2432 gcc_assert (can_overrun_p);
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435 "Data access with gaps requires scalar "
2436 "epilogue loop\n");
2437 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2440 return true;
2443 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2444 if there is a memory access type that the vectorized form can use,
2445 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2446 or scatters, fill in GS_INFO accordingly. In addition
2447 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2448 the target does not support the alignment scheme. *MISALIGNMENT
2449 is set according to the alignment of the access (including
2450 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2452 SLP says whether we're performing SLP rather than loop vectorization.
2453 MASKED_P is true if the statement is conditional on a vectorized mask.
2454 VECTYPE is the vector type that the vectorized statements will use.
2455 NCOPIES is the number of vector statements that will be needed. */
2457 static bool
2458 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2459 tree vectype, slp_tree slp_node,
2460 bool masked_p, vec_load_store_type vls_type,
2461 unsigned int ncopies,
2462 vect_memory_access_type *memory_access_type,
2463 poly_int64 *poffset,
2464 dr_alignment_support *alignment_support_scheme,
2465 int *misalignment,
2466 gather_scatter_info *gs_info)
2468 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2469 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2470 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2471 *poffset = 0;
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2474 *memory_access_type = VMAT_GATHER_SCATTER;
2475 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2476 gcc_unreachable ();
2477 /* When using internal functions, we rely on pattern recognition
2478 to convert the type of the offset to the type that the target
2479 requires, with the result being a call to an internal function.
2480 If that failed for some reason (e.g. because another pattern
2481 took priority), just handle cases in which the offset already
2482 has the right type. */
2483 else if (gs_info->ifn != IFN_LAST
2484 && !is_gimple_call (stmt_info->stmt)
2485 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2486 TREE_TYPE (gs_info->offset_vectype)))
2488 if (dump_enabled_p ())
2489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490 "%s offset requires a conversion\n",
2491 vls_type == VLS_LOAD ? "gather" : "scatter");
2492 return false;
2494 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2495 &gs_info->offset_dt,
2496 &gs_info->offset_vectype))
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2500 "%s index use not simple.\n",
2501 vls_type == VLS_LOAD ? "gather" : "scatter");
2502 return false;
2504 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2506 if (vls_type != VLS_LOAD)
2508 if (dump_enabled_p ())
2509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2510 "unsupported emulated scatter.\n");
2511 return false;
2513 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2514 || !TYPE_VECTOR_SUBPARTS
2515 (gs_info->offset_vectype).is_constant ()
2516 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2517 (gs_info->offset_vectype),
2518 TYPE_VECTOR_SUBPARTS (vectype)))
2520 if (dump_enabled_p ())
2521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2522 "unsupported vector types for emulated "
2523 "gather.\n");
2524 return false;
2527 /* Gather-scatter accesses perform only component accesses, alignment
2528 is irrelevant for them. */
2529 *alignment_support_scheme = dr_unaligned_supported;
2531 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2533 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2534 masked_p,
2535 vls_type, memory_access_type, poffset,
2536 alignment_support_scheme,
2537 misalignment, gs_info))
2538 return false;
2540 else if (STMT_VINFO_STRIDED_P (stmt_info))
2542 gcc_assert (!slp_node);
2543 if (loop_vinfo
2544 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2545 masked_p, gs_info))
2546 *memory_access_type = VMAT_GATHER_SCATTER;
2547 else
2548 *memory_access_type = VMAT_ELEMENTWISE;
2549 /* Alignment is irrelevant here. */
2550 *alignment_support_scheme = dr_unaligned_supported;
2552 else
2554 int cmp = compare_step_with_zero (vinfo, stmt_info);
2555 if (cmp == 0)
2557 gcc_assert (vls_type == VLS_LOAD);
2558 *memory_access_type = VMAT_INVARIANT;
2559 /* Invariant accesses perform only component accesses, alignment
2560 is irrelevant for them. */
2561 *alignment_support_scheme = dr_unaligned_supported;
2563 else
2565 if (cmp < 0)
2566 *memory_access_type = get_negative_load_store_type
2567 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2568 else
2569 *memory_access_type = VMAT_CONTIGUOUS;
2570 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2571 vectype, *poffset);
2572 *alignment_support_scheme
2573 = vect_supportable_dr_alignment (vinfo,
2574 STMT_VINFO_DR_INFO (stmt_info),
2575 vectype, *misalignment);
2579 if ((*memory_access_type == VMAT_ELEMENTWISE
2580 || *memory_access_type == VMAT_STRIDED_SLP)
2581 && !nunits.is_constant ())
2583 if (dump_enabled_p ())
2584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2585 "Not using elementwise accesses due to variable "
2586 "vectorization factor.\n");
2587 return false;
2590 if (*alignment_support_scheme == dr_unaligned_unsupported)
2592 if (dump_enabled_p ())
2593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2594 "unsupported unaligned access\n");
2595 return false;
2598 /* FIXME: At the moment the cost model seems to underestimate the
2599 cost of using elementwise accesses. This check preserves the
2600 traditional behavior until that can be fixed. */
2601 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2602 if (!first_stmt_info)
2603 first_stmt_info = stmt_info;
2604 if (*memory_access_type == VMAT_ELEMENTWISE
2605 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2606 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2607 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2608 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2610 if (dump_enabled_p ())
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2612 "not falling back to elementwise accesses\n");
2613 return false;
2615 return true;
2618 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2619 conditional operation STMT_INFO. When returning true, store the mask
2620 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2621 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2622 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2624 static bool
2625 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2626 slp_tree slp_node, unsigned mask_index,
2627 tree *mask, slp_tree *mask_node,
2628 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2630 enum vect_def_type mask_dt;
2631 tree mask_vectype;
2632 slp_tree mask_node_1;
2633 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2634 mask, &mask_node_1, &mask_dt, &mask_vectype))
2636 if (dump_enabled_p ())
2637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2638 "mask use not simple.\n");
2639 return false;
2642 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2646 "mask argument is not a boolean.\n");
2647 return false;
2650 /* If the caller is not prepared for adjusting an external/constant
2651 SLP mask vector type fail. */
2652 if (slp_node
2653 && !mask_node
2654 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2656 if (dump_enabled_p ())
2657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2658 "SLP mask argument is not vectorized.\n");
2659 return false;
2662 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2663 if (!mask_vectype)
2664 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2666 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2668 if (dump_enabled_p ())
2669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2670 "could not find an appropriate vector mask type.\n");
2671 return false;
2674 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2675 TYPE_VECTOR_SUBPARTS (vectype)))
2677 if (dump_enabled_p ())
2678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2679 "vector mask type %T"
2680 " does not match vector data type %T.\n",
2681 mask_vectype, vectype);
2683 return false;
2686 *mask_dt_out = mask_dt;
2687 *mask_vectype_out = mask_vectype;
2688 if (mask_node)
2689 *mask_node = mask_node_1;
2690 return true;
2693 /* Return true if stored value RHS is suitable for vectorizing store
2694 statement STMT_INFO. When returning true, store the type of the
2695 definition in *RHS_DT_OUT, the type of the vectorized store value in
2696 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2698 static bool
2699 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2700 slp_tree slp_node, tree rhs,
2701 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2702 vec_load_store_type *vls_type_out)
2704 /* In the case this is a store from a constant make sure
2705 native_encode_expr can handle it. */
2706 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2708 if (dump_enabled_p ())
2709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710 "cannot encode constant as a byte sequence.\n");
2711 return false;
2714 unsigned op_no = 0;
2715 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2717 if (gimple_call_internal_p (call)
2718 && internal_store_fn_p (gimple_call_internal_fn (call)))
2719 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2722 enum vect_def_type rhs_dt;
2723 tree rhs_vectype;
2724 slp_tree slp_op;
2725 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2726 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2728 if (dump_enabled_p ())
2729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2730 "use not simple.\n");
2731 return false;
2734 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2735 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2737 if (dump_enabled_p ())
2738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2739 "incompatible vector types.\n");
2740 return false;
2743 *rhs_dt_out = rhs_dt;
2744 *rhs_vectype_out = rhs_vectype;
2745 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2746 *vls_type_out = VLS_STORE_INVARIANT;
2747 else
2748 *vls_type_out = VLS_STORE;
2749 return true;
2752 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2753 Note that we support masks with floating-point type, in which case the
2754 floats are interpreted as a bitmask. */
2756 static tree
2757 vect_build_all_ones_mask (vec_info *vinfo,
2758 stmt_vec_info stmt_info, tree masktype)
2760 if (TREE_CODE (masktype) == INTEGER_TYPE)
2761 return build_int_cst (masktype, -1);
2762 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2764 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2765 mask = build_vector_from_val (masktype, mask);
2766 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2768 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2770 REAL_VALUE_TYPE r;
2771 long tmp[6];
2772 for (int j = 0; j < 6; ++j)
2773 tmp[j] = -1;
2774 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2775 tree mask = build_real (TREE_TYPE (masktype), r);
2776 mask = build_vector_from_val (masktype, mask);
2777 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2779 gcc_unreachable ();
2782 /* Build an all-zero merge value of type VECTYPE while vectorizing
2783 STMT_INFO as a gather load. */
2785 static tree
2786 vect_build_zero_merge_argument (vec_info *vinfo,
2787 stmt_vec_info stmt_info, tree vectype)
2789 tree merge;
2790 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2791 merge = build_int_cst (TREE_TYPE (vectype), 0);
2792 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2794 REAL_VALUE_TYPE r;
2795 long tmp[6];
2796 for (int j = 0; j < 6; ++j)
2797 tmp[j] = 0;
2798 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2799 merge = build_real (TREE_TYPE (vectype), r);
2801 else
2802 gcc_unreachable ();
2803 merge = build_vector_from_val (vectype, merge);
2804 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2807 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2808 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2809 the gather load operation. If the load is conditional, MASK is the
2810 unvectorized condition and MASK_DT is its definition type, otherwise
2811 MASK is null. */
2813 static void
2814 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2815 gimple_stmt_iterator *gsi,
2816 gimple **vec_stmt,
2817 gather_scatter_info *gs_info,
2818 tree mask)
2820 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2821 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2822 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2823 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2824 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2825 edge pe = loop_preheader_edge (loop);
2826 enum { NARROW, NONE, WIDEN } modifier;
2827 poly_uint64 gather_off_nunits
2828 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2830 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2831 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2832 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2833 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2834 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2835 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2836 tree scaletype = TREE_VALUE (arglist);
2837 tree real_masktype = masktype;
2838 gcc_checking_assert (types_compatible_p (srctype, rettype)
2839 && (!mask
2840 || TREE_CODE (masktype) == INTEGER_TYPE
2841 || types_compatible_p (srctype, masktype)));
2842 if (mask)
2843 masktype = truth_type_for (srctype);
2845 tree mask_halftype = masktype;
2846 tree perm_mask = NULL_TREE;
2847 tree mask_perm_mask = NULL_TREE;
2848 if (known_eq (nunits, gather_off_nunits))
2849 modifier = NONE;
2850 else if (known_eq (nunits * 2, gather_off_nunits))
2852 modifier = WIDEN;
2854 /* Currently widening gathers and scatters are only supported for
2855 fixed-length vectors. */
2856 int count = gather_off_nunits.to_constant ();
2857 vec_perm_builder sel (count, count, 1);
2858 for (int i = 0; i < count; ++i)
2859 sel.quick_push (i | (count / 2));
2861 vec_perm_indices indices (sel, 1, count);
2862 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2863 indices);
2865 else if (known_eq (nunits, gather_off_nunits * 2))
2867 modifier = NARROW;
2869 /* Currently narrowing gathers and scatters are only supported for
2870 fixed-length vectors. */
2871 int count = nunits.to_constant ();
2872 vec_perm_builder sel (count, count, 1);
2873 sel.quick_grow (count);
2874 for (int i = 0; i < count; ++i)
2875 sel[i] = i < count / 2 ? i : i + count / 2;
2876 vec_perm_indices indices (sel, 2, count);
2877 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2879 ncopies *= 2;
2881 if (mask && VECTOR_TYPE_P (real_masktype))
2883 for (int i = 0; i < count; ++i)
2884 sel[i] = i | (count / 2);
2885 indices.new_vector (sel, 2, count);
2886 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2888 else if (mask)
2889 mask_halftype = truth_type_for (gs_info->offset_vectype);
2891 else
2892 gcc_unreachable ();
2894 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2895 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2897 tree ptr = fold_convert (ptrtype, gs_info->base);
2898 if (!is_gimple_min_invariant (ptr))
2900 gimple_seq seq;
2901 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2902 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2903 gcc_assert (!new_bb);
2906 tree scale = build_int_cst (scaletype, gs_info->scale);
2908 tree vec_oprnd0 = NULL_TREE;
2909 tree vec_mask = NULL_TREE;
2910 tree src_op = NULL_TREE;
2911 tree mask_op = NULL_TREE;
2912 tree prev_res = NULL_TREE;
2914 if (!mask)
2916 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2917 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2920 auto_vec<tree> vec_oprnds0;
2921 auto_vec<tree> vec_masks;
2922 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2923 modifier == WIDEN ? ncopies / 2 : ncopies,
2924 gs_info->offset, &vec_oprnds0);
2925 if (mask)
2926 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2927 modifier == NARROW ? ncopies / 2 : ncopies,
2928 mask, &vec_masks, masktype);
2929 for (int j = 0; j < ncopies; ++j)
2931 tree op, var;
2932 if (modifier == WIDEN && (j & 1))
2933 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2934 perm_mask, stmt_info, gsi);
2935 else
2936 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2938 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2940 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2941 TYPE_VECTOR_SUBPARTS (idxtype)));
2942 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2943 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2944 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2945 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2946 op = var;
2949 if (mask)
2951 if (mask_perm_mask && (j & 1))
2952 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2953 mask_perm_mask, stmt_info, gsi);
2954 else
2956 if (modifier == NARROW)
2958 if ((j & 1) == 0)
2959 vec_mask = vec_masks[j / 2];
2961 else
2962 vec_mask = vec_masks[j];
2964 mask_op = vec_mask;
2965 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2967 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2968 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2969 gcc_assert (known_eq (sub1, sub2));
2970 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2971 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2972 gassign *new_stmt
2973 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2974 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2975 mask_op = var;
2978 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2980 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2981 gassign *new_stmt
2982 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2983 : VEC_UNPACK_LO_EXPR,
2984 mask_op);
2985 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2986 mask_op = var;
2988 src_op = mask_op;
2991 tree mask_arg = mask_op;
2992 if (masktype != real_masktype)
2994 tree utype, optype = TREE_TYPE (mask_op);
2995 if (VECTOR_TYPE_P (real_masktype)
2996 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2997 utype = real_masktype;
2998 else
2999 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3000 var = vect_get_new_ssa_name (utype, vect_scalar_var);
3001 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
3002 gassign *new_stmt
3003 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3004 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3005 mask_arg = var;
3006 if (!useless_type_conversion_p (real_masktype, utype))
3008 gcc_assert (TYPE_PRECISION (utype)
3009 <= TYPE_PRECISION (real_masktype));
3010 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
3011 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3012 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3013 mask_arg = var;
3015 src_op = build_zero_cst (srctype);
3017 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
3018 mask_arg, scale);
3020 if (!useless_type_conversion_p (vectype, rettype))
3022 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3023 TYPE_VECTOR_SUBPARTS (rettype)));
3024 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3025 gimple_call_set_lhs (new_stmt, op);
3026 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3027 var = make_ssa_name (vec_dest);
3028 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3029 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3030 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3032 else
3034 var = make_ssa_name (vec_dest, new_stmt);
3035 gimple_call_set_lhs (new_stmt, var);
3036 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3039 if (modifier == NARROW)
3041 if ((j & 1) == 0)
3043 prev_res = var;
3044 continue;
3046 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3047 stmt_info, gsi);
3048 new_stmt = SSA_NAME_DEF_STMT (var);
3051 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3053 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3056 /* Prepare the base and offset in GS_INFO for vectorization.
3057 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3058 to the vectorized offset argument for the first copy of STMT_INFO.
3059 STMT_INFO is the statement described by GS_INFO and LOOP is the
3060 containing loop. */
3062 static void
3063 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3064 class loop *loop, stmt_vec_info stmt_info,
3065 slp_tree slp_node, gather_scatter_info *gs_info,
3066 tree *dataref_ptr, vec<tree> *vec_offset)
3068 gimple_seq stmts = NULL;
3069 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3070 if (stmts != NULL)
3072 basic_block new_bb;
3073 edge pe = loop_preheader_edge (loop);
3074 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3075 gcc_assert (!new_bb);
3077 if (slp_node)
3078 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3079 else
3081 unsigned ncopies
3082 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3083 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3084 gs_info->offset, vec_offset,
3085 gs_info->offset_vectype);
3089 /* Prepare to implement a grouped or strided load or store using
3090 the gather load or scatter store operation described by GS_INFO.
3091 STMT_INFO is the load or store statement.
3093 Set *DATAREF_BUMP to the amount that should be added to the base
3094 address after each copy of the vectorized statement. Set *VEC_OFFSET
3095 to an invariant offset vector in which element I has the value
3096 I * DR_STEP / SCALE. */
3098 static void
3099 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3100 loop_vec_info loop_vinfo,
3101 gather_scatter_info *gs_info,
3102 tree *dataref_bump, tree *vec_offset)
3104 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3105 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3107 tree bump = size_binop (MULT_EXPR,
3108 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3109 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3110 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3112 /* The offset given in GS_INFO can have pointer type, so use the element
3113 type of the vector instead. */
3114 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3116 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3117 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3118 ssize_int (gs_info->scale));
3119 step = fold_convert (offset_type, step);
3121 /* Create {0, X, X*2, X*3, ...}. */
3122 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3123 build_zero_cst (offset_type), step);
3124 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3127 /* Return the amount that should be added to a vector pointer to move
3128 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3129 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3130 vectorization. */
3132 static tree
3133 vect_get_data_ptr_increment (vec_info *vinfo,
3134 dr_vec_info *dr_info, tree aggr_type,
3135 vect_memory_access_type memory_access_type)
3137 if (memory_access_type == VMAT_INVARIANT)
3138 return size_zero_node;
3140 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3141 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3142 if (tree_int_cst_sgn (step) == -1)
3143 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3144 return iv_step;
3147 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3149 static bool
3150 vectorizable_bswap (vec_info *vinfo,
3151 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3152 gimple **vec_stmt, slp_tree slp_node,
3153 slp_tree *slp_op,
3154 tree vectype_in, stmt_vector_for_cost *cost_vec)
3156 tree op, vectype;
3157 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3158 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3159 unsigned ncopies;
3161 op = gimple_call_arg (stmt, 0);
3162 vectype = STMT_VINFO_VECTYPE (stmt_info);
3163 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3165 /* Multiple types in SLP are handled by creating the appropriate number of
3166 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3167 case of SLP. */
3168 if (slp_node)
3169 ncopies = 1;
3170 else
3171 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3173 gcc_assert (ncopies >= 1);
3175 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3176 if (! char_vectype)
3177 return false;
3179 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3180 unsigned word_bytes;
3181 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3182 return false;
3184 /* The encoding uses one stepped pattern for each byte in the word. */
3185 vec_perm_builder elts (num_bytes, word_bytes, 3);
3186 for (unsigned i = 0; i < 3; ++i)
3187 for (unsigned j = 0; j < word_bytes; ++j)
3188 elts.quick_push ((i + 1) * word_bytes - j - 1);
3190 vec_perm_indices indices (elts, 1, num_bytes);
3191 machine_mode vmode = TYPE_MODE (char_vectype);
3192 if (!can_vec_perm_const_p (vmode, vmode, indices))
3193 return false;
3195 if (! vec_stmt)
3197 if (slp_node
3198 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3202 "incompatible vector types for invariants\n");
3203 return false;
3206 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3207 DUMP_VECT_SCOPE ("vectorizable_bswap");
3208 record_stmt_cost (cost_vec,
3209 1, vector_stmt, stmt_info, 0, vect_prologue);
3210 record_stmt_cost (cost_vec,
3211 slp_node
3212 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3213 vec_perm, stmt_info, 0, vect_body);
3214 return true;
3217 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3219 /* Transform. */
3220 vec<tree> vec_oprnds = vNULL;
3221 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3222 op, &vec_oprnds);
3223 /* Arguments are ready. create the new vector stmt. */
3224 unsigned i;
3225 tree vop;
3226 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3228 gimple *new_stmt;
3229 tree tem = make_ssa_name (char_vectype);
3230 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3231 char_vectype, vop));
3232 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3233 tree tem2 = make_ssa_name (char_vectype);
3234 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3235 tem, tem, bswap_vconst);
3236 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3237 tem = make_ssa_name (vectype);
3238 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3239 vectype, tem2));
3240 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3241 if (slp_node)
3242 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3243 else
3244 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3247 if (!slp_node)
3248 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3250 vec_oprnds.release ();
3251 return true;
3254 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3255 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3256 in a single step. On success, store the binary pack code in
3257 *CONVERT_CODE. */
3259 static bool
3260 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3261 tree_code *convert_code)
3263 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3264 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3265 return false;
3267 tree_code code;
3268 int multi_step_cvt = 0;
3269 auto_vec <tree, 8> interm_types;
3270 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3271 &code, &multi_step_cvt, &interm_types)
3272 || multi_step_cvt)
3273 return false;
3275 *convert_code = code;
3276 return true;
3279 /* Function vectorizable_call.
3281 Check if STMT_INFO performs a function call that can be vectorized.
3282 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3283 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3284 Return true if STMT_INFO is vectorizable in this way. */
3286 static bool
3287 vectorizable_call (vec_info *vinfo,
3288 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3289 gimple **vec_stmt, slp_tree slp_node,
3290 stmt_vector_for_cost *cost_vec)
3292 gcall *stmt;
3293 tree vec_dest;
3294 tree scalar_dest;
3295 tree op;
3296 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3297 tree vectype_out, vectype_in;
3298 poly_uint64 nunits_in;
3299 poly_uint64 nunits_out;
3300 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3301 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3302 tree fndecl, new_temp, rhs_type;
3303 enum vect_def_type dt[4]
3304 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3305 vect_unknown_def_type };
3306 tree vectypes[ARRAY_SIZE (dt)] = {};
3307 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3308 int ndts = ARRAY_SIZE (dt);
3309 int ncopies, j;
3310 auto_vec<tree, 8> vargs;
3311 enum { NARROW, NONE, WIDEN } modifier;
3312 size_t i, nargs;
3313 tree lhs;
3315 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3316 return false;
3318 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3319 && ! vec_stmt)
3320 return false;
3322 /* Is STMT_INFO a vectorizable call? */
3323 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3324 if (!stmt)
3325 return false;
3327 if (gimple_call_internal_p (stmt)
3328 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3329 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3330 /* Handled by vectorizable_load and vectorizable_store. */
3331 return false;
3333 if (gimple_call_lhs (stmt) == NULL_TREE
3334 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3335 return false;
3337 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3339 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3341 /* Process function arguments. */
3342 rhs_type = NULL_TREE;
3343 vectype_in = NULL_TREE;
3344 nargs = gimple_call_num_args (stmt);
3346 /* Bail out if the function has more than four arguments, we do not have
3347 interesting builtin functions to vectorize with more than two arguments
3348 except for fma. No arguments is also not good. */
3349 if (nargs == 0 || nargs > 4)
3350 return false;
3352 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3353 combined_fn cfn = gimple_call_combined_fn (stmt);
3354 if (cfn == CFN_GOMP_SIMD_LANE)
3356 nargs = 0;
3357 rhs_type = unsigned_type_node;
3360 int mask_opno = -1;
3361 if (internal_fn_p (cfn))
3362 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3364 for (i = 0; i < nargs; i++)
3366 if ((int) i == mask_opno)
3368 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3369 &op, &slp_op[i], &dt[i], &vectypes[i]))
3370 return false;
3371 continue;
3374 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3375 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3377 if (dump_enabled_p ())
3378 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3379 "use not simple.\n");
3380 return false;
3383 /* We can only handle calls with arguments of the same type. */
3384 if (rhs_type
3385 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3387 if (dump_enabled_p ())
3388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3389 "argument types differ.\n");
3390 return false;
3392 if (!rhs_type)
3393 rhs_type = TREE_TYPE (op);
3395 if (!vectype_in)
3396 vectype_in = vectypes[i];
3397 else if (vectypes[i]
3398 && !types_compatible_p (vectypes[i], vectype_in))
3400 if (dump_enabled_p ())
3401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3402 "argument vector types differ.\n");
3403 return false;
3406 /* If all arguments are external or constant defs, infer the vector type
3407 from the scalar type. */
3408 if (!vectype_in)
3409 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3410 if (vec_stmt)
3411 gcc_assert (vectype_in);
3412 if (!vectype_in)
3414 if (dump_enabled_p ())
3415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3416 "no vectype for scalar type %T\n", rhs_type);
3418 return false;
3420 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3421 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3422 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3423 by a pack of the two vectors into an SI vector. We would need
3424 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3425 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3427 if (dump_enabled_p ())
3428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3429 "mismatched vector sizes %T and %T\n",
3430 vectype_in, vectype_out);
3431 return false;
3434 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3435 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3437 if (dump_enabled_p ())
3438 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3439 "mixed mask and nonmask vector types\n");
3440 return false;
3443 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3445 if (dump_enabled_p ())
3446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3447 "use emulated vector type for call\n");
3448 return false;
3451 /* FORNOW */
3452 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3453 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3454 if (known_eq (nunits_in * 2, nunits_out))
3455 modifier = NARROW;
3456 else if (known_eq (nunits_out, nunits_in))
3457 modifier = NONE;
3458 else if (known_eq (nunits_out * 2, nunits_in))
3459 modifier = WIDEN;
3460 else
3461 return false;
3463 /* We only handle functions that do not read or clobber memory. */
3464 if (gimple_vuse (stmt))
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3468 "function reads from or writes to memory.\n");
3469 return false;
3472 /* For now, we only vectorize functions if a target specific builtin
3473 is available. TODO -- in some cases, it might be profitable to
3474 insert the calls for pieces of the vector, in order to be able
3475 to vectorize other operations in the loop. */
3476 fndecl = NULL_TREE;
3477 internal_fn ifn = IFN_LAST;
3478 tree callee = gimple_call_fndecl (stmt);
3480 /* First try using an internal function. */
3481 tree_code convert_code = ERROR_MARK;
3482 if (cfn != CFN_LAST
3483 && (modifier == NONE
3484 || (modifier == NARROW
3485 && simple_integer_narrowing (vectype_out, vectype_in,
3486 &convert_code))))
3487 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3488 vectype_in);
3490 /* If that fails, try asking for a target-specific built-in function. */
3491 if (ifn == IFN_LAST)
3493 if (cfn != CFN_LAST)
3494 fndecl = targetm.vectorize.builtin_vectorized_function
3495 (cfn, vectype_out, vectype_in);
3496 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3497 fndecl = targetm.vectorize.builtin_md_vectorized_function
3498 (callee, vectype_out, vectype_in);
3501 if (ifn == IFN_LAST && !fndecl)
3503 if (cfn == CFN_GOMP_SIMD_LANE
3504 && !slp_node
3505 && loop_vinfo
3506 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3507 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3508 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3509 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3511 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3512 { 0, 1, 2, ... vf - 1 } vector. */
3513 gcc_assert (nargs == 0);
3515 else if (modifier == NONE
3516 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3517 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3518 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3519 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3520 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3521 slp_op, vectype_in, cost_vec);
3522 else
3524 if (dump_enabled_p ())
3525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3526 "function is not vectorizable.\n");
3527 return false;
3531 if (slp_node)
3532 ncopies = 1;
3533 else if (modifier == NARROW && ifn == IFN_LAST)
3534 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3535 else
3536 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3538 /* Sanity check: make sure that at least one copy of the vectorized stmt
3539 needs to be generated. */
3540 gcc_assert (ncopies >= 1);
3542 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3543 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3544 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3545 if (!vec_stmt) /* transformation not required. */
3547 if (slp_node)
3548 for (i = 0; i < nargs; ++i)
3549 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3550 vectypes[i]
3551 ? vectypes[i] : vectype_in))
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3555 "incompatible vector types for invariants\n");
3556 return false;
3558 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3559 DUMP_VECT_SCOPE ("vectorizable_call");
3560 vect_model_simple_cost (vinfo, stmt_info,
3561 ncopies, dt, ndts, slp_node, cost_vec);
3562 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3563 record_stmt_cost (cost_vec, ncopies / 2,
3564 vec_promote_demote, stmt_info, 0, vect_body);
3566 if (loop_vinfo
3567 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3568 && (reduc_idx >= 0 || mask_opno >= 0))
3570 if (reduc_idx >= 0
3571 && (cond_fn == IFN_LAST
3572 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3573 OPTIMIZE_FOR_SPEED)))
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3577 "can't use a fully-masked loop because no"
3578 " conditional operation is available.\n");
3579 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3581 else
3583 unsigned int nvectors
3584 = (slp_node
3585 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3586 : ncopies);
3587 tree scalar_mask = NULL_TREE;
3588 if (mask_opno >= 0)
3589 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3590 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3591 vectype_out, scalar_mask);
3594 return true;
3597 /* Transform. */
3599 if (dump_enabled_p ())
3600 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3602 /* Handle def. */
3603 scalar_dest = gimple_call_lhs (stmt);
3604 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3606 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3607 unsigned int vect_nargs = nargs;
3608 if (masked_loop_p && reduc_idx >= 0)
3610 ifn = cond_fn;
3611 vect_nargs += 2;
3614 if (modifier == NONE || ifn != IFN_LAST)
3616 tree prev_res = NULL_TREE;
3617 vargs.safe_grow (vect_nargs, true);
3618 auto_vec<vec<tree> > vec_defs (nargs);
3619 for (j = 0; j < ncopies; ++j)
3621 /* Build argument list for the vectorized call. */
3622 if (slp_node)
3624 vec<tree> vec_oprnds0;
3626 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3627 vec_oprnds0 = vec_defs[0];
3629 /* Arguments are ready. Create the new vector stmt. */
3630 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3632 int varg = 0;
3633 if (masked_loop_p && reduc_idx >= 0)
3635 unsigned int vec_num = vec_oprnds0.length ();
3636 /* Always true for SLP. */
3637 gcc_assert (ncopies == 1);
3638 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3639 vectype_out, i);
3641 size_t k;
3642 for (k = 0; k < nargs; k++)
3644 vec<tree> vec_oprndsk = vec_defs[k];
3645 vargs[varg++] = vec_oprndsk[i];
3647 if (masked_loop_p && reduc_idx >= 0)
3648 vargs[varg++] = vargs[reduc_idx + 1];
3649 gimple *new_stmt;
3650 if (modifier == NARROW)
3652 /* We don't define any narrowing conditional functions
3653 at present. */
3654 gcc_assert (mask_opno < 0);
3655 tree half_res = make_ssa_name (vectype_in);
3656 gcall *call
3657 = gimple_build_call_internal_vec (ifn, vargs);
3658 gimple_call_set_lhs (call, half_res);
3659 gimple_call_set_nothrow (call, true);
3660 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3661 if ((i & 1) == 0)
3663 prev_res = half_res;
3664 continue;
3666 new_temp = make_ssa_name (vec_dest);
3667 new_stmt = gimple_build_assign (new_temp, convert_code,
3668 prev_res, half_res);
3669 vect_finish_stmt_generation (vinfo, stmt_info,
3670 new_stmt, gsi);
3672 else
3674 if (mask_opno >= 0 && masked_loop_p)
3676 unsigned int vec_num = vec_oprnds0.length ();
3677 /* Always true for SLP. */
3678 gcc_assert (ncopies == 1);
3679 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3680 vectype_out, i);
3681 vargs[mask_opno] = prepare_vec_mask
3682 (loop_vinfo, TREE_TYPE (mask), mask,
3683 vargs[mask_opno], gsi);
3686 gcall *call;
3687 if (ifn != IFN_LAST)
3688 call = gimple_build_call_internal_vec (ifn, vargs);
3689 else
3690 call = gimple_build_call_vec (fndecl, vargs);
3691 new_temp = make_ssa_name (vec_dest, call);
3692 gimple_call_set_lhs (call, new_temp);
3693 gimple_call_set_nothrow (call, true);
3694 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3695 new_stmt = call;
3697 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3699 continue;
3702 int varg = 0;
3703 if (masked_loop_p && reduc_idx >= 0)
3704 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3705 vectype_out, j);
3706 for (i = 0; i < nargs; i++)
3708 op = gimple_call_arg (stmt, i);
3709 if (j == 0)
3711 vec_defs.quick_push (vNULL);
3712 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3713 op, &vec_defs[i],
3714 vectypes[i]);
3716 vargs[varg++] = vec_defs[i][j];
3718 if (masked_loop_p && reduc_idx >= 0)
3719 vargs[varg++] = vargs[reduc_idx + 1];
3721 if (mask_opno >= 0 && masked_loop_p)
3723 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3724 vectype_out, j);
3725 vargs[mask_opno]
3726 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3727 vargs[mask_opno], gsi);
3730 gimple *new_stmt;
3731 if (cfn == CFN_GOMP_SIMD_LANE)
3733 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3734 tree new_var
3735 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3736 gimple *init_stmt = gimple_build_assign (new_var, cst);
3737 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3738 new_temp = make_ssa_name (vec_dest);
3739 new_stmt = gimple_build_assign (new_temp, new_var);
3740 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3742 else if (modifier == NARROW)
3744 /* We don't define any narrowing conditional functions at
3745 present. */
3746 gcc_assert (mask_opno < 0);
3747 tree half_res = make_ssa_name (vectype_in);
3748 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3749 gimple_call_set_lhs (call, half_res);
3750 gimple_call_set_nothrow (call, true);
3751 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3752 if ((j & 1) == 0)
3754 prev_res = half_res;
3755 continue;
3757 new_temp = make_ssa_name (vec_dest);
3758 new_stmt = gimple_build_assign (new_temp, convert_code,
3759 prev_res, half_res);
3760 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3762 else
3764 gcall *call;
3765 if (ifn != IFN_LAST)
3766 call = gimple_build_call_internal_vec (ifn, vargs);
3767 else
3768 call = gimple_build_call_vec (fndecl, vargs);
3769 new_temp = make_ssa_name (vec_dest, call);
3770 gimple_call_set_lhs (call, new_temp);
3771 gimple_call_set_nothrow (call, true);
3772 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3773 new_stmt = call;
3776 if (j == (modifier == NARROW ? 1 : 0))
3777 *vec_stmt = new_stmt;
3778 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3780 for (i = 0; i < nargs; i++)
3782 vec<tree> vec_oprndsi = vec_defs[i];
3783 vec_oprndsi.release ();
3786 else if (modifier == NARROW)
3788 auto_vec<vec<tree> > vec_defs (nargs);
3789 /* We don't define any narrowing conditional functions at present. */
3790 gcc_assert (mask_opno < 0);
3791 for (j = 0; j < ncopies; ++j)
3793 /* Build argument list for the vectorized call. */
3794 if (j == 0)
3795 vargs.create (nargs * 2);
3796 else
3797 vargs.truncate (0);
3799 if (slp_node)
3801 vec<tree> vec_oprnds0;
3803 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3804 vec_oprnds0 = vec_defs[0];
3806 /* Arguments are ready. Create the new vector stmt. */
3807 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3809 size_t k;
3810 vargs.truncate (0);
3811 for (k = 0; k < nargs; k++)
3813 vec<tree> vec_oprndsk = vec_defs[k];
3814 vargs.quick_push (vec_oprndsk[i]);
3815 vargs.quick_push (vec_oprndsk[i + 1]);
3817 gcall *call;
3818 if (ifn != IFN_LAST)
3819 call = gimple_build_call_internal_vec (ifn, vargs);
3820 else
3821 call = gimple_build_call_vec (fndecl, vargs);
3822 new_temp = make_ssa_name (vec_dest, call);
3823 gimple_call_set_lhs (call, new_temp);
3824 gimple_call_set_nothrow (call, true);
3825 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3826 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3828 continue;
3831 for (i = 0; i < nargs; i++)
3833 op = gimple_call_arg (stmt, i);
3834 if (j == 0)
3836 vec_defs.quick_push (vNULL);
3837 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3838 op, &vec_defs[i], vectypes[i]);
3840 vec_oprnd0 = vec_defs[i][2*j];
3841 vec_oprnd1 = vec_defs[i][2*j+1];
3843 vargs.quick_push (vec_oprnd0);
3844 vargs.quick_push (vec_oprnd1);
3847 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3848 new_temp = make_ssa_name (vec_dest, new_stmt);
3849 gimple_call_set_lhs (new_stmt, new_temp);
3850 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3852 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3855 if (!slp_node)
3856 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3858 for (i = 0; i < nargs; i++)
3860 vec<tree> vec_oprndsi = vec_defs[i];
3861 vec_oprndsi.release ();
3864 else
3865 /* No current target implements this case. */
3866 return false;
3868 vargs.release ();
3870 /* The call in STMT might prevent it from being removed in dce.
3871 We however cannot remove it here, due to the way the ssa name
3872 it defines is mapped to the new definition. So just replace
3873 rhs of the statement with something harmless. */
3875 if (slp_node)
3876 return true;
3878 stmt_info = vect_orig_stmt (stmt_info);
3879 lhs = gimple_get_lhs (stmt_info->stmt);
3881 gassign *new_stmt
3882 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3883 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3885 return true;
3889 struct simd_call_arg_info
3891 tree vectype;
3892 tree op;
3893 HOST_WIDE_INT linear_step;
3894 enum vect_def_type dt;
3895 unsigned int align;
3896 bool simd_lane_linear;
3899 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3900 is linear within simd lane (but not within whole loop), note it in
3901 *ARGINFO. */
3903 static void
3904 vect_simd_lane_linear (tree op, class loop *loop,
3905 struct simd_call_arg_info *arginfo)
3907 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3909 if (!is_gimple_assign (def_stmt)
3910 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3911 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3912 return;
3914 tree base = gimple_assign_rhs1 (def_stmt);
3915 HOST_WIDE_INT linear_step = 0;
3916 tree v = gimple_assign_rhs2 (def_stmt);
3917 while (TREE_CODE (v) == SSA_NAME)
3919 tree t;
3920 def_stmt = SSA_NAME_DEF_STMT (v);
3921 if (is_gimple_assign (def_stmt))
3922 switch (gimple_assign_rhs_code (def_stmt))
3924 case PLUS_EXPR:
3925 t = gimple_assign_rhs2 (def_stmt);
3926 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3927 return;
3928 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3929 v = gimple_assign_rhs1 (def_stmt);
3930 continue;
3931 case MULT_EXPR:
3932 t = gimple_assign_rhs2 (def_stmt);
3933 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3934 return;
3935 linear_step = tree_to_shwi (t);
3936 v = gimple_assign_rhs1 (def_stmt);
3937 continue;
3938 CASE_CONVERT:
3939 t = gimple_assign_rhs1 (def_stmt);
3940 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3941 || (TYPE_PRECISION (TREE_TYPE (v))
3942 < TYPE_PRECISION (TREE_TYPE (t))))
3943 return;
3944 if (!linear_step)
3945 linear_step = 1;
3946 v = t;
3947 continue;
3948 default:
3949 return;
3951 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3952 && loop->simduid
3953 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3954 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3955 == loop->simduid))
3957 if (!linear_step)
3958 linear_step = 1;
3959 arginfo->linear_step = linear_step;
3960 arginfo->op = base;
3961 arginfo->simd_lane_linear = true;
3962 return;
3967 /* Return the number of elements in vector type VECTYPE, which is associated
3968 with a SIMD clone. At present these vectors always have a constant
3969 length. */
3971 static unsigned HOST_WIDE_INT
3972 simd_clone_subparts (tree vectype)
3974 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3977 /* Function vectorizable_simd_clone_call.
3979 Check if STMT_INFO performs a function call that can be vectorized
3980 by calling a simd clone of the function.
3981 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3982 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3983 Return true if STMT_INFO is vectorizable in this way. */
3985 static bool
3986 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3987 gimple_stmt_iterator *gsi,
3988 gimple **vec_stmt, slp_tree slp_node,
3989 stmt_vector_for_cost *)
3991 tree vec_dest;
3992 tree scalar_dest;
3993 tree op, type;
3994 tree vec_oprnd0 = NULL_TREE;
3995 tree vectype;
3996 poly_uint64 nunits;
3997 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3998 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3999 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4000 tree fndecl, new_temp;
4001 int ncopies, j;
4002 auto_vec<simd_call_arg_info> arginfo;
4003 vec<tree> vargs = vNULL;
4004 size_t i, nargs;
4005 tree lhs, rtype, ratype;
4006 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4007 int arg_offset = 0;
4009 /* Is STMT a vectorizable call? */
4010 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4011 if (!stmt)
4012 return false;
4014 fndecl = gimple_call_fndecl (stmt);
4015 if (fndecl == NULL_TREE
4016 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4018 fndecl = gimple_call_arg (stmt, 0);
4019 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4020 fndecl = TREE_OPERAND (fndecl, 0);
4021 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4022 arg_offset = 1;
4024 if (fndecl == NULL_TREE)
4025 return false;
4027 struct cgraph_node *node = cgraph_node::get (fndecl);
4028 if (node == NULL || node->simd_clones == NULL)
4029 return false;
4031 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4032 return false;
4034 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4035 && ! vec_stmt)
4036 return false;
4038 if (gimple_call_lhs (stmt)
4039 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4040 return false;
4042 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4044 vectype = STMT_VINFO_VECTYPE (stmt_info);
4046 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4047 return false;
4049 /* FORNOW */
4050 if (slp_node)
4051 return false;
4053 /* Process function arguments. */
4054 nargs = gimple_call_num_args (stmt) - arg_offset;
4056 /* Bail out if the function has zero arguments. */
4057 if (nargs == 0)
4058 return false;
4060 arginfo.reserve (nargs, true);
4062 for (i = 0; i < nargs; i++)
4064 simd_call_arg_info thisarginfo;
4065 affine_iv iv;
4067 thisarginfo.linear_step = 0;
4068 thisarginfo.align = 0;
4069 thisarginfo.op = NULL_TREE;
4070 thisarginfo.simd_lane_linear = false;
4072 op = gimple_call_arg (stmt, i + arg_offset);
4073 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4074 &thisarginfo.vectype)
4075 || thisarginfo.dt == vect_uninitialized_def)
4077 if (dump_enabled_p ())
4078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4079 "use not simple.\n");
4080 return false;
4083 if (thisarginfo.dt == vect_constant_def
4084 || thisarginfo.dt == vect_external_def)
4085 gcc_assert (thisarginfo.vectype == NULL_TREE);
4086 else
4087 gcc_assert (thisarginfo.vectype != NULL_TREE);
4089 /* For linear arguments, the analyze phase should have saved
4090 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4091 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4092 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4094 gcc_assert (vec_stmt);
4095 thisarginfo.linear_step
4096 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4097 thisarginfo.op
4098 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4099 thisarginfo.simd_lane_linear
4100 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4101 == boolean_true_node);
4102 /* If loop has been peeled for alignment, we need to adjust it. */
4103 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4104 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4105 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4107 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4108 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4109 tree opt = TREE_TYPE (thisarginfo.op);
4110 bias = fold_convert (TREE_TYPE (step), bias);
4111 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4112 thisarginfo.op
4113 = fold_build2 (POINTER_TYPE_P (opt)
4114 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4115 thisarginfo.op, bias);
4118 else if (!vec_stmt
4119 && thisarginfo.dt != vect_constant_def
4120 && thisarginfo.dt != vect_external_def
4121 && loop_vinfo
4122 && TREE_CODE (op) == SSA_NAME
4123 && simple_iv (loop, loop_containing_stmt (stmt), op,
4124 &iv, false)
4125 && tree_fits_shwi_p (iv.step))
4127 thisarginfo.linear_step = tree_to_shwi (iv.step);
4128 thisarginfo.op = iv.base;
4130 else if ((thisarginfo.dt == vect_constant_def
4131 || thisarginfo.dt == vect_external_def)
4132 && POINTER_TYPE_P (TREE_TYPE (op)))
4133 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4134 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4135 linear too. */
4136 if (POINTER_TYPE_P (TREE_TYPE (op))
4137 && !thisarginfo.linear_step
4138 && !vec_stmt
4139 && thisarginfo.dt != vect_constant_def
4140 && thisarginfo.dt != vect_external_def
4141 && loop_vinfo
4142 && !slp_node
4143 && TREE_CODE (op) == SSA_NAME)
4144 vect_simd_lane_linear (op, loop, &thisarginfo);
4146 arginfo.quick_push (thisarginfo);
4149 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4150 if (!vf.is_constant ())
4152 if (dump_enabled_p ())
4153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4154 "not considering SIMD clones; not yet supported"
4155 " for variable-width vectors.\n");
4156 return false;
4159 unsigned int badness = 0;
4160 struct cgraph_node *bestn = NULL;
4161 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4162 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4163 else
4164 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4165 n = n->simdclone->next_clone)
4167 unsigned int this_badness = 0;
4168 unsigned int num_calls;
4169 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4170 || n->simdclone->nargs != nargs)
4171 continue;
4172 if (num_calls != 1)
4173 this_badness += exact_log2 (num_calls) * 4096;
4174 if (n->simdclone->inbranch)
4175 this_badness += 8192;
4176 int target_badness = targetm.simd_clone.usable (n);
4177 if (target_badness < 0)
4178 continue;
4179 this_badness += target_badness * 512;
4180 for (i = 0; i < nargs; i++)
4182 switch (n->simdclone->args[i].arg_type)
4184 case SIMD_CLONE_ARG_TYPE_VECTOR:
4185 if (!useless_type_conversion_p
4186 (n->simdclone->args[i].orig_type,
4187 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4188 i = -1;
4189 else if (arginfo[i].dt == vect_constant_def
4190 || arginfo[i].dt == vect_external_def
4191 || arginfo[i].linear_step)
4192 this_badness += 64;
4193 break;
4194 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4195 if (arginfo[i].dt != vect_constant_def
4196 && arginfo[i].dt != vect_external_def)
4197 i = -1;
4198 break;
4199 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4200 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4201 if (arginfo[i].dt == vect_constant_def
4202 || arginfo[i].dt == vect_external_def
4203 || (arginfo[i].linear_step
4204 != n->simdclone->args[i].linear_step))
4205 i = -1;
4206 break;
4207 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4208 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4209 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4210 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4213 /* FORNOW */
4214 i = -1;
4215 break;
4216 case SIMD_CLONE_ARG_TYPE_MASK:
4217 break;
4219 if (i == (size_t) -1)
4220 break;
4221 if (n->simdclone->args[i].alignment > arginfo[i].align)
4223 i = -1;
4224 break;
4226 if (arginfo[i].align)
4227 this_badness += (exact_log2 (arginfo[i].align)
4228 - exact_log2 (n->simdclone->args[i].alignment));
4230 if (i == (size_t) -1)
4231 continue;
4232 if (bestn == NULL || this_badness < badness)
4234 bestn = n;
4235 badness = this_badness;
4239 if (bestn == NULL)
4240 return false;
4242 for (i = 0; i < nargs; i++)
4244 if ((arginfo[i].dt == vect_constant_def
4245 || arginfo[i].dt == vect_external_def)
4246 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4248 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4249 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4250 slp_node);
4251 if (arginfo[i].vectype == NULL
4252 || !constant_multiple_p (bestn->simdclone->simdlen,
4253 simd_clone_subparts (arginfo[i].vectype)))
4254 return false;
4257 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4258 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4260 if (dump_enabled_p ())
4261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4262 "vector mask arguments are not supported.\n");
4263 return false;
4266 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4267 && bestn->simdclone->mask_mode == VOIDmode
4268 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4269 != simd_clone_subparts (arginfo[i].vectype)))
4271 /* FORNOW we only have partial support for vector-type masks that
4272 can't hold all of simdlen. */
4273 if (dump_enabled_p ())
4274 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4275 vect_location,
4276 "in-branch vector clones are not yet"
4277 " supported for mismatched vector sizes.\n");
4278 return false;
4280 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4281 && bestn->simdclone->mask_mode != VOIDmode)
4283 /* FORNOW don't support integer-type masks. */
4284 if (dump_enabled_p ())
4285 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4286 vect_location,
4287 "in-branch vector clones are not yet"
4288 " supported for integer mask modes.\n");
4289 return false;
4293 fndecl = bestn->decl;
4294 nunits = bestn->simdclone->simdlen;
4295 ncopies = vector_unroll_factor (vf, nunits);
4297 /* If the function isn't const, only allow it in simd loops where user
4298 has asserted that at least nunits consecutive iterations can be
4299 performed using SIMD instructions. */
4300 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4301 && gimple_vuse (stmt))
4302 return false;
4304 /* Sanity check: make sure that at least one copy of the vectorized stmt
4305 needs to be generated. */
4306 gcc_assert (ncopies >= 1);
4308 if (!vec_stmt) /* transformation not required. */
4310 /* When the original call is pure or const but the SIMD ABI dictates
4311 an aggregate return we will have to use a virtual definition and
4312 in a loop eventually even need to add a virtual PHI. That's
4313 not straight-forward so allow to fix this up via renaming. */
4314 if (gimple_call_lhs (stmt)
4315 && !gimple_vdef (stmt)
4316 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4317 vinfo->any_known_not_updated_vssa = true;
4318 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4319 for (i = 0; i < nargs; i++)
4320 if ((bestn->simdclone->args[i].arg_type
4321 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4322 || (bestn->simdclone->args[i].arg_type
4323 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4325 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4326 + 1,
4327 true);
4328 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4329 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4330 ? size_type_node : TREE_TYPE (arginfo[i].op);
4331 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4332 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4333 tree sll = arginfo[i].simd_lane_linear
4334 ? boolean_true_node : boolean_false_node;
4335 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4337 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4338 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4339 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4340 dt, slp_node, cost_vec); */
4341 return true;
4344 /* Transform. */
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4349 /* Handle def. */
4350 scalar_dest = gimple_call_lhs (stmt);
4351 vec_dest = NULL_TREE;
4352 rtype = NULL_TREE;
4353 ratype = NULL_TREE;
4354 if (scalar_dest)
4356 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4357 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4358 if (TREE_CODE (rtype) == ARRAY_TYPE)
4360 ratype = rtype;
4361 rtype = TREE_TYPE (ratype);
4365 auto_vec<vec<tree> > vec_oprnds;
4366 auto_vec<unsigned> vec_oprnds_i;
4367 vec_oprnds.safe_grow_cleared (nargs, true);
4368 vec_oprnds_i.safe_grow_cleared (nargs, true);
4369 for (j = 0; j < ncopies; ++j)
4371 /* Build argument list for the vectorized call. */
4372 if (j == 0)
4373 vargs.create (nargs);
4374 else
4375 vargs.truncate (0);
4377 for (i = 0; i < nargs; i++)
4379 unsigned int k, l, m, o;
4380 tree atype;
4381 op = gimple_call_arg (stmt, i + arg_offset);
4382 switch (bestn->simdclone->args[i].arg_type)
4384 case SIMD_CLONE_ARG_TYPE_VECTOR:
4385 atype = bestn->simdclone->args[i].vector_type;
4386 o = vector_unroll_factor (nunits,
4387 simd_clone_subparts (atype));
4388 for (m = j * o; m < (j + 1) * o; m++)
4390 if (simd_clone_subparts (atype)
4391 < simd_clone_subparts (arginfo[i].vectype))
4393 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4394 k = (simd_clone_subparts (arginfo[i].vectype)
4395 / simd_clone_subparts (atype));
4396 gcc_assert ((k & (k - 1)) == 0);
4397 if (m == 0)
4399 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4400 ncopies * o / k, op,
4401 &vec_oprnds[i]);
4402 vec_oprnds_i[i] = 0;
4403 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4405 else
4407 vec_oprnd0 = arginfo[i].op;
4408 if ((m & (k - 1)) == 0)
4409 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4411 arginfo[i].op = vec_oprnd0;
4412 vec_oprnd0
4413 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4414 bitsize_int (prec),
4415 bitsize_int ((m & (k - 1)) * prec));
4416 gassign *new_stmt
4417 = gimple_build_assign (make_ssa_name (atype),
4418 vec_oprnd0);
4419 vect_finish_stmt_generation (vinfo, stmt_info,
4420 new_stmt, gsi);
4421 vargs.safe_push (gimple_assign_lhs (new_stmt));
4423 else
4425 k = (simd_clone_subparts (atype)
4426 / simd_clone_subparts (arginfo[i].vectype));
4427 gcc_assert ((k & (k - 1)) == 0);
4428 vec<constructor_elt, va_gc> *ctor_elts;
4429 if (k != 1)
4430 vec_alloc (ctor_elts, k);
4431 else
4432 ctor_elts = NULL;
4433 for (l = 0; l < k; l++)
4435 if (m == 0 && l == 0)
4437 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4438 k * o * ncopies,
4440 &vec_oprnds[i]);
4441 vec_oprnds_i[i] = 0;
4442 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4444 else
4445 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4446 arginfo[i].op = vec_oprnd0;
4447 if (k == 1)
4448 break;
4449 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4450 vec_oprnd0);
4452 if (k == 1)
4453 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4454 atype))
4456 vec_oprnd0
4457 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4458 gassign *new_stmt
4459 = gimple_build_assign (make_ssa_name (atype),
4460 vec_oprnd0);
4461 vect_finish_stmt_generation (vinfo, stmt_info,
4462 new_stmt, gsi);
4463 vargs.safe_push (gimple_assign_lhs (new_stmt));
4465 else
4466 vargs.safe_push (vec_oprnd0);
4467 else
4469 vec_oprnd0 = build_constructor (atype, ctor_elts);
4470 gassign *new_stmt
4471 = gimple_build_assign (make_ssa_name (atype),
4472 vec_oprnd0);
4473 vect_finish_stmt_generation (vinfo, stmt_info,
4474 new_stmt, gsi);
4475 vargs.safe_push (gimple_assign_lhs (new_stmt));
4479 break;
4480 case SIMD_CLONE_ARG_TYPE_MASK:
4481 atype = bestn->simdclone->args[i].vector_type;
4482 if (bestn->simdclone->mask_mode != VOIDmode)
4484 /* FORNOW: this is disabled above. */
4485 gcc_unreachable ();
4487 else
4489 tree elt_type = TREE_TYPE (atype);
4490 tree one = fold_convert (elt_type, integer_one_node);
4491 tree zero = fold_convert (elt_type, integer_zero_node);
4492 o = vector_unroll_factor (nunits,
4493 simd_clone_subparts (atype));
4494 for (m = j * o; m < (j + 1) * o; m++)
4496 if (simd_clone_subparts (atype)
4497 < simd_clone_subparts (arginfo[i].vectype))
4499 /* The mask type has fewer elements than simdlen. */
4501 /* FORNOW */
4502 gcc_unreachable ();
4504 else if (simd_clone_subparts (atype)
4505 == simd_clone_subparts (arginfo[i].vectype))
4507 /* The SIMD clone function has the same number of
4508 elements as the current function. */
4509 if (m == 0)
4511 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4512 o * ncopies,
4514 &vec_oprnds[i]);
4515 vec_oprnds_i[i] = 0;
4517 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4518 vec_oprnd0
4519 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4520 build_vector_from_val (atype, one),
4521 build_vector_from_val (atype, zero));
4522 gassign *new_stmt
4523 = gimple_build_assign (make_ssa_name (atype),
4524 vec_oprnd0);
4525 vect_finish_stmt_generation (vinfo, stmt_info,
4526 new_stmt, gsi);
4527 vargs.safe_push (gimple_assign_lhs (new_stmt));
4529 else
4531 /* The mask type has more elements than simdlen. */
4533 /* FORNOW */
4534 gcc_unreachable ();
4538 break;
4539 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4540 vargs.safe_push (op);
4541 break;
4542 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4543 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4544 if (j == 0)
4546 gimple_seq stmts;
4547 arginfo[i].op
4548 = force_gimple_operand (unshare_expr (arginfo[i].op),
4549 &stmts, true, NULL_TREE);
4550 if (stmts != NULL)
4552 basic_block new_bb;
4553 edge pe = loop_preheader_edge (loop);
4554 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4555 gcc_assert (!new_bb);
4557 if (arginfo[i].simd_lane_linear)
4559 vargs.safe_push (arginfo[i].op);
4560 break;
4562 tree phi_res = copy_ssa_name (op);
4563 gphi *new_phi = create_phi_node (phi_res, loop->header);
4564 add_phi_arg (new_phi, arginfo[i].op,
4565 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4566 enum tree_code code
4567 = POINTER_TYPE_P (TREE_TYPE (op))
4568 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4569 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4570 ? sizetype : TREE_TYPE (op);
4571 poly_widest_int cst
4572 = wi::mul (bestn->simdclone->args[i].linear_step,
4573 ncopies * nunits);
4574 tree tcst = wide_int_to_tree (type, cst);
4575 tree phi_arg = copy_ssa_name (op);
4576 gassign *new_stmt
4577 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4578 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4579 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4580 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4581 UNKNOWN_LOCATION);
4582 arginfo[i].op = phi_res;
4583 vargs.safe_push (phi_res);
4585 else
4587 enum tree_code code
4588 = POINTER_TYPE_P (TREE_TYPE (op))
4589 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4590 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4591 ? sizetype : TREE_TYPE (op);
4592 poly_widest_int cst
4593 = wi::mul (bestn->simdclone->args[i].linear_step,
4594 j * nunits);
4595 tree tcst = wide_int_to_tree (type, cst);
4596 new_temp = make_ssa_name (TREE_TYPE (op));
4597 gassign *new_stmt
4598 = gimple_build_assign (new_temp, code,
4599 arginfo[i].op, tcst);
4600 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4601 vargs.safe_push (new_temp);
4603 break;
4604 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4605 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4606 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4607 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4608 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4609 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4610 default:
4611 gcc_unreachable ();
4615 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4616 if (vec_dest)
4618 gcc_assert (ratype
4619 || known_eq (simd_clone_subparts (rtype), nunits));
4620 if (ratype)
4621 new_temp = create_tmp_var (ratype);
4622 else if (useless_type_conversion_p (vectype, rtype))
4623 new_temp = make_ssa_name (vec_dest, new_call);
4624 else
4625 new_temp = make_ssa_name (rtype, new_call);
4626 gimple_call_set_lhs (new_call, new_temp);
4628 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4629 gimple *new_stmt = new_call;
4631 if (vec_dest)
4633 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4635 unsigned int k, l;
4636 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4637 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4638 k = vector_unroll_factor (nunits,
4639 simd_clone_subparts (vectype));
4640 gcc_assert ((k & (k - 1)) == 0);
4641 for (l = 0; l < k; l++)
4643 tree t;
4644 if (ratype)
4646 t = build_fold_addr_expr (new_temp);
4647 t = build2 (MEM_REF, vectype, t,
4648 build_int_cst (TREE_TYPE (t), l * bytes));
4650 else
4651 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4652 bitsize_int (prec), bitsize_int (l * prec));
4653 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4654 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4656 if (j == 0 && l == 0)
4657 *vec_stmt = new_stmt;
4658 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4661 if (ratype)
4662 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4663 continue;
4665 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4667 unsigned int k = (simd_clone_subparts (vectype)
4668 / simd_clone_subparts (rtype));
4669 gcc_assert ((k & (k - 1)) == 0);
4670 if ((j & (k - 1)) == 0)
4671 vec_alloc (ret_ctor_elts, k);
4672 if (ratype)
4674 unsigned int m, o;
4675 o = vector_unroll_factor (nunits,
4676 simd_clone_subparts (rtype));
4677 for (m = 0; m < o; m++)
4679 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4680 size_int (m), NULL_TREE, NULL_TREE);
4681 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4682 tem);
4683 vect_finish_stmt_generation (vinfo, stmt_info,
4684 new_stmt, gsi);
4685 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4686 gimple_assign_lhs (new_stmt));
4688 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4690 else
4691 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4692 if ((j & (k - 1)) != k - 1)
4693 continue;
4694 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4695 new_stmt
4696 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4697 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4699 if ((unsigned) j == k - 1)
4700 *vec_stmt = new_stmt;
4701 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4702 continue;
4704 else if (ratype)
4706 tree t = build_fold_addr_expr (new_temp);
4707 t = build2 (MEM_REF, vectype, t,
4708 build_int_cst (TREE_TYPE (t), 0));
4709 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4710 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4711 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4713 else if (!useless_type_conversion_p (vectype, rtype))
4715 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4716 new_stmt
4717 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4718 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4722 if (j == 0)
4723 *vec_stmt = new_stmt;
4724 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4727 for (i = 0; i < nargs; ++i)
4729 vec<tree> oprndsi = vec_oprnds[i];
4730 oprndsi.release ();
4732 vargs.release ();
4734 /* Mark the clone as no longer being a candidate for GC. */
4735 bestn->gc_candidate = false;
4737 /* The call in STMT might prevent it from being removed in dce.
4738 We however cannot remove it here, due to the way the ssa name
4739 it defines is mapped to the new definition. So just replace
4740 rhs of the statement with something harmless. */
4742 if (slp_node)
4743 return true;
4745 gimple *new_stmt;
4746 if (scalar_dest)
4748 type = TREE_TYPE (scalar_dest);
4749 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4750 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4752 else
4753 new_stmt = gimple_build_nop ();
4754 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4755 unlink_stmt_vdef (stmt);
4757 return true;
4761 /* Function vect_gen_widened_results_half
4763 Create a vector stmt whose code, type, number of arguments, and result
4764 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4765 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4766 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4767 needs to be created (DECL is a function-decl of a target-builtin).
4768 STMT_INFO is the original scalar stmt that we are vectorizing. */
4770 static gimple *
4771 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4772 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4773 tree vec_dest, gimple_stmt_iterator *gsi,
4774 stmt_vec_info stmt_info)
4776 gimple *new_stmt;
4777 tree new_temp;
4779 /* Generate half of the widened result: */
4780 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4781 if (op_type != binary_op)
4782 vec_oprnd1 = NULL;
4783 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4784 new_temp = make_ssa_name (vec_dest, new_stmt);
4785 gimple_assign_set_lhs (new_stmt, new_temp);
4786 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4788 return new_stmt;
4792 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4793 For multi-step conversions store the resulting vectors and call the function
4794 recursively. */
4796 static void
4797 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4798 int multi_step_cvt,
4799 stmt_vec_info stmt_info,
4800 vec<tree> &vec_dsts,
4801 gimple_stmt_iterator *gsi,
4802 slp_tree slp_node, enum tree_code code)
4804 unsigned int i;
4805 tree vop0, vop1, new_tmp, vec_dest;
4807 vec_dest = vec_dsts.pop ();
4809 for (i = 0; i < vec_oprnds->length (); i += 2)
4811 /* Create demotion operation. */
4812 vop0 = (*vec_oprnds)[i];
4813 vop1 = (*vec_oprnds)[i + 1];
4814 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4815 new_tmp = make_ssa_name (vec_dest, new_stmt);
4816 gimple_assign_set_lhs (new_stmt, new_tmp);
4817 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4819 if (multi_step_cvt)
4820 /* Store the resulting vector for next recursive call. */
4821 (*vec_oprnds)[i/2] = new_tmp;
4822 else
4824 /* This is the last step of the conversion sequence. Store the
4825 vectors in SLP_NODE or in vector info of the scalar statement
4826 (or in STMT_VINFO_RELATED_STMT chain). */
4827 if (slp_node)
4828 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4829 else
4830 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4834 /* For multi-step demotion operations we first generate demotion operations
4835 from the source type to the intermediate types, and then combine the
4836 results (stored in VEC_OPRNDS) in demotion operation to the destination
4837 type. */
4838 if (multi_step_cvt)
4840 /* At each level of recursion we have half of the operands we had at the
4841 previous level. */
4842 vec_oprnds->truncate ((i+1)/2);
4843 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4844 multi_step_cvt - 1,
4845 stmt_info, vec_dsts, gsi,
4846 slp_node, VEC_PACK_TRUNC_EXPR);
4849 vec_dsts.quick_push (vec_dest);
4853 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4854 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4855 STMT_INFO. For multi-step conversions store the resulting vectors and
4856 call the function recursively. */
4858 static void
4859 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4860 vec<tree> *vec_oprnds0,
4861 vec<tree> *vec_oprnds1,
4862 stmt_vec_info stmt_info, tree vec_dest,
4863 gimple_stmt_iterator *gsi,
4864 enum tree_code code1,
4865 enum tree_code code2, int op_type)
4867 int i;
4868 tree vop0, vop1, new_tmp1, new_tmp2;
4869 gimple *new_stmt1, *new_stmt2;
4870 vec<tree> vec_tmp = vNULL;
4872 vec_tmp.create (vec_oprnds0->length () * 2);
4873 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4875 if (op_type == binary_op)
4876 vop1 = (*vec_oprnds1)[i];
4877 else
4878 vop1 = NULL_TREE;
4880 /* Generate the two halves of promotion operation. */
4881 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4882 op_type, vec_dest, gsi,
4883 stmt_info);
4884 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4885 op_type, vec_dest, gsi,
4886 stmt_info);
4887 if (is_gimple_call (new_stmt1))
4889 new_tmp1 = gimple_call_lhs (new_stmt1);
4890 new_tmp2 = gimple_call_lhs (new_stmt2);
4892 else
4894 new_tmp1 = gimple_assign_lhs (new_stmt1);
4895 new_tmp2 = gimple_assign_lhs (new_stmt2);
4898 /* Store the results for the next step. */
4899 vec_tmp.quick_push (new_tmp1);
4900 vec_tmp.quick_push (new_tmp2);
4903 vec_oprnds0->release ();
4904 *vec_oprnds0 = vec_tmp;
4907 /* Create vectorized promotion stmts for widening stmts using only half the
4908 potential vector size for input. */
4909 static void
4910 vect_create_half_widening_stmts (vec_info *vinfo,
4911 vec<tree> *vec_oprnds0,
4912 vec<tree> *vec_oprnds1,
4913 stmt_vec_info stmt_info, tree vec_dest,
4914 gimple_stmt_iterator *gsi,
4915 enum tree_code code1,
4916 int op_type)
4918 int i;
4919 tree vop0, vop1;
4920 gimple *new_stmt1;
4921 gimple *new_stmt2;
4922 gimple *new_stmt3;
4923 vec<tree> vec_tmp = vNULL;
4925 vec_tmp.create (vec_oprnds0->length ());
4926 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4928 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4930 gcc_assert (op_type == binary_op);
4931 vop1 = (*vec_oprnds1)[i];
4933 /* Widen the first vector input. */
4934 out_type = TREE_TYPE (vec_dest);
4935 new_tmp1 = make_ssa_name (out_type);
4936 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4937 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4938 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4940 /* Widen the second vector input. */
4941 new_tmp2 = make_ssa_name (out_type);
4942 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4943 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4944 /* Perform the operation. With both vector inputs widened. */
4945 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4947 else
4949 /* Perform the operation. With the single vector input widened. */
4950 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4953 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4954 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4955 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4957 /* Store the results for the next step. */
4958 vec_tmp.quick_push (new_tmp3);
4961 vec_oprnds0->release ();
4962 *vec_oprnds0 = vec_tmp;
4966 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4967 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4968 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4969 Return true if STMT_INFO is vectorizable in this way. */
4971 static bool
4972 vectorizable_conversion (vec_info *vinfo,
4973 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4974 gimple **vec_stmt, slp_tree slp_node,
4975 stmt_vector_for_cost *cost_vec)
4977 tree vec_dest;
4978 tree scalar_dest;
4979 tree op0, op1 = NULL_TREE;
4980 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4981 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4982 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4983 tree new_temp;
4984 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4985 int ndts = 2;
4986 poly_uint64 nunits_in;
4987 poly_uint64 nunits_out;
4988 tree vectype_out, vectype_in;
4989 int ncopies, i;
4990 tree lhs_type, rhs_type;
4991 enum { NARROW, NONE, WIDEN } modifier;
4992 vec<tree> vec_oprnds0 = vNULL;
4993 vec<tree> vec_oprnds1 = vNULL;
4994 tree vop0;
4995 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4996 int multi_step_cvt = 0;
4997 vec<tree> interm_types = vNULL;
4998 tree intermediate_type, cvt_type = NULL_TREE;
4999 int op_type;
5000 unsigned short fltsz;
5002 /* Is STMT a vectorizable conversion? */
5004 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5005 return false;
5007 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5008 && ! vec_stmt)
5009 return false;
5011 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5012 if (!stmt)
5013 return false;
5015 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5016 return false;
5018 code = gimple_assign_rhs_code (stmt);
5019 if (!CONVERT_EXPR_CODE_P (code)
5020 && code != FIX_TRUNC_EXPR
5021 && code != FLOAT_EXPR
5022 && code != WIDEN_PLUS_EXPR
5023 && code != WIDEN_MINUS_EXPR
5024 && code != WIDEN_MULT_EXPR
5025 && code != WIDEN_LSHIFT_EXPR)
5026 return false;
5028 bool widen_arith = (code == WIDEN_PLUS_EXPR
5029 || code == WIDEN_MINUS_EXPR
5030 || code == WIDEN_MULT_EXPR
5031 || code == WIDEN_LSHIFT_EXPR);
5032 op_type = TREE_CODE_LENGTH (code);
5034 /* Check types of lhs and rhs. */
5035 scalar_dest = gimple_assign_lhs (stmt);
5036 lhs_type = TREE_TYPE (scalar_dest);
5037 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5039 /* Check the operands of the operation. */
5040 slp_tree slp_op0, slp_op1 = NULL;
5041 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5042 0, &op0, &slp_op0, &dt[0], &vectype_in))
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5046 "use not simple.\n");
5047 return false;
5050 rhs_type = TREE_TYPE (op0);
5051 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5052 && !((INTEGRAL_TYPE_P (lhs_type)
5053 && INTEGRAL_TYPE_P (rhs_type))
5054 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5055 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5056 return false;
5058 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5059 && ((INTEGRAL_TYPE_P (lhs_type)
5060 && !type_has_mode_precision_p (lhs_type))
5061 || (INTEGRAL_TYPE_P (rhs_type)
5062 && !type_has_mode_precision_p (rhs_type))))
5064 if (dump_enabled_p ())
5065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5066 "type conversion to/from bit-precision unsupported."
5067 "\n");
5068 return false;
5071 if (op_type == binary_op)
5073 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
5074 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
5076 op1 = gimple_assign_rhs2 (stmt);
5077 tree vectype1_in;
5078 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5079 &op1, &slp_op1, &dt[1], &vectype1_in))
5081 if (dump_enabled_p ())
5082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5083 "use not simple.\n");
5084 return false;
5086 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5087 OP1. */
5088 if (!vectype_in)
5089 vectype_in = vectype1_in;
5092 /* If op0 is an external or constant def, infer the vector type
5093 from the scalar type. */
5094 if (!vectype_in)
5095 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5096 if (vec_stmt)
5097 gcc_assert (vectype_in);
5098 if (!vectype_in)
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5102 "no vectype for scalar type %T\n", rhs_type);
5104 return false;
5107 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5108 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5110 if (dump_enabled_p ())
5111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5112 "can't convert between boolean and non "
5113 "boolean vectors %T\n", rhs_type);
5115 return false;
5118 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5119 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5120 if (known_eq (nunits_out, nunits_in))
5121 if (widen_arith)
5122 modifier = WIDEN;
5123 else
5124 modifier = NONE;
5125 else if (multiple_p (nunits_out, nunits_in))
5126 modifier = NARROW;
5127 else
5129 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5130 modifier = WIDEN;
5133 /* Multiple types in SLP are handled by creating the appropriate number of
5134 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5135 case of SLP. */
5136 if (slp_node)
5137 ncopies = 1;
5138 else if (modifier == NARROW)
5139 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5140 else
5141 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5143 /* Sanity check: make sure that at least one copy of the vectorized stmt
5144 needs to be generated. */
5145 gcc_assert (ncopies >= 1);
5147 bool found_mode = false;
5148 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5149 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5150 opt_scalar_mode rhs_mode_iter;
5152 /* Supportable by target? */
5153 switch (modifier)
5155 case NONE:
5156 if (code != FIX_TRUNC_EXPR
5157 && code != FLOAT_EXPR
5158 && !CONVERT_EXPR_CODE_P (code))
5159 return false;
5160 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5161 break;
5162 /* FALLTHRU */
5163 unsupported:
5164 if (dump_enabled_p ())
5165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5166 "conversion not supported by target.\n");
5167 return false;
5169 case WIDEN:
5170 if (known_eq (nunits_in, nunits_out))
5172 if (!supportable_half_widening_operation (code, vectype_out,
5173 vectype_in, &code1))
5174 goto unsupported;
5175 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5176 break;
5178 if (supportable_widening_operation (vinfo, code, stmt_info,
5179 vectype_out, vectype_in, &code1,
5180 &code2, &multi_step_cvt,
5181 &interm_types))
5183 /* Binary widening operation can only be supported directly by the
5184 architecture. */
5185 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5186 break;
5189 if (code != FLOAT_EXPR
5190 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5191 goto unsupported;
5193 fltsz = GET_MODE_SIZE (lhs_mode);
5194 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5196 rhs_mode = rhs_mode_iter.require ();
5197 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5198 break;
5200 cvt_type
5201 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5202 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5203 if (cvt_type == NULL_TREE)
5204 goto unsupported;
5206 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5208 if (!supportable_convert_operation (code, vectype_out,
5209 cvt_type, &codecvt1))
5210 goto unsupported;
5212 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5213 vectype_out, cvt_type,
5214 &codecvt1, &codecvt2,
5215 &multi_step_cvt,
5216 &interm_types))
5217 continue;
5218 else
5219 gcc_assert (multi_step_cvt == 0);
5221 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5222 cvt_type,
5223 vectype_in, &code1, &code2,
5224 &multi_step_cvt, &interm_types))
5226 found_mode = true;
5227 break;
5231 if (!found_mode)
5232 goto unsupported;
5234 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5235 codecvt2 = ERROR_MARK;
5236 else
5238 multi_step_cvt++;
5239 interm_types.safe_push (cvt_type);
5240 cvt_type = NULL_TREE;
5242 break;
5244 case NARROW:
5245 gcc_assert (op_type == unary_op);
5246 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5247 &code1, &multi_step_cvt,
5248 &interm_types))
5249 break;
5251 if (code != FIX_TRUNC_EXPR
5252 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5253 goto unsupported;
5255 cvt_type
5256 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5257 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5258 if (cvt_type == NULL_TREE)
5259 goto unsupported;
5260 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5261 &codecvt1))
5262 goto unsupported;
5263 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5264 &code1, &multi_step_cvt,
5265 &interm_types))
5266 break;
5267 goto unsupported;
5269 default:
5270 gcc_unreachable ();
5273 if (!vec_stmt) /* transformation not required. */
5275 if (slp_node
5276 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5277 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5279 if (dump_enabled_p ())
5280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5281 "incompatible vector types for invariants\n");
5282 return false;
5284 DUMP_VECT_SCOPE ("vectorizable_conversion");
5285 if (modifier == NONE)
5287 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5288 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5289 cost_vec);
5291 else if (modifier == NARROW)
5293 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5294 /* The final packing step produces one vector result per copy. */
5295 unsigned int nvectors
5296 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5297 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5298 multi_step_cvt, cost_vec,
5299 widen_arith);
5301 else
5303 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5304 /* The initial unpacking step produces two vector results
5305 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5306 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5307 unsigned int nvectors
5308 = (slp_node
5309 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5310 : ncopies * 2);
5311 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5312 multi_step_cvt, cost_vec,
5313 widen_arith);
5315 interm_types.release ();
5316 return true;
5319 /* Transform. */
5320 if (dump_enabled_p ())
5321 dump_printf_loc (MSG_NOTE, vect_location,
5322 "transform conversion. ncopies = %d.\n", ncopies);
5324 if (op_type == binary_op)
5326 if (CONSTANT_CLASS_P (op0))
5327 op0 = fold_convert (TREE_TYPE (op1), op0);
5328 else if (CONSTANT_CLASS_P (op1))
5329 op1 = fold_convert (TREE_TYPE (op0), op1);
5332 /* In case of multi-step conversion, we first generate conversion operations
5333 to the intermediate types, and then from that types to the final one.
5334 We create vector destinations for the intermediate type (TYPES) received
5335 from supportable_*_operation, and store them in the correct order
5336 for future use in vect_create_vectorized_*_stmts (). */
5337 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5338 vec_dest = vect_create_destination_var (scalar_dest,
5339 (cvt_type && modifier == WIDEN)
5340 ? cvt_type : vectype_out);
5341 vec_dsts.quick_push (vec_dest);
5343 if (multi_step_cvt)
5345 for (i = interm_types.length () - 1;
5346 interm_types.iterate (i, &intermediate_type); i--)
5348 vec_dest = vect_create_destination_var (scalar_dest,
5349 intermediate_type);
5350 vec_dsts.quick_push (vec_dest);
5354 if (cvt_type)
5355 vec_dest = vect_create_destination_var (scalar_dest,
5356 modifier == WIDEN
5357 ? vectype_out : cvt_type);
5359 int ninputs = 1;
5360 if (!slp_node)
5362 if (modifier == WIDEN)
5364 else if (modifier == NARROW)
5366 if (multi_step_cvt)
5367 ninputs = vect_pow2 (multi_step_cvt);
5368 ninputs *= 2;
5372 switch (modifier)
5374 case NONE:
5375 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5376 op0, &vec_oprnds0);
5377 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5379 /* Arguments are ready, create the new vector stmt. */
5380 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5381 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5382 new_temp = make_ssa_name (vec_dest, new_stmt);
5383 gimple_assign_set_lhs (new_stmt, new_temp);
5384 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5386 if (slp_node)
5387 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5388 else
5389 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5391 break;
5393 case WIDEN:
5394 /* In case the vectorization factor (VF) is bigger than the number
5395 of elements that we can fit in a vectype (nunits), we have to
5396 generate more than one vector stmt - i.e - we need to "unroll"
5397 the vector stmt by a factor VF/nunits. */
5398 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5399 op0, &vec_oprnds0,
5400 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5401 &vec_oprnds1);
5402 if (code == WIDEN_LSHIFT_EXPR)
5404 int oprnds_size = vec_oprnds0.length ();
5405 vec_oprnds1.create (oprnds_size);
5406 for (i = 0; i < oprnds_size; ++i)
5407 vec_oprnds1.quick_push (op1);
5409 /* Arguments are ready. Create the new vector stmts. */
5410 for (i = multi_step_cvt; i >= 0; i--)
5412 tree this_dest = vec_dsts[i];
5413 enum tree_code c1 = code1, c2 = code2;
5414 if (i == 0 && codecvt2 != ERROR_MARK)
5416 c1 = codecvt1;
5417 c2 = codecvt2;
5419 if (known_eq (nunits_out, nunits_in))
5420 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5421 &vec_oprnds1, stmt_info,
5422 this_dest, gsi,
5423 c1, op_type);
5424 else
5425 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5426 &vec_oprnds1, stmt_info,
5427 this_dest, gsi,
5428 c1, c2, op_type);
5431 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5433 gimple *new_stmt;
5434 if (cvt_type)
5436 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5437 new_temp = make_ssa_name (vec_dest);
5438 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5439 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5441 else
5442 new_stmt = SSA_NAME_DEF_STMT (vop0);
5444 if (slp_node)
5445 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5446 else
5447 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5449 break;
5451 case NARROW:
5452 /* In case the vectorization factor (VF) is bigger than the number
5453 of elements that we can fit in a vectype (nunits), we have to
5454 generate more than one vector stmt - i.e - we need to "unroll"
5455 the vector stmt by a factor VF/nunits. */
5456 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5457 op0, &vec_oprnds0);
5458 /* Arguments are ready. Create the new vector stmts. */
5459 if (cvt_type)
5460 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5462 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5463 new_temp = make_ssa_name (vec_dest);
5464 gassign *new_stmt
5465 = gimple_build_assign (new_temp, codecvt1, vop0);
5466 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5467 vec_oprnds0[i] = new_temp;
5470 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5471 multi_step_cvt,
5472 stmt_info, vec_dsts, gsi,
5473 slp_node, code1);
5474 break;
5476 if (!slp_node)
5477 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5479 vec_oprnds0.release ();
5480 vec_oprnds1.release ();
5481 interm_types.release ();
5483 return true;
5486 /* Return true if we can assume from the scalar form of STMT_INFO that
5487 neither the scalar nor the vector forms will generate code. STMT_INFO
5488 is known not to involve a data reference. */
5490 bool
5491 vect_nop_conversion_p (stmt_vec_info stmt_info)
5493 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5494 if (!stmt)
5495 return false;
5497 tree lhs = gimple_assign_lhs (stmt);
5498 tree_code code = gimple_assign_rhs_code (stmt);
5499 tree rhs = gimple_assign_rhs1 (stmt);
5501 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5502 return true;
5504 if (CONVERT_EXPR_CODE_P (code))
5505 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5507 return false;
5510 /* Function vectorizable_assignment.
5512 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5513 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5514 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5515 Return true if STMT_INFO is vectorizable in this way. */
5517 static bool
5518 vectorizable_assignment (vec_info *vinfo,
5519 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5520 gimple **vec_stmt, slp_tree slp_node,
5521 stmt_vector_for_cost *cost_vec)
5523 tree vec_dest;
5524 tree scalar_dest;
5525 tree op;
5526 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5527 tree new_temp;
5528 enum vect_def_type dt[1] = {vect_unknown_def_type};
5529 int ndts = 1;
5530 int ncopies;
5531 int i;
5532 vec<tree> vec_oprnds = vNULL;
5533 tree vop;
5534 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5535 enum tree_code code;
5536 tree vectype_in;
5538 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5539 return false;
5541 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5542 && ! vec_stmt)
5543 return false;
5545 /* Is vectorizable assignment? */
5546 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5547 if (!stmt)
5548 return false;
5550 scalar_dest = gimple_assign_lhs (stmt);
5551 if (TREE_CODE (scalar_dest) != SSA_NAME)
5552 return false;
5554 if (STMT_VINFO_DATA_REF (stmt_info))
5555 return false;
5557 code = gimple_assign_rhs_code (stmt);
5558 if (!(gimple_assign_single_p (stmt)
5559 || code == PAREN_EXPR
5560 || CONVERT_EXPR_CODE_P (code)))
5561 return false;
5563 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5564 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5566 /* Multiple types in SLP are handled by creating the appropriate number of
5567 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5568 case of SLP. */
5569 if (slp_node)
5570 ncopies = 1;
5571 else
5572 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5574 gcc_assert (ncopies >= 1);
5576 slp_tree slp_op;
5577 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5578 &dt[0], &vectype_in))
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5582 "use not simple.\n");
5583 return false;
5585 if (!vectype_in)
5586 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5588 /* We can handle NOP_EXPR conversions that do not change the number
5589 of elements or the vector size. */
5590 if ((CONVERT_EXPR_CODE_P (code)
5591 || code == VIEW_CONVERT_EXPR)
5592 && (!vectype_in
5593 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5594 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5595 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5596 return false;
5598 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5599 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5601 if (dump_enabled_p ())
5602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5603 "can't convert between boolean and non "
5604 "boolean vectors %T\n", TREE_TYPE (op));
5606 return false;
5609 /* We do not handle bit-precision changes. */
5610 if ((CONVERT_EXPR_CODE_P (code)
5611 || code == VIEW_CONVERT_EXPR)
5612 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5613 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5614 || !type_has_mode_precision_p (TREE_TYPE (op)))
5615 /* But a conversion that does not change the bit-pattern is ok. */
5616 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5617 > TYPE_PRECISION (TREE_TYPE (op)))
5618 && TYPE_UNSIGNED (TREE_TYPE (op))))
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "type conversion to/from bit-precision "
5623 "unsupported.\n");
5624 return false;
5627 if (!vec_stmt) /* transformation not required. */
5629 if (slp_node
5630 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5632 if (dump_enabled_p ())
5633 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5634 "incompatible vector types for invariants\n");
5635 return false;
5637 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5638 DUMP_VECT_SCOPE ("vectorizable_assignment");
5639 if (!vect_nop_conversion_p (stmt_info))
5640 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5641 cost_vec);
5642 return true;
5645 /* Transform. */
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5649 /* Handle def. */
5650 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5652 /* Handle use. */
5653 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5655 /* Arguments are ready. create the new vector stmt. */
5656 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5658 if (CONVERT_EXPR_CODE_P (code)
5659 || code == VIEW_CONVERT_EXPR)
5660 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5661 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5662 new_temp = make_ssa_name (vec_dest, new_stmt);
5663 gimple_assign_set_lhs (new_stmt, new_temp);
5664 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5665 if (slp_node)
5666 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5667 else
5668 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5670 if (!slp_node)
5671 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5673 vec_oprnds.release ();
5674 return true;
5678 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5679 either as shift by a scalar or by a vector. */
5681 bool
5682 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5685 machine_mode vec_mode;
5686 optab optab;
5687 int icode;
5688 tree vectype;
5690 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5691 if (!vectype)
5692 return false;
5694 optab = optab_for_tree_code (code, vectype, optab_scalar);
5695 if (!optab
5696 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5698 optab = optab_for_tree_code (code, vectype, optab_vector);
5699 if (!optab
5700 || (optab_handler (optab, TYPE_MODE (vectype))
5701 == CODE_FOR_nothing))
5702 return false;
5705 vec_mode = TYPE_MODE (vectype);
5706 icode = (int) optab_handler (optab, vec_mode);
5707 if (icode == CODE_FOR_nothing)
5708 return false;
5710 return true;
5714 /* Function vectorizable_shift.
5716 Check if STMT_INFO performs a shift operation that can be vectorized.
5717 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5718 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5719 Return true if STMT_INFO is vectorizable in this way. */
5721 static bool
5722 vectorizable_shift (vec_info *vinfo,
5723 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5724 gimple **vec_stmt, slp_tree slp_node,
5725 stmt_vector_for_cost *cost_vec)
5727 tree vec_dest;
5728 tree scalar_dest;
5729 tree op0, op1 = NULL;
5730 tree vec_oprnd1 = NULL_TREE;
5731 tree vectype;
5732 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5733 enum tree_code code;
5734 machine_mode vec_mode;
5735 tree new_temp;
5736 optab optab;
5737 int icode;
5738 machine_mode optab_op2_mode;
5739 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5740 int ndts = 2;
5741 poly_uint64 nunits_in;
5742 poly_uint64 nunits_out;
5743 tree vectype_out;
5744 tree op1_vectype;
5745 int ncopies;
5746 int i;
5747 vec<tree> vec_oprnds0 = vNULL;
5748 vec<tree> vec_oprnds1 = vNULL;
5749 tree vop0, vop1;
5750 unsigned int k;
5751 bool scalar_shift_arg = true;
5752 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5753 bool incompatible_op1_vectype_p = false;
5755 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5756 return false;
5758 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5759 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5760 && ! vec_stmt)
5761 return false;
5763 /* Is STMT a vectorizable binary/unary operation? */
5764 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5765 if (!stmt)
5766 return false;
5768 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5769 return false;
5771 code = gimple_assign_rhs_code (stmt);
5773 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5774 || code == RROTATE_EXPR))
5775 return false;
5777 scalar_dest = gimple_assign_lhs (stmt);
5778 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5779 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5781 if (dump_enabled_p ())
5782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5783 "bit-precision shifts not supported.\n");
5784 return false;
5787 slp_tree slp_op0;
5788 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5789 0, &op0, &slp_op0, &dt[0], &vectype))
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5793 "use not simple.\n");
5794 return false;
5796 /* If op0 is an external or constant def, infer the vector type
5797 from the scalar type. */
5798 if (!vectype)
5799 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5800 if (vec_stmt)
5801 gcc_assert (vectype);
5802 if (!vectype)
5804 if (dump_enabled_p ())
5805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5806 "no vectype for scalar type\n");
5807 return false;
5810 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5811 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5812 if (maybe_ne (nunits_out, nunits_in))
5813 return false;
5815 stmt_vec_info op1_def_stmt_info;
5816 slp_tree slp_op1;
5817 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5818 &dt[1], &op1_vectype, &op1_def_stmt_info))
5820 if (dump_enabled_p ())
5821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5822 "use not simple.\n");
5823 return false;
5826 /* Multiple types in SLP are handled by creating the appropriate number of
5827 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5828 case of SLP. */
5829 if (slp_node)
5830 ncopies = 1;
5831 else
5832 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5834 gcc_assert (ncopies >= 1);
5836 /* Determine whether the shift amount is a vector, or scalar. If the
5837 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5839 if ((dt[1] == vect_internal_def
5840 || dt[1] == vect_induction_def
5841 || dt[1] == vect_nested_cycle)
5842 && !slp_node)
5843 scalar_shift_arg = false;
5844 else if (dt[1] == vect_constant_def
5845 || dt[1] == vect_external_def
5846 || dt[1] == vect_internal_def)
5848 /* In SLP, need to check whether the shift count is the same,
5849 in loops if it is a constant or invariant, it is always
5850 a scalar shift. */
5851 if (slp_node)
5853 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5854 stmt_vec_info slpstmt_info;
5856 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5858 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5859 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5860 scalar_shift_arg = false;
5863 /* For internal SLP defs we have to make sure we see scalar stmts
5864 for all vector elements.
5865 ??? For different vectors we could resort to a different
5866 scalar shift operand but code-generation below simply always
5867 takes the first. */
5868 if (dt[1] == vect_internal_def
5869 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5870 stmts.length ()))
5871 scalar_shift_arg = false;
5874 /* If the shift amount is computed by a pattern stmt we cannot
5875 use the scalar amount directly thus give up and use a vector
5876 shift. */
5877 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5878 scalar_shift_arg = false;
5880 else
5882 if (dump_enabled_p ())
5883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5884 "operand mode requires invariant argument.\n");
5885 return false;
5888 /* Vector shifted by vector. */
5889 bool was_scalar_shift_arg = scalar_shift_arg;
5890 if (!scalar_shift_arg)
5892 optab = optab_for_tree_code (code, vectype, optab_vector);
5893 if (dump_enabled_p ())
5894 dump_printf_loc (MSG_NOTE, vect_location,
5895 "vector/vector shift/rotate found.\n");
5897 if (!op1_vectype)
5898 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5899 slp_op1);
5900 incompatible_op1_vectype_p
5901 = (op1_vectype == NULL_TREE
5902 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5903 TYPE_VECTOR_SUBPARTS (vectype))
5904 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5905 if (incompatible_op1_vectype_p
5906 && (!slp_node
5907 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5908 || slp_op1->refcnt != 1))
5910 if (dump_enabled_p ())
5911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5912 "unusable type for last operand in"
5913 " vector/vector shift/rotate.\n");
5914 return false;
5917 /* See if the machine has a vector shifted by scalar insn and if not
5918 then see if it has a vector shifted by vector insn. */
5919 else
5921 optab = optab_for_tree_code (code, vectype, optab_scalar);
5922 if (optab
5923 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5925 if (dump_enabled_p ())
5926 dump_printf_loc (MSG_NOTE, vect_location,
5927 "vector/scalar shift/rotate found.\n");
5929 else
5931 optab = optab_for_tree_code (code, vectype, optab_vector);
5932 if (optab
5933 && (optab_handler (optab, TYPE_MODE (vectype))
5934 != CODE_FOR_nothing))
5936 scalar_shift_arg = false;
5938 if (dump_enabled_p ())
5939 dump_printf_loc (MSG_NOTE, vect_location,
5940 "vector/vector shift/rotate found.\n");
5942 if (!op1_vectype)
5943 op1_vectype = get_vectype_for_scalar_type (vinfo,
5944 TREE_TYPE (op1),
5945 slp_op1);
5947 /* Unlike the other binary operators, shifts/rotates have
5948 the rhs being int, instead of the same type as the lhs,
5949 so make sure the scalar is the right type if we are
5950 dealing with vectors of long long/long/short/char. */
5951 incompatible_op1_vectype_p
5952 = (!op1_vectype
5953 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5954 TREE_TYPE (op1)));
5955 if (incompatible_op1_vectype_p
5956 && dt[1] == vect_internal_def)
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5960 "unusable type for last operand in"
5961 " vector/vector shift/rotate.\n");
5962 return false;
5968 /* Supportable by target? */
5969 if (!optab)
5971 if (dump_enabled_p ())
5972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5973 "no optab.\n");
5974 return false;
5976 vec_mode = TYPE_MODE (vectype);
5977 icode = (int) optab_handler (optab, vec_mode);
5978 if (icode == CODE_FOR_nothing)
5980 if (dump_enabled_p ())
5981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5982 "op not supported by target.\n");
5983 return false;
5985 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5986 if (vect_emulated_vector_p (vectype))
5987 return false;
5989 if (!vec_stmt) /* transformation not required. */
5991 if (slp_node
5992 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5993 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5994 && (!incompatible_op1_vectype_p
5995 || dt[1] == vect_constant_def)
5996 && !vect_maybe_update_slp_op_vectype
5997 (slp_op1,
5998 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6000 if (dump_enabled_p ())
6001 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6002 "incompatible vector types for invariants\n");
6003 return false;
6005 /* Now adjust the constant shift amount in place. */
6006 if (slp_node
6007 && incompatible_op1_vectype_p
6008 && dt[1] == vect_constant_def)
6010 for (unsigned i = 0;
6011 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6013 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6014 = fold_convert (TREE_TYPE (vectype),
6015 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6016 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6017 == INTEGER_CST));
6020 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6021 DUMP_VECT_SCOPE ("vectorizable_shift");
6022 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6023 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6024 return true;
6027 /* Transform. */
6029 if (dump_enabled_p ())
6030 dump_printf_loc (MSG_NOTE, vect_location,
6031 "transform binary/unary operation.\n");
6033 if (incompatible_op1_vectype_p && !slp_node)
6035 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6036 op1 = fold_convert (TREE_TYPE (vectype), op1);
6037 if (dt[1] != vect_constant_def)
6038 op1 = vect_init_vector (vinfo, stmt_info, op1,
6039 TREE_TYPE (vectype), NULL);
6042 /* Handle def. */
6043 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6045 if (scalar_shift_arg && dt[1] != vect_internal_def)
6047 /* Vector shl and shr insn patterns can be defined with scalar
6048 operand 2 (shift operand). In this case, use constant or loop
6049 invariant op1 directly, without extending it to vector mode
6050 first. */
6051 optab_op2_mode = insn_data[icode].operand[2].mode;
6052 if (!VECTOR_MODE_P (optab_op2_mode))
6054 if (dump_enabled_p ())
6055 dump_printf_loc (MSG_NOTE, vect_location,
6056 "operand 1 using scalar mode.\n");
6057 vec_oprnd1 = op1;
6058 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6059 vec_oprnds1.quick_push (vec_oprnd1);
6060 /* Store vec_oprnd1 for every vector stmt to be created.
6061 We check during the analysis that all the shift arguments
6062 are the same.
6063 TODO: Allow different constants for different vector
6064 stmts generated for an SLP instance. */
6065 for (k = 0;
6066 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6067 vec_oprnds1.quick_push (vec_oprnd1);
6070 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6072 if (was_scalar_shift_arg)
6074 /* If the argument was the same in all lanes create
6075 the correctly typed vector shift amount directly. */
6076 op1 = fold_convert (TREE_TYPE (vectype), op1);
6077 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6078 !loop_vinfo ? gsi : NULL);
6079 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6080 !loop_vinfo ? gsi : NULL);
6081 vec_oprnds1.create (slp_node->vec_stmts_size);
6082 for (k = 0; k < slp_node->vec_stmts_size; k++)
6083 vec_oprnds1.quick_push (vec_oprnd1);
6085 else if (dt[1] == vect_constant_def)
6086 /* The constant shift amount has been adjusted in place. */
6088 else
6089 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6092 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6093 (a special case for certain kind of vector shifts); otherwise,
6094 operand 1 should be of a vector type (the usual case). */
6095 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6096 op0, &vec_oprnds0,
6097 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6099 /* Arguments are ready. Create the new vector stmt. */
6100 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6102 /* For internal defs where we need to use a scalar shift arg
6103 extract the first lane. */
6104 if (scalar_shift_arg && dt[1] == vect_internal_def)
6106 vop1 = vec_oprnds1[0];
6107 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6108 gassign *new_stmt
6109 = gimple_build_assign (new_temp,
6110 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6111 vop1,
6112 TYPE_SIZE (TREE_TYPE (new_temp)),
6113 bitsize_zero_node));
6114 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6115 vop1 = new_temp;
6117 else
6118 vop1 = vec_oprnds1[i];
6119 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6120 new_temp = make_ssa_name (vec_dest, new_stmt);
6121 gimple_assign_set_lhs (new_stmt, new_temp);
6122 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6123 if (slp_node)
6124 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6125 else
6126 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6129 if (!slp_node)
6130 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6132 vec_oprnds0.release ();
6133 vec_oprnds1.release ();
6135 return true;
6139 /* Function vectorizable_operation.
6141 Check if STMT_INFO performs a binary, unary or ternary operation that can
6142 be vectorized.
6143 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6144 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6145 Return true if STMT_INFO is vectorizable in this way. */
6147 static bool
6148 vectorizable_operation (vec_info *vinfo,
6149 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6150 gimple **vec_stmt, slp_tree slp_node,
6151 stmt_vector_for_cost *cost_vec)
6153 tree vec_dest;
6154 tree scalar_dest;
6155 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6156 tree vectype;
6157 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6158 enum tree_code code, orig_code;
6159 machine_mode vec_mode;
6160 tree new_temp;
6161 int op_type;
6162 optab optab;
6163 bool target_support_p;
6164 enum vect_def_type dt[3]
6165 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6166 int ndts = 3;
6167 poly_uint64 nunits_in;
6168 poly_uint64 nunits_out;
6169 tree vectype_out;
6170 int ncopies, vec_num;
6171 int i;
6172 vec<tree> vec_oprnds0 = vNULL;
6173 vec<tree> vec_oprnds1 = vNULL;
6174 vec<tree> vec_oprnds2 = vNULL;
6175 tree vop0, vop1, vop2;
6176 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6178 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6179 return false;
6181 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6182 && ! vec_stmt)
6183 return false;
6185 /* Is STMT a vectorizable binary/unary operation? */
6186 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6187 if (!stmt)
6188 return false;
6190 /* Loads and stores are handled in vectorizable_{load,store}. */
6191 if (STMT_VINFO_DATA_REF (stmt_info))
6192 return false;
6194 orig_code = code = gimple_assign_rhs_code (stmt);
6196 /* Shifts are handled in vectorizable_shift. */
6197 if (code == LSHIFT_EXPR
6198 || code == RSHIFT_EXPR
6199 || code == LROTATE_EXPR
6200 || code == RROTATE_EXPR)
6201 return false;
6203 /* Comparisons are handled in vectorizable_comparison. */
6204 if (TREE_CODE_CLASS (code) == tcc_comparison)
6205 return false;
6207 /* Conditions are handled in vectorizable_condition. */
6208 if (code == COND_EXPR)
6209 return false;
6211 /* For pointer addition and subtraction, we should use the normal
6212 plus and minus for the vector operation. */
6213 if (code == POINTER_PLUS_EXPR)
6214 code = PLUS_EXPR;
6215 if (code == POINTER_DIFF_EXPR)
6216 code = MINUS_EXPR;
6218 /* Support only unary or binary operations. */
6219 op_type = TREE_CODE_LENGTH (code);
6220 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6224 "num. args = %d (not unary/binary/ternary op).\n",
6225 op_type);
6226 return false;
6229 scalar_dest = gimple_assign_lhs (stmt);
6230 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6232 /* Most operations cannot handle bit-precision types without extra
6233 truncations. */
6234 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6235 if (!mask_op_p
6236 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6237 /* Exception are bitwise binary operations. */
6238 && code != BIT_IOR_EXPR
6239 && code != BIT_XOR_EXPR
6240 && code != BIT_AND_EXPR)
6242 if (dump_enabled_p ())
6243 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6244 "bit-precision arithmetic not supported.\n");
6245 return false;
6248 slp_tree slp_op0;
6249 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6250 0, &op0, &slp_op0, &dt[0], &vectype))
6252 if (dump_enabled_p ())
6253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6254 "use not simple.\n");
6255 return false;
6257 bool is_invariant = (dt[0] == vect_external_def
6258 || dt[0] == vect_constant_def);
6259 /* If op0 is an external or constant def, infer the vector type
6260 from the scalar type. */
6261 if (!vectype)
6263 /* For boolean type we cannot determine vectype by
6264 invariant value (don't know whether it is a vector
6265 of booleans or vector of integers). We use output
6266 vectype because operations on boolean don't change
6267 type. */
6268 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6270 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6272 if (dump_enabled_p ())
6273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6274 "not supported operation on bool value.\n");
6275 return false;
6277 vectype = vectype_out;
6279 else
6280 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6281 slp_node);
6283 if (vec_stmt)
6284 gcc_assert (vectype);
6285 if (!vectype)
6287 if (dump_enabled_p ())
6288 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6289 "no vectype for scalar type %T\n",
6290 TREE_TYPE (op0));
6292 return false;
6295 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6296 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6297 if (maybe_ne (nunits_out, nunits_in))
6298 return false;
6300 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6301 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6302 if (op_type == binary_op || op_type == ternary_op)
6304 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6305 1, &op1, &slp_op1, &dt[1], &vectype2))
6307 if (dump_enabled_p ())
6308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6309 "use not simple.\n");
6310 return false;
6312 is_invariant &= (dt[1] == vect_external_def
6313 || dt[1] == vect_constant_def);
6314 if (vectype2
6315 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6316 return false;
6318 if (op_type == ternary_op)
6320 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6321 2, &op2, &slp_op2, &dt[2], &vectype3))
6323 if (dump_enabled_p ())
6324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6325 "use not simple.\n");
6326 return false;
6328 is_invariant &= (dt[2] == vect_external_def
6329 || dt[2] == vect_constant_def);
6330 if (vectype3
6331 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6332 return false;
6335 /* Multiple types in SLP are handled by creating the appropriate number of
6336 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6337 case of SLP. */
6338 if (slp_node)
6340 ncopies = 1;
6341 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6343 else
6345 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6346 vec_num = 1;
6349 gcc_assert (ncopies >= 1);
6351 /* Reject attempts to combine mask types with nonmask types, e.g. if
6352 we have an AND between a (nonmask) boolean loaded from memory and
6353 a (mask) boolean result of a comparison.
6355 TODO: We could easily fix these cases up using pattern statements. */
6356 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6357 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6358 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6360 if (dump_enabled_p ())
6361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6362 "mixed mask and nonmask vector types\n");
6363 return false;
6366 /* Supportable by target? */
6368 vec_mode = TYPE_MODE (vectype);
6369 if (code == MULT_HIGHPART_EXPR)
6370 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6371 else
6373 optab = optab_for_tree_code (code, vectype, optab_default);
6374 if (!optab)
6376 if (dump_enabled_p ())
6377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6378 "no optab.\n");
6379 return false;
6381 target_support_p = (optab_handler (optab, vec_mode)
6382 != CODE_FOR_nothing);
6385 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6386 if (!target_support_p)
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6390 "op not supported by target.\n");
6391 /* Check only during analysis. */
6392 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6393 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6394 return false;
6395 if (dump_enabled_p ())
6396 dump_printf_loc (MSG_NOTE, vect_location,
6397 "proceeding using word mode.\n");
6398 using_emulated_vectors_p = true;
6401 if (using_emulated_vectors_p
6402 && !vect_can_vectorize_without_simd_p (code))
6404 if (dump_enabled_p ())
6405 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6406 return false;
6409 /* ??? We should instead expand the operations here, instead of
6410 relying on vector lowering which has this hard cap on the number
6411 of vector elements below it performs elementwise operations. */
6412 if (using_emulated_vectors_p
6413 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6414 && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
6415 || maybe_lt (nunits_out, 4U)))
6417 if (dump_enabled_p ())
6418 dump_printf (MSG_NOTE, "not using word mode for +- and less than "
6419 "four vector elements\n");
6420 return false;
6423 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6424 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6425 internal_fn cond_fn = get_conditional_internal_fn (code);
6427 /* If operating on inactive elements could generate spurious traps,
6428 we need to restrict the operation to active lanes. Note that this
6429 specifically doesn't apply to unhoisted invariants, since they
6430 operate on the same value for every lane.
6432 Similarly, if this operation is part of a reduction, a fully-masked
6433 loop should only change the active lanes of the reduction chain,
6434 keeping the inactive lanes as-is. */
6435 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6436 || reduc_idx >= 0);
6438 if (!vec_stmt) /* transformation not required. */
6440 if (loop_vinfo
6441 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6442 && mask_out_inactive)
6444 if (cond_fn == IFN_LAST
6445 || !direct_internal_fn_supported_p (cond_fn, vectype,
6446 OPTIMIZE_FOR_SPEED))
6448 if (dump_enabled_p ())
6449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6450 "can't use a fully-masked loop because no"
6451 " conditional operation is available.\n");
6452 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6454 else
6455 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6456 vectype, NULL);
6459 /* Put types on constant and invariant SLP children. */
6460 if (slp_node
6461 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6462 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6463 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6465 if (dump_enabled_p ())
6466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6467 "incompatible vector types for invariants\n");
6468 return false;
6471 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6472 DUMP_VECT_SCOPE ("vectorizable_operation");
6473 vect_model_simple_cost (vinfo, stmt_info,
6474 ncopies, dt, ndts, slp_node, cost_vec);
6475 if (using_emulated_vectors_p)
6477 /* The above vect_model_simple_cost call handles constants
6478 in the prologue and (mis-)costs one of the stmts as
6479 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6480 for the actual lowering that will be applied. */
6481 unsigned n
6482 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6483 switch (code)
6485 case PLUS_EXPR:
6486 n *= 5;
6487 break;
6488 case MINUS_EXPR:
6489 n *= 6;
6490 break;
6491 case NEGATE_EXPR:
6492 n *= 4;
6493 break;
6494 default:;
6496 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6498 return true;
6501 /* Transform. */
6503 if (dump_enabled_p ())
6504 dump_printf_loc (MSG_NOTE, vect_location,
6505 "transform binary/unary operation.\n");
6507 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6509 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6510 vectors with unsigned elements, but the result is signed. So, we
6511 need to compute the MINUS_EXPR into vectype temporary and
6512 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6513 tree vec_cvt_dest = NULL_TREE;
6514 if (orig_code == POINTER_DIFF_EXPR)
6516 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6517 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6519 /* Handle def. */
6520 else
6521 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6523 /* In case the vectorization factor (VF) is bigger than the number
6524 of elements that we can fit in a vectype (nunits), we have to generate
6525 more than one vector stmt - i.e - we need to "unroll" the
6526 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6527 from one copy of the vector stmt to the next, in the field
6528 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6529 stages to find the correct vector defs to be used when vectorizing
6530 stmts that use the defs of the current stmt. The example below
6531 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6532 we need to create 4 vectorized stmts):
6534 before vectorization:
6535 RELATED_STMT VEC_STMT
6536 S1: x = memref - -
6537 S2: z = x + 1 - -
6539 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6540 there):
6541 RELATED_STMT VEC_STMT
6542 VS1_0: vx0 = memref0 VS1_1 -
6543 VS1_1: vx1 = memref1 VS1_2 -
6544 VS1_2: vx2 = memref2 VS1_3 -
6545 VS1_3: vx3 = memref3 - -
6546 S1: x = load - VS1_0
6547 S2: z = x + 1 - -
6549 step2: vectorize stmt S2 (done here):
6550 To vectorize stmt S2 we first need to find the relevant vector
6551 def for the first operand 'x'. This is, as usual, obtained from
6552 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6553 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6554 relevant vector def 'vx0'. Having found 'vx0' we can generate
6555 the vector stmt VS2_0, and as usual, record it in the
6556 STMT_VINFO_VEC_STMT of stmt S2.
6557 When creating the second copy (VS2_1), we obtain the relevant vector
6558 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6559 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6560 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6561 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6562 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6563 chain of stmts and pointers:
6564 RELATED_STMT VEC_STMT
6565 VS1_0: vx0 = memref0 VS1_1 -
6566 VS1_1: vx1 = memref1 VS1_2 -
6567 VS1_2: vx2 = memref2 VS1_3 -
6568 VS1_3: vx3 = memref3 - -
6569 S1: x = load - VS1_0
6570 VS2_0: vz0 = vx0 + v1 VS2_1 -
6571 VS2_1: vz1 = vx1 + v1 VS2_2 -
6572 VS2_2: vz2 = vx2 + v1 VS2_3 -
6573 VS2_3: vz3 = vx3 + v1 - -
6574 S2: z = x + 1 - VS2_0 */
6576 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6577 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6578 /* Arguments are ready. Create the new vector stmt. */
6579 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6581 gimple *new_stmt = NULL;
6582 vop1 = ((op_type == binary_op || op_type == ternary_op)
6583 ? vec_oprnds1[i] : NULL_TREE);
6584 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6585 if (masked_loop_p && mask_out_inactive)
6587 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6588 vectype, i);
6589 auto_vec<tree> vops (5);
6590 vops.quick_push (mask);
6591 vops.quick_push (vop0);
6592 if (vop1)
6593 vops.quick_push (vop1);
6594 if (vop2)
6595 vops.quick_push (vop2);
6596 if (reduc_idx >= 0)
6598 /* Perform the operation on active elements only and take
6599 inactive elements from the reduction chain input. */
6600 gcc_assert (!vop2);
6601 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6603 else
6605 auto else_value = targetm.preferred_else_value
6606 (cond_fn, vectype, vops.length () - 1, &vops[1]);
6607 vops.quick_push (else_value);
6609 gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
6610 new_temp = make_ssa_name (vec_dest, call);
6611 gimple_call_set_lhs (call, new_temp);
6612 gimple_call_set_nothrow (call, true);
6613 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6614 new_stmt = call;
6616 else
6618 tree mask = NULL_TREE;
6619 /* When combining two masks check if either of them is elsewhere
6620 combined with a loop mask, if that's the case we can mark that the
6621 new combined mask doesn't need to be combined with a loop mask. */
6622 if (masked_loop_p
6623 && code == BIT_AND_EXPR
6624 && VECTOR_BOOLEAN_TYPE_P (vectype))
6626 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6627 ncopies}))
6629 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6630 vectype, i);
6632 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6633 vop0, gsi);
6636 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6637 ncopies }))
6639 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6640 vectype, i);
6642 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6643 vop1, gsi);
6647 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6648 new_temp = make_ssa_name (vec_dest, new_stmt);
6649 gimple_assign_set_lhs (new_stmt, new_temp);
6650 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6651 if (using_emulated_vectors_p)
6652 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6654 /* Enter the combined value into the vector cond hash so we don't
6655 AND it with a loop mask again. */
6656 if (mask)
6657 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6659 if (vec_cvt_dest)
6661 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6662 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6663 new_temp);
6664 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6665 gimple_assign_set_lhs (new_stmt, new_temp);
6666 vect_finish_stmt_generation (vinfo, stmt_info,
6667 new_stmt, gsi);
6670 if (slp_node)
6671 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6672 else
6673 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6676 if (!slp_node)
6677 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6679 vec_oprnds0.release ();
6680 vec_oprnds1.release ();
6681 vec_oprnds2.release ();
6683 return true;
6686 /* A helper function to ensure data reference DR_INFO's base alignment. */
6688 static void
6689 ensure_base_align (dr_vec_info *dr_info)
6691 /* Alignment is only analyzed for the first element of a DR group,
6692 use that to look at base alignment we need to enforce. */
6693 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6694 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6696 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6698 if (dr_info->base_misaligned)
6700 tree base_decl = dr_info->base_decl;
6702 // We should only be able to increase the alignment of a base object if
6703 // we know what its new alignment should be at compile time.
6704 unsigned HOST_WIDE_INT align_base_to =
6705 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6707 if (decl_in_symtab_p (base_decl))
6708 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6709 else if (DECL_ALIGN (base_decl) < align_base_to)
6711 SET_DECL_ALIGN (base_decl, align_base_to);
6712 DECL_USER_ALIGN (base_decl) = 1;
6714 dr_info->base_misaligned = false;
6719 /* Function get_group_alias_ptr_type.
6721 Return the alias type for the group starting at FIRST_STMT_INFO. */
6723 static tree
6724 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6726 struct data_reference *first_dr, *next_dr;
6728 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6729 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6730 while (next_stmt_info)
6732 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6733 if (get_alias_set (DR_REF (first_dr))
6734 != get_alias_set (DR_REF (next_dr)))
6736 if (dump_enabled_p ())
6737 dump_printf_loc (MSG_NOTE, vect_location,
6738 "conflicting alias set types.\n");
6739 return ptr_type_node;
6741 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6743 return reference_alias_ptr_type (DR_REF (first_dr));
6747 /* Function scan_operand_equal_p.
6749 Helper function for check_scan_store. Compare two references
6750 with .GOMP_SIMD_LANE bases. */
6752 static bool
6753 scan_operand_equal_p (tree ref1, tree ref2)
6755 tree ref[2] = { ref1, ref2 };
6756 poly_int64 bitsize[2], bitpos[2];
6757 tree offset[2], base[2];
6758 for (int i = 0; i < 2; ++i)
6760 machine_mode mode;
6761 int unsignedp, reversep, volatilep = 0;
6762 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6763 &offset[i], &mode, &unsignedp,
6764 &reversep, &volatilep);
6765 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6766 return false;
6767 if (TREE_CODE (base[i]) == MEM_REF
6768 && offset[i] == NULL_TREE
6769 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6771 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6772 if (is_gimple_assign (def_stmt)
6773 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6774 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6775 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6777 if (maybe_ne (mem_ref_offset (base[i]), 0))
6778 return false;
6779 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6780 offset[i] = gimple_assign_rhs2 (def_stmt);
6785 if (!operand_equal_p (base[0], base[1], 0))
6786 return false;
6787 if (maybe_ne (bitsize[0], bitsize[1]))
6788 return false;
6789 if (offset[0] != offset[1])
6791 if (!offset[0] || !offset[1])
6792 return false;
6793 if (!operand_equal_p (offset[0], offset[1], 0))
6795 tree step[2];
6796 for (int i = 0; i < 2; ++i)
6798 step[i] = integer_one_node;
6799 if (TREE_CODE (offset[i]) == SSA_NAME)
6801 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6802 if (is_gimple_assign (def_stmt)
6803 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6804 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6805 == INTEGER_CST))
6807 step[i] = gimple_assign_rhs2 (def_stmt);
6808 offset[i] = gimple_assign_rhs1 (def_stmt);
6811 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6813 step[i] = TREE_OPERAND (offset[i], 1);
6814 offset[i] = TREE_OPERAND (offset[i], 0);
6816 tree rhs1 = NULL_TREE;
6817 if (TREE_CODE (offset[i]) == SSA_NAME)
6819 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6820 if (gimple_assign_cast_p (def_stmt))
6821 rhs1 = gimple_assign_rhs1 (def_stmt);
6823 else if (CONVERT_EXPR_P (offset[i]))
6824 rhs1 = TREE_OPERAND (offset[i], 0);
6825 if (rhs1
6826 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6827 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6828 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6829 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6830 offset[i] = rhs1;
6832 if (!operand_equal_p (offset[0], offset[1], 0)
6833 || !operand_equal_p (step[0], step[1], 0))
6834 return false;
6837 return true;
6841 enum scan_store_kind {
6842 /* Normal permutation. */
6843 scan_store_kind_perm,
6845 /* Whole vector left shift permutation with zero init. */
6846 scan_store_kind_lshift_zero,
6848 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6849 scan_store_kind_lshift_cond
6852 /* Function check_scan_store.
6854 Verify if we can perform the needed permutations or whole vector shifts.
6855 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6856 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6857 to do at each step. */
6859 static int
6860 scan_store_can_perm_p (tree vectype, tree init,
6861 vec<enum scan_store_kind> *use_whole_vector = NULL)
6863 enum machine_mode vec_mode = TYPE_MODE (vectype);
6864 unsigned HOST_WIDE_INT nunits;
6865 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6866 return -1;
6867 int units_log2 = exact_log2 (nunits);
6868 if (units_log2 <= 0)
6869 return -1;
6871 int i;
6872 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6873 for (i = 0; i <= units_log2; ++i)
6875 unsigned HOST_WIDE_INT j, k;
6876 enum scan_store_kind kind = scan_store_kind_perm;
6877 vec_perm_builder sel (nunits, nunits, 1);
6878 sel.quick_grow (nunits);
6879 if (i == units_log2)
6881 for (j = 0; j < nunits; ++j)
6882 sel[j] = nunits - 1;
6884 else
6886 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6887 sel[j] = j;
6888 for (k = 0; j < nunits; ++j, ++k)
6889 sel[j] = nunits + k;
6891 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6892 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
6894 if (i == units_log2)
6895 return -1;
6897 if (whole_vector_shift_kind == scan_store_kind_perm)
6899 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6900 return -1;
6901 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6902 /* Whole vector shifts shift in zeros, so if init is all zero
6903 constant, there is no need to do anything further. */
6904 if ((TREE_CODE (init) != INTEGER_CST
6905 && TREE_CODE (init) != REAL_CST)
6906 || !initializer_zerop (init))
6908 tree masktype = truth_type_for (vectype);
6909 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6910 return -1;
6911 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6914 kind = whole_vector_shift_kind;
6916 if (use_whole_vector)
6918 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6919 use_whole_vector->safe_grow_cleared (i, true);
6920 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6921 use_whole_vector->safe_push (kind);
6925 return units_log2;
6929 /* Function check_scan_store.
6931 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6933 static bool
6934 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6935 enum vect_def_type rhs_dt, bool slp, tree mask,
6936 vect_memory_access_type memory_access_type)
6938 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6939 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6940 tree ref_type;
6942 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6943 if (slp
6944 || mask
6945 || memory_access_type != VMAT_CONTIGUOUS
6946 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6947 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6948 || loop_vinfo == NULL
6949 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6950 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6951 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6952 || !integer_zerop (DR_INIT (dr_info->dr))
6953 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6954 || !alias_sets_conflict_p (get_alias_set (vectype),
6955 get_alias_set (TREE_TYPE (ref_type))))
6957 if (dump_enabled_p ())
6958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6959 "unsupported OpenMP scan store.\n");
6960 return false;
6963 /* We need to pattern match code built by OpenMP lowering and simplified
6964 by following optimizations into something we can handle.
6965 #pragma omp simd reduction(inscan,+:r)
6966 for (...)
6968 r += something ();
6969 #pragma omp scan inclusive (r)
6970 use (r);
6972 shall have body with:
6973 // Initialization for input phase, store the reduction initializer:
6974 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6975 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6976 D.2042[_21] = 0;
6977 // Actual input phase:
6979 r.0_5 = D.2042[_20];
6980 _6 = _4 + r.0_5;
6981 D.2042[_20] = _6;
6982 // Initialization for scan phase:
6983 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6984 _26 = D.2043[_25];
6985 _27 = D.2042[_25];
6986 _28 = _26 + _27;
6987 D.2043[_25] = _28;
6988 D.2042[_25] = _28;
6989 // Actual scan phase:
6991 r.1_8 = D.2042[_20];
6993 The "omp simd array" variable D.2042 holds the privatized copy used
6994 inside of the loop and D.2043 is another one that holds copies of
6995 the current original list item. The separate GOMP_SIMD_LANE ifn
6996 kinds are there in order to allow optimizing the initializer store
6997 and combiner sequence, e.g. if it is originally some C++ish user
6998 defined reduction, but allow the vectorizer to pattern recognize it
6999 and turn into the appropriate vectorized scan.
7001 For exclusive scan, this is slightly different:
7002 #pragma omp simd reduction(inscan,+:r)
7003 for (...)
7005 use (r);
7006 #pragma omp scan exclusive (r)
7007 r += something ();
7009 shall have body with:
7010 // Initialization for input phase, store the reduction initializer:
7011 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7012 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7013 D.2042[_21] = 0;
7014 // Actual input phase:
7016 r.0_5 = D.2042[_20];
7017 _6 = _4 + r.0_5;
7018 D.2042[_20] = _6;
7019 // Initialization for scan phase:
7020 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7021 _26 = D.2043[_25];
7022 D.2044[_25] = _26;
7023 _27 = D.2042[_25];
7024 _28 = _26 + _27;
7025 D.2043[_25] = _28;
7026 // Actual scan phase:
7028 r.1_8 = D.2044[_20];
7029 ... */
7031 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7033 /* Match the D.2042[_21] = 0; store above. Just require that
7034 it is a constant or external definition store. */
7035 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7037 fail_init:
7038 if (dump_enabled_p ())
7039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7040 "unsupported OpenMP scan initializer store.\n");
7041 return false;
7044 if (! loop_vinfo->scan_map)
7045 loop_vinfo->scan_map = new hash_map<tree, tree>;
7046 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7047 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7048 if (cached)
7049 goto fail_init;
7050 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7052 /* These stores can be vectorized normally. */
7053 return true;
7056 if (rhs_dt != vect_internal_def)
7058 fail:
7059 if (dump_enabled_p ())
7060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7061 "unsupported OpenMP scan combiner pattern.\n");
7062 return false;
7065 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7066 tree rhs = gimple_assign_rhs1 (stmt);
7067 if (TREE_CODE (rhs) != SSA_NAME)
7068 goto fail;
7070 gimple *other_store_stmt = NULL;
7071 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7072 bool inscan_var_store
7073 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7075 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7077 if (!inscan_var_store)
7079 use_operand_p use_p;
7080 imm_use_iterator iter;
7081 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7083 gimple *use_stmt = USE_STMT (use_p);
7084 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7085 continue;
7086 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7087 || !is_gimple_assign (use_stmt)
7088 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7089 || other_store_stmt
7090 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7091 goto fail;
7092 other_store_stmt = use_stmt;
7094 if (other_store_stmt == NULL)
7095 goto fail;
7096 rhs = gimple_assign_lhs (other_store_stmt);
7097 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7098 goto fail;
7101 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7103 use_operand_p use_p;
7104 imm_use_iterator iter;
7105 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7107 gimple *use_stmt = USE_STMT (use_p);
7108 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7109 continue;
7110 if (other_store_stmt)
7111 goto fail;
7112 other_store_stmt = use_stmt;
7115 else
7116 goto fail;
7118 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7119 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7120 || !is_gimple_assign (def_stmt)
7121 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7122 goto fail;
7124 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7125 /* For pointer addition, we should use the normal plus for the vector
7126 operation. */
7127 switch (code)
7129 case POINTER_PLUS_EXPR:
7130 code = PLUS_EXPR;
7131 break;
7132 case MULT_HIGHPART_EXPR:
7133 goto fail;
7134 default:
7135 break;
7137 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7138 goto fail;
7140 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7141 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7142 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7143 goto fail;
7145 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7146 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7147 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7148 || !gimple_assign_load_p (load1_stmt)
7149 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7150 || !gimple_assign_load_p (load2_stmt))
7151 goto fail;
7153 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7154 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7155 if (load1_stmt_info == NULL
7156 || load2_stmt_info == NULL
7157 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7158 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7159 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7160 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7161 goto fail;
7163 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7165 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7166 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7167 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7168 goto fail;
7169 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7170 tree lrhs;
7171 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7172 lrhs = rhs1;
7173 else
7174 lrhs = rhs2;
7175 use_operand_p use_p;
7176 imm_use_iterator iter;
7177 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7179 gimple *use_stmt = USE_STMT (use_p);
7180 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7181 continue;
7182 if (other_store_stmt)
7183 goto fail;
7184 other_store_stmt = use_stmt;
7188 if (other_store_stmt == NULL)
7189 goto fail;
7190 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7191 || !gimple_store_p (other_store_stmt))
7192 goto fail;
7194 stmt_vec_info other_store_stmt_info
7195 = loop_vinfo->lookup_stmt (other_store_stmt);
7196 if (other_store_stmt_info == NULL
7197 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7198 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7199 goto fail;
7201 gimple *stmt1 = stmt;
7202 gimple *stmt2 = other_store_stmt;
7203 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7204 std::swap (stmt1, stmt2);
7205 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7206 gimple_assign_rhs1 (load2_stmt)))
7208 std::swap (rhs1, rhs2);
7209 std::swap (load1_stmt, load2_stmt);
7210 std::swap (load1_stmt_info, load2_stmt_info);
7212 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7213 gimple_assign_rhs1 (load1_stmt)))
7214 goto fail;
7216 tree var3 = NULL_TREE;
7217 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7218 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7219 gimple_assign_rhs1 (load2_stmt)))
7220 goto fail;
7221 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7223 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7224 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7225 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7226 goto fail;
7227 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7228 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7229 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7230 || lookup_attribute ("omp simd inscan exclusive",
7231 DECL_ATTRIBUTES (var3)))
7232 goto fail;
7235 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7236 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7237 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7238 goto fail;
7240 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7241 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7242 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7243 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7244 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7245 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7246 goto fail;
7248 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7249 std::swap (var1, var2);
7251 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7253 if (!lookup_attribute ("omp simd inscan exclusive",
7254 DECL_ATTRIBUTES (var1)))
7255 goto fail;
7256 var1 = var3;
7259 if (loop_vinfo->scan_map == NULL)
7260 goto fail;
7261 tree *init = loop_vinfo->scan_map->get (var1);
7262 if (init == NULL)
7263 goto fail;
7265 /* The IL is as expected, now check if we can actually vectorize it.
7266 Inclusive scan:
7267 _26 = D.2043[_25];
7268 _27 = D.2042[_25];
7269 _28 = _26 + _27;
7270 D.2043[_25] = _28;
7271 D.2042[_25] = _28;
7272 should be vectorized as (where _40 is the vectorized rhs
7273 from the D.2042[_21] = 0; store):
7274 _30 = MEM <vector(8) int> [(int *)&D.2043];
7275 _31 = MEM <vector(8) int> [(int *)&D.2042];
7276 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7277 _33 = _31 + _32;
7278 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7279 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7280 _35 = _33 + _34;
7281 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7282 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7283 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7284 _37 = _35 + _36;
7285 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7286 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7287 _38 = _30 + _37;
7288 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7289 MEM <vector(8) int> [(int *)&D.2043] = _39;
7290 MEM <vector(8) int> [(int *)&D.2042] = _38;
7291 Exclusive scan:
7292 _26 = D.2043[_25];
7293 D.2044[_25] = _26;
7294 _27 = D.2042[_25];
7295 _28 = _26 + _27;
7296 D.2043[_25] = _28;
7297 should be vectorized as (where _40 is the vectorized rhs
7298 from the D.2042[_21] = 0; store):
7299 _30 = MEM <vector(8) int> [(int *)&D.2043];
7300 _31 = MEM <vector(8) int> [(int *)&D.2042];
7301 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7302 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7303 _34 = _32 + _33;
7304 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7305 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7306 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7307 _36 = _34 + _35;
7308 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7309 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7310 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7311 _38 = _36 + _37;
7312 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7313 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7314 _39 = _30 + _38;
7315 _50 = _31 + _39;
7316 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7317 MEM <vector(8) int> [(int *)&D.2044] = _39;
7318 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7319 enum machine_mode vec_mode = TYPE_MODE (vectype);
7320 optab optab = optab_for_tree_code (code, vectype, optab_default);
7321 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7322 goto fail;
7324 int units_log2 = scan_store_can_perm_p (vectype, *init);
7325 if (units_log2 == -1)
7326 goto fail;
7328 return true;
7332 /* Function vectorizable_scan_store.
7334 Helper of vectorizable_score, arguments like on vectorizable_store.
7335 Handle only the transformation, checking is done in check_scan_store. */
7337 static bool
7338 vectorizable_scan_store (vec_info *vinfo,
7339 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7340 gimple **vec_stmt, int ncopies)
7342 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7343 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7344 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7345 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7347 if (dump_enabled_p ())
7348 dump_printf_loc (MSG_NOTE, vect_location,
7349 "transform scan store. ncopies = %d\n", ncopies);
7351 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7352 tree rhs = gimple_assign_rhs1 (stmt);
7353 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7355 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7356 bool inscan_var_store
7357 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7359 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7361 use_operand_p use_p;
7362 imm_use_iterator iter;
7363 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7365 gimple *use_stmt = USE_STMT (use_p);
7366 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7367 continue;
7368 rhs = gimple_assign_lhs (use_stmt);
7369 break;
7373 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7374 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7375 if (code == POINTER_PLUS_EXPR)
7376 code = PLUS_EXPR;
7377 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7378 && commutative_tree_code (code));
7379 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7380 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7381 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7382 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7383 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7384 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7385 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7386 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7387 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7388 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7389 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7391 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7393 std::swap (rhs1, rhs2);
7394 std::swap (var1, var2);
7395 std::swap (load1_dr_info, load2_dr_info);
7398 tree *init = loop_vinfo->scan_map->get (var1);
7399 gcc_assert (init);
7401 unsigned HOST_WIDE_INT nunits;
7402 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7403 gcc_unreachable ();
7404 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7405 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7406 gcc_assert (units_log2 > 0);
7407 auto_vec<tree, 16> perms;
7408 perms.quick_grow (units_log2 + 1);
7409 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7410 for (int i = 0; i <= units_log2; ++i)
7412 unsigned HOST_WIDE_INT j, k;
7413 vec_perm_builder sel (nunits, nunits, 1);
7414 sel.quick_grow (nunits);
7415 if (i == units_log2)
7416 for (j = 0; j < nunits; ++j)
7417 sel[j] = nunits - 1;
7418 else
7420 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7421 sel[j] = j;
7422 for (k = 0; j < nunits; ++j, ++k)
7423 sel[j] = nunits + k;
7425 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7426 if (!use_whole_vector.is_empty ()
7427 && use_whole_vector[i] != scan_store_kind_perm)
7429 if (zero_vec == NULL_TREE)
7430 zero_vec = build_zero_cst (vectype);
7431 if (masktype == NULL_TREE
7432 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7433 masktype = truth_type_for (vectype);
7434 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7436 else
7437 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7440 tree vec_oprnd1 = NULL_TREE;
7441 tree vec_oprnd2 = NULL_TREE;
7442 tree vec_oprnd3 = NULL_TREE;
7443 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7444 tree dataref_offset = build_int_cst (ref_type, 0);
7445 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7446 vectype, VMAT_CONTIGUOUS);
7447 tree ldataref_ptr = NULL_TREE;
7448 tree orig = NULL_TREE;
7449 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7450 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7451 auto_vec<tree> vec_oprnds1;
7452 auto_vec<tree> vec_oprnds2;
7453 auto_vec<tree> vec_oprnds3;
7454 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7455 *init, &vec_oprnds1,
7456 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7457 rhs2, &vec_oprnds3);
7458 for (int j = 0; j < ncopies; j++)
7460 vec_oprnd1 = vec_oprnds1[j];
7461 if (ldataref_ptr == NULL)
7462 vec_oprnd2 = vec_oprnds2[j];
7463 vec_oprnd3 = vec_oprnds3[j];
7464 if (j == 0)
7465 orig = vec_oprnd3;
7466 else if (!inscan_var_store)
7467 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7469 if (ldataref_ptr)
7471 vec_oprnd2 = make_ssa_name (vectype);
7472 tree data_ref = fold_build2 (MEM_REF, vectype,
7473 unshare_expr (ldataref_ptr),
7474 dataref_offset);
7475 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7476 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7477 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7478 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7479 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7482 tree v = vec_oprnd2;
7483 for (int i = 0; i < units_log2; ++i)
7485 tree new_temp = make_ssa_name (vectype);
7486 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7487 (zero_vec
7488 && (use_whole_vector[i]
7489 != scan_store_kind_perm))
7490 ? zero_vec : vec_oprnd1, v,
7491 perms[i]);
7492 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7493 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7494 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7496 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7498 /* Whole vector shift shifted in zero bits, but if *init
7499 is not initializer_zerop, we need to replace those elements
7500 with elements from vec_oprnd1. */
7501 tree_vector_builder vb (masktype, nunits, 1);
7502 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7503 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7504 ? boolean_false_node : boolean_true_node);
7506 tree new_temp2 = make_ssa_name (vectype);
7507 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7508 new_temp, vec_oprnd1);
7509 vect_finish_stmt_generation (vinfo, stmt_info,
7510 g, gsi);
7511 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7512 new_temp = new_temp2;
7515 /* For exclusive scan, perform the perms[i] permutation once
7516 more. */
7517 if (i == 0
7518 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7519 && v == vec_oprnd2)
7521 v = new_temp;
7522 --i;
7523 continue;
7526 tree new_temp2 = make_ssa_name (vectype);
7527 g = gimple_build_assign (new_temp2, code, v, new_temp);
7528 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7529 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7531 v = new_temp2;
7534 tree new_temp = make_ssa_name (vectype);
7535 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7536 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7537 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7539 tree last_perm_arg = new_temp;
7540 /* For exclusive scan, new_temp computed above is the exclusive scan
7541 prefix sum. Turn it into inclusive prefix sum for the broadcast
7542 of the last element into orig. */
7543 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7545 last_perm_arg = make_ssa_name (vectype);
7546 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7547 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7548 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7551 orig = make_ssa_name (vectype);
7552 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7553 last_perm_arg, perms[units_log2]);
7554 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7555 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7557 if (!inscan_var_store)
7559 tree data_ref = fold_build2 (MEM_REF, vectype,
7560 unshare_expr (dataref_ptr),
7561 dataref_offset);
7562 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7563 g = gimple_build_assign (data_ref, new_temp);
7564 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7565 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7569 if (inscan_var_store)
7570 for (int j = 0; j < ncopies; j++)
7572 if (j != 0)
7573 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7575 tree data_ref = fold_build2 (MEM_REF, vectype,
7576 unshare_expr (dataref_ptr),
7577 dataref_offset);
7578 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7579 gimple *g = gimple_build_assign (data_ref, orig);
7580 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7581 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7583 return true;
7587 /* Function vectorizable_store.
7589 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7590 that can be vectorized.
7591 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7592 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7593 Return true if STMT_INFO is vectorizable in this way. */
7595 static bool
7596 vectorizable_store (vec_info *vinfo,
7597 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7598 gimple **vec_stmt, slp_tree slp_node,
7599 stmt_vector_for_cost *cost_vec)
7601 tree data_ref;
7602 tree op;
7603 tree vec_oprnd = NULL_TREE;
7604 tree elem_type;
7605 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7606 class loop *loop = NULL;
7607 machine_mode vec_mode;
7608 tree dummy;
7609 enum vect_def_type rhs_dt = vect_unknown_def_type;
7610 enum vect_def_type mask_dt = vect_unknown_def_type;
7611 tree dataref_ptr = NULL_TREE;
7612 tree dataref_offset = NULL_TREE;
7613 gimple *ptr_incr = NULL;
7614 int ncopies;
7615 int j;
7616 stmt_vec_info first_stmt_info;
7617 bool grouped_store;
7618 unsigned int group_size, i;
7619 vec<tree> oprnds = vNULL;
7620 vec<tree> result_chain = vNULL;
7621 vec<tree> vec_oprnds = vNULL;
7622 bool slp = (slp_node != NULL);
7623 unsigned int vec_num;
7624 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7625 tree aggr_type;
7626 gather_scatter_info gs_info;
7627 poly_uint64 vf;
7628 vec_load_store_type vls_type;
7629 tree ref_type;
7631 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7632 return false;
7634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7635 && ! vec_stmt)
7636 return false;
7638 /* Is vectorizable store? */
7640 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7641 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7643 tree scalar_dest = gimple_assign_lhs (assign);
7644 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7645 && is_pattern_stmt_p (stmt_info))
7646 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7647 if (TREE_CODE (scalar_dest) != ARRAY_REF
7648 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7649 && TREE_CODE (scalar_dest) != INDIRECT_REF
7650 && TREE_CODE (scalar_dest) != COMPONENT_REF
7651 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7652 && TREE_CODE (scalar_dest) != REALPART_EXPR
7653 && TREE_CODE (scalar_dest) != MEM_REF)
7654 return false;
7656 else
7658 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7659 if (!call || !gimple_call_internal_p (call))
7660 return false;
7662 internal_fn ifn = gimple_call_internal_fn (call);
7663 if (!internal_store_fn_p (ifn))
7664 return false;
7666 if (slp_node != NULL)
7668 if (dump_enabled_p ())
7669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7670 "SLP of masked stores not supported.\n");
7671 return false;
7674 int mask_index = internal_fn_mask_index (ifn);
7675 if (mask_index >= 0
7676 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7677 &mask, NULL, &mask_dt, &mask_vectype))
7678 return false;
7681 op = vect_get_store_rhs (stmt_info);
7683 /* Cannot have hybrid store SLP -- that would mean storing to the
7684 same location twice. */
7685 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7687 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7688 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7690 if (loop_vinfo)
7692 loop = LOOP_VINFO_LOOP (loop_vinfo);
7693 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7695 else
7696 vf = 1;
7698 /* Multiple types in SLP are handled by creating the appropriate number of
7699 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7700 case of SLP. */
7701 if (slp)
7702 ncopies = 1;
7703 else
7704 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7706 gcc_assert (ncopies >= 1);
7708 /* FORNOW. This restriction should be relaxed. */
7709 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7711 if (dump_enabled_p ())
7712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7713 "multiple types in nested loop.\n");
7714 return false;
7717 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7718 op, &rhs_dt, &rhs_vectype, &vls_type))
7719 return false;
7721 elem_type = TREE_TYPE (vectype);
7722 vec_mode = TYPE_MODE (vectype);
7724 if (!STMT_VINFO_DATA_REF (stmt_info))
7725 return false;
7727 vect_memory_access_type memory_access_type;
7728 enum dr_alignment_support alignment_support_scheme;
7729 int misalignment;
7730 poly_int64 poffset;
7731 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7732 ncopies, &memory_access_type, &poffset,
7733 &alignment_support_scheme, &misalignment, &gs_info))
7734 return false;
7736 if (mask)
7738 if (memory_access_type == VMAT_CONTIGUOUS)
7740 if (!VECTOR_MODE_P (vec_mode)
7741 || !can_vec_mask_load_store_p (vec_mode,
7742 TYPE_MODE (mask_vectype), false))
7743 return false;
7745 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7746 && (memory_access_type != VMAT_GATHER_SCATTER
7747 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7749 if (dump_enabled_p ())
7750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7751 "unsupported access type for masked store.\n");
7752 return false;
7755 else
7757 /* FORNOW. In some cases can vectorize even if data-type not supported
7758 (e.g. - array initialization with 0). */
7759 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7760 return false;
7763 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7764 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7765 && memory_access_type != VMAT_GATHER_SCATTER
7766 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7767 if (grouped_store)
7769 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7770 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7771 group_size = DR_GROUP_SIZE (first_stmt_info);
7773 else
7775 first_stmt_info = stmt_info;
7776 first_dr_info = dr_info;
7777 group_size = vec_num = 1;
7780 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7782 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7783 memory_access_type))
7784 return false;
7787 if (!vec_stmt) /* transformation not required. */
7789 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7791 if (loop_vinfo
7792 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7793 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7794 vls_type, group_size,
7795 memory_access_type, &gs_info,
7796 mask);
7798 if (slp_node
7799 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7800 vectype))
7802 if (dump_enabled_p ())
7803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7804 "incompatible vector types for invariants\n");
7805 return false;
7808 if (dump_enabled_p ()
7809 && memory_access_type != VMAT_ELEMENTWISE
7810 && memory_access_type != VMAT_GATHER_SCATTER
7811 && alignment_support_scheme != dr_aligned)
7812 dump_printf_loc (MSG_NOTE, vect_location,
7813 "Vectorizing an unaligned access.\n");
7815 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7816 vect_model_store_cost (vinfo, stmt_info, ncopies,
7817 memory_access_type, alignment_support_scheme,
7818 misalignment, vls_type, slp_node, cost_vec);
7819 return true;
7821 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7823 /* Transform. */
7825 ensure_base_align (dr_info);
7827 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7829 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7830 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7831 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7832 tree ptr, var, scale, vec_mask;
7833 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7834 tree mask_halfvectype = mask_vectype;
7835 edge pe = loop_preheader_edge (loop);
7836 gimple_seq seq;
7837 basic_block new_bb;
7838 enum { NARROW, NONE, WIDEN } modifier;
7839 poly_uint64 scatter_off_nunits
7840 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7842 if (known_eq (nunits, scatter_off_nunits))
7843 modifier = NONE;
7844 else if (known_eq (nunits * 2, scatter_off_nunits))
7846 modifier = WIDEN;
7848 /* Currently gathers and scatters are only supported for
7849 fixed-length vectors. */
7850 unsigned int count = scatter_off_nunits.to_constant ();
7851 vec_perm_builder sel (count, count, 1);
7852 for (i = 0; i < (unsigned int) count; ++i)
7853 sel.quick_push (i | (count / 2));
7855 vec_perm_indices indices (sel, 1, count);
7856 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7857 indices);
7858 gcc_assert (perm_mask != NULL_TREE);
7860 else if (known_eq (nunits, scatter_off_nunits * 2))
7862 modifier = NARROW;
7864 /* Currently gathers and scatters are only supported for
7865 fixed-length vectors. */
7866 unsigned int count = nunits.to_constant ();
7867 vec_perm_builder sel (count, count, 1);
7868 for (i = 0; i < (unsigned int) count; ++i)
7869 sel.quick_push (i | (count / 2));
7871 vec_perm_indices indices (sel, 2, count);
7872 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7873 gcc_assert (perm_mask != NULL_TREE);
7874 ncopies *= 2;
7876 if (mask)
7877 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7879 else
7880 gcc_unreachable ();
7882 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7883 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7884 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7885 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7886 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7887 scaletype = TREE_VALUE (arglist);
7889 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7890 && TREE_CODE (rettype) == VOID_TYPE);
7892 ptr = fold_convert (ptrtype, gs_info.base);
7893 if (!is_gimple_min_invariant (ptr))
7895 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7896 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7897 gcc_assert (!new_bb);
7900 if (mask == NULL_TREE)
7902 mask_arg = build_int_cst (masktype, -1);
7903 mask_arg = vect_init_vector (vinfo, stmt_info,
7904 mask_arg, masktype, NULL);
7907 scale = build_int_cst (scaletype, gs_info.scale);
7909 auto_vec<tree> vec_oprnds0;
7910 auto_vec<tree> vec_oprnds1;
7911 auto_vec<tree> vec_masks;
7912 if (mask)
7914 tree mask_vectype = truth_type_for (vectype);
7915 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7916 modifier == NARROW
7917 ? ncopies / 2 : ncopies,
7918 mask, &vec_masks, mask_vectype);
7920 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7921 modifier == WIDEN
7922 ? ncopies / 2 : ncopies,
7923 gs_info.offset, &vec_oprnds0);
7924 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7925 modifier == NARROW
7926 ? ncopies / 2 : ncopies,
7927 op, &vec_oprnds1);
7928 for (j = 0; j < ncopies; ++j)
7930 if (modifier == WIDEN)
7932 if (j & 1)
7933 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7934 perm_mask, stmt_info, gsi);
7935 else
7936 op = vec_oprnd0 = vec_oprnds0[j / 2];
7937 src = vec_oprnd1 = vec_oprnds1[j];
7938 if (mask)
7939 mask_op = vec_mask = vec_masks[j];
7941 else if (modifier == NARROW)
7943 if (j & 1)
7944 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7945 perm_mask, stmt_info, gsi);
7946 else
7947 src = vec_oprnd1 = vec_oprnds1[j / 2];
7948 op = vec_oprnd0 = vec_oprnds0[j];
7949 if (mask)
7950 mask_op = vec_mask = vec_masks[j / 2];
7952 else
7954 op = vec_oprnd0 = vec_oprnds0[j];
7955 src = vec_oprnd1 = vec_oprnds1[j];
7956 if (mask)
7957 mask_op = vec_mask = vec_masks[j];
7960 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7962 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7963 TYPE_VECTOR_SUBPARTS (srctype)));
7964 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7965 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7966 gassign *new_stmt
7967 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7968 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7969 src = var;
7972 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7974 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7975 TYPE_VECTOR_SUBPARTS (idxtype)));
7976 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7977 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7978 gassign *new_stmt
7979 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7980 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7981 op = var;
7984 if (mask)
7986 tree utype;
7987 mask_arg = mask_op;
7988 if (modifier == NARROW)
7990 var = vect_get_new_ssa_name (mask_halfvectype,
7991 vect_simple_var);
7992 gassign *new_stmt
7993 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7994 : VEC_UNPACK_LO_EXPR,
7995 mask_op);
7996 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7997 mask_arg = var;
7999 tree optype = TREE_TYPE (mask_arg);
8000 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
8001 utype = masktype;
8002 else
8003 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
8004 var = vect_get_new_ssa_name (utype, vect_scalar_var);
8005 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
8006 gassign *new_stmt
8007 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
8008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8009 mask_arg = var;
8010 if (!useless_type_conversion_p (masktype, utype))
8012 gcc_assert (TYPE_PRECISION (utype)
8013 <= TYPE_PRECISION (masktype));
8014 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
8015 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
8016 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8017 mask_arg = var;
8021 gcall *new_stmt
8022 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
8023 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8025 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8027 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8028 return true;
8030 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8031 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8033 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8034 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
8036 if (grouped_store)
8038 /* FORNOW */
8039 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8041 /* We vectorize all the stmts of the interleaving group when we
8042 reach the last stmt in the group. */
8043 if (DR_GROUP_STORE_COUNT (first_stmt_info)
8044 < DR_GROUP_SIZE (first_stmt_info)
8045 && !slp)
8047 *vec_stmt = NULL;
8048 return true;
8051 if (slp)
8053 grouped_store = false;
8054 /* VEC_NUM is the number of vect stmts to be created for this
8055 group. */
8056 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8057 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8058 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8059 == first_stmt_info);
8060 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8061 op = vect_get_store_rhs (first_stmt_info);
8063 else
8064 /* VEC_NUM is the number of vect stmts to be created for this
8065 group. */
8066 vec_num = group_size;
8068 ref_type = get_group_alias_ptr_type (first_stmt_info);
8070 else
8071 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8073 if (dump_enabled_p ())
8074 dump_printf_loc (MSG_NOTE, vect_location,
8075 "transform store. ncopies = %d\n", ncopies);
8077 if (memory_access_type == VMAT_ELEMENTWISE
8078 || memory_access_type == VMAT_STRIDED_SLP)
8080 gimple_stmt_iterator incr_gsi;
8081 bool insert_after;
8082 gimple *incr;
8083 tree offvar;
8084 tree ivstep;
8085 tree running_off;
8086 tree stride_base, stride_step, alias_off;
8087 tree vec_oprnd;
8088 tree dr_offset;
8089 unsigned int g;
8090 /* Checked by get_load_store_type. */
8091 unsigned int const_nunits = nunits.to_constant ();
8093 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8094 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8096 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8097 stride_base
8098 = fold_build_pointer_plus
8099 (DR_BASE_ADDRESS (first_dr_info->dr),
8100 size_binop (PLUS_EXPR,
8101 convert_to_ptrofftype (dr_offset),
8102 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8103 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8105 /* For a store with loop-invariant (but other than power-of-2)
8106 stride (i.e. not a grouped access) like so:
8108 for (i = 0; i < n; i += stride)
8109 array[i] = ...;
8111 we generate a new induction variable and new stores from
8112 the components of the (vectorized) rhs:
8114 for (j = 0; ; j += VF*stride)
8115 vectemp = ...;
8116 tmp1 = vectemp[0];
8117 array[j] = tmp1;
8118 tmp2 = vectemp[1];
8119 array[j + stride] = tmp2;
8123 unsigned nstores = const_nunits;
8124 unsigned lnel = 1;
8125 tree ltype = elem_type;
8126 tree lvectype = vectype;
8127 if (slp)
8129 if (group_size < const_nunits
8130 && const_nunits % group_size == 0)
8132 nstores = const_nunits / group_size;
8133 lnel = group_size;
8134 ltype = build_vector_type (elem_type, group_size);
8135 lvectype = vectype;
8137 /* First check if vec_extract optab doesn't support extraction
8138 of vector elts directly. */
8139 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8140 machine_mode vmode;
8141 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8142 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8143 group_size).exists (&vmode)
8144 || (convert_optab_handler (vec_extract_optab,
8145 TYPE_MODE (vectype), vmode)
8146 == CODE_FOR_nothing))
8148 /* Try to avoid emitting an extract of vector elements
8149 by performing the extracts using an integer type of the
8150 same size, extracting from a vector of those and then
8151 re-interpreting it as the original vector type if
8152 supported. */
8153 unsigned lsize
8154 = group_size * GET_MODE_BITSIZE (elmode);
8155 unsigned int lnunits = const_nunits / group_size;
8156 /* If we can't construct such a vector fall back to
8157 element extracts from the original vector type and
8158 element size stores. */
8159 if (int_mode_for_size (lsize, 0).exists (&elmode)
8160 && VECTOR_MODE_P (TYPE_MODE (vectype))
8161 && related_vector_mode (TYPE_MODE (vectype), elmode,
8162 lnunits).exists (&vmode)
8163 && (convert_optab_handler (vec_extract_optab,
8164 vmode, elmode)
8165 != CODE_FOR_nothing))
8167 nstores = lnunits;
8168 lnel = group_size;
8169 ltype = build_nonstandard_integer_type (lsize, 1);
8170 lvectype = build_vector_type (ltype, nstores);
8172 /* Else fall back to vector extraction anyway.
8173 Fewer stores are more important than avoiding spilling
8174 of the vector we extract from. Compared to the
8175 construction case in vectorizable_load no store-forwarding
8176 issue exists here for reasonable archs. */
8179 else if (group_size >= const_nunits
8180 && group_size % const_nunits == 0)
8182 nstores = 1;
8183 lnel = const_nunits;
8184 ltype = vectype;
8185 lvectype = vectype;
8187 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8188 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8191 ivstep = stride_step;
8192 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8193 build_int_cst (TREE_TYPE (ivstep), vf));
8195 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8197 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8198 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8199 create_iv (stride_base, ivstep, NULL,
8200 loop, &incr_gsi, insert_after,
8201 &offvar, NULL);
8202 incr = gsi_stmt (incr_gsi);
8204 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8206 alias_off = build_int_cst (ref_type, 0);
8207 stmt_vec_info next_stmt_info = first_stmt_info;
8208 for (g = 0; g < group_size; g++)
8210 running_off = offvar;
8211 if (g)
8213 tree size = TYPE_SIZE_UNIT (ltype);
8214 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8215 size);
8216 tree newoff = copy_ssa_name (running_off, NULL);
8217 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8218 running_off, pos);
8219 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8220 running_off = newoff;
8222 if (!slp)
8223 op = vect_get_store_rhs (next_stmt_info);
8224 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8225 op, &vec_oprnds);
8226 unsigned int group_el = 0;
8227 unsigned HOST_WIDE_INT
8228 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8229 for (j = 0; j < ncopies; j++)
8231 vec_oprnd = vec_oprnds[j];
8232 /* Pun the vector to extract from if necessary. */
8233 if (lvectype != vectype)
8235 tree tem = make_ssa_name (lvectype);
8236 gimple *pun
8237 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8238 lvectype, vec_oprnd));
8239 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8240 vec_oprnd = tem;
8242 for (i = 0; i < nstores; i++)
8244 tree newref, newoff;
8245 gimple *incr, *assign;
8246 tree size = TYPE_SIZE (ltype);
8247 /* Extract the i'th component. */
8248 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8249 bitsize_int (i), size);
8250 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8251 size, pos);
8253 elem = force_gimple_operand_gsi (gsi, elem, true,
8254 NULL_TREE, true,
8255 GSI_SAME_STMT);
8257 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8258 group_el * elsz);
8259 newref = build2 (MEM_REF, ltype,
8260 running_off, this_off);
8261 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8263 /* And store it to *running_off. */
8264 assign = gimple_build_assign (newref, elem);
8265 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8267 group_el += lnel;
8268 if (! slp
8269 || group_el == group_size)
8271 newoff = copy_ssa_name (running_off, NULL);
8272 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8273 running_off, stride_step);
8274 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8276 running_off = newoff;
8277 group_el = 0;
8279 if (g == group_size - 1
8280 && !slp)
8282 if (j == 0 && i == 0)
8283 *vec_stmt = assign;
8284 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8288 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8289 vec_oprnds.release ();
8290 if (slp)
8291 break;
8294 return true;
8297 auto_vec<tree> dr_chain (group_size);
8298 oprnds.create (group_size);
8300 gcc_assert (alignment_support_scheme);
8301 vec_loop_masks *loop_masks
8302 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8303 ? &LOOP_VINFO_MASKS (loop_vinfo)
8304 : NULL);
8305 vec_loop_lens *loop_lens
8306 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8307 ? &LOOP_VINFO_LENS (loop_vinfo)
8308 : NULL);
8310 /* Shouldn't go with length-based approach if fully masked. */
8311 gcc_assert (!loop_lens || !loop_masks);
8313 /* Targets with store-lane instructions must not require explicit
8314 realignment. vect_supportable_dr_alignment always returns either
8315 dr_aligned or dr_unaligned_supported for masked operations. */
8316 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8317 && !mask
8318 && !loop_masks)
8319 || alignment_support_scheme == dr_aligned
8320 || alignment_support_scheme == dr_unaligned_supported);
8322 tree offset = NULL_TREE;
8323 if (!known_eq (poffset, 0))
8324 offset = size_int (poffset);
8326 tree bump;
8327 tree vec_offset = NULL_TREE;
8328 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8330 aggr_type = NULL_TREE;
8331 bump = NULL_TREE;
8333 else if (memory_access_type == VMAT_GATHER_SCATTER)
8335 aggr_type = elem_type;
8336 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8337 &bump, &vec_offset);
8339 else
8341 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8342 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8343 else
8344 aggr_type = vectype;
8345 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8346 memory_access_type);
8349 if (mask)
8350 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8352 /* In case the vectorization factor (VF) is bigger than the number
8353 of elements that we can fit in a vectype (nunits), we have to generate
8354 more than one vector stmt - i.e - we need to "unroll" the
8355 vector stmt by a factor VF/nunits. */
8357 /* In case of interleaving (non-unit grouped access):
8359 S1: &base + 2 = x2
8360 S2: &base = x0
8361 S3: &base + 1 = x1
8362 S4: &base + 3 = x3
8364 We create vectorized stores starting from base address (the access of the
8365 first stmt in the chain (S2 in the above example), when the last store stmt
8366 of the chain (S4) is reached:
8368 VS1: &base = vx2
8369 VS2: &base + vec_size*1 = vx0
8370 VS3: &base + vec_size*2 = vx1
8371 VS4: &base + vec_size*3 = vx3
8373 Then permutation statements are generated:
8375 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8376 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8379 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8380 (the order of the data-refs in the output of vect_permute_store_chain
8381 corresponds to the order of scalar stmts in the interleaving chain - see
8382 the documentation of vect_permute_store_chain()).
8384 In case of both multiple types and interleaving, above vector stores and
8385 permutation stmts are created for every copy. The result vector stmts are
8386 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8387 STMT_VINFO_RELATED_STMT for the next copies.
8390 auto_vec<tree> vec_masks;
8391 tree vec_mask = NULL;
8392 auto_vec<tree> vec_offsets;
8393 auto_vec<vec<tree> > gvec_oprnds;
8394 gvec_oprnds.safe_grow_cleared (group_size, true);
8395 for (j = 0; j < ncopies; j++)
8397 gimple *new_stmt;
8398 if (j == 0)
8400 if (slp)
8402 /* Get vectorized arguments for SLP_NODE. */
8403 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8404 op, &vec_oprnds);
8405 vec_oprnd = vec_oprnds[0];
8407 else
8409 /* For interleaved stores we collect vectorized defs for all the
8410 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8411 used as an input to vect_permute_store_chain().
8413 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8414 and OPRNDS are of size 1. */
8415 stmt_vec_info next_stmt_info = first_stmt_info;
8416 for (i = 0; i < group_size; i++)
8418 /* Since gaps are not supported for interleaved stores,
8419 DR_GROUP_SIZE is the exact number of stmts in the chain.
8420 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8421 that there is no interleaving, DR_GROUP_SIZE is 1,
8422 and only one iteration of the loop will be executed. */
8423 op = vect_get_store_rhs (next_stmt_info);
8424 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8425 ncopies, op, &gvec_oprnds[i]);
8426 vec_oprnd = gvec_oprnds[i][0];
8427 dr_chain.quick_push (gvec_oprnds[i][0]);
8428 oprnds.quick_push (gvec_oprnds[i][0]);
8429 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8431 if (mask)
8433 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8434 mask, &vec_masks, mask_vectype);
8435 vec_mask = vec_masks[0];
8439 /* We should have catched mismatched types earlier. */
8440 gcc_assert (useless_type_conversion_p (vectype,
8441 TREE_TYPE (vec_oprnd)));
8442 bool simd_lane_access_p
8443 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8444 if (simd_lane_access_p
8445 && !loop_masks
8446 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8447 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8448 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8449 && integer_zerop (DR_INIT (first_dr_info->dr))
8450 && alias_sets_conflict_p (get_alias_set (aggr_type),
8451 get_alias_set (TREE_TYPE (ref_type))))
8453 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8454 dataref_offset = build_int_cst (ref_type, 0);
8456 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8458 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8459 slp_node, &gs_info, &dataref_ptr,
8460 &vec_offsets);
8461 vec_offset = vec_offsets[0];
8463 else
8464 dataref_ptr
8465 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8466 simd_lane_access_p ? loop : NULL,
8467 offset, &dummy, gsi, &ptr_incr,
8468 simd_lane_access_p, bump);
8470 else
8472 /* For interleaved stores we created vectorized defs for all the
8473 defs stored in OPRNDS in the previous iteration (previous copy).
8474 DR_CHAIN is then used as an input to vect_permute_store_chain().
8475 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8476 OPRNDS are of size 1. */
8477 for (i = 0; i < group_size; i++)
8479 vec_oprnd = gvec_oprnds[i][j];
8480 dr_chain[i] = gvec_oprnds[i][j];
8481 oprnds[i] = gvec_oprnds[i][j];
8483 if (mask)
8484 vec_mask = vec_masks[j];
8485 if (dataref_offset)
8486 dataref_offset
8487 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8488 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8489 vec_offset = vec_offsets[j];
8490 else
8491 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8492 stmt_info, bump);
8495 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8497 tree vec_array;
8499 /* Get an array into which we can store the individual vectors. */
8500 vec_array = create_vector_array (vectype, vec_num);
8502 /* Invalidate the current contents of VEC_ARRAY. This should
8503 become an RTL clobber too, which prevents the vector registers
8504 from being upward-exposed. */
8505 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8507 /* Store the individual vectors into the array. */
8508 for (i = 0; i < vec_num; i++)
8510 vec_oprnd = dr_chain[i];
8511 write_vector_array (vinfo, stmt_info,
8512 gsi, vec_oprnd, vec_array, i);
8515 tree final_mask = NULL;
8516 if (loop_masks)
8517 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8518 vectype, j);
8519 if (vec_mask)
8520 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8521 final_mask, vec_mask, gsi);
8523 gcall *call;
8524 if (final_mask)
8526 /* Emit:
8527 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8528 VEC_ARRAY). */
8529 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8530 tree alias_ptr = build_int_cst (ref_type, align);
8531 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8532 dataref_ptr, alias_ptr,
8533 final_mask, vec_array);
8535 else
8537 /* Emit:
8538 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8539 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8540 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8541 vec_array);
8542 gimple_call_set_lhs (call, data_ref);
8544 gimple_call_set_nothrow (call, true);
8545 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8546 new_stmt = call;
8548 /* Record that VEC_ARRAY is now dead. */
8549 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8551 else
8553 new_stmt = NULL;
8554 if (grouped_store)
8556 if (j == 0)
8557 result_chain.create (group_size);
8558 /* Permute. */
8559 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8560 gsi, &result_chain);
8563 stmt_vec_info next_stmt_info = first_stmt_info;
8564 for (i = 0; i < vec_num; i++)
8566 unsigned misalign;
8567 unsigned HOST_WIDE_INT align;
8569 tree final_mask = NULL_TREE;
8570 if (loop_masks)
8571 final_mask = vect_get_loop_mask (gsi, loop_masks,
8572 vec_num * ncopies,
8573 vectype, vec_num * j + i);
8574 if (vec_mask)
8575 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8576 final_mask, vec_mask, gsi);
8578 if (memory_access_type == VMAT_GATHER_SCATTER)
8580 tree scale = size_int (gs_info.scale);
8581 gcall *call;
8582 if (final_mask)
8583 call = gimple_build_call_internal
8584 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8585 scale, vec_oprnd, final_mask);
8586 else
8587 call = gimple_build_call_internal
8588 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8589 scale, vec_oprnd);
8590 gimple_call_set_nothrow (call, true);
8591 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8592 new_stmt = call;
8593 break;
8596 if (i > 0)
8597 /* Bump the vector pointer. */
8598 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8599 gsi, stmt_info, bump);
8601 if (slp)
8602 vec_oprnd = vec_oprnds[i];
8603 else if (grouped_store)
8604 /* For grouped stores vectorized defs are interleaved in
8605 vect_permute_store_chain(). */
8606 vec_oprnd = result_chain[i];
8608 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8609 if (alignment_support_scheme == dr_aligned)
8610 misalign = 0;
8611 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8613 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8614 misalign = 0;
8616 else
8617 misalign = misalignment;
8618 if (dataref_offset == NULL_TREE
8619 && TREE_CODE (dataref_ptr) == SSA_NAME)
8620 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8621 misalign);
8622 align = least_bit_hwi (misalign | align);
8624 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8626 tree perm_mask = perm_mask_for_reverse (vectype);
8627 tree perm_dest = vect_create_destination_var
8628 (vect_get_store_rhs (stmt_info), vectype);
8629 tree new_temp = make_ssa_name (perm_dest);
8631 /* Generate the permute statement. */
8632 gimple *perm_stmt
8633 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8634 vec_oprnd, perm_mask);
8635 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8637 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8638 vec_oprnd = new_temp;
8641 /* Arguments are ready. Create the new vector stmt. */
8642 if (final_mask)
8644 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8645 gcall *call
8646 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8647 dataref_ptr, ptr,
8648 final_mask, vec_oprnd);
8649 gimple_call_set_nothrow (call, true);
8650 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8651 new_stmt = call;
8653 else if (loop_lens)
8655 tree final_len
8656 = vect_get_loop_len (loop_vinfo, loop_lens,
8657 vec_num * ncopies, vec_num * j + i);
8658 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8659 machine_mode vmode = TYPE_MODE (vectype);
8660 opt_machine_mode new_ovmode
8661 = get_len_load_store_mode (vmode, false);
8662 machine_mode new_vmode = new_ovmode.require ();
8663 /* Need conversion if it's wrapped with VnQI. */
8664 if (vmode != new_vmode)
8666 tree new_vtype
8667 = build_vector_type_for_mode (unsigned_intQI_type_node,
8668 new_vmode);
8669 tree var
8670 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8671 vec_oprnd
8672 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8673 gassign *new_stmt
8674 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8675 vec_oprnd);
8676 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8677 gsi);
8678 vec_oprnd = var;
8681 signed char biasval =
8682 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8684 tree bias = build_int_cst (intQI_type_node, biasval);
8685 gcall *call
8686 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8687 ptr, final_len, vec_oprnd,
8688 bias);
8689 gimple_call_set_nothrow (call, true);
8690 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8691 new_stmt = call;
8693 else
8695 data_ref = fold_build2 (MEM_REF, vectype,
8696 dataref_ptr,
8697 dataref_offset
8698 ? dataref_offset
8699 : build_int_cst (ref_type, 0));
8700 if (alignment_support_scheme == dr_aligned)
8702 else
8703 TREE_TYPE (data_ref)
8704 = build_aligned_type (TREE_TYPE (data_ref),
8705 align * BITS_PER_UNIT);
8706 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8707 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8708 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8711 if (slp)
8712 continue;
8714 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8715 if (!next_stmt_info)
8716 break;
8719 if (!slp)
8721 if (j == 0)
8722 *vec_stmt = new_stmt;
8723 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8727 for (i = 0; i < group_size; ++i)
8729 vec<tree> oprndsi = gvec_oprnds[i];
8730 oprndsi.release ();
8732 oprnds.release ();
8733 result_chain.release ();
8734 vec_oprnds.release ();
8736 return true;
8739 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8740 VECTOR_CST mask. No checks are made that the target platform supports the
8741 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8742 vect_gen_perm_mask_checked. */
8744 tree
8745 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8747 tree mask_type;
8749 poly_uint64 nunits = sel.length ();
8750 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8752 mask_type = build_vector_type (ssizetype, nunits);
8753 return vec_perm_indices_to_tree (mask_type, sel);
8756 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8757 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8759 tree
8760 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8762 machine_mode vmode = TYPE_MODE (vectype);
8763 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8764 return vect_gen_perm_mask_any (vectype, sel);
8767 /* Given a vector variable X and Y, that was generated for the scalar
8768 STMT_INFO, generate instructions to permute the vector elements of X and Y
8769 using permutation mask MASK_VEC, insert them at *GSI and return the
8770 permuted vector variable. */
8772 static tree
8773 permute_vec_elements (vec_info *vinfo,
8774 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8775 gimple_stmt_iterator *gsi)
8777 tree vectype = TREE_TYPE (x);
8778 tree perm_dest, data_ref;
8779 gimple *perm_stmt;
8781 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8782 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8783 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8784 else
8785 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8786 data_ref = make_ssa_name (perm_dest);
8788 /* Generate the permute statement. */
8789 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8790 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8792 return data_ref;
8795 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8796 inserting them on the loops preheader edge. Returns true if we
8797 were successful in doing so (and thus STMT_INFO can be moved then),
8798 otherwise returns false. */
8800 static bool
8801 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8803 ssa_op_iter i;
8804 tree op;
8805 bool any = false;
8807 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8809 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8810 if (!gimple_nop_p (def_stmt)
8811 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8813 /* Make sure we don't need to recurse. While we could do
8814 so in simple cases when there are more complex use webs
8815 we don't have an easy way to preserve stmt order to fulfil
8816 dependencies within them. */
8817 tree op2;
8818 ssa_op_iter i2;
8819 if (gimple_code (def_stmt) == GIMPLE_PHI)
8820 return false;
8821 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8823 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8824 if (!gimple_nop_p (def_stmt2)
8825 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8826 return false;
8828 any = true;
8832 if (!any)
8833 return true;
8835 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8837 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8838 if (!gimple_nop_p (def_stmt)
8839 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8841 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8842 gsi_remove (&gsi, false);
8843 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8847 return true;
8850 /* vectorizable_load.
8852 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8853 that can be vectorized.
8854 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8855 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8856 Return true if STMT_INFO is vectorizable in this way. */
8858 static bool
8859 vectorizable_load (vec_info *vinfo,
8860 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8861 gimple **vec_stmt, slp_tree slp_node,
8862 stmt_vector_for_cost *cost_vec)
8864 tree scalar_dest;
8865 tree vec_dest = NULL;
8866 tree data_ref = NULL;
8867 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8868 class loop *loop = NULL;
8869 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8870 bool nested_in_vect_loop = false;
8871 tree elem_type;
8872 tree new_temp;
8873 machine_mode mode;
8874 tree dummy;
8875 tree dataref_ptr = NULL_TREE;
8876 tree dataref_offset = NULL_TREE;
8877 gimple *ptr_incr = NULL;
8878 int ncopies;
8879 int i, j;
8880 unsigned int group_size;
8881 poly_uint64 group_gap_adj;
8882 tree msq = NULL_TREE, lsq;
8883 tree realignment_token = NULL_TREE;
8884 gphi *phi = NULL;
8885 vec<tree> dr_chain = vNULL;
8886 bool grouped_load = false;
8887 stmt_vec_info first_stmt_info;
8888 stmt_vec_info first_stmt_info_for_drptr = NULL;
8889 bool compute_in_loop = false;
8890 class loop *at_loop;
8891 int vec_num;
8892 bool slp = (slp_node != NULL);
8893 bool slp_perm = false;
8894 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8895 poly_uint64 vf;
8896 tree aggr_type;
8897 gather_scatter_info gs_info;
8898 tree ref_type;
8899 enum vect_def_type mask_dt = vect_unknown_def_type;
8901 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8902 return false;
8904 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8905 && ! vec_stmt)
8906 return false;
8908 if (!STMT_VINFO_DATA_REF (stmt_info))
8909 return false;
8911 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8912 int mask_index = -1;
8913 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8915 scalar_dest = gimple_assign_lhs (assign);
8916 if (TREE_CODE (scalar_dest) != SSA_NAME)
8917 return false;
8919 tree_code code = gimple_assign_rhs_code (assign);
8920 if (code != ARRAY_REF
8921 && code != BIT_FIELD_REF
8922 && code != INDIRECT_REF
8923 && code != COMPONENT_REF
8924 && code != IMAGPART_EXPR
8925 && code != REALPART_EXPR
8926 && code != MEM_REF
8927 && TREE_CODE_CLASS (code) != tcc_declaration)
8928 return false;
8930 else
8932 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8933 if (!call || !gimple_call_internal_p (call))
8934 return false;
8936 internal_fn ifn = gimple_call_internal_fn (call);
8937 if (!internal_load_fn_p (ifn))
8938 return false;
8940 scalar_dest = gimple_call_lhs (call);
8941 if (!scalar_dest)
8942 return false;
8944 mask_index = internal_fn_mask_index (ifn);
8945 /* ??? For SLP the mask operand is always last. */
8946 if (mask_index >= 0 && slp_node)
8947 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8948 if (mask_index >= 0
8949 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8950 &mask, NULL, &mask_dt, &mask_vectype))
8951 return false;
8954 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8955 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8957 if (loop_vinfo)
8959 loop = LOOP_VINFO_LOOP (loop_vinfo);
8960 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8961 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8963 else
8964 vf = 1;
8966 /* Multiple types in SLP are handled by creating the appropriate number of
8967 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8968 case of SLP. */
8969 if (slp)
8970 ncopies = 1;
8971 else
8972 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8974 gcc_assert (ncopies >= 1);
8976 /* FORNOW. This restriction should be relaxed. */
8977 if (nested_in_vect_loop && ncopies > 1)
8979 if (dump_enabled_p ())
8980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8981 "multiple types in nested loop.\n");
8982 return false;
8985 /* Invalidate assumptions made by dependence analysis when vectorization
8986 on the unrolled body effectively re-orders stmts. */
8987 if (ncopies > 1
8988 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8989 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8990 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8992 if (dump_enabled_p ())
8993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8994 "cannot perform implicit CSE when unrolling "
8995 "with negative dependence distance\n");
8996 return false;
8999 elem_type = TREE_TYPE (vectype);
9000 mode = TYPE_MODE (vectype);
9002 /* FORNOW. In some cases can vectorize even if data-type not supported
9003 (e.g. - data copies). */
9004 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9006 if (dump_enabled_p ())
9007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9008 "Aligned load, but unsupported type.\n");
9009 return false;
9012 /* Check if the load is a part of an interleaving chain. */
9013 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9015 grouped_load = true;
9016 /* FORNOW */
9017 gcc_assert (!nested_in_vect_loop);
9018 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9020 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9021 group_size = DR_GROUP_SIZE (first_stmt_info);
9023 /* Refuse non-SLP vectorization of SLP-only groups. */
9024 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9026 if (dump_enabled_p ())
9027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9028 "cannot vectorize load in non-SLP mode.\n");
9029 return false;
9032 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9034 slp_perm = true;
9036 if (!loop_vinfo)
9038 /* In BB vectorization we may not actually use a loaded vector
9039 accessing elements in excess of DR_GROUP_SIZE. */
9040 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9041 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9042 unsigned HOST_WIDE_INT nunits;
9043 unsigned j, k, maxk = 0;
9044 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9045 if (k > maxk)
9046 maxk = k;
9047 tree vectype = SLP_TREE_VECTYPE (slp_node);
9048 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9049 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9051 if (dump_enabled_p ())
9052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9053 "BB vectorization with gaps at the end of "
9054 "a load is not supported\n");
9055 return false;
9059 auto_vec<tree> tem;
9060 unsigned n_perms;
9061 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9062 true, &n_perms))
9064 if (dump_enabled_p ())
9065 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9066 vect_location,
9067 "unsupported load permutation\n");
9068 return false;
9072 /* Invalidate assumptions made by dependence analysis when vectorization
9073 on the unrolled body effectively re-orders stmts. */
9074 if (!PURE_SLP_STMT (stmt_info)
9075 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9076 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9077 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9079 if (dump_enabled_p ())
9080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9081 "cannot perform implicit CSE when performing "
9082 "group loads with negative dependence distance\n");
9083 return false;
9086 else
9087 group_size = 1;
9089 vect_memory_access_type memory_access_type;
9090 enum dr_alignment_support alignment_support_scheme;
9091 int misalignment;
9092 poly_int64 poffset;
9093 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9094 ncopies, &memory_access_type, &poffset,
9095 &alignment_support_scheme, &misalignment, &gs_info))
9096 return false;
9098 if (mask)
9100 if (memory_access_type == VMAT_CONTIGUOUS)
9102 machine_mode vec_mode = TYPE_MODE (vectype);
9103 if (!VECTOR_MODE_P (vec_mode)
9104 || !can_vec_mask_load_store_p (vec_mode,
9105 TYPE_MODE (mask_vectype), true))
9106 return false;
9108 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9109 && memory_access_type != VMAT_GATHER_SCATTER)
9111 if (dump_enabled_p ())
9112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9113 "unsupported access type for masked load.\n");
9114 return false;
9116 else if (memory_access_type == VMAT_GATHER_SCATTER
9117 && gs_info.ifn == IFN_LAST
9118 && !gs_info.decl)
9120 if (dump_enabled_p ())
9121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9122 "unsupported masked emulated gather.\n");
9123 return false;
9127 if (!vec_stmt) /* transformation not required. */
9129 if (slp_node
9130 && mask
9131 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9132 mask_vectype))
9134 if (dump_enabled_p ())
9135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9136 "incompatible vector types for invariants\n");
9137 return false;
9140 if (!slp)
9141 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9143 if (loop_vinfo
9144 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9145 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9146 VLS_LOAD, group_size,
9147 memory_access_type, &gs_info,
9148 mask);
9150 if (dump_enabled_p ()
9151 && memory_access_type != VMAT_ELEMENTWISE
9152 && memory_access_type != VMAT_GATHER_SCATTER
9153 && alignment_support_scheme != dr_aligned)
9154 dump_printf_loc (MSG_NOTE, vect_location,
9155 "Vectorizing an unaligned access.\n");
9157 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9158 vinfo->any_known_not_updated_vssa = true;
9160 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9161 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9162 alignment_support_scheme, misalignment,
9163 &gs_info, slp_node, cost_vec);
9164 return true;
9167 if (!slp)
9168 gcc_assert (memory_access_type
9169 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9171 if (dump_enabled_p ())
9172 dump_printf_loc (MSG_NOTE, vect_location,
9173 "transform load. ncopies = %d\n", ncopies);
9175 /* Transform. */
9177 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9178 ensure_base_align (dr_info);
9180 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9182 vect_build_gather_load_calls (vinfo,
9183 stmt_info, gsi, vec_stmt, &gs_info, mask);
9184 return true;
9187 if (memory_access_type == VMAT_INVARIANT)
9189 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9190 /* If we have versioned for aliasing or the loop doesn't
9191 have any data dependencies that would preclude this,
9192 then we are sure this is a loop invariant load and
9193 thus we can insert it on the preheader edge. */
9194 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9195 && !nested_in_vect_loop
9196 && hoist_defs_of_uses (stmt_info, loop));
9197 if (hoist_p)
9199 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9200 if (dump_enabled_p ())
9201 dump_printf_loc (MSG_NOTE, vect_location,
9202 "hoisting out of the vectorized loop: %G",
9203 (gimple *) stmt);
9204 scalar_dest = copy_ssa_name (scalar_dest);
9205 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9206 edge pe = loop_preheader_edge (loop);
9207 gphi *vphi = get_virtual_phi (loop->header);
9208 tree vuse;
9209 if (vphi)
9210 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9211 else
9212 vuse = gimple_vuse (gsi_stmt (*gsi));
9213 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9214 gimple_set_vuse (new_stmt, vuse);
9215 gsi_insert_on_edge_immediate (pe, new_stmt);
9217 /* These copies are all equivalent, but currently the representation
9218 requires a separate STMT_VINFO_VEC_STMT for each one. */
9219 gimple_stmt_iterator gsi2 = *gsi;
9220 gsi_next (&gsi2);
9221 for (j = 0; j < ncopies; j++)
9223 if (hoist_p)
9224 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9225 vectype, NULL);
9226 else
9227 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9228 vectype, &gsi2);
9229 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9230 if (slp)
9231 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9232 else
9234 if (j == 0)
9235 *vec_stmt = new_stmt;
9236 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9239 return true;
9242 if (memory_access_type == VMAT_ELEMENTWISE
9243 || memory_access_type == VMAT_STRIDED_SLP)
9245 gimple_stmt_iterator incr_gsi;
9246 bool insert_after;
9247 tree offvar;
9248 tree ivstep;
9249 tree running_off;
9250 vec<constructor_elt, va_gc> *v = NULL;
9251 tree stride_base, stride_step, alias_off;
9252 /* Checked by get_load_store_type. */
9253 unsigned int const_nunits = nunits.to_constant ();
9254 unsigned HOST_WIDE_INT cst_offset = 0;
9255 tree dr_offset;
9257 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9258 gcc_assert (!nested_in_vect_loop);
9260 if (grouped_load)
9262 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9263 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9265 else
9267 first_stmt_info = stmt_info;
9268 first_dr_info = dr_info;
9270 if (slp && grouped_load)
9272 group_size = DR_GROUP_SIZE (first_stmt_info);
9273 ref_type = get_group_alias_ptr_type (first_stmt_info);
9275 else
9277 if (grouped_load)
9278 cst_offset
9279 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9280 * vect_get_place_in_interleaving_chain (stmt_info,
9281 first_stmt_info));
9282 group_size = 1;
9283 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9286 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9287 stride_base
9288 = fold_build_pointer_plus
9289 (DR_BASE_ADDRESS (first_dr_info->dr),
9290 size_binop (PLUS_EXPR,
9291 convert_to_ptrofftype (dr_offset),
9292 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9293 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9295 /* For a load with loop-invariant (but other than power-of-2)
9296 stride (i.e. not a grouped access) like so:
9298 for (i = 0; i < n; i += stride)
9299 ... = array[i];
9301 we generate a new induction variable and new accesses to
9302 form a new vector (or vectors, depending on ncopies):
9304 for (j = 0; ; j += VF*stride)
9305 tmp1 = array[j];
9306 tmp2 = array[j + stride];
9308 vectemp = {tmp1, tmp2, ...}
9311 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9312 build_int_cst (TREE_TYPE (stride_step), vf));
9314 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9316 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9317 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9318 create_iv (stride_base, ivstep, NULL,
9319 loop, &incr_gsi, insert_after,
9320 &offvar, NULL);
9322 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9324 running_off = offvar;
9325 alias_off = build_int_cst (ref_type, 0);
9326 int nloads = const_nunits;
9327 int lnel = 1;
9328 tree ltype = TREE_TYPE (vectype);
9329 tree lvectype = vectype;
9330 auto_vec<tree> dr_chain;
9331 if (memory_access_type == VMAT_STRIDED_SLP)
9333 if (group_size < const_nunits)
9335 /* First check if vec_init optab supports construction from vector
9336 elts directly. Otherwise avoid emitting a constructor of
9337 vector elements by performing the loads using an integer type
9338 of the same size, constructing a vector of those and then
9339 re-interpreting it as the original vector type. This avoids a
9340 huge runtime penalty due to the general inability to perform
9341 store forwarding from smaller stores to a larger load. */
9342 tree ptype;
9343 tree vtype
9344 = vector_vector_composition_type (vectype,
9345 const_nunits / group_size,
9346 &ptype);
9347 if (vtype != NULL_TREE)
9349 nloads = const_nunits / group_size;
9350 lnel = group_size;
9351 lvectype = vtype;
9352 ltype = ptype;
9355 else
9357 nloads = 1;
9358 lnel = const_nunits;
9359 ltype = vectype;
9361 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9363 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9364 else if (nloads == 1)
9365 ltype = vectype;
9367 if (slp)
9369 /* For SLP permutation support we need to load the whole group,
9370 not only the number of vector stmts the permutation result
9371 fits in. */
9372 if (slp_perm)
9374 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9375 variable VF. */
9376 unsigned int const_vf = vf.to_constant ();
9377 ncopies = CEIL (group_size * const_vf, const_nunits);
9378 dr_chain.create (ncopies);
9380 else
9381 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9383 unsigned int group_el = 0;
9384 unsigned HOST_WIDE_INT
9385 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9386 unsigned int n_groups = 0;
9387 for (j = 0; j < ncopies; j++)
9389 if (nloads > 1)
9390 vec_alloc (v, nloads);
9391 gimple *new_stmt = NULL;
9392 for (i = 0; i < nloads; i++)
9394 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9395 group_el * elsz + cst_offset);
9396 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9397 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9398 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9399 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9400 if (nloads > 1)
9401 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9402 gimple_assign_lhs (new_stmt));
9404 group_el += lnel;
9405 if (! slp
9406 || group_el == group_size)
9408 n_groups++;
9409 /* When doing SLP make sure to not load elements from
9410 the next vector iteration, those will not be accessed
9411 so just use the last element again. See PR107451. */
9412 if (!slp || known_lt (n_groups, vf))
9414 tree newoff = copy_ssa_name (running_off);
9415 gimple *incr
9416 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9417 running_off, stride_step);
9418 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9419 running_off = newoff;
9421 group_el = 0;
9424 if (nloads > 1)
9426 tree vec_inv = build_constructor (lvectype, v);
9427 new_temp = vect_init_vector (vinfo, stmt_info,
9428 vec_inv, lvectype, gsi);
9429 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9430 if (lvectype != vectype)
9432 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9433 VIEW_CONVERT_EXPR,
9434 build1 (VIEW_CONVERT_EXPR,
9435 vectype, new_temp));
9436 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9440 if (slp)
9442 if (slp_perm)
9443 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9444 else
9445 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9447 else
9449 if (j == 0)
9450 *vec_stmt = new_stmt;
9451 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9454 if (slp_perm)
9456 unsigned n_perms;
9457 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9458 false, &n_perms);
9460 return true;
9463 if (memory_access_type == VMAT_GATHER_SCATTER
9464 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9465 grouped_load = false;
9467 if (grouped_load)
9469 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9470 group_size = DR_GROUP_SIZE (first_stmt_info);
9471 /* For SLP vectorization we directly vectorize a subchain
9472 without permutation. */
9473 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9474 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9475 /* For BB vectorization always use the first stmt to base
9476 the data ref pointer on. */
9477 if (bb_vinfo)
9478 first_stmt_info_for_drptr
9479 = vect_find_first_scalar_stmt_in_slp (slp_node);
9481 /* Check if the chain of loads is already vectorized. */
9482 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9483 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9484 ??? But we can only do so if there is exactly one
9485 as we have no way to get at the rest. Leave the CSE
9486 opportunity alone.
9487 ??? With the group load eventually participating
9488 in multiple different permutations (having multiple
9489 slp nodes which refer to the same group) the CSE
9490 is even wrong code. See PR56270. */
9491 && !slp)
9493 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9494 return true;
9496 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9497 group_gap_adj = 0;
9499 /* VEC_NUM is the number of vect stmts to be created for this group. */
9500 if (slp)
9502 grouped_load = false;
9503 /* If an SLP permutation is from N elements to N elements,
9504 and if one vector holds a whole number of N, we can load
9505 the inputs to the permutation in the same way as an
9506 unpermuted sequence. In other cases we need to load the
9507 whole group, not only the number of vector stmts the
9508 permutation result fits in. */
9509 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9510 if (slp_perm
9511 && (group_size != scalar_lanes
9512 || !multiple_p (nunits, group_size)))
9514 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9515 variable VF; see vect_transform_slp_perm_load. */
9516 unsigned int const_vf = vf.to_constant ();
9517 unsigned int const_nunits = nunits.to_constant ();
9518 vec_num = CEIL (group_size * const_vf, const_nunits);
9519 group_gap_adj = vf * group_size - nunits * vec_num;
9521 else
9523 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9524 group_gap_adj
9525 = group_size - scalar_lanes;
9528 else
9529 vec_num = group_size;
9531 ref_type = get_group_alias_ptr_type (first_stmt_info);
9533 else
9535 first_stmt_info = stmt_info;
9536 first_dr_info = dr_info;
9537 group_size = vec_num = 1;
9538 group_gap_adj = 0;
9539 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9540 if (slp)
9541 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9544 gcc_assert (alignment_support_scheme);
9545 vec_loop_masks *loop_masks
9546 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9547 ? &LOOP_VINFO_MASKS (loop_vinfo)
9548 : NULL);
9549 vec_loop_lens *loop_lens
9550 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9551 ? &LOOP_VINFO_LENS (loop_vinfo)
9552 : NULL);
9554 /* Shouldn't go with length-based approach if fully masked. */
9555 gcc_assert (!loop_lens || !loop_masks);
9557 /* Targets with store-lane instructions must not require explicit
9558 realignment. vect_supportable_dr_alignment always returns either
9559 dr_aligned or dr_unaligned_supported for masked operations. */
9560 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9561 && !mask
9562 && !loop_masks)
9563 || alignment_support_scheme == dr_aligned
9564 || alignment_support_scheme == dr_unaligned_supported);
9566 /* In case the vectorization factor (VF) is bigger than the number
9567 of elements that we can fit in a vectype (nunits), we have to generate
9568 more than one vector stmt - i.e - we need to "unroll" the
9569 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9570 from one copy of the vector stmt to the next, in the field
9571 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9572 stages to find the correct vector defs to be used when vectorizing
9573 stmts that use the defs of the current stmt. The example below
9574 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9575 need to create 4 vectorized stmts):
9577 before vectorization:
9578 RELATED_STMT VEC_STMT
9579 S1: x = memref - -
9580 S2: z = x + 1 - -
9582 step 1: vectorize stmt S1:
9583 We first create the vector stmt VS1_0, and, as usual, record a
9584 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9585 Next, we create the vector stmt VS1_1, and record a pointer to
9586 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9587 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9588 stmts and pointers:
9589 RELATED_STMT VEC_STMT
9590 VS1_0: vx0 = memref0 VS1_1 -
9591 VS1_1: vx1 = memref1 VS1_2 -
9592 VS1_2: vx2 = memref2 VS1_3 -
9593 VS1_3: vx3 = memref3 - -
9594 S1: x = load - VS1_0
9595 S2: z = x + 1 - -
9598 /* In case of interleaving (non-unit grouped access):
9600 S1: x2 = &base + 2
9601 S2: x0 = &base
9602 S3: x1 = &base + 1
9603 S4: x3 = &base + 3
9605 Vectorized loads are created in the order of memory accesses
9606 starting from the access of the first stmt of the chain:
9608 VS1: vx0 = &base
9609 VS2: vx1 = &base + vec_size*1
9610 VS3: vx3 = &base + vec_size*2
9611 VS4: vx4 = &base + vec_size*3
9613 Then permutation statements are generated:
9615 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9616 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9619 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9620 (the order of the data-refs in the output of vect_permute_load_chain
9621 corresponds to the order of scalar stmts in the interleaving chain - see
9622 the documentation of vect_permute_load_chain()).
9623 The generation of permutation stmts and recording them in
9624 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9626 In case of both multiple types and interleaving, the vector loads and
9627 permutation stmts above are created for every copy. The result vector
9628 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9629 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9631 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9632 on a target that supports unaligned accesses (dr_unaligned_supported)
9633 we generate the following code:
9634 p = initial_addr;
9635 indx = 0;
9636 loop {
9637 p = p + indx * vectype_size;
9638 vec_dest = *(p);
9639 indx = indx + 1;
9642 Otherwise, the data reference is potentially unaligned on a target that
9643 does not support unaligned accesses (dr_explicit_realign_optimized) -
9644 then generate the following code, in which the data in each iteration is
9645 obtained by two vector loads, one from the previous iteration, and one
9646 from the current iteration:
9647 p1 = initial_addr;
9648 msq_init = *(floor(p1))
9649 p2 = initial_addr + VS - 1;
9650 realignment_token = call target_builtin;
9651 indx = 0;
9652 loop {
9653 p2 = p2 + indx * vectype_size
9654 lsq = *(floor(p2))
9655 vec_dest = realign_load (msq, lsq, realignment_token)
9656 indx = indx + 1;
9657 msq = lsq;
9658 } */
9660 /* If the misalignment remains the same throughout the execution of the
9661 loop, we can create the init_addr and permutation mask at the loop
9662 preheader. Otherwise, it needs to be created inside the loop.
9663 This can only occur when vectorizing memory accesses in the inner-loop
9664 nested within an outer-loop that is being vectorized. */
9666 if (nested_in_vect_loop
9667 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9668 GET_MODE_SIZE (TYPE_MODE (vectype))))
9670 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9671 compute_in_loop = true;
9674 bool diff_first_stmt_info
9675 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9677 tree offset = NULL_TREE;
9678 if ((alignment_support_scheme == dr_explicit_realign_optimized
9679 || alignment_support_scheme == dr_explicit_realign)
9680 && !compute_in_loop)
9682 /* If we have different first_stmt_info, we can't set up realignment
9683 here, since we can't guarantee first_stmt_info DR has been
9684 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9685 distance from first_stmt_info DR instead as below. */
9686 if (!diff_first_stmt_info)
9687 msq = vect_setup_realignment (vinfo,
9688 first_stmt_info, gsi, &realignment_token,
9689 alignment_support_scheme, NULL_TREE,
9690 &at_loop);
9691 if (alignment_support_scheme == dr_explicit_realign_optimized)
9693 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9694 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9695 size_one_node);
9696 gcc_assert (!first_stmt_info_for_drptr);
9699 else
9700 at_loop = loop;
9702 if (!known_eq (poffset, 0))
9703 offset = (offset
9704 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9705 : size_int (poffset));
9707 tree bump;
9708 tree vec_offset = NULL_TREE;
9709 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9711 aggr_type = NULL_TREE;
9712 bump = NULL_TREE;
9714 else if (memory_access_type == VMAT_GATHER_SCATTER)
9716 aggr_type = elem_type;
9717 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9718 &bump, &vec_offset);
9720 else
9722 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9723 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9724 else
9725 aggr_type = vectype;
9726 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9727 memory_access_type);
9730 auto_vec<tree> vec_offsets;
9731 auto_vec<tree> vec_masks;
9732 if (mask)
9734 if (slp_node)
9735 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9736 &vec_masks);
9737 else
9738 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9739 &vec_masks, mask_vectype);
9741 tree vec_mask = NULL_TREE;
9742 poly_uint64 group_elt = 0;
9743 for (j = 0; j < ncopies; j++)
9745 /* 1. Create the vector or array pointer update chain. */
9746 if (j == 0)
9748 bool simd_lane_access_p
9749 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9750 if (simd_lane_access_p
9751 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9752 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9753 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9754 && integer_zerop (DR_INIT (first_dr_info->dr))
9755 && alias_sets_conflict_p (get_alias_set (aggr_type),
9756 get_alias_set (TREE_TYPE (ref_type)))
9757 && (alignment_support_scheme == dr_aligned
9758 || alignment_support_scheme == dr_unaligned_supported))
9760 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9761 dataref_offset = build_int_cst (ref_type, 0);
9763 else if (diff_first_stmt_info)
9765 dataref_ptr
9766 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9767 aggr_type, at_loop, offset, &dummy,
9768 gsi, &ptr_incr, simd_lane_access_p,
9769 bump);
9770 /* Adjust the pointer by the difference to first_stmt. */
9771 data_reference_p ptrdr
9772 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9773 tree diff
9774 = fold_convert (sizetype,
9775 size_binop (MINUS_EXPR,
9776 DR_INIT (first_dr_info->dr),
9777 DR_INIT (ptrdr)));
9778 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9779 stmt_info, diff);
9780 if (alignment_support_scheme == dr_explicit_realign)
9782 msq = vect_setup_realignment (vinfo,
9783 first_stmt_info_for_drptr, gsi,
9784 &realignment_token,
9785 alignment_support_scheme,
9786 dataref_ptr, &at_loop);
9787 gcc_assert (!compute_in_loop);
9790 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9792 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9793 slp_node, &gs_info, &dataref_ptr,
9794 &vec_offsets);
9796 else
9797 dataref_ptr
9798 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9799 at_loop,
9800 offset, &dummy, gsi, &ptr_incr,
9801 simd_lane_access_p, bump);
9802 if (mask)
9803 vec_mask = vec_masks[0];
9805 else
9807 if (dataref_offset)
9808 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9809 bump);
9810 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9811 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9812 stmt_info, bump);
9813 if (mask)
9814 vec_mask = vec_masks[j];
9817 if (grouped_load || slp_perm)
9818 dr_chain.create (vec_num);
9820 gimple *new_stmt = NULL;
9821 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9823 tree vec_array;
9825 vec_array = create_vector_array (vectype, vec_num);
9827 tree final_mask = NULL_TREE;
9828 if (loop_masks)
9829 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9830 vectype, j);
9831 if (vec_mask)
9832 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9833 final_mask, vec_mask, gsi);
9835 gcall *call;
9836 if (final_mask)
9838 /* Emit:
9839 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9840 VEC_MASK). */
9841 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9842 tree alias_ptr = build_int_cst (ref_type, align);
9843 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9844 dataref_ptr, alias_ptr,
9845 final_mask);
9847 else
9849 /* Emit:
9850 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9851 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9852 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9854 gimple_call_set_lhs (call, vec_array);
9855 gimple_call_set_nothrow (call, true);
9856 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9857 new_stmt = call;
9859 /* Extract each vector into an SSA_NAME. */
9860 for (i = 0; i < vec_num; i++)
9862 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9863 vec_array, i);
9864 dr_chain.quick_push (new_temp);
9867 /* Record the mapping between SSA_NAMEs and statements. */
9868 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9870 /* Record that VEC_ARRAY is now dead. */
9871 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9873 else
9875 for (i = 0; i < vec_num; i++)
9877 tree final_mask = NULL_TREE;
9878 if (loop_masks
9879 && memory_access_type != VMAT_INVARIANT)
9880 final_mask = vect_get_loop_mask (gsi, loop_masks,
9881 vec_num * ncopies,
9882 vectype, vec_num * j + i);
9883 if (vec_mask)
9884 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9885 final_mask, vec_mask, gsi);
9887 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9888 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9889 gsi, stmt_info, bump);
9891 /* 2. Create the vector-load in the loop. */
9892 switch (alignment_support_scheme)
9894 case dr_aligned:
9895 case dr_unaligned_supported:
9897 unsigned int misalign;
9898 unsigned HOST_WIDE_INT align;
9900 if (memory_access_type == VMAT_GATHER_SCATTER
9901 && gs_info.ifn != IFN_LAST)
9903 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9904 vec_offset = vec_offsets[vec_num * j + i];
9905 tree zero = build_zero_cst (vectype);
9906 tree scale = size_int (gs_info.scale);
9907 gcall *call;
9908 if (final_mask)
9909 call = gimple_build_call_internal
9910 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9911 vec_offset, scale, zero, final_mask);
9912 else
9913 call = gimple_build_call_internal
9914 (IFN_GATHER_LOAD, 4, dataref_ptr,
9915 vec_offset, scale, zero);
9916 gimple_call_set_nothrow (call, true);
9917 new_stmt = call;
9918 data_ref = NULL_TREE;
9919 break;
9921 else if (memory_access_type == VMAT_GATHER_SCATTER)
9923 /* Emulated gather-scatter. */
9924 gcc_assert (!final_mask);
9925 unsigned HOST_WIDE_INT const_nunits
9926 = nunits.to_constant ();
9927 unsigned HOST_WIDE_INT const_offset_nunits
9928 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9929 .to_constant ();
9930 vec<constructor_elt, va_gc> *ctor_elts;
9931 vec_alloc (ctor_elts, const_nunits);
9932 gimple_seq stmts = NULL;
9933 /* We support offset vectors with more elements
9934 than the data vector for now. */
9935 unsigned HOST_WIDE_INT factor
9936 = const_offset_nunits / const_nunits;
9937 vec_offset = vec_offsets[j / factor];
9938 unsigned elt_offset = (j % factor) * const_nunits;
9939 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9940 tree scale = size_int (gs_info.scale);
9941 align
9942 = get_object_alignment (DR_REF (first_dr_info->dr));
9943 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9944 align);
9945 for (unsigned k = 0; k < const_nunits; ++k)
9947 tree boff = size_binop (MULT_EXPR,
9948 TYPE_SIZE (idx_type),
9949 bitsize_int
9950 (k + elt_offset));
9951 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9952 idx_type, vec_offset,
9953 TYPE_SIZE (idx_type),
9954 boff);
9955 idx = gimple_convert (&stmts, sizetype, idx);
9956 idx = gimple_build (&stmts, MULT_EXPR,
9957 sizetype, idx, scale);
9958 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9959 TREE_TYPE (dataref_ptr),
9960 dataref_ptr, idx);
9961 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9962 tree elt = make_ssa_name (TREE_TYPE (vectype));
9963 tree ref = build2 (MEM_REF, ltype, ptr,
9964 build_int_cst (ref_type, 0));
9965 new_stmt = gimple_build_assign (elt, ref);
9966 gimple_set_vuse (new_stmt,
9967 gimple_vuse (gsi_stmt (*gsi)));
9968 gimple_seq_add_stmt (&stmts, new_stmt);
9969 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9971 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9972 new_stmt = gimple_build_assign (NULL_TREE,
9973 build_constructor
9974 (vectype, ctor_elts));
9975 data_ref = NULL_TREE;
9976 break;
9979 align =
9980 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9981 if (alignment_support_scheme == dr_aligned)
9982 misalign = 0;
9983 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9985 align = dr_alignment
9986 (vect_dr_behavior (vinfo, first_dr_info));
9987 misalign = 0;
9989 else
9990 misalign = misalignment;
9991 if (dataref_offset == NULL_TREE
9992 && TREE_CODE (dataref_ptr) == SSA_NAME)
9993 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9994 align, misalign);
9995 align = least_bit_hwi (misalign | align);
9997 if (final_mask)
9999 tree ptr = build_int_cst (ref_type,
10000 align * BITS_PER_UNIT);
10001 gcall *call
10002 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
10003 dataref_ptr, ptr,
10004 final_mask);
10005 gimple_call_set_nothrow (call, true);
10006 new_stmt = call;
10007 data_ref = NULL_TREE;
10009 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
10011 tree final_len
10012 = vect_get_loop_len (loop_vinfo, loop_lens,
10013 vec_num * ncopies,
10014 vec_num * j + i);
10015 tree ptr = build_int_cst (ref_type,
10016 align * BITS_PER_UNIT);
10018 machine_mode vmode = TYPE_MODE (vectype);
10019 opt_machine_mode new_ovmode
10020 = get_len_load_store_mode (vmode, true);
10021 machine_mode new_vmode = new_ovmode.require ();
10022 tree qi_type = unsigned_intQI_type_node;
10024 signed char biasval =
10025 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10027 tree bias = build_int_cst (intQI_type_node, biasval);
10029 gcall *call
10030 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10031 dataref_ptr, ptr,
10032 final_len, bias);
10033 gimple_call_set_nothrow (call, true);
10034 new_stmt = call;
10035 data_ref = NULL_TREE;
10037 /* Need conversion if it's wrapped with VnQI. */
10038 if (vmode != new_vmode)
10040 tree new_vtype
10041 = build_vector_type_for_mode (qi_type, new_vmode);
10042 tree var = vect_get_new_ssa_name (new_vtype,
10043 vect_simple_var);
10044 gimple_set_lhs (call, var);
10045 vect_finish_stmt_generation (vinfo, stmt_info, call,
10046 gsi);
10047 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
10048 new_stmt
10049 = gimple_build_assign (vec_dest,
10050 VIEW_CONVERT_EXPR, op);
10053 else
10055 tree ltype = vectype;
10056 tree new_vtype = NULL_TREE;
10057 unsigned HOST_WIDE_INT gap
10058 = DR_GROUP_GAP (first_stmt_info);
10059 unsigned int vect_align
10060 = vect_known_alignment_in_bytes (first_dr_info,
10061 vectype);
10062 unsigned int scalar_dr_size
10063 = vect_get_scalar_dr_size (first_dr_info);
10064 /* If there's no peeling for gaps but we have a gap
10065 with slp loads then load the lower half of the
10066 vector only. See get_group_load_store_type for
10067 when we apply this optimization. */
10068 if (slp
10069 && loop_vinfo
10070 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
10071 && gap != 0
10072 && known_eq (nunits, (group_size - gap) * 2)
10073 && known_eq (nunits, group_size)
10074 && gap >= (vect_align / scalar_dr_size))
10076 tree half_vtype;
10077 new_vtype
10078 = vector_vector_composition_type (vectype, 2,
10079 &half_vtype);
10080 if (new_vtype != NULL_TREE)
10081 ltype = half_vtype;
10083 tree offset
10084 = (dataref_offset ? dataref_offset
10085 : build_int_cst (ref_type, 0));
10086 if (ltype != vectype
10087 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10089 unsigned HOST_WIDE_INT gap_offset
10090 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
10091 tree gapcst = build_int_cst (ref_type, gap_offset);
10092 offset = size_binop (PLUS_EXPR, offset, gapcst);
10094 data_ref
10095 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
10096 if (alignment_support_scheme == dr_aligned)
10098 else
10099 TREE_TYPE (data_ref)
10100 = build_aligned_type (TREE_TYPE (data_ref),
10101 align * BITS_PER_UNIT);
10102 if (ltype != vectype)
10104 vect_copy_ref_info (data_ref,
10105 DR_REF (first_dr_info->dr));
10106 tree tem = make_ssa_name (ltype);
10107 new_stmt = gimple_build_assign (tem, data_ref);
10108 vect_finish_stmt_generation (vinfo, stmt_info,
10109 new_stmt, gsi);
10110 data_ref = NULL;
10111 vec<constructor_elt, va_gc> *v;
10112 vec_alloc (v, 2);
10113 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10115 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10116 build_zero_cst (ltype));
10117 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10119 else
10121 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10122 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10123 build_zero_cst (ltype));
10125 gcc_assert (new_vtype != NULL_TREE);
10126 if (new_vtype == vectype)
10127 new_stmt = gimple_build_assign (
10128 vec_dest, build_constructor (vectype, v));
10129 else
10131 tree new_vname = make_ssa_name (new_vtype);
10132 new_stmt = gimple_build_assign (
10133 new_vname, build_constructor (new_vtype, v));
10134 vect_finish_stmt_generation (vinfo, stmt_info,
10135 new_stmt, gsi);
10136 new_stmt = gimple_build_assign (
10137 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
10138 new_vname));
10142 break;
10144 case dr_explicit_realign:
10146 tree ptr, bump;
10148 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10150 if (compute_in_loop)
10151 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10152 &realignment_token,
10153 dr_explicit_realign,
10154 dataref_ptr, NULL);
10156 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10157 ptr = copy_ssa_name (dataref_ptr);
10158 else
10159 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10160 // For explicit realign the target alignment should be
10161 // known at compile time.
10162 unsigned HOST_WIDE_INT align =
10163 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10164 new_stmt = gimple_build_assign
10165 (ptr, BIT_AND_EXPR, dataref_ptr,
10166 build_int_cst
10167 (TREE_TYPE (dataref_ptr),
10168 -(HOST_WIDE_INT) align));
10169 vect_finish_stmt_generation (vinfo, stmt_info,
10170 new_stmt, gsi);
10171 data_ref
10172 = build2 (MEM_REF, vectype, ptr,
10173 build_int_cst (ref_type, 0));
10174 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10175 vec_dest = vect_create_destination_var (scalar_dest,
10176 vectype);
10177 new_stmt = gimple_build_assign (vec_dest, data_ref);
10178 new_temp = make_ssa_name (vec_dest, new_stmt);
10179 gimple_assign_set_lhs (new_stmt, new_temp);
10180 gimple_move_vops (new_stmt, stmt_info->stmt);
10181 vect_finish_stmt_generation (vinfo, stmt_info,
10182 new_stmt, gsi);
10183 msq = new_temp;
10185 bump = size_binop (MULT_EXPR, vs,
10186 TYPE_SIZE_UNIT (elem_type));
10187 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10188 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10189 stmt_info, bump);
10190 new_stmt = gimple_build_assign
10191 (NULL_TREE, BIT_AND_EXPR, ptr,
10192 build_int_cst
10193 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10194 if (TREE_CODE (ptr) == SSA_NAME)
10195 ptr = copy_ssa_name (ptr, new_stmt);
10196 else
10197 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10198 gimple_assign_set_lhs (new_stmt, ptr);
10199 vect_finish_stmt_generation (vinfo, stmt_info,
10200 new_stmt, gsi);
10201 data_ref
10202 = build2 (MEM_REF, vectype, ptr,
10203 build_int_cst (ref_type, 0));
10204 break;
10206 case dr_explicit_realign_optimized:
10208 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10209 new_temp = copy_ssa_name (dataref_ptr);
10210 else
10211 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10212 // We should only be doing this if we know the target
10213 // alignment at compile time.
10214 unsigned HOST_WIDE_INT align =
10215 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10216 new_stmt = gimple_build_assign
10217 (new_temp, BIT_AND_EXPR, dataref_ptr,
10218 build_int_cst (TREE_TYPE (dataref_ptr),
10219 -(HOST_WIDE_INT) align));
10220 vect_finish_stmt_generation (vinfo, stmt_info,
10221 new_stmt, gsi);
10222 data_ref
10223 = build2 (MEM_REF, vectype, new_temp,
10224 build_int_cst (ref_type, 0));
10225 break;
10227 default:
10228 gcc_unreachable ();
10230 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10231 /* DATA_REF is null if we've already built the statement. */
10232 if (data_ref)
10234 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10235 new_stmt = gimple_build_assign (vec_dest, data_ref);
10237 new_temp = make_ssa_name (vec_dest, new_stmt);
10238 gimple_set_lhs (new_stmt, new_temp);
10239 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10241 /* 3. Handle explicit realignment if necessary/supported.
10242 Create in loop:
10243 vec_dest = realign_load (msq, lsq, realignment_token) */
10244 if (alignment_support_scheme == dr_explicit_realign_optimized
10245 || alignment_support_scheme == dr_explicit_realign)
10247 lsq = gimple_assign_lhs (new_stmt);
10248 if (!realignment_token)
10249 realignment_token = dataref_ptr;
10250 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10251 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10252 msq, lsq, realignment_token);
10253 new_temp = make_ssa_name (vec_dest, new_stmt);
10254 gimple_assign_set_lhs (new_stmt, new_temp);
10255 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10257 if (alignment_support_scheme == dr_explicit_realign_optimized)
10259 gcc_assert (phi);
10260 if (i == vec_num - 1 && j == ncopies - 1)
10261 add_phi_arg (phi, lsq,
10262 loop_latch_edge (containing_loop),
10263 UNKNOWN_LOCATION);
10264 msq = lsq;
10268 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10270 tree perm_mask = perm_mask_for_reverse (vectype);
10271 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10272 perm_mask, stmt_info, gsi);
10273 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10276 /* Collect vector loads and later create their permutation in
10277 vect_transform_grouped_load (). */
10278 if (grouped_load || slp_perm)
10279 dr_chain.quick_push (new_temp);
10281 /* Store vector loads in the corresponding SLP_NODE. */
10282 if (slp && !slp_perm)
10283 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10285 /* With SLP permutation we load the gaps as well, without
10286 we need to skip the gaps after we manage to fully load
10287 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10288 group_elt += nunits;
10289 if (maybe_ne (group_gap_adj, 0U)
10290 && !slp_perm
10291 && known_eq (group_elt, group_size - group_gap_adj))
10293 poly_wide_int bump_val
10294 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10295 * group_gap_adj);
10296 if (tree_int_cst_sgn
10297 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10298 bump_val = -bump_val;
10299 tree bump = wide_int_to_tree (sizetype, bump_val);
10300 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10301 gsi, stmt_info, bump);
10302 group_elt = 0;
10305 /* Bump the vector pointer to account for a gap or for excess
10306 elements loaded for a permuted SLP load. */
10307 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10309 poly_wide_int bump_val
10310 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10311 * group_gap_adj);
10312 if (tree_int_cst_sgn
10313 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10314 bump_val = -bump_val;
10315 tree bump = wide_int_to_tree (sizetype, bump_val);
10316 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10317 stmt_info, bump);
10321 if (slp && !slp_perm)
10322 continue;
10324 if (slp_perm)
10326 unsigned n_perms;
10327 /* For SLP we know we've seen all possible uses of dr_chain so
10328 direct vect_transform_slp_perm_load to DCE the unused parts.
10329 ??? This is a hack to prevent compile-time issues as seen
10330 in PR101120 and friends. */
10331 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10332 gsi, vf, false, &n_perms,
10333 nullptr, true);
10334 gcc_assert (ok);
10336 else
10338 if (grouped_load)
10340 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10341 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10342 group_size, gsi);
10343 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10345 else
10347 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10350 dr_chain.release ();
10352 if (!slp)
10353 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10355 return true;
10358 /* Function vect_is_simple_cond.
10360 Input:
10361 LOOP - the loop that is being vectorized.
10362 COND - Condition that is checked for simple use.
10364 Output:
10365 *COMP_VECTYPE - the vector type for the comparison.
10366 *DTS - The def types for the arguments of the comparison
10368 Returns whether a COND can be vectorized. Checks whether
10369 condition operands are supportable using vec_is_simple_use. */
10371 static bool
10372 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10373 slp_tree slp_node, tree *comp_vectype,
10374 enum vect_def_type *dts, tree vectype)
10376 tree lhs, rhs;
10377 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10378 slp_tree slp_op;
10380 /* Mask case. */
10381 if (TREE_CODE (cond) == SSA_NAME
10382 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10384 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10385 &slp_op, &dts[0], comp_vectype)
10386 || !*comp_vectype
10387 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10388 return false;
10389 return true;
10392 if (!COMPARISON_CLASS_P (cond))
10393 return false;
10395 lhs = TREE_OPERAND (cond, 0);
10396 rhs = TREE_OPERAND (cond, 1);
10398 if (TREE_CODE (lhs) == SSA_NAME)
10400 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10401 &lhs, &slp_op, &dts[0], &vectype1))
10402 return false;
10404 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10405 || TREE_CODE (lhs) == FIXED_CST)
10406 dts[0] = vect_constant_def;
10407 else
10408 return false;
10410 if (TREE_CODE (rhs) == SSA_NAME)
10412 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10413 &rhs, &slp_op, &dts[1], &vectype2))
10414 return false;
10416 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10417 || TREE_CODE (rhs) == FIXED_CST)
10418 dts[1] = vect_constant_def;
10419 else
10420 return false;
10422 if (vectype1 && vectype2
10423 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10424 TYPE_VECTOR_SUBPARTS (vectype2)))
10425 return false;
10427 *comp_vectype = vectype1 ? vectype1 : vectype2;
10428 /* Invariant comparison. */
10429 if (! *comp_vectype)
10431 tree scalar_type = TREE_TYPE (lhs);
10432 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10433 *comp_vectype = truth_type_for (vectype);
10434 else
10436 /* If we can widen the comparison to match vectype do so. */
10437 if (INTEGRAL_TYPE_P (scalar_type)
10438 && !slp_node
10439 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10440 TYPE_SIZE (TREE_TYPE (vectype))))
10441 scalar_type = build_nonstandard_integer_type
10442 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10443 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10444 slp_node);
10448 return true;
10451 /* vectorizable_condition.
10453 Check if STMT_INFO is conditional modify expression that can be vectorized.
10454 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10455 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10456 at GSI.
10458 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10460 Return true if STMT_INFO is vectorizable in this way. */
10462 static bool
10463 vectorizable_condition (vec_info *vinfo,
10464 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10465 gimple **vec_stmt,
10466 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10468 tree scalar_dest = NULL_TREE;
10469 tree vec_dest = NULL_TREE;
10470 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10471 tree then_clause, else_clause;
10472 tree comp_vectype = NULL_TREE;
10473 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10474 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10475 tree vec_compare;
10476 tree new_temp;
10477 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10478 enum vect_def_type dts[4]
10479 = {vect_unknown_def_type, vect_unknown_def_type,
10480 vect_unknown_def_type, vect_unknown_def_type};
10481 int ndts = 4;
10482 int ncopies;
10483 int vec_num;
10484 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10485 int i;
10486 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10487 vec<tree> vec_oprnds0 = vNULL;
10488 vec<tree> vec_oprnds1 = vNULL;
10489 vec<tree> vec_oprnds2 = vNULL;
10490 vec<tree> vec_oprnds3 = vNULL;
10491 tree vec_cmp_type;
10492 bool masked = false;
10494 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10495 return false;
10497 /* Is vectorizable conditional operation? */
10498 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10499 if (!stmt)
10500 return false;
10502 code = gimple_assign_rhs_code (stmt);
10503 if (code != COND_EXPR)
10504 return false;
10506 stmt_vec_info reduc_info = NULL;
10507 int reduc_index = -1;
10508 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10509 bool for_reduction
10510 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10511 if (for_reduction)
10513 if (slp_node)
10514 return false;
10515 reduc_info = info_for_reduction (vinfo, stmt_info);
10516 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10517 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10518 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10519 || reduc_index != -1);
10521 else
10523 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10524 return false;
10527 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10528 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10530 if (slp_node)
10532 ncopies = 1;
10533 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10535 else
10537 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10538 vec_num = 1;
10541 gcc_assert (ncopies >= 1);
10542 if (for_reduction && ncopies > 1)
10543 return false; /* FORNOW */
10545 cond_expr = gimple_assign_rhs1 (stmt);
10547 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10548 &comp_vectype, &dts[0], vectype)
10549 || !comp_vectype)
10550 return false;
10552 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10553 slp_tree then_slp_node, else_slp_node;
10554 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10555 &then_clause, &then_slp_node, &dts[2], &vectype1))
10556 return false;
10557 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10558 &else_clause, &else_slp_node, &dts[3], &vectype2))
10559 return false;
10561 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10562 return false;
10564 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10565 return false;
10567 masked = !COMPARISON_CLASS_P (cond_expr);
10568 vec_cmp_type = truth_type_for (comp_vectype);
10570 if (vec_cmp_type == NULL_TREE)
10571 return false;
10573 cond_code = TREE_CODE (cond_expr);
10574 if (!masked)
10576 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10577 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10580 /* For conditional reductions, the "then" value needs to be the candidate
10581 value calculated by this iteration while the "else" value needs to be
10582 the result carried over from previous iterations. If the COND_EXPR
10583 is the other way around, we need to swap it. */
10584 bool must_invert_cmp_result = false;
10585 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10587 if (masked)
10588 must_invert_cmp_result = true;
10589 else
10591 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10592 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10593 if (new_code == ERROR_MARK)
10594 must_invert_cmp_result = true;
10595 else
10597 cond_code = new_code;
10598 /* Make sure we don't accidentally use the old condition. */
10599 cond_expr = NULL_TREE;
10602 std::swap (then_clause, else_clause);
10605 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10607 /* Boolean values may have another representation in vectors
10608 and therefore we prefer bit operations over comparison for
10609 them (which also works for scalar masks). We store opcodes
10610 to use in bitop1 and bitop2. Statement is vectorized as
10611 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10612 depending on bitop1 and bitop2 arity. */
10613 switch (cond_code)
10615 case GT_EXPR:
10616 bitop1 = BIT_NOT_EXPR;
10617 bitop2 = BIT_AND_EXPR;
10618 break;
10619 case GE_EXPR:
10620 bitop1 = BIT_NOT_EXPR;
10621 bitop2 = BIT_IOR_EXPR;
10622 break;
10623 case LT_EXPR:
10624 bitop1 = BIT_NOT_EXPR;
10625 bitop2 = BIT_AND_EXPR;
10626 std::swap (cond_expr0, cond_expr1);
10627 break;
10628 case LE_EXPR:
10629 bitop1 = BIT_NOT_EXPR;
10630 bitop2 = BIT_IOR_EXPR;
10631 std::swap (cond_expr0, cond_expr1);
10632 break;
10633 case NE_EXPR:
10634 bitop1 = BIT_XOR_EXPR;
10635 break;
10636 case EQ_EXPR:
10637 bitop1 = BIT_XOR_EXPR;
10638 bitop2 = BIT_NOT_EXPR;
10639 break;
10640 default:
10641 return false;
10643 cond_code = SSA_NAME;
10646 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10647 && reduction_type == EXTRACT_LAST_REDUCTION
10648 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10650 if (dump_enabled_p ())
10651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10652 "reduction comparison operation not supported.\n");
10653 return false;
10656 if (!vec_stmt)
10658 if (bitop1 != NOP_EXPR)
10660 machine_mode mode = TYPE_MODE (comp_vectype);
10661 optab optab;
10663 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10664 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10665 return false;
10667 if (bitop2 != NOP_EXPR)
10669 optab = optab_for_tree_code (bitop2, comp_vectype,
10670 optab_default);
10671 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10672 return false;
10676 vect_cost_for_stmt kind = vector_stmt;
10677 if (reduction_type == EXTRACT_LAST_REDUCTION)
10678 /* Count one reduction-like operation per vector. */
10679 kind = vec_to_scalar;
10680 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10681 return false;
10683 if (slp_node
10684 && (!vect_maybe_update_slp_op_vectype
10685 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10686 || (op_adjust == 1
10687 && !vect_maybe_update_slp_op_vectype
10688 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10689 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10690 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10692 if (dump_enabled_p ())
10693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10694 "incompatible vector types for invariants\n");
10695 return false;
10698 if (loop_vinfo && for_reduction
10699 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10701 if (reduction_type == EXTRACT_LAST_REDUCTION)
10702 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10703 ncopies * vec_num, vectype, NULL);
10704 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10705 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10707 if (dump_enabled_p ())
10708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10709 "conditional reduction prevents the use"
10710 " of partial vectors.\n");
10711 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10715 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10716 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10717 cost_vec, kind);
10718 return true;
10721 /* Transform. */
10723 /* Handle def. */
10724 scalar_dest = gimple_assign_lhs (stmt);
10725 if (reduction_type != EXTRACT_LAST_REDUCTION)
10726 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10728 bool swap_cond_operands = false;
10730 /* See whether another part of the vectorized code applies a loop
10731 mask to the condition, or to its inverse. */
10733 vec_loop_masks *masks = NULL;
10734 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10736 if (reduction_type == EXTRACT_LAST_REDUCTION)
10737 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10738 else
10740 scalar_cond_masked_key cond (cond_expr, ncopies);
10741 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10742 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10743 else
10745 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10746 tree_code orig_code = cond.code;
10747 cond.code = invert_tree_comparison (cond.code, honor_nans);
10748 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10750 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10751 cond_code = cond.code;
10752 swap_cond_operands = true;
10754 else
10756 /* Try the inverse of the current mask. We check if the
10757 inverse mask is live and if so we generate a negate of
10758 the current mask such that we still honor NaNs. */
10759 cond.inverted_p = true;
10760 cond.code = orig_code;
10761 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10763 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10764 cond_code = cond.code;
10765 swap_cond_operands = true;
10766 must_invert_cmp_result = true;
10773 /* Handle cond expr. */
10774 if (masked)
10775 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10776 cond_expr, &vec_oprnds0, comp_vectype,
10777 then_clause, &vec_oprnds2, vectype,
10778 reduction_type != EXTRACT_LAST_REDUCTION
10779 ? else_clause : NULL, &vec_oprnds3, vectype);
10780 else
10781 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10782 cond_expr0, &vec_oprnds0, comp_vectype,
10783 cond_expr1, &vec_oprnds1, comp_vectype,
10784 then_clause, &vec_oprnds2, vectype,
10785 reduction_type != EXTRACT_LAST_REDUCTION
10786 ? else_clause : NULL, &vec_oprnds3, vectype);
10788 /* Arguments are ready. Create the new vector stmt. */
10789 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10791 vec_then_clause = vec_oprnds2[i];
10792 if (reduction_type != EXTRACT_LAST_REDUCTION)
10793 vec_else_clause = vec_oprnds3[i];
10795 if (swap_cond_operands)
10796 std::swap (vec_then_clause, vec_else_clause);
10798 if (masked)
10799 vec_compare = vec_cond_lhs;
10800 else
10802 vec_cond_rhs = vec_oprnds1[i];
10803 if (bitop1 == NOP_EXPR)
10805 gimple_seq stmts = NULL;
10806 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10807 vec_cond_lhs, vec_cond_rhs);
10808 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10810 else
10812 new_temp = make_ssa_name (vec_cmp_type);
10813 gassign *new_stmt;
10814 if (bitop1 == BIT_NOT_EXPR)
10815 new_stmt = gimple_build_assign (new_temp, bitop1,
10816 vec_cond_rhs);
10817 else
10818 new_stmt
10819 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10820 vec_cond_rhs);
10821 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10822 if (bitop2 == NOP_EXPR)
10823 vec_compare = new_temp;
10824 else if (bitop2 == BIT_NOT_EXPR
10825 && reduction_type != EXTRACT_LAST_REDUCTION)
10827 /* Instead of doing ~x ? y : z do x ? z : y. */
10828 vec_compare = new_temp;
10829 std::swap (vec_then_clause, vec_else_clause);
10831 else
10833 vec_compare = make_ssa_name (vec_cmp_type);
10834 if (bitop2 == BIT_NOT_EXPR)
10835 new_stmt
10836 = gimple_build_assign (vec_compare, bitop2, new_temp);
10837 else
10838 new_stmt
10839 = gimple_build_assign (vec_compare, bitop2,
10840 vec_cond_lhs, new_temp);
10841 vect_finish_stmt_generation (vinfo, stmt_info,
10842 new_stmt, gsi);
10847 /* If we decided to apply a loop mask to the result of the vector
10848 comparison, AND the comparison with the mask now. Later passes
10849 should then be able to reuse the AND results between mulitple
10850 vector statements.
10852 For example:
10853 for (int i = 0; i < 100; ++i)
10854 x[i] = y[i] ? z[i] : 10;
10856 results in following optimized GIMPLE:
10858 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10859 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10860 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10861 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10862 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10863 vect_iftmp.11_47, { 10, ... }>;
10865 instead of using a masked and unmasked forms of
10866 vec != { 0, ... } (masked in the MASK_LOAD,
10867 unmasked in the VEC_COND_EXPR). */
10869 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10870 in cases where that's necessary. */
10872 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10874 if (!is_gimple_val (vec_compare))
10876 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10877 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10878 vec_compare);
10879 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10880 vec_compare = vec_compare_name;
10883 if (must_invert_cmp_result)
10885 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10886 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10887 BIT_NOT_EXPR,
10888 vec_compare);
10889 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10890 vec_compare = vec_compare_name;
10893 if (masks)
10895 tree loop_mask
10896 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10897 vectype, i);
10898 tree tmp2 = make_ssa_name (vec_cmp_type);
10899 gassign *g
10900 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10901 loop_mask);
10902 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10903 vec_compare = tmp2;
10907 gimple *new_stmt;
10908 if (reduction_type == EXTRACT_LAST_REDUCTION)
10910 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10911 tree lhs = gimple_get_lhs (old_stmt);
10912 new_stmt = gimple_build_call_internal
10913 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10914 vec_then_clause);
10915 gimple_call_set_lhs (new_stmt, lhs);
10916 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10917 if (old_stmt == gsi_stmt (*gsi))
10918 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10919 else
10921 /* In this case we're moving the definition to later in the
10922 block. That doesn't matter because the only uses of the
10923 lhs are in phi statements. */
10924 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10925 gsi_remove (&old_gsi, true);
10926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10929 else
10931 new_temp = make_ssa_name (vec_dest);
10932 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10933 vec_then_clause, vec_else_clause);
10934 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10936 if (slp_node)
10937 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10938 else
10939 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10942 if (!slp_node)
10943 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10945 vec_oprnds0.release ();
10946 vec_oprnds1.release ();
10947 vec_oprnds2.release ();
10948 vec_oprnds3.release ();
10950 return true;
10953 /* vectorizable_comparison.
10955 Check if STMT_INFO is comparison expression that can be vectorized.
10956 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10957 comparison, put it in VEC_STMT, and insert it at GSI.
10959 Return true if STMT_INFO is vectorizable in this way. */
10961 static bool
10962 vectorizable_comparison (vec_info *vinfo,
10963 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10964 gimple **vec_stmt,
10965 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10967 tree lhs, rhs1, rhs2;
10968 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10969 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10970 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10971 tree new_temp;
10972 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10973 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10974 int ndts = 2;
10975 poly_uint64 nunits;
10976 int ncopies;
10977 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10978 int i;
10979 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10980 vec<tree> vec_oprnds0 = vNULL;
10981 vec<tree> vec_oprnds1 = vNULL;
10982 tree mask_type;
10983 tree mask;
10985 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10986 return false;
10988 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10989 return false;
10991 mask_type = vectype;
10992 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10994 if (slp_node)
10995 ncopies = 1;
10996 else
10997 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10999 gcc_assert (ncopies >= 1);
11000 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11001 return false;
11003 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11004 if (!stmt)
11005 return false;
11007 code = gimple_assign_rhs_code (stmt);
11009 if (TREE_CODE_CLASS (code) != tcc_comparison)
11010 return false;
11012 slp_tree slp_rhs1, slp_rhs2;
11013 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11014 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
11015 return false;
11017 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11018 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
11019 return false;
11021 if (vectype1 && vectype2
11022 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11023 TYPE_VECTOR_SUBPARTS (vectype2)))
11024 return false;
11026 vectype = vectype1 ? vectype1 : vectype2;
11028 /* Invariant comparison. */
11029 if (!vectype)
11031 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11032 vectype = mask_type;
11033 else
11034 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
11035 slp_node);
11036 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
11037 return false;
11039 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
11040 return false;
11042 /* Can't compare mask and non-mask types. */
11043 if (vectype1 && vectype2
11044 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
11045 return false;
11047 /* Boolean values may have another representation in vectors
11048 and therefore we prefer bit operations over comparison for
11049 them (which also works for scalar masks). We store opcodes
11050 to use in bitop1 and bitop2. Statement is vectorized as
11051 BITOP2 (rhs1 BITOP1 rhs2) or
11052 rhs1 BITOP2 (BITOP1 rhs2)
11053 depending on bitop1 and bitop2 arity. */
11054 bool swap_p = false;
11055 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11057 if (code == GT_EXPR)
11059 bitop1 = BIT_NOT_EXPR;
11060 bitop2 = BIT_AND_EXPR;
11062 else if (code == GE_EXPR)
11064 bitop1 = BIT_NOT_EXPR;
11065 bitop2 = BIT_IOR_EXPR;
11067 else if (code == LT_EXPR)
11069 bitop1 = BIT_NOT_EXPR;
11070 bitop2 = BIT_AND_EXPR;
11071 swap_p = true;
11073 else if (code == LE_EXPR)
11075 bitop1 = BIT_NOT_EXPR;
11076 bitop2 = BIT_IOR_EXPR;
11077 swap_p = true;
11079 else
11081 bitop1 = BIT_XOR_EXPR;
11082 if (code == EQ_EXPR)
11083 bitop2 = BIT_NOT_EXPR;
11087 if (!vec_stmt)
11089 if (bitop1 == NOP_EXPR)
11091 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
11092 return false;
11094 else
11096 machine_mode mode = TYPE_MODE (vectype);
11097 optab optab;
11099 optab = optab_for_tree_code (bitop1, vectype, optab_default);
11100 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11101 return false;
11103 if (bitop2 != NOP_EXPR)
11105 optab = optab_for_tree_code (bitop2, vectype, optab_default);
11106 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11107 return false;
11111 /* Put types on constant and invariant SLP children. */
11112 if (slp_node
11113 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
11114 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
11116 if (dump_enabled_p ())
11117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11118 "incompatible vector types for invariants\n");
11119 return false;
11122 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
11123 vect_model_simple_cost (vinfo, stmt_info,
11124 ncopies * (1 + (bitop2 != NOP_EXPR)),
11125 dts, ndts, slp_node, cost_vec);
11126 return true;
11129 /* Transform. */
11131 /* Handle def. */
11132 lhs = gimple_assign_lhs (stmt);
11133 mask = vect_create_destination_var (lhs, mask_type);
11135 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11136 rhs1, &vec_oprnds0, vectype,
11137 rhs2, &vec_oprnds1, vectype);
11138 if (swap_p)
11139 std::swap (vec_oprnds0, vec_oprnds1);
11141 /* Arguments are ready. Create the new vector stmt. */
11142 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
11144 gimple *new_stmt;
11145 vec_rhs2 = vec_oprnds1[i];
11147 new_temp = make_ssa_name (mask);
11148 if (bitop1 == NOP_EXPR)
11150 new_stmt = gimple_build_assign (new_temp, code,
11151 vec_rhs1, vec_rhs2);
11152 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11154 else
11156 if (bitop1 == BIT_NOT_EXPR)
11157 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11158 else
11159 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11160 vec_rhs2);
11161 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11162 if (bitop2 != NOP_EXPR)
11164 tree res = make_ssa_name (mask);
11165 if (bitop2 == BIT_NOT_EXPR)
11166 new_stmt = gimple_build_assign (res, bitop2, new_temp);
11167 else
11168 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11169 new_temp);
11170 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11173 if (slp_node)
11174 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11175 else
11176 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11179 if (!slp_node)
11180 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11182 vec_oprnds0.release ();
11183 vec_oprnds1.release ();
11185 return true;
11188 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11189 can handle all live statements in the node. Otherwise return true
11190 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11191 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11193 static bool
11194 can_vectorize_live_stmts (vec_info *vinfo,
11195 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11196 slp_tree slp_node, slp_instance slp_node_instance,
11197 bool vec_stmt_p,
11198 stmt_vector_for_cost *cost_vec)
11200 if (slp_node)
11202 stmt_vec_info slp_stmt_info;
11203 unsigned int i;
11204 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11206 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11207 && !vectorizable_live_operation (vinfo,
11208 slp_stmt_info, gsi, slp_node,
11209 slp_node_instance, i,
11210 vec_stmt_p, cost_vec))
11211 return false;
11214 else if (STMT_VINFO_LIVE_P (stmt_info)
11215 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11216 slp_node, slp_node_instance, -1,
11217 vec_stmt_p, cost_vec))
11218 return false;
11220 return true;
11223 /* Make sure the statement is vectorizable. */
11225 opt_result
11226 vect_analyze_stmt (vec_info *vinfo,
11227 stmt_vec_info stmt_info, bool *need_to_vectorize,
11228 slp_tree node, slp_instance node_instance,
11229 stmt_vector_for_cost *cost_vec)
11231 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11232 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11233 bool ok;
11234 gimple_seq pattern_def_seq;
11236 if (dump_enabled_p ())
11237 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11238 stmt_info->stmt);
11240 if (gimple_has_volatile_ops (stmt_info->stmt))
11241 return opt_result::failure_at (stmt_info->stmt,
11242 "not vectorized:"
11243 " stmt has volatile operands: %G\n",
11244 stmt_info->stmt);
11246 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11247 && node == NULL
11248 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11250 gimple_stmt_iterator si;
11252 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11254 stmt_vec_info pattern_def_stmt_info
11255 = vinfo->lookup_stmt (gsi_stmt (si));
11256 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11257 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11259 /* Analyze def stmt of STMT if it's a pattern stmt. */
11260 if (dump_enabled_p ())
11261 dump_printf_loc (MSG_NOTE, vect_location,
11262 "==> examining pattern def statement: %G",
11263 pattern_def_stmt_info->stmt);
11265 opt_result res
11266 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11267 need_to_vectorize, node, node_instance,
11268 cost_vec);
11269 if (!res)
11270 return res;
11275 /* Skip stmts that do not need to be vectorized. In loops this is expected
11276 to include:
11277 - the COND_EXPR which is the loop exit condition
11278 - any LABEL_EXPRs in the loop
11279 - computations that are used only for array indexing or loop control.
11280 In basic blocks we only analyze statements that are a part of some SLP
11281 instance, therefore, all the statements are relevant.
11283 Pattern statement needs to be analyzed instead of the original statement
11284 if the original statement is not relevant. Otherwise, we analyze both
11285 statements. In basic blocks we are called from some SLP instance
11286 traversal, don't analyze pattern stmts instead, the pattern stmts
11287 already will be part of SLP instance. */
11289 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11290 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11291 && !STMT_VINFO_LIVE_P (stmt_info))
11293 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11294 && pattern_stmt_info
11295 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11296 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11298 /* Analyze PATTERN_STMT instead of the original stmt. */
11299 stmt_info = pattern_stmt_info;
11300 if (dump_enabled_p ())
11301 dump_printf_loc (MSG_NOTE, vect_location,
11302 "==> examining pattern statement: %G",
11303 stmt_info->stmt);
11305 else
11307 if (dump_enabled_p ())
11308 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11310 return opt_result::success ();
11313 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11314 && node == NULL
11315 && pattern_stmt_info
11316 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11317 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11319 /* Analyze PATTERN_STMT too. */
11320 if (dump_enabled_p ())
11321 dump_printf_loc (MSG_NOTE, vect_location,
11322 "==> examining pattern statement: %G",
11323 pattern_stmt_info->stmt);
11325 opt_result res
11326 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11327 node_instance, cost_vec);
11328 if (!res)
11329 return res;
11332 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11334 case vect_internal_def:
11335 break;
11337 case vect_reduction_def:
11338 case vect_nested_cycle:
11339 gcc_assert (!bb_vinfo
11340 && (relevance == vect_used_in_outer
11341 || relevance == vect_used_in_outer_by_reduction
11342 || relevance == vect_used_by_reduction
11343 || relevance == vect_unused_in_scope
11344 || relevance == vect_used_only_live));
11345 break;
11347 case vect_induction_def:
11348 case vect_first_order_recurrence:
11349 gcc_assert (!bb_vinfo);
11350 break;
11352 case vect_constant_def:
11353 case vect_external_def:
11354 case vect_unknown_def_type:
11355 default:
11356 gcc_unreachable ();
11359 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11360 if (node)
11361 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11363 if (STMT_VINFO_RELEVANT_P (stmt_info))
11365 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11366 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11367 || (call && gimple_call_lhs (call) == NULL_TREE));
11368 *need_to_vectorize = true;
11371 if (PURE_SLP_STMT (stmt_info) && !node)
11373 if (dump_enabled_p ())
11374 dump_printf_loc (MSG_NOTE, vect_location,
11375 "handled only by SLP analysis\n");
11376 return opt_result::success ();
11379 ok = true;
11380 if (!bb_vinfo
11381 && (STMT_VINFO_RELEVANT_P (stmt_info)
11382 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11383 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11384 -mveclibabi= takes preference over library functions with
11385 the simd attribute. */
11386 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11387 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11388 cost_vec)
11389 || vectorizable_conversion (vinfo, stmt_info,
11390 NULL, NULL, node, cost_vec)
11391 || vectorizable_operation (vinfo, stmt_info,
11392 NULL, NULL, node, cost_vec)
11393 || vectorizable_assignment (vinfo, stmt_info,
11394 NULL, NULL, node, cost_vec)
11395 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11396 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11397 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11398 node, node_instance, cost_vec)
11399 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11400 NULL, node, cost_vec)
11401 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11402 || vectorizable_condition (vinfo, stmt_info,
11403 NULL, NULL, node, cost_vec)
11404 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11405 cost_vec)
11406 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11407 stmt_info, NULL, node)
11408 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11409 stmt_info, NULL, node, cost_vec));
11410 else
11412 if (bb_vinfo)
11413 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11414 || vectorizable_simd_clone_call (vinfo, stmt_info,
11415 NULL, NULL, node, cost_vec)
11416 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11417 cost_vec)
11418 || vectorizable_shift (vinfo, stmt_info,
11419 NULL, NULL, node, cost_vec)
11420 || vectorizable_operation (vinfo, stmt_info,
11421 NULL, NULL, node, cost_vec)
11422 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11423 cost_vec)
11424 || vectorizable_load (vinfo, stmt_info,
11425 NULL, NULL, node, cost_vec)
11426 || vectorizable_store (vinfo, stmt_info,
11427 NULL, NULL, node, cost_vec)
11428 || vectorizable_condition (vinfo, stmt_info,
11429 NULL, NULL, node, cost_vec)
11430 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11431 cost_vec)
11432 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11435 if (node)
11436 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11438 if (!ok)
11439 return opt_result::failure_at (stmt_info->stmt,
11440 "not vectorized:"
11441 " relevant stmt not supported: %G",
11442 stmt_info->stmt);
11444 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11445 need extra handling, except for vectorizable reductions. */
11446 if (!bb_vinfo
11447 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11448 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11449 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11450 stmt_info, NULL, node, node_instance,
11451 false, cost_vec))
11452 return opt_result::failure_at (stmt_info->stmt,
11453 "not vectorized:"
11454 " live stmt not supported: %G",
11455 stmt_info->stmt);
11457 return opt_result::success ();
11461 /* Function vect_transform_stmt.
11463 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11465 bool
11466 vect_transform_stmt (vec_info *vinfo,
11467 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11468 slp_tree slp_node, slp_instance slp_node_instance)
11470 bool is_store = false;
11471 gimple *vec_stmt = NULL;
11472 bool done;
11474 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11476 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11477 if (slp_node)
11478 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11480 switch (STMT_VINFO_TYPE (stmt_info))
11482 case type_demotion_vec_info_type:
11483 case type_promotion_vec_info_type:
11484 case type_conversion_vec_info_type:
11485 done = vectorizable_conversion (vinfo, stmt_info,
11486 gsi, &vec_stmt, slp_node, NULL);
11487 gcc_assert (done);
11488 break;
11490 case induc_vec_info_type:
11491 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11492 stmt_info, &vec_stmt, slp_node,
11493 NULL);
11494 gcc_assert (done);
11495 break;
11497 case shift_vec_info_type:
11498 done = vectorizable_shift (vinfo, stmt_info,
11499 gsi, &vec_stmt, slp_node, NULL);
11500 gcc_assert (done);
11501 break;
11503 case op_vec_info_type:
11504 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11505 NULL);
11506 gcc_assert (done);
11507 break;
11509 case assignment_vec_info_type:
11510 done = vectorizable_assignment (vinfo, stmt_info,
11511 gsi, &vec_stmt, slp_node, NULL);
11512 gcc_assert (done);
11513 break;
11515 case load_vec_info_type:
11516 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11517 NULL);
11518 gcc_assert (done);
11519 break;
11521 case store_vec_info_type:
11522 done = vectorizable_store (vinfo, stmt_info,
11523 gsi, &vec_stmt, slp_node, NULL);
11524 gcc_assert (done);
11525 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11527 /* In case of interleaving, the whole chain is vectorized when the
11528 last store in the chain is reached. Store stmts before the last
11529 one are skipped, and there vec_stmt_info shouldn't be freed
11530 meanwhile. */
11531 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11532 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11533 is_store = true;
11535 else
11536 is_store = true;
11537 break;
11539 case condition_vec_info_type:
11540 done = vectorizable_condition (vinfo, stmt_info,
11541 gsi, &vec_stmt, slp_node, NULL);
11542 gcc_assert (done);
11543 break;
11545 case comparison_vec_info_type:
11546 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11547 slp_node, NULL);
11548 gcc_assert (done);
11549 break;
11551 case call_vec_info_type:
11552 done = vectorizable_call (vinfo, stmt_info,
11553 gsi, &vec_stmt, slp_node, NULL);
11554 break;
11556 case call_simd_clone_vec_info_type:
11557 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11558 slp_node, NULL);
11559 break;
11561 case reduc_vec_info_type:
11562 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11563 gsi, &vec_stmt, slp_node);
11564 gcc_assert (done);
11565 break;
11567 case cycle_phi_info_type:
11568 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11569 &vec_stmt, slp_node, slp_node_instance);
11570 gcc_assert (done);
11571 break;
11573 case lc_phi_info_type:
11574 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11575 stmt_info, &vec_stmt, slp_node);
11576 gcc_assert (done);
11577 break;
11579 case recurr_info_type:
11580 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11581 stmt_info, &vec_stmt, slp_node, NULL);
11582 gcc_assert (done);
11583 break;
11585 case phi_info_type:
11586 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11587 gcc_assert (done);
11588 break;
11590 default:
11591 if (!STMT_VINFO_LIVE_P (stmt_info))
11593 if (dump_enabled_p ())
11594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11595 "stmt not supported.\n");
11596 gcc_unreachable ();
11598 done = true;
11601 if (!slp_node && vec_stmt)
11602 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11604 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11606 /* Handle stmts whose DEF is used outside the loop-nest that is
11607 being vectorized. */
11608 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11609 slp_node_instance, true, NULL);
11610 gcc_assert (done);
11613 if (slp_node)
11614 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11616 return is_store;
11620 /* Remove a group of stores (for SLP or interleaving), free their
11621 stmt_vec_info. */
11623 void
11624 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11626 stmt_vec_info next_stmt_info = first_stmt_info;
11628 while (next_stmt_info)
11630 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11631 next_stmt_info = vect_orig_stmt (next_stmt_info);
11632 /* Free the attached stmt_vec_info and remove the stmt. */
11633 vinfo->remove_stmt (next_stmt_info);
11634 next_stmt_info = tmp;
11638 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11639 elements of type SCALAR_TYPE, or null if the target doesn't support
11640 such a type.
11642 If NUNITS is zero, return a vector type that contains elements of
11643 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11645 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11646 for this vectorization region and want to "autodetect" the best choice.
11647 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11648 and we want the new type to be interoperable with it. PREVAILING_MODE
11649 in this case can be a scalar integer mode or a vector mode; when it
11650 is a vector mode, the function acts like a tree-level version of
11651 related_vector_mode. */
11653 tree
11654 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11655 tree scalar_type, poly_uint64 nunits)
11657 tree orig_scalar_type = scalar_type;
11658 scalar_mode inner_mode;
11659 machine_mode simd_mode;
11660 tree vectype;
11662 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11663 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11664 return NULL_TREE;
11666 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11668 /* Interoperability between modes requires one to be a constant multiple
11669 of the other, so that the number of vectors required for each operation
11670 is a compile-time constant. */
11671 if (prevailing_mode != VOIDmode
11672 && !constant_multiple_p (nunits * nbytes,
11673 GET_MODE_SIZE (prevailing_mode))
11674 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
11675 nunits * nbytes))
11676 return NULL_TREE;
11678 /* For vector types of elements whose mode precision doesn't
11679 match their types precision we use a element type of mode
11680 precision. The vectorization routines will have to make sure
11681 they support the proper result truncation/extension.
11682 We also make sure to build vector types with INTEGER_TYPE
11683 component type only. */
11684 if (INTEGRAL_TYPE_P (scalar_type)
11685 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11686 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11687 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11688 TYPE_UNSIGNED (scalar_type));
11690 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11691 When the component mode passes the above test simply use a type
11692 corresponding to that mode. The theory is that any use that
11693 would cause problems with this will disable vectorization anyway. */
11694 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11695 && !INTEGRAL_TYPE_P (scalar_type))
11696 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11698 /* We can't build a vector type of elements with alignment bigger than
11699 their size. */
11700 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11701 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11702 TYPE_UNSIGNED (scalar_type));
11704 /* If we felt back to using the mode fail if there was
11705 no scalar type for it. */
11706 if (scalar_type == NULL_TREE)
11707 return NULL_TREE;
11709 /* If no prevailing mode was supplied, use the mode the target prefers.
11710 Otherwise lookup a vector mode based on the prevailing mode. */
11711 if (prevailing_mode == VOIDmode)
11713 gcc_assert (known_eq (nunits, 0U));
11714 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11715 if (SCALAR_INT_MODE_P (simd_mode))
11717 /* Traditional behavior is not to take the integer mode
11718 literally, but simply to use it as a way of determining
11719 the vector size. It is up to mode_for_vector to decide
11720 what the TYPE_MODE should be.
11722 Note that nunits == 1 is allowed in order to support single
11723 element vector types. */
11724 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11725 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11726 return NULL_TREE;
11729 else if (SCALAR_INT_MODE_P (prevailing_mode)
11730 || !related_vector_mode (prevailing_mode,
11731 inner_mode, nunits).exists (&simd_mode))
11733 /* Fall back to using mode_for_vector, mostly in the hope of being
11734 able to use an integer mode. */
11735 if (known_eq (nunits, 0U)
11736 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11737 return NULL_TREE;
11739 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11740 return NULL_TREE;
11743 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11745 /* In cases where the mode was chosen by mode_for_vector, check that
11746 the target actually supports the chosen mode, or that it at least
11747 allows the vector mode to be replaced by a like-sized integer. */
11748 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11749 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11750 return NULL_TREE;
11752 /* Re-attach the address-space qualifier if we canonicalized the scalar
11753 type. */
11754 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11755 return build_qualified_type
11756 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11758 return vectype;
11761 /* Function get_vectype_for_scalar_type.
11763 Returns the vector type corresponding to SCALAR_TYPE as supported
11764 by the target. If GROUP_SIZE is nonzero and we're performing BB
11765 vectorization, make sure that the number of elements in the vector
11766 is no bigger than GROUP_SIZE. */
11768 tree
11769 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11770 unsigned int group_size)
11772 /* For BB vectorization, we should always have a group size once we've
11773 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11774 are tentative requests during things like early data reference
11775 analysis and pattern recognition. */
11776 if (is_a <bb_vec_info> (vinfo))
11777 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11778 else
11779 group_size = 0;
11781 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11782 scalar_type);
11783 if (vectype && vinfo->vector_mode == VOIDmode)
11784 vinfo->vector_mode = TYPE_MODE (vectype);
11786 /* Register the natural choice of vector type, before the group size
11787 has been applied. */
11788 if (vectype)
11789 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11791 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11792 try again with an explicit number of elements. */
11793 if (vectype
11794 && group_size
11795 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11797 /* Start with the biggest number of units that fits within
11798 GROUP_SIZE and halve it until we find a valid vector type.
11799 Usually either the first attempt will succeed or all will
11800 fail (in the latter case because GROUP_SIZE is too small
11801 for the target), but it's possible that a target could have
11802 a hole between supported vector types.
11804 If GROUP_SIZE is not a power of 2, this has the effect of
11805 trying the largest power of 2 that fits within the group,
11806 even though the group is not a multiple of that vector size.
11807 The BB vectorizer will then try to carve up the group into
11808 smaller pieces. */
11809 unsigned int nunits = 1 << floor_log2 (group_size);
11812 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11813 scalar_type, nunits);
11814 nunits /= 2;
11816 while (nunits > 1 && !vectype);
11819 return vectype;
11822 /* Return the vector type corresponding to SCALAR_TYPE as supported
11823 by the target. NODE, if nonnull, is the SLP tree node that will
11824 use the returned vector type. */
11826 tree
11827 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11829 unsigned int group_size = 0;
11830 if (node)
11831 group_size = SLP_TREE_LANES (node);
11832 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11835 /* Function get_mask_type_for_scalar_type.
11837 Returns the mask type corresponding to a result of comparison
11838 of vectors of specified SCALAR_TYPE as supported by target.
11839 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11840 make sure that the number of elements in the vector is no bigger
11841 than GROUP_SIZE. */
11843 tree
11844 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11845 unsigned int group_size)
11847 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11849 if (!vectype)
11850 return NULL;
11852 return truth_type_for (vectype);
11855 /* Function get_same_sized_vectype
11857 Returns a vector type corresponding to SCALAR_TYPE of size
11858 VECTOR_TYPE if supported by the target. */
11860 tree
11861 get_same_sized_vectype (tree scalar_type, tree vector_type)
11863 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11864 return truth_type_for (vector_type);
11866 poly_uint64 nunits;
11867 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11868 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11869 return NULL_TREE;
11871 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11872 scalar_type, nunits);
11875 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11876 would not change the chosen vector modes. */
11878 bool
11879 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11881 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11882 i != vinfo->used_vector_modes.end (); ++i)
11883 if (!VECTOR_MODE_P (*i)
11884 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11885 return false;
11886 return true;
11889 /* Function vect_is_simple_use.
11891 Input:
11892 VINFO - the vect info of the loop or basic block that is being vectorized.
11893 OPERAND - operand in the loop or bb.
11894 Output:
11895 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11896 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11897 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11898 the definition could be anywhere in the function
11899 DT - the type of definition
11901 Returns whether a stmt with OPERAND can be vectorized.
11902 For loops, supportable operands are constants, loop invariants, and operands
11903 that are defined by the current iteration of the loop. Unsupportable
11904 operands are those that are defined by a previous iteration of the loop (as
11905 is the case in reduction/induction computations).
11906 For basic blocks, supportable operands are constants and bb invariants.
11907 For now, operands defined outside the basic block are not supported. */
11909 bool
11910 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11911 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11913 if (def_stmt_info_out)
11914 *def_stmt_info_out = NULL;
11915 if (def_stmt_out)
11916 *def_stmt_out = NULL;
11917 *dt = vect_unknown_def_type;
11919 if (dump_enabled_p ())
11921 dump_printf_loc (MSG_NOTE, vect_location,
11922 "vect_is_simple_use: operand ");
11923 if (TREE_CODE (operand) == SSA_NAME
11924 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11925 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11926 else
11927 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11930 if (CONSTANT_CLASS_P (operand))
11931 *dt = vect_constant_def;
11932 else if (is_gimple_min_invariant (operand))
11933 *dt = vect_external_def;
11934 else if (TREE_CODE (operand) != SSA_NAME)
11935 *dt = vect_unknown_def_type;
11936 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11937 *dt = vect_external_def;
11938 else
11940 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11941 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11942 if (!stmt_vinfo)
11943 *dt = vect_external_def;
11944 else
11946 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11947 def_stmt = stmt_vinfo->stmt;
11948 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11949 if (def_stmt_info_out)
11950 *def_stmt_info_out = stmt_vinfo;
11952 if (def_stmt_out)
11953 *def_stmt_out = def_stmt;
11956 if (dump_enabled_p ())
11958 dump_printf (MSG_NOTE, ", type of def: ");
11959 switch (*dt)
11961 case vect_uninitialized_def:
11962 dump_printf (MSG_NOTE, "uninitialized\n");
11963 break;
11964 case vect_constant_def:
11965 dump_printf (MSG_NOTE, "constant\n");
11966 break;
11967 case vect_external_def:
11968 dump_printf (MSG_NOTE, "external\n");
11969 break;
11970 case vect_internal_def:
11971 dump_printf (MSG_NOTE, "internal\n");
11972 break;
11973 case vect_induction_def:
11974 dump_printf (MSG_NOTE, "induction\n");
11975 break;
11976 case vect_reduction_def:
11977 dump_printf (MSG_NOTE, "reduction\n");
11978 break;
11979 case vect_double_reduction_def:
11980 dump_printf (MSG_NOTE, "double reduction\n");
11981 break;
11982 case vect_nested_cycle:
11983 dump_printf (MSG_NOTE, "nested cycle\n");
11984 break;
11985 case vect_first_order_recurrence:
11986 dump_printf (MSG_NOTE, "first order recurrence\n");
11987 break;
11988 case vect_unknown_def_type:
11989 dump_printf (MSG_NOTE, "unknown\n");
11990 break;
11994 if (*dt == vect_unknown_def_type)
11996 if (dump_enabled_p ())
11997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11998 "Unsupported pattern.\n");
11999 return false;
12002 return true;
12005 /* Function vect_is_simple_use.
12007 Same as vect_is_simple_use but also determines the vector operand
12008 type of OPERAND and stores it to *VECTYPE. If the definition of
12009 OPERAND is vect_uninitialized_def, vect_constant_def or
12010 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12011 is responsible to compute the best suited vector type for the
12012 scalar operand. */
12014 bool
12015 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12016 tree *vectype, stmt_vec_info *def_stmt_info_out,
12017 gimple **def_stmt_out)
12019 stmt_vec_info def_stmt_info;
12020 gimple *def_stmt;
12021 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
12022 return false;
12024 if (def_stmt_out)
12025 *def_stmt_out = def_stmt;
12026 if (def_stmt_info_out)
12027 *def_stmt_info_out = def_stmt_info;
12029 /* Now get a vector type if the def is internal, otherwise supply
12030 NULL_TREE and leave it up to the caller to figure out a proper
12031 type for the use stmt. */
12032 if (*dt == vect_internal_def
12033 || *dt == vect_induction_def
12034 || *dt == vect_reduction_def
12035 || *dt == vect_double_reduction_def
12036 || *dt == vect_nested_cycle
12037 || *dt == vect_first_order_recurrence)
12039 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
12040 gcc_assert (*vectype != NULL_TREE);
12041 if (dump_enabled_p ())
12042 dump_printf_loc (MSG_NOTE, vect_location,
12043 "vect_is_simple_use: vectype %T\n", *vectype);
12045 else if (*dt == vect_uninitialized_def
12046 || *dt == vect_constant_def
12047 || *dt == vect_external_def)
12048 *vectype = NULL_TREE;
12049 else
12050 gcc_unreachable ();
12052 return true;
12055 /* Function vect_is_simple_use.
12057 Same as vect_is_simple_use but determines the operand by operand
12058 position OPERAND from either STMT or SLP_NODE, filling in *OP
12059 and *SLP_DEF (when SLP_NODE is not NULL). */
12061 bool
12062 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
12063 unsigned operand, tree *op, slp_tree *slp_def,
12064 enum vect_def_type *dt,
12065 tree *vectype, stmt_vec_info *def_stmt_info_out)
12067 if (slp_node)
12069 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
12070 *slp_def = child;
12071 *vectype = SLP_TREE_VECTYPE (child);
12072 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
12074 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
12075 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
12077 else
12079 if (def_stmt_info_out)
12080 *def_stmt_info_out = NULL;
12081 *op = SLP_TREE_SCALAR_OPS (child)[0];
12082 *dt = SLP_TREE_DEF_TYPE (child);
12083 return true;
12086 else
12088 *slp_def = NULL;
12089 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
12091 if (gimple_assign_rhs_code (ass) == COND_EXPR
12092 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
12094 if (operand < 2)
12095 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
12096 else
12097 *op = gimple_op (ass, operand);
12099 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
12100 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
12101 else
12102 *op = gimple_op (ass, operand + 1);
12104 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
12105 *op = gimple_call_arg (call, operand);
12106 else
12107 gcc_unreachable ();
12108 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
12112 /* If OP is not NULL and is external or constant update its vector
12113 type with VECTYPE. Returns true if successful or false if not,
12114 for example when conflicting vector types are present. */
12116 bool
12117 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
12119 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
12120 return true;
12121 if (SLP_TREE_VECTYPE (op))
12122 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
12123 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
12124 should be handled by patters. Allow vect_constant_def for now. */
12125 if (VECTOR_BOOLEAN_TYPE_P (vectype)
12126 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
12127 return false;
12128 SLP_TREE_VECTYPE (op) = vectype;
12129 return true;
12132 /* Function supportable_widening_operation
12134 Check whether an operation represented by the code CODE is a
12135 widening operation that is supported by the target platform in
12136 vector form (i.e., when operating on arguments of type VECTYPE_IN
12137 producing a result of type VECTYPE_OUT).
12139 Widening operations we currently support are NOP (CONVERT), FLOAT,
12140 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
12141 are supported by the target platform either directly (via vector
12142 tree-codes), or via target builtins.
12144 Output:
12145 - CODE1 and CODE2 are codes of vector operations to be used when
12146 vectorizing the operation, if available.
12147 - MULTI_STEP_CVT determines the number of required intermediate steps in
12148 case of multi-step conversion (like char->short->int - in that case
12149 MULTI_STEP_CVT will be 1).
12150 - INTERM_TYPES contains the intermediate type required to perform the
12151 widening operation (short in the above example). */
12153 bool
12154 supportable_widening_operation (vec_info *vinfo,
12155 enum tree_code code, stmt_vec_info stmt_info,
12156 tree vectype_out, tree vectype_in,
12157 enum tree_code *code1, enum tree_code *code2,
12158 int *multi_step_cvt,
12159 vec<tree> *interm_types)
12161 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
12162 class loop *vect_loop = NULL;
12163 machine_mode vec_mode;
12164 enum insn_code icode1, icode2;
12165 optab optab1, optab2;
12166 tree vectype = vectype_in;
12167 tree wide_vectype = vectype_out;
12168 enum tree_code c1, c2;
12169 int i;
12170 tree prev_type, intermediate_type;
12171 machine_mode intermediate_mode, prev_mode;
12172 optab optab3, optab4;
12174 *multi_step_cvt = 0;
12175 if (loop_info)
12176 vect_loop = LOOP_VINFO_LOOP (loop_info);
12178 switch (code)
12180 case WIDEN_MULT_EXPR:
12181 /* The result of a vectorized widening operation usually requires
12182 two vectors (because the widened results do not fit into one vector).
12183 The generated vector results would normally be expected to be
12184 generated in the same order as in the original scalar computation,
12185 i.e. if 8 results are generated in each vector iteration, they are
12186 to be organized as follows:
12187 vect1: [res1,res2,res3,res4],
12188 vect2: [res5,res6,res7,res8].
12190 However, in the special case that the result of the widening
12191 operation is used in a reduction computation only, the order doesn't
12192 matter (because when vectorizing a reduction we change the order of
12193 the computation). Some targets can take advantage of this and
12194 generate more efficient code. For example, targets like Altivec,
12195 that support widen_mult using a sequence of {mult_even,mult_odd}
12196 generate the following vectors:
12197 vect1: [res1,res3,res5,res7],
12198 vect2: [res2,res4,res6,res8].
12200 When vectorizing outer-loops, we execute the inner-loop sequentially
12201 (each vectorized inner-loop iteration contributes to VF outer-loop
12202 iterations in parallel). We therefore don't allow to change the
12203 order of the computation in the inner-loop during outer-loop
12204 vectorization. */
12205 /* TODO: Another case in which order doesn't *really* matter is when we
12206 widen and then contract again, e.g. (short)((int)x * y >> 8).
12207 Normally, pack_trunc performs an even/odd permute, whereas the
12208 repack from an even/odd expansion would be an interleave, which
12209 would be significantly simpler for e.g. AVX2. */
12210 /* In any case, in order to avoid duplicating the code below, recurse
12211 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12212 are properly set up for the caller. If we fail, we'll continue with
12213 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12214 if (vect_loop
12215 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12216 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12217 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12218 stmt_info, vectype_out,
12219 vectype_in, code1, code2,
12220 multi_step_cvt, interm_types))
12222 /* Elements in a vector with vect_used_by_reduction property cannot
12223 be reordered if the use chain with this property does not have the
12224 same operation. One such an example is s += a * b, where elements
12225 in a and b cannot be reordered. Here we check if the vector defined
12226 by STMT is only directly used in the reduction statement. */
12227 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12228 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12229 if (use_stmt_info
12230 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12231 return true;
12233 c1 = VEC_WIDEN_MULT_LO_EXPR;
12234 c2 = VEC_WIDEN_MULT_HI_EXPR;
12235 break;
12237 case DOT_PROD_EXPR:
12238 c1 = DOT_PROD_EXPR;
12239 c2 = DOT_PROD_EXPR;
12240 break;
12242 case SAD_EXPR:
12243 c1 = SAD_EXPR;
12244 c2 = SAD_EXPR;
12245 break;
12247 case VEC_WIDEN_MULT_EVEN_EXPR:
12248 /* Support the recursion induced just above. */
12249 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12250 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12251 break;
12253 case WIDEN_LSHIFT_EXPR:
12254 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12255 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12256 break;
12258 case WIDEN_PLUS_EXPR:
12259 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12260 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12261 break;
12263 case WIDEN_MINUS_EXPR:
12264 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12265 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12266 break;
12268 CASE_CONVERT:
12269 c1 = VEC_UNPACK_LO_EXPR;
12270 c2 = VEC_UNPACK_HI_EXPR;
12271 break;
12273 case FLOAT_EXPR:
12274 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12275 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12276 break;
12278 case FIX_TRUNC_EXPR:
12279 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12280 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12281 break;
12283 default:
12284 gcc_unreachable ();
12287 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12288 std::swap (c1, c2);
12290 if (code == FIX_TRUNC_EXPR)
12292 /* The signedness is determined from output operand. */
12293 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12294 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12296 else if (CONVERT_EXPR_CODE_P (code)
12297 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12298 && VECTOR_BOOLEAN_TYPE_P (vectype)
12299 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12300 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12302 /* If the input and result modes are the same, a different optab
12303 is needed where we pass in the number of units in vectype. */
12304 optab1 = vec_unpacks_sbool_lo_optab;
12305 optab2 = vec_unpacks_sbool_hi_optab;
12307 else
12309 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12310 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12313 if (!optab1 || !optab2)
12314 return false;
12316 vec_mode = TYPE_MODE (vectype);
12317 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12318 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12319 return false;
12321 *code1 = c1;
12322 *code2 = c2;
12324 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12325 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12327 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12328 return true;
12329 /* For scalar masks we may have different boolean
12330 vector types having the same QImode. Thus we
12331 add additional check for elements number. */
12332 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12333 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12334 return true;
12337 /* Check if it's a multi-step conversion that can be done using intermediate
12338 types. */
12340 prev_type = vectype;
12341 prev_mode = vec_mode;
12343 if (!CONVERT_EXPR_CODE_P (code))
12344 return false;
12346 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12347 intermediate steps in promotion sequence. We try
12348 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12349 not. */
12350 interm_types->create (MAX_INTERM_CVT_STEPS);
12351 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12353 intermediate_mode = insn_data[icode1].operand[0].mode;
12354 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12355 intermediate_type
12356 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12357 else if (VECTOR_MODE_P (intermediate_mode))
12359 tree intermediate_element_type
12360 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
12361 TYPE_UNSIGNED (prev_type));
12362 intermediate_type
12363 = build_vector_type_for_mode (intermediate_element_type,
12364 intermediate_mode);
12366 else
12367 intermediate_type
12368 = lang_hooks.types.type_for_mode (intermediate_mode,
12369 TYPE_UNSIGNED (prev_type));
12371 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12372 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12373 && intermediate_mode == prev_mode
12374 && SCALAR_INT_MODE_P (prev_mode))
12376 /* If the input and result modes are the same, a different optab
12377 is needed where we pass in the number of units in vectype. */
12378 optab3 = vec_unpacks_sbool_lo_optab;
12379 optab4 = vec_unpacks_sbool_hi_optab;
12381 else
12383 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12384 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12387 if (!optab3 || !optab4
12388 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12389 || insn_data[icode1].operand[0].mode != intermediate_mode
12390 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12391 || insn_data[icode2].operand[0].mode != intermediate_mode
12392 || ((icode1 = optab_handler (optab3, intermediate_mode))
12393 == CODE_FOR_nothing)
12394 || ((icode2 = optab_handler (optab4, intermediate_mode))
12395 == CODE_FOR_nothing))
12396 break;
12398 interm_types->quick_push (intermediate_type);
12399 (*multi_step_cvt)++;
12401 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12402 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12404 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12405 return true;
12406 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12407 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12408 return true;
12411 prev_type = intermediate_type;
12412 prev_mode = intermediate_mode;
12415 interm_types->release ();
12416 return false;
12420 /* Function supportable_narrowing_operation
12422 Check whether an operation represented by the code CODE is a
12423 narrowing operation that is supported by the target platform in
12424 vector form (i.e., when operating on arguments of type VECTYPE_IN
12425 and producing a result of type VECTYPE_OUT).
12427 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12428 and FLOAT. This function checks if these operations are supported by
12429 the target platform directly via vector tree-codes.
12431 Output:
12432 - CODE1 is the code of a vector operation to be used when
12433 vectorizing the operation, if available.
12434 - MULTI_STEP_CVT determines the number of required intermediate steps in
12435 case of multi-step conversion (like int->short->char - in that case
12436 MULTI_STEP_CVT will be 1).
12437 - INTERM_TYPES contains the intermediate type required to perform the
12438 narrowing operation (short in the above example). */
12440 bool
12441 supportable_narrowing_operation (enum tree_code code,
12442 tree vectype_out, tree vectype_in,
12443 enum tree_code *code1, int *multi_step_cvt,
12444 vec<tree> *interm_types)
12446 machine_mode vec_mode;
12447 enum insn_code icode1;
12448 optab optab1, interm_optab;
12449 tree vectype = vectype_in;
12450 tree narrow_vectype = vectype_out;
12451 enum tree_code c1;
12452 tree intermediate_type, prev_type;
12453 machine_mode intermediate_mode, prev_mode;
12454 int i;
12455 unsigned HOST_WIDE_INT n_elts;
12456 bool uns;
12458 *multi_step_cvt = 0;
12459 switch (code)
12461 CASE_CONVERT:
12462 c1 = VEC_PACK_TRUNC_EXPR;
12463 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12464 && VECTOR_BOOLEAN_TYPE_P (vectype)
12465 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12466 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12467 && n_elts < BITS_PER_UNIT)
12468 optab1 = vec_pack_sbool_trunc_optab;
12469 else
12470 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12471 break;
12473 case FIX_TRUNC_EXPR:
12474 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12475 /* The signedness is determined from output operand. */
12476 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12477 break;
12479 case FLOAT_EXPR:
12480 c1 = VEC_PACK_FLOAT_EXPR;
12481 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12482 break;
12484 default:
12485 gcc_unreachable ();
12488 if (!optab1)
12489 return false;
12491 vec_mode = TYPE_MODE (vectype);
12492 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12493 return false;
12495 *code1 = c1;
12497 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12499 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12500 return true;
12501 /* For scalar masks we may have different boolean
12502 vector types having the same QImode. Thus we
12503 add additional check for elements number. */
12504 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12505 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12506 return true;
12509 if (code == FLOAT_EXPR)
12510 return false;
12512 /* Check if it's a multi-step conversion that can be done using intermediate
12513 types. */
12514 prev_mode = vec_mode;
12515 prev_type = vectype;
12516 if (code == FIX_TRUNC_EXPR)
12517 uns = TYPE_UNSIGNED (vectype_out);
12518 else
12519 uns = TYPE_UNSIGNED (vectype);
12521 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12522 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12523 costly than signed. */
12524 if (code == FIX_TRUNC_EXPR && uns)
12526 enum insn_code icode2;
12528 intermediate_type
12529 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12530 interm_optab
12531 = optab_for_tree_code (c1, intermediate_type, optab_default);
12532 if (interm_optab != unknown_optab
12533 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12534 && insn_data[icode1].operand[0].mode
12535 == insn_data[icode2].operand[0].mode)
12537 uns = false;
12538 optab1 = interm_optab;
12539 icode1 = icode2;
12543 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12544 intermediate steps in promotion sequence. We try
12545 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12546 interm_types->create (MAX_INTERM_CVT_STEPS);
12547 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12549 intermediate_mode = insn_data[icode1].operand[0].mode;
12550 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12551 intermediate_type
12552 = vect_double_mask_nunits (prev_type, intermediate_mode);
12553 else
12554 intermediate_type
12555 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12556 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12557 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12558 && SCALAR_INT_MODE_P (prev_mode)
12559 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12560 && n_elts < BITS_PER_UNIT)
12561 interm_optab = vec_pack_sbool_trunc_optab;
12562 else
12563 interm_optab
12564 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12565 optab_default);
12566 if (!interm_optab
12567 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12568 || insn_data[icode1].operand[0].mode != intermediate_mode
12569 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12570 == CODE_FOR_nothing))
12571 break;
12573 interm_types->quick_push (intermediate_type);
12574 (*multi_step_cvt)++;
12576 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12578 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12579 return true;
12580 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12581 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12582 return true;
12585 prev_mode = intermediate_mode;
12586 prev_type = intermediate_type;
12587 optab1 = interm_optab;
12590 interm_types->release ();
12591 return false;
12594 /* Generate and return a vector mask of MASK_TYPE such that
12595 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12596 Add the statements to SEQ. */
12598 tree
12599 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12600 tree end_index, const char *name)
12602 tree cmp_type = TREE_TYPE (start_index);
12603 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12604 cmp_type, mask_type,
12605 OPTIMIZE_FOR_SPEED));
12606 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12607 start_index, end_index,
12608 build_zero_cst (mask_type));
12609 tree tmp;
12610 if (name)
12611 tmp = make_temp_ssa_name (mask_type, NULL, name);
12612 else
12613 tmp = make_ssa_name (mask_type);
12614 gimple_call_set_lhs (call, tmp);
12615 gimple_seq_add_stmt (seq, call);
12616 return tmp;
12619 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12620 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12622 tree
12623 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12624 tree end_index)
12626 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12627 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12630 /* Try to compute the vector types required to vectorize STMT_INFO,
12631 returning true on success and false if vectorization isn't possible.
12632 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12633 take sure that the number of elements in the vectors is no bigger
12634 than GROUP_SIZE.
12636 On success:
12638 - Set *STMT_VECTYPE_OUT to:
12639 - NULL_TREE if the statement doesn't need to be vectorized;
12640 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12642 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12643 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12644 statement does not help to determine the overall number of units. */
12646 opt_result
12647 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12648 tree *stmt_vectype_out,
12649 tree *nunits_vectype_out,
12650 unsigned int group_size)
12652 gimple *stmt = stmt_info->stmt;
12654 /* For BB vectorization, we should always have a group size once we've
12655 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12656 are tentative requests during things like early data reference
12657 analysis and pattern recognition. */
12658 if (is_a <bb_vec_info> (vinfo))
12659 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12660 else
12661 group_size = 0;
12663 *stmt_vectype_out = NULL_TREE;
12664 *nunits_vectype_out = NULL_TREE;
12666 if (gimple_get_lhs (stmt) == NULL_TREE
12667 /* MASK_STORE has no lhs, but is ok. */
12668 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12670 if (is_a <gcall *> (stmt))
12672 /* Ignore calls with no lhs. These must be calls to
12673 #pragma omp simd functions, and what vectorization factor
12674 it really needs can't be determined until
12675 vectorizable_simd_clone_call. */
12676 if (dump_enabled_p ())
12677 dump_printf_loc (MSG_NOTE, vect_location,
12678 "defer to SIMD clone analysis.\n");
12679 return opt_result::success ();
12682 return opt_result::failure_at (stmt,
12683 "not vectorized: irregular stmt.%G", stmt);
12686 tree vectype;
12687 tree scalar_type = NULL_TREE;
12688 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12690 vectype = STMT_VINFO_VECTYPE (stmt_info);
12691 if (dump_enabled_p ())
12692 dump_printf_loc (MSG_NOTE, vect_location,
12693 "precomputed vectype: %T\n", vectype);
12695 else if (vect_use_mask_type_p (stmt_info))
12697 unsigned int precision = stmt_info->mask_precision;
12698 scalar_type = build_nonstandard_integer_type (precision, 1);
12699 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12700 if (!vectype)
12701 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12702 " data-type %T\n", scalar_type);
12703 if (dump_enabled_p ())
12704 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12706 else
12708 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12709 scalar_type = TREE_TYPE (DR_REF (dr));
12710 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12711 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12712 else
12713 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12715 if (dump_enabled_p ())
12717 if (group_size)
12718 dump_printf_loc (MSG_NOTE, vect_location,
12719 "get vectype for scalar type (group size %d):"
12720 " %T\n", group_size, scalar_type);
12721 else
12722 dump_printf_loc (MSG_NOTE, vect_location,
12723 "get vectype for scalar type: %T\n", scalar_type);
12725 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12726 if (!vectype)
12727 return opt_result::failure_at (stmt,
12728 "not vectorized:"
12729 " unsupported data-type %T\n",
12730 scalar_type);
12732 if (dump_enabled_p ())
12733 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12736 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12737 return opt_result::failure_at (stmt,
12738 "not vectorized: vector stmt in loop:%G",
12739 stmt);
12741 *stmt_vectype_out = vectype;
12743 /* Don't try to compute scalar types if the stmt produces a boolean
12744 vector; use the existing vector type instead. */
12745 tree nunits_vectype = vectype;
12746 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12748 /* The number of units is set according to the smallest scalar
12749 type (or the largest vector size, but we only support one
12750 vector size per vectorization). */
12751 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12752 TREE_TYPE (vectype));
12753 if (scalar_type != TREE_TYPE (vectype))
12755 if (dump_enabled_p ())
12756 dump_printf_loc (MSG_NOTE, vect_location,
12757 "get vectype for smallest scalar type: %T\n",
12758 scalar_type);
12759 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12760 group_size);
12761 if (!nunits_vectype)
12762 return opt_result::failure_at
12763 (stmt, "not vectorized: unsupported data-type %T\n",
12764 scalar_type);
12765 if (dump_enabled_p ())
12766 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12767 nunits_vectype);
12771 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12772 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12773 return opt_result::failure_at (stmt,
12774 "Not vectorized: Incompatible number "
12775 "of vector subparts between %T and %T\n",
12776 nunits_vectype, *stmt_vectype_out);
12778 if (dump_enabled_p ())
12780 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12781 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12782 dump_printf (MSG_NOTE, "\n");
12785 *nunits_vectype_out = nunits_vectype;
12786 return opt_result::success ();
12789 /* Generate and return statement sequence that sets vector length LEN that is:
12791 min_of_start_and_end = min (START_INDEX, END_INDEX);
12792 left_len = END_INDEX - min_of_start_and_end;
12793 rhs = min (left_len, LEN_LIMIT);
12794 LEN = rhs;
12796 Note: the cost of the code generated by this function is modeled
12797 by vect_estimate_min_profitable_iters, so changes here may need
12798 corresponding changes there. */
12800 gimple_seq
12801 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12803 gimple_seq stmts = NULL;
12804 tree len_type = TREE_TYPE (len);
12805 gcc_assert (TREE_TYPE (start_index) == len_type);
12807 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12808 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12809 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12810 gimple* stmt = gimple_build_assign (len, rhs);
12811 gimple_seq_add_stmt (&stmts, stmt);
12813 return stmts;